From 27d5281641917f2f411a59d1cb922ebaff67fd80 Mon Sep 17 00:00:00 2001 From: Lixia Chen Date: Tue, 25 Aug 2020 19:52:53 -0400 Subject: [PATCH] Change epoch count to 1 for python testcases --- .../engine/datasetops/map_op/map_op.cc | 1 + .../engine/datasetops/source/tf_reader_op.cc | 1 + mindspore/dataset/engine/datasets.py | 10 +- mindspore/dataset/text/utils.py | 4 +- tests/ut/python/dataset/test_HWC2CHW.py | 4 +- tests/ut/python/dataset/test_apply.py | 18 +-- .../dataset/test_bounding_box_augment.py | 10 +- .../dataset/test_bucket_batch_by_length.py | 20 ++-- tests/ut/python/dataset/test_c_compose.py | 2 +- .../ut/python/dataset/test_c_random_apply.py | 2 +- .../ut/python/dataset/test_c_random_choice.py | 2 +- tests/ut/python/dataset/test_cache_map.py | 6 +- tests/ut/python/dataset/test_cache_nomap.py | 32 +++--- tests/ut/python/dataset/test_callbacks.py | 6 +- tests/ut/python/dataset/test_center_crop.py | 6 +- tests/ut/python/dataset/test_config.py | 8 +- tests/ut/python/dataset/test_cut_out.py | 6 +- .../dataset/test_dataset_numpy_slices.py | 2 +- .../ut/python/dataset/test_datasets_celeba.py | 8 +- .../python/dataset/test_datasets_cifarop.py | 36 +++--- tests/ut/python/dataset/test_datasets_clue.py | 42 +++---- tests/ut/python/dataset/test_datasets_coco.py | 14 +-- tests/ut/python/dataset/test_datasets_csv.py | 26 ++--- .../python/dataset/test_datasets_generator.py | 70 ++++++------ .../dataset/test_datasets_imagefolder.py | 42 +++---- .../dataset/test_datasets_manifestop.py | 12 +- .../ut/python/dataset/test_datasets_mnist.py | 18 +-- .../python/dataset/test_datasets_sharding.py | 16 +-- .../dataset/test_datasets_textfileop.py | 24 ++-- .../python/dataset/test_datasets_tfrecord.py | 20 ++-- tests/ut/python/dataset/test_datasets_voc.py | 28 ++--- tests/ut/python/dataset/test_decode.py | 6 +- tests/ut/python/dataset/test_duplicate_op.py | 2 +- tests/ut/python/dataset/test_epoch_ctrl.py | 8 +- tests/ut/python/dataset/test_exceptions.py | 2 +- tests/ut/python/dataset/test_filterop.py | 42 +++---- tests/ut/python/dataset/test_five_crop.py | 2 +- tests/ut/python/dataset/test_from_dataset.py | 8 +- tests/ut/python/dataset/test_graphdata.py | 2 +- .../dataset/test_graphdata_distributed.py | 2 +- tests/ut/python/dataset/test_iterator.py | 17 +-- .../dataset/test_linear_transformation.py | 2 +- tests/ut/python/dataset/test_minddataset.py | 92 +++++++-------- .../dataset/test_minddataset_exception.py | 22 ++-- .../dataset/test_minddataset_multi_images.py | 4 +- ...st_minddataset_multi_images_and_ndarray.py | 2 +- .../python/dataset/test_minddataset_padded.py | 22 ++-- .../dataset/test_minddataset_sampler.py | 56 ++++----- .../dataset/test_mixup_label_smoothing.py | 6 +- tests/ut/python/dataset/test_ngram_op.py | 6 +- tests/ut/python/dataset/test_nlp.py | 4 +- tests/ut/python/dataset/test_noop_mode.py | 4 +- tests/ut/python/dataset/test_normalizeOp.py | 10 +- tests/ut/python/dataset/test_onehot_op.py | 2 +- tests/ut/python/dataset/test_opt.py | 2 +- tests/ut/python/dataset/test_opt_pass.py | 4 +- tests/ut/python/dataset/test_pad.py | 6 +- tests/ut/python/dataset/test_pad_batch.py | 22 ++-- tests/ut/python/dataset/test_paddeddataset.py | 30 ++--- tests/ut/python/dataset/test_pair_truncate.py | 2 +- tests/ut/python/dataset/test_pyfunc.py | 20 ++-- .../python/dataset/test_python_tokenizer.py | 2 +- tests/ut/python/dataset/test_random_affine.py | 6 +- tests/ut/python/dataset/test_random_apply.py | 4 +- tests/ut/python/dataset/test_random_choice.py | 6 +- tests/ut/python/dataset/test_random_color.py | 4 +- .../dataset/test_random_color_adjust.py | 4 +- tests/ut/python/dataset/test_random_crop.py | 12 +- .../dataset/test_random_crop_and_resize.py | 6 +- .../test_random_crop_and_resize_with_bbox.py | 10 +- .../dataset/test_random_crop_decode_resize.py | 2 +- .../dataset/test_random_crop_with_bbox.py | 16 +-- .../ut/python/dataset/test_random_dataset.py | 6 +- .../ut/python/dataset/test_random_erasing.py | 2 +- .../python/dataset/test_random_grayscale.py | 4 +- .../dataset/test_random_horizontal_flip.py | 4 +- .../test_random_horizontal_flip_with_bbox.py | 8 +- tests/ut/python/dataset/test_random_order.py | 2 +- .../python/dataset/test_random_perspective.py | 2 +- .../python/dataset/test_random_posterize.py | 4 +- tests/ut/python/dataset/test_random_resize.py | 2 +- .../dataset/test_random_resize_with_bbox.py | 6 +- .../ut/python/dataset/test_random_rotation.py | 8 +- .../dataset/test_random_select_subpolicy.py | 2 +- .../python/dataset/test_random_solarize_op.py | 2 +- .../dataset/test_random_vertical_flip.py | 4 +- .../test_random_vertical_flip_with_bbox.py | 10 +- tests/ut/python/dataset/test_rename.py | 2 +- tests/ut/python/dataset/test_repeat.py | 6 +- tests/ut/python/dataset/test_rescale_op.py | 4 +- tests/ut/python/dataset/test_resize.py | 4 +- .../python/dataset/test_resize_with_bbox.py | 6 +- tests/ut/python/dataset/test_rgb_hsv.py | 2 +- tests/ut/python/dataset/test_sampler.py | 12 +- tests/ut/python/dataset/test_save_op.py | 14 +-- .../dataset/test_sentencepiece_tokenizer.py | 20 ++-- .../ut/python/dataset/test_serdes_dataset.py | 18 +-- tests/ut/python/dataset/test_shuffle.py | 2 +- tests/ut/python/dataset/test_skip.py | 4 +- .../ut/python/dataset/test_sliding_window.py | 10 +- tests/ut/python/dataset/test_soft_dvpp.py | 6 +- tests/ut/python/dataset/test_split.py | 106 +++++++++--------- tests/ut/python/dataset/test_sync_wait.py | 14 +-- tests/ut/python/dataset/test_ten_crop.py | 4 +- tests/ut/python/dataset/test_tensor_string.py | 18 +-- .../dataset/test_text_basic_tokenizer.py | 4 +- .../dataset/test_text_bert_tokenizer.py | 4 +- .../dataset/test_text_jieba_tokenizer.py | 42 +++---- .../ut/python/dataset/test_text_tokenizer.py | 26 ++--- .../dataset/test_text_wordpiece_tokenizer.py | 4 +- tests/ut/python/dataset/test_to_number_op.py | 26 ++--- tests/ut/python/dataset/test_to_type.py | 2 +- tests/ut/python/dataset/test_type_cast.py | 4 +- .../ut/python/dataset/test_uniform_augment.py | 2 +- tests/ut/python/dataset/test_var_batch_map.py | 24 ++-- tests/ut/python/dataset/test_vocab.py | 10 +- tests/ut/python/dataset/test_zip.py | 12 +- tests/ut/python/dataset/util.py | 8 +- 118 files changed, 735 insertions(+), 728 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc index bc8cf22e87c..2556cc120b2 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc @@ -308,6 +308,7 @@ Status MapOp::WorkerCompute(DataBuffer *in_buffer, TensorQTable *new_tensor_tabl std::vector result_table; // Executing the list of jobs. for (size_t i = 0; i < job_list.size(); i++) { + RETURN_IF_INTERRUPTED(); // Execute MapWorkerJob. RETURN_IF_NOT_OK(job_list[i]->Run(job_input_table, &result_table)); // Assign the processed data as an input for the next job processing, except for the last TensorOp in the list. diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc index e4b3015ffcd..bcfa045796b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc @@ -581,6 +581,7 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off if (!load_jagged_connector_) { break; } + RETURN_IF_INTERRUPTED(); // read length int64_t record_length = 0; diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index 97133e6e6eb..bf478e52726 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -1181,7 +1181,7 @@ class Dataset: def __iter__(self): """Create an Iterator over the dataset.""" - return self.create_tuple_iterator() + return self.create_tuple_iterator(num_epochs=1) @property def input_indexs(self): @@ -1598,7 +1598,7 @@ class BucketBatchByLengthDataset(DatasetOp): """ if self.dataset_size is None: num_rows = 0 - for _ in self.create_dict_iterator(): + for _ in self.create_dict_iterator(num_epochs=1): num_rows += 1 self.dataset_size = num_rows return self.dataset_size @@ -2130,7 +2130,7 @@ class FilterDataset(DatasetOp): """ if self.dataset_size is None: num_rows = 0 - for _ in self.create_dict_iterator(): + for _ in self.create_dict_iterator(num_epochs=1): num_rows += 1 self.dataset_size = num_rows return self.dataset_size @@ -2367,7 +2367,7 @@ class ConcatDataset(DatasetOp): """ if self.dataset_size is None: num_rows = 0 - for _ in self.create_dict_iterator(): + for _ in self.create_dict_iterator(num_epochs=1): num_rows += 1 self.dataset_size = num_rows return self.dataset_size @@ -3463,7 +3463,7 @@ class GeneratorDataset(MappableDataset): self.dataset_size = rows_from_sampler else: num_rows = 0 - for _ in self.create_dict_iterator(): + for _ in self.create_dict_iterator(num_epochs=1): num_rows += 1 self.dataset_size = num_rows return self.dataset_size diff --git a/mindspore/dataset/text/utils.py b/mindspore/dataset/text/utils.py index 22328ad5432..d8ff7fcc43a 100644 --- a/mindspore/dataset/text/utils.py +++ b/mindspore/dataset/text/utils.py @@ -80,7 +80,7 @@ class Vocab(cde.Vocab): if special_tokens is None: special_tokens = [] root = copy.deepcopy(dataset).build_vocab(vocab, columns, freq_range, top_k, special_tokens, special_first) - for d in root.create_dict_iterator(): + for d in root.create_dict_iterator(num_epochs=1): if d is not None: raise ValueError("from_dataset should receive data other than None.") return vocab @@ -167,7 +167,7 @@ class SentencePieceVocab(cde.SentencePieceVocab): vocab = SentencePieceVocab() root = copy.deepcopy(dataset).build_sentencepiece_vocab(vocab, col_names, vocab_size, character_coverage, model_type, params) - for d in root.create_dict_iterator(): + for d in root.create_dict_iterator(num_epochs=1): if d is None: raise ValueError("from_dataset should receive data other than None.") return vocab diff --git a/tests/ut/python/dataset/test_HWC2CHW.py b/tests/ut/python/dataset/test_HWC2CHW.py index 63d5b33b617..b14d0a99904 100644 --- a/tests/ut/python/dataset/test_HWC2CHW.py +++ b/tests/ut/python/dataset/test_HWC2CHW.py @@ -47,7 +47,7 @@ def test_HWC2CHW(plot=False): image_transposed = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): transposed_item = item1["image"].copy() original_item = item2["image"].copy() image_transposed.append(transposed_item.transpose(1, 2, 0)) @@ -104,7 +104,7 @@ def test_HWC2CHW_comp(plot=False): image_c_transposed = [] image_py_transposed = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_apply.py b/tests/ut/python/dataset/test_apply.py index e731ddcbc18..abc5511830c 100644 --- a/tests/ut/python/dataset/test_apply.py +++ b/tests/ut/python/dataset/test_apply.py @@ -40,7 +40,7 @@ def test_apply_generator_case(): data2 = data2.repeat(2) data2 = data2.batch(4) - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): np.testing.assert_array_equal(item1["data"], item2["data"]) @@ -63,7 +63,7 @@ def test_apply_imagefolder_case(): data2 = data2.map(operations=normalize_op) data2 = data2.repeat(2) - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): np.testing.assert_array_equal(item1["image"], item2["image"]) @@ -85,7 +85,7 @@ def test_apply_flow_case_0(id_=0): data1 = data1.apply(dataset_fn) num_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter = num_iter + 1 if id_ == 0: @@ -116,7 +116,7 @@ def test_apply_flow_case_1(id_=1): data1 = data1.apply(dataset_fn) num_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter = num_iter + 1 if id_ == 0: @@ -147,7 +147,7 @@ def test_apply_flow_case_2(id_=2): data1 = data1.apply(dataset_fn) num_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter = num_iter + 1 if id_ == 0: @@ -178,7 +178,7 @@ def test_apply_flow_case_3(id_=3): data1 = data1.apply(dataset_fn) num_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter = num_iter + 1 if id_ == 0: @@ -204,7 +204,7 @@ def test_apply_exception_case(): try: data1 = data1.apply("123") - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): pass assert False except TypeError: @@ -212,7 +212,7 @@ def test_apply_exception_case(): try: data1 = data1.apply(exception_fn) - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): pass assert False except TypeError: @@ -221,7 +221,7 @@ def test_apply_exception_case(): try: data2 = data1.apply(dataset_fn) _ = data1.apply(dataset_fn) - for _, _ in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for _, _ in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): pass assert False except ValueError as e: diff --git a/tests/ut/python/dataset/test_bounding_box_augment.py b/tests/ut/python/dataset/test_bounding_box_augment.py index 90bfae7bb85..f7ac962d919 100644 --- a/tests/ut/python/dataset/test_bounding_box_augment.py +++ b/tests/ut/python/dataset/test_bounding_box_augment.py @@ -59,7 +59,7 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -98,7 +98,7 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -136,7 +136,7 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -170,7 +170,7 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -214,7 +214,7 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_bucket_batch_by_length.py b/tests/ut/python/dataset/test_bucket_batch_by_length.py index fb0d1bc25e6..d0f102b8aef 100644 --- a/tests/ut/python/dataset/test_bucket_batch_by_length.py +++ b/tests/ut/python/dataset/test_bucket_batch_by_length.py @@ -130,7 +130,7 @@ def test_bucket_batch_multi_bucket_no_padding(): [[1], [5], [9]]] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["col1"].tolist()) assert output == expected_output @@ -161,7 +161,7 @@ def test_bucket_batch_multi_bucket_with_padding(): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["col1"].tolist()) assert output == expected_output @@ -182,7 +182,7 @@ def test_bucket_batch_single_bucket_no_padding(): [[5], [6], [7], [8], [9]]] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["col1"].tolist()) assert output == expected_output @@ -212,7 +212,7 @@ def test_bucket_batch_single_bucket_with_padding(): [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0]]] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["col1"].tolist()) assert output == expected_output @@ -243,7 +243,7 @@ def test_bucket_batch_pad_to_bucket_boundary(): [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0]]] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["col1"].tolist()) assert output == expected_output @@ -279,7 +279,7 @@ def test_bucket_batch_default_pad(): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]]] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["col1"].tolist()) assert output == expected_output @@ -310,7 +310,7 @@ def test_bucket_batch_drop_remainder(): [[19], [22], [25]]] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["col1"].tolist()) assert output == expected_output @@ -340,7 +340,7 @@ def test_bucket_batch_default_length_function(): [0, 1, 2, 3, 4, 5, 6, 7, 8]]] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["col1"].tolist()) assert output == expected_output @@ -375,7 +375,7 @@ def test_bucket_batch_multi_column(): same_shape_output = [] variable_shape_output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): same_shape_output.append(data["same_shape"].tolist()) variable_shape_output.append(data["variable_shape"].tolist()) @@ -396,7 +396,7 @@ def test_bucket_batch_get_dataset_size(): data_size = dataset.get_dataset_size() num_rows = 0 - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): num_rows += 1 assert data_size == num_rows diff --git a/tests/ut/python/dataset/test_c_compose.py b/tests/ut/python/dataset/test_c_compose.py index 906d787f219..2119bac78fc 100644 --- a/tests/ut/python/dataset/test_c_compose.py +++ b/tests/ut/python/dataset/test_c_compose.py @@ -27,7 +27,7 @@ def test_compose(): data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(input_columns=["col"], operations=ops.Compose(op_list)) res = [] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: diff --git a/tests/ut/python/dataset/test_c_random_apply.py b/tests/ut/python/dataset/test_c_random_apply.py index 8b4851aab57..dfa96b9767c 100644 --- a/tests/ut/python/dataset/test_c_random_apply.py +++ b/tests/ut/python/dataset/test_c_random_apply.py @@ -26,7 +26,7 @@ def test_random_apply(): data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(input_columns=["col"], operations=ops.RandomApply(op_list, prob)) res = [] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: diff --git a/tests/ut/python/dataset/test_c_random_choice.py b/tests/ut/python/dataset/test_c_random_choice.py index 3faedeb26e0..568a7ad560f 100644 --- a/tests/ut/python/dataset/test_c_random_choice.py +++ b/tests/ut/python/dataset/test_c_random_choice.py @@ -26,7 +26,7 @@ def test_random_choice(): data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(input_columns=["col"], operations=ops.RandomChoice(op_list)) res = [] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: diff --git a/tests/ut/python/dataset/test_cache_map.py b/tests/ut/python/dataset/test_cache_map.py index 5fcc5d08665..d8218629ebc 100644 --- a/tests/ut/python/dataset/test_cache_map.py +++ b/tests/ut/python/dataset/test_cache_map.py @@ -111,7 +111,7 @@ def test_cache_map_basic3(): logger.info("ds1.dataset_size is ", ds1.get_dataset_size()) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): logger.info("get data from dataset") num_iter += 1 @@ -136,7 +136,7 @@ def test_cache_map_basic4(): shape = ds1.output_shapes() logger.info(shape) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): logger.info("get data from dataset") num_iter += 1 @@ -172,7 +172,7 @@ def test_cache_map_failure1(): try: num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) diff --git a/tests/ut/python/dataset/test_cache_nomap.py b/tests/ut/python/dataset/test_cache_nomap.py index 010d32e3703..74fb0939a54 100644 --- a/tests/ut/python/dataset/test_cache_nomap.py +++ b/tests/ut/python/dataset/test_cache_nomap.py @@ -48,7 +48,7 @@ def test_cache_nomap_basic1(): ds1 = ds1.repeat(4) num_iter = 0 - for data in ds1.create_dict_iterator(): + for data in ds1.create_dict_iterator(num_epochs=1): logger.info("printing the label: {}".format(data["label"])) num_iter += 1 @@ -80,7 +80,7 @@ def test_cache_nomap_basic2(): ds1 = ds1.repeat(2) num_iter = 0 - for data in ds1.create_dict_iterator(): + for data in ds1.create_dict_iterator(num_epochs=1): logger.info("printing the label: {}".format(data["label"])) num_iter += 1 @@ -112,7 +112,7 @@ def test_cache_nomap_basic3(): ds1 = ds1.repeat(4) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) @@ -164,7 +164,7 @@ def test_cache_nomap_basic4(): ds1 = ds1.repeat(4) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) @@ -201,7 +201,7 @@ def test_cache_nomap_basic5(): ds1 = ds1.repeat(4) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) @@ -241,7 +241,7 @@ def test_cache_nomap_basic6(): ds1 = ds1.repeat(4) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) @@ -277,7 +277,7 @@ def test_cache_nomap_basic7(): ds1 = ds1.repeat(4) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) @@ -309,13 +309,13 @@ def test_cache_nomap_allowed_share1(): ds2 = ds2.shuffle(buffer_size=2) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 12 logger.info("Number of data in ds1: {} ".format(num_iter)) num_iter = 0 - for _ in ds2.create_dict_iterator(): + for _ in ds2.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 3 logger.info("test_cache_nomap_allowed_share1 Ended.\n") @@ -351,13 +351,13 @@ def test_cache_nomap_allowed_share2(): ds2 = ds2.shuffle(buffer_size=2) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) assert num_iter == 12 num_iter = 0 - for _ in ds2.create_dict_iterator(): + for _ in ds2.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 3 logger.info("test_cache_nomap_allowed_share2 Ended.\n") @@ -387,13 +387,13 @@ def test_cache_nomap_allowed_share3(): ds2 = ds2.repeat(4) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) assert num_iter == 12 num_iter = 0 - for _ in ds2.create_dict_iterator(): + for _ in ds2.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 12 logger.info("test_cache_nomap_allowed_share3 Ended.\n") @@ -424,13 +424,13 @@ def test_cache_nomap_allowed_share4(): ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=2) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) assert num_iter == 3 num_iter = 0 - for _ in ds2.create_dict_iterator(): + for _ in ds2.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds2: {} ".format(num_iter)) assert num_iter == 3 @@ -464,7 +464,7 @@ def test_cache_nomap_disallowed_share1(): ds2 = ds2.map(input_columns=["image"], operations=rescale_op, cache=some_cache) num_iter = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) assert num_iter == 3 diff --git a/tests/ut/python/dataset/test_callbacks.py b/tests/ut/python/dataset/test_callbacks.py index 77c9d23953e..744ed6c0975 100644 --- a/tests/ut/python/dataset/test_callbacks.py +++ b/tests/ut/python/dataset/test_callbacks.py @@ -279,7 +279,7 @@ def test_callbacks_non_sink(): 'ms_step_end_2_6', 'ds_step_begin_2_7', 'ms_step_end_2_7', 'ds_step_begin_2_8', 'ms_step_end_2_8', 'ms_epoch_end_2_8'] - assert events == expected_synced_events + assert events[:18] == expected_synced_events def test_callbacks_non_sink_batch_size2(): @@ -303,7 +303,7 @@ def test_callbacks_non_sink_batch_size2(): 'ds_step_begin_2_5', 'ms_step_end_2_3', 'ds_step_begin_2_7', 'ms_step_end_2_4', 'ms_epoch_end_2_4'] - assert events == expected_synced_events + assert events[:10] == expected_synced_events def test_callbacks_non_sink_mismatch_size(): @@ -443,7 +443,7 @@ def test_callbacks_one_cb(): data = data.map(operations=(lambda x: x), callbacks=[my_epoch_begin, my_step_end]) data = data.map(operations=(lambda x: x), callbacks=[my_epoch_end, my_step_begin]) - itr = data.create_tuple_iterator() + itr = data.create_tuple_iterator(num_epochs=2) for _ in range(2): for _ in itr: pass diff --git a/tests/ut/python/dataset/test_center_crop.py b/tests/ut/python/dataset/test_center_crop.py index 03b8079e1ee..86961857a5b 100644 --- a/tests/ut/python/dataset/test_center_crop.py +++ b/tests/ut/python/dataset/test_center_crop.py @@ -48,7 +48,7 @@ def test_center_crop_op(height=375, width=375, plot=False): image_cropped = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_cropped.append(item1["image"].copy()) image.append(item2["image"].copy()) if plot: @@ -98,7 +98,7 @@ def test_center_crop_comp(height=375, width=375, plot=False): image_c_cropped = [] image_py_cropped = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # Note: The images aren't exactly the same due to rounding error @@ -131,7 +131,7 @@ def test_crop_grayscale(height=375, width=375): crop_gray = vision.CenterCrop([height, width]) data1 = data1.map(input_columns=["image"], operations=crop_gray) - for item1 in data1.create_dict_iterator(): + for item1 in data1.create_dict_iterator(num_epochs=1): c_image = item1["image"] # Check that the image is grayscale diff --git a/tests/ut/python/dataset/test_config.py b/tests/ut/python/dataset/test_config.py index 8a76df559b6..2bff8166598 100644 --- a/tests/ut/python/dataset/test_config.py +++ b/tests/ut/python/dataset/test_config.py @@ -287,7 +287,7 @@ def test_deterministic_python_seed(): data1 = data1.map(input_columns=["image"], operations=transform()) data1_output = [] # config.set_seed() calls random.seed() - for data_one in data1.create_dict_iterator(): + for data_one in data1.create_dict_iterator(num_epochs=1): data1_output.append(data_one["image"]) # Second dataset @@ -297,7 +297,7 @@ def test_deterministic_python_seed(): ds.config.set_seed(0) data2_output = [] - for data_two in data2.create_dict_iterator(): + for data_two in data2.create_dict_iterator(num_epochs=1): data2_output.append(data_two["image"]) np.testing.assert_equal(data1_output, data2_output) @@ -330,7 +330,7 @@ def test_deterministic_python_seed_multi_thread(): data1 = data1.map(input_columns=["image"], operations=transform(), python_multiprocessing=True) data1_output = [] # config.set_seed() calls random.seed() - for data_one in data1.create_dict_iterator(): + for data_one in data1.create_dict_iterator(num_epochs=1): data1_output.append(data_one["image"]) # Second dataset @@ -341,7 +341,7 @@ def test_deterministic_python_seed_multi_thread(): ds.config.set_seed(0) data2_output = [] - for data_two in data2.create_dict_iterator(): + for data_two in data2.create_dict_iterator(num_epochs=1): data2_output.append(data_two["image"]) try: diff --git a/tests/ut/python/dataset/test_cut_out.py b/tests/ut/python/dataset/test_cut_out.py index 862b1f33c8d..1e9cf4090dc 100644 --- a/tests/ut/python/dataset/test_cut_out.py +++ b/tests/ut/python/dataset/test_cut_out.py @@ -59,7 +59,7 @@ def test_cut_out_op(plot=False): data2 = data2.map(input_columns=["image"], operations=transforms_2) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # C image doesn't require transpose @@ -106,7 +106,7 @@ def test_cut_out_op_multicut(plot=False): num_iter = 0 image_list_1, image_list_2 = [], [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # C image doesn't require transpose @@ -187,7 +187,7 @@ def test_cut_out_comp(plot=False): num_iter = 0 image_list_1, image_list_2 = [], [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # C image doesn't require transpose diff --git a/tests/ut/python/dataset/test_dataset_numpy_slices.py b/tests/ut/python/dataset/test_dataset_numpy_slices.py index 791a5674088..60930b5a146 100644 --- a/tests/ut/python/dataset/test_dataset_numpy_slices.py +++ b/tests/ut/python/dataset/test_dataset_numpy_slices.py @@ -62,7 +62,7 @@ def test_numpy_slices_list_append(): data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True), resize_op]) res = [] - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): res.append(data["image"]) ds = de.NumpySlicesDataset(res, column_names=["col1"], shuffle=False) diff --git a/tests/ut/python/dataset/test_datasets_celeba.py b/tests/ut/python/dataset/test_datasets_celeba.py index 26f18e87727..409041b1a8f 100644 --- a/tests/ut/python/dataset/test_datasets_celeba.py +++ b/tests/ut/python/dataset/test_datasets_celeba.py @@ -27,7 +27,7 @@ def test_celeba_dataset_label(): [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]] count = 0 - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): logger.info("----------image--------") logger.info(item["image"]) logger.info("----------attr--------") @@ -50,7 +50,7 @@ def test_celeba_dataset_op(): data = data.map(input_columns=["image"], operations=resize_op) count = 0 - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): logger.info("----------image--------") logger.info(item["image"]) count = count + 1 @@ -63,7 +63,7 @@ def test_celeba_dataset_ext(): expect_labels = [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1], count = 0 - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): logger.info("----------image--------") logger.info(item["image"]) logger.info("----------attr--------") @@ -77,7 +77,7 @@ def test_celeba_dataset_ext(): def test_celeba_dataset_distribute(): data = ds.CelebADataset(DATA_DIR, decode=True, num_shards=2, shard_id=0) count = 0 - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): logger.info("----------image--------") logger.info(item["image"]) logger.info("----------attr--------") diff --git a/tests/ut/python/dataset/test_datasets_cifarop.py b/tests/ut/python/dataset/test_datasets_cifarop.py index 9b485cac8c2..cc3494f629f 100644 --- a/tests/ut/python/dataset/test_datasets_cifarop.py +++ b/tests/ut/python/dataset/test_datasets_cifarop.py @@ -75,7 +75,7 @@ def test_cifar10_content_check(): images, labels = load_cifar(DATA_DIR_10) num_iter = 0 # in this example, each dictionary has keys "image" and "label" - for i, d in enumerate(data1.create_dict_iterator()): + for i, d in enumerate(data1.create_dict_iterator(num_epochs=1)): np.testing.assert_array_equal(d["image"], images[i]) np.testing.assert_array_equal(d["label"], labels[i]) num_iter += 1 @@ -91,21 +91,21 @@ def test_cifar10_basic(): # case 0: test loading the whole dataset data0 = ds.Cifar10Dataset(DATA_DIR_10) num_iter0 = 0 - for _ in data0.create_dict_iterator(): + for _ in data0.create_dict_iterator(num_epochs=1): num_iter0 += 1 assert num_iter0 == 10000 # case 1: test num_samples data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100) num_iter1 = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter1 += 1 assert num_iter1 == 100 # case 2: test num_parallel_workers data2 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=50, num_parallel_workers=1) num_iter2 = 0 - for _ in data2.create_dict_iterator(): + for _ in data2.create_dict_iterator(num_epochs=1): num_iter2 += 1 assert num_iter2 == 50 @@ -113,7 +113,7 @@ def test_cifar10_basic(): data3 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100) data3 = data3.repeat(3) num_iter3 = 0 - for _ in data3.create_dict_iterator(): + for _ in data3.create_dict_iterator(num_epochs=1): num_iter3 += 1 assert num_iter3 == 300 @@ -125,7 +125,7 @@ def test_cifar10_basic(): assert data4.get_dataset_size() == 15 assert data4.get_batch_size() == 7 num_iter4 = 0 - for _ in data4.create_dict_iterator(): + for _ in data4.create_dict_iterator(num_epochs=1): num_iter4 += 1 assert num_iter4 == 15 @@ -137,7 +137,7 @@ def test_cifar10_basic(): assert data5.get_dataset_size() == 14 assert data5.get_batch_size() == 7 num_iter5 = 0 - for _ in data5.create_dict_iterator(): + for _ in data5.create_dict_iterator(num_epochs=1): num_iter5 += 1 assert num_iter5 == 14 @@ -153,7 +153,7 @@ def test_cifar10_pk_sampler(): data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) num_iter = 0 label_list = [] - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): label_list.append(item["label"]) num_iter += 1 np.testing.assert_array_equal(golden, label_list) @@ -170,7 +170,7 @@ def test_cifar10_sequential_sampler(): data1 = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) data2 = ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_samples=num_samples) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): np.testing.assert_equal(item1["label"], item2["label"]) num_iter += 1 assert num_iter == num_samples @@ -225,7 +225,7 @@ def test_cifar10_visualize(plot=False): data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False) num_iter = 0 image_list, label_list = [], [] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): image = item["image"] label = item["label"] image_list.append(image) @@ -251,7 +251,7 @@ def test_cifar100_content_check(): images, labels = load_cifar(DATA_DIR_100, kind="cifar100") num_iter = 0 # in this example, each dictionary has keys "image", "coarse_label" and "fine_image" - for i, d in enumerate(data1.create_dict_iterator()): + for i, d in enumerate(data1.create_dict_iterator(num_epochs=1)): np.testing.assert_array_equal(d["image"], images[i]) np.testing.assert_array_equal(d["coarse_label"], labels[i][0]) np.testing.assert_array_equal(d["fine_label"], labels[i][1]) @@ -268,21 +268,21 @@ def test_cifar100_basic(): # case 1: test num_samples data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100) num_iter1 = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter1 += 1 assert num_iter1 == 100 # case 2: test repeat data1 = data1.repeat(2) num_iter2 = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter2 += 1 assert num_iter2 == 200 # case 3: test num_parallel_workers data2 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100, num_parallel_workers=1) num_iter3 = 0 - for _ in data2.create_dict_iterator(): + for _ in data2.create_dict_iterator(num_epochs=1): num_iter3 += 1 assert num_iter3 == 100 @@ -294,7 +294,7 @@ def test_cifar100_basic(): assert data3.get_dataset_size() == 34 assert data3.get_batch_size() == 3 num_iter4 = 0 - for _ in data3.create_dict_iterator(): + for _ in data3.create_dict_iterator(num_epochs=1): num_iter4 += 1 assert num_iter4 == 34 @@ -304,7 +304,7 @@ def test_cifar100_basic(): assert data4.get_dataset_size() == 33 assert data4.get_batch_size() == 3 num_iter5 = 0 - for _ in data4.create_dict_iterator(): + for _ in data4.create_dict_iterator(num_epochs=1): num_iter5 += 1 assert num_iter5 == 33 @@ -319,7 +319,7 @@ def test_cifar100_pk_sampler(): data = ds.Cifar100Dataset(DATA_DIR_100, sampler=sampler) num_iter = 0 label_list = [] - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): label_list.append(item["coarse_label"]) num_iter += 1 np.testing.assert_array_equal(golden, label_list) @@ -375,7 +375,7 @@ def test_cifar100_visualize(plot=False): data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=10, shuffle=False) num_iter = 0 image_list, label_list = [], [] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): image = item["image"] coarse_label = item["coarse_label"] fine_label = item["fine_label"] diff --git a/tests/ut/python/dataset/test_datasets_clue.py b/tests/ut/python/dataset/test_datasets_clue.py index 0d8a60f5d11..d9f33041d49 100644 --- a/tests/ut/python/dataset/test_datasets_clue.py +++ b/tests/ut/python/dataset/test_datasets_clue.py @@ -26,7 +26,7 @@ def test_clue(): data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False) data = data.repeat(2) data = data.skip(3) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -43,7 +43,7 @@ def test_clue_num_shards(): buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_shards=3, shard_id=1) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -60,7 +60,7 @@ def test_clue_num_samples(): data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_samples=2) count = 0 - for _ in data.create_dict_iterator(): + for _ in data.create_dict_iterator(num_epochs=1): count += 1 assert count == 2 @@ -87,7 +87,7 @@ def test_clue_afqmc(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -98,7 +98,7 @@ def test_clue_afqmc(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='AFQMC', usage='test', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'id': d['id'], 'sentence1': d['sentence1'].item().decode("utf8"), @@ -109,7 +109,7 @@ def test_clue_afqmc(): # evaluation buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='AFQMC', usage='eval', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -129,7 +129,7 @@ def test_clue_cmnli(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='CMNLI', usage='train', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -140,7 +140,7 @@ def test_clue_cmnli(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='CMNLI', usage='test', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'id': d['id'], 'sentence1': d['sentence1'], @@ -151,7 +151,7 @@ def test_clue_cmnli(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='CMNLI', usage='eval', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'], 'sentence1': d['sentence1'], @@ -171,7 +171,7 @@ def test_clue_csl(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='CSL', usage='train', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'id': d['id'], 'abst': d['abst'].item().decode("utf8"), @@ -183,7 +183,7 @@ def test_clue_csl(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='CSL', usage='test', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'id': d['id'], 'abst': d['abst'].item().decode("utf8"), @@ -194,7 +194,7 @@ def test_clue_csl(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='CSL', usage='eval', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'id': d['id'], 'abst': d['abst'].item().decode("utf8"), @@ -215,7 +215,7 @@ def test_clue_iflytek(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='IFLYTEK', usage='train', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'].item().decode("utf8"), 'label_des': d['label_des'].item().decode("utf8"), @@ -226,7 +226,7 @@ def test_clue_iflytek(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='IFLYTEK', usage='test', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'id': d['id'], 'sentence': d['sentence'].item().decode("utf8") @@ -236,7 +236,7 @@ def test_clue_iflytek(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='IFLYTEK', usage='eval', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'].item().decode("utf8"), 'label_des': d['label_des'].item().decode("utf8"), @@ -256,7 +256,7 @@ def test_clue_tnews(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='TNEWS', usage='train', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'].item().decode("utf8"), 'label_desc': d['label_desc'].item().decode("utf8"), @@ -269,7 +269,7 @@ def test_clue_tnews(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='TNEWS', usage='test', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'id': d['id'], 'sentence': d['sentence'].item().decode("utf8"), @@ -281,7 +281,7 @@ def test_clue_tnews(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='TNEWS', usage='eval', shuffle=False) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'label': d['label'].item().decode("utf8"), 'label_desc': d['label_desc'].item().decode("utf8"), @@ -303,7 +303,7 @@ def test_clue_wsc(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='WSC', usage='train') - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'span1_index': d['span1_index'], 'span2_index': d['span2_index'], @@ -318,7 +318,7 @@ def test_clue_wsc(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='WSC', usage='test') - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'span1_index': d['span1_index'], 'span2_index': d['span2_index'], @@ -332,7 +332,7 @@ def test_clue_wsc(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='WSC', usage='eval') - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append({ 'span1_index': d['span1_index'], 'span2_index': d['span2_index'], diff --git a/tests/ut/python/dataset/test_datasets_coco.py b/tests/ut/python/dataset/test_datasets_coco.py index fd7430ccd29..deff626333f 100644 --- a/tests/ut/python/dataset/test_datasets_coco.py +++ b/tests/ut/python/dataset/test_datasets_coco.py @@ -32,7 +32,7 @@ def test_coco_detection(): image_shape = [] bbox = [] category_id = [] - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): image_shape.append(data["image"].shape) bbox.append(data["bbox"]) category_id.append(data["category_id"]) @@ -64,7 +64,7 @@ def test_coco_stuff(): image_shape = [] segmentation = [] iscrowd = [] - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): image_shape.append(data["image"].shape) segmentation.append(data["segmentation"]) iscrowd.append(data["iscrowd"]) @@ -104,7 +104,7 @@ def test_coco_keypoint(): image_shape = [] keypoints = [] num_keypoints = [] - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): image_shape.append(data["image"].shape) keypoints.append(data["keypoints"]) num_keypoints.append(data["num_keypoints"]) @@ -132,7 +132,7 @@ def test_coco_panoptic(): category_id = [] iscrowd = [] area = [] - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): image_shape.append(data["image"].shape) bbox.append(data["bbox"]) category_id.append(data["category_id"]) @@ -175,7 +175,7 @@ def test_coco_case_0(): data1 = data1.shuffle(10) data1 = data1.batch(3, pad_info={}) num_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 2 @@ -186,11 +186,11 @@ def test_coco_case_1(): dataset1, dataset2 = data1.split(sizes=sizes, randomize=randomize) num_iter = 0 - for _ in dataset1.create_dict_iterator(): + for _ in dataset1.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 3 num_iter = 0 - for _ in dataset2.create_dict_iterator(): + for _ in dataset2.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 3 diff --git a/tests/ut/python/dataset/test_datasets_csv.py b/tests/ut/python/dataset/test_datasets_csv.py index f998e9774db..ca2eb8d1338 100644 --- a/tests/ut/python/dataset/test_datasets_csv.py +++ b/tests/ut/python/dataset/test_datasets_csv.py @@ -33,7 +33,7 @@ def test_csv_dataset_basic(): shuffle=False) data = data.repeat(2) data = data.skip(2) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append(d) assert len(buffer) == 4 @@ -45,7 +45,7 @@ def test_csv_dataset_one_file(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append(d) assert len(buffer) == 3 @@ -58,7 +58,7 @@ def test_csv_dataset_all_file(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.append(d) assert len(buffer) == 10 @@ -70,7 +70,7 @@ def test_csv_dataset_num_samples(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False, num_samples=2) count = 0 - for _ in data.create_dict_iterator(): + for _ in data.create_dict_iterator(num_epochs=1): count += 1 assert count == 2 @@ -83,7 +83,7 @@ def test_csv_dataset_distribution(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False, num_shards=2, shard_id=0) count = 0 - for _ in data.create_dict_iterator(): + for _ in data.create_dict_iterator(num_epochs=1): count += 1 assert count == 2 @@ -96,7 +96,7 @@ def test_csv_dataset_quoted(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -113,7 +113,7 @@ def test_csv_dataset_separated(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -129,7 +129,7 @@ def test_csv_dataset_embedded(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -145,7 +145,7 @@ def test_csv_dataset_chinese(): column_names=['col1', 'col2', 'col3', 'col4', 'col5'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -161,7 +161,7 @@ def test_csv_dataset_header(): column_defaults=["", "", "", ""], shuffle=False) buffer = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -177,7 +177,7 @@ def test_csv_dataset_number(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): buffer.extend([d['col1'].item(), d['col2'].item(), d['col3'].item(), @@ -203,7 +203,7 @@ def test_csv_dataset_exception(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) with pytest.raises(Exception) as err: - for _ in data.create_dict_iterator(): + for _ in data.create_dict_iterator(num_epochs=1): pass assert "Failed to parse file" in str(err.value) @@ -216,7 +216,7 @@ def test_csv_dataset_type_error(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) with pytest.raises(Exception) as err: - for _ in data.create_dict_iterator(): + for _ in data.create_dict_iterator(num_epochs=1): pass assert "type does not match" in str(err.value) diff --git a/tests/ut/python/dataset/test_datasets_generator.py b/tests/ut/python/dataset/test_datasets_generator.py index 512f746b89a..d78b55aa178 100644 --- a/tests/ut/python/dataset/test_datasets_generator.py +++ b/tests/ut/python/dataset/test_datasets_generator.py @@ -46,7 +46,7 @@ def test_generator_0(): data1 = ds.GeneratorDataset(generator_1d, ["data"]) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -68,7 +68,7 @@ def test_generator_1(): data1 = ds.GeneratorDataset(generator_md, ["data"]) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -90,7 +90,7 @@ def test_generator_2(): data1 = ds.GeneratorDataset(generator_mc, ["col0", "col1"]) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["col0"], golden) golden = np.array([[i, i + 1], [i + 2, i + 3]]) @@ -110,7 +110,7 @@ def test_generator_3(): data1 = data1.repeat(4) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -130,7 +130,7 @@ def test_generator_4(): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]]) np.testing.assert_array_equal(item["data"], golden) i = i + 4 @@ -150,7 +150,7 @@ def type_tester(t): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) np.testing.assert_array_equal(item["data"], golden) i = i + 4 @@ -177,7 +177,7 @@ def type_tester_with_type_check(t, c): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) np.testing.assert_array_equal(item["data"], golden) i = i + 4 @@ -212,7 +212,7 @@ def type_tester_with_type_check_2c(t, c): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) np.testing.assert_array_equal(item["data0"], golden) i = i + 4 @@ -249,7 +249,7 @@ def test_generator_8(): num_parallel_workers=2) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i * 3]) np.testing.assert_array_equal(item["out0"], golden) golden = np.array([[i * 7, (i + 1) * 7], [(i + 2) * 7, (i + 3) * 7]]) @@ -303,7 +303,7 @@ def test_generator_10(): # Expected column order is |col0|out1|out2| i = 0 - for item in data1.create_tuple_iterator(): + for item in data1.create_tuple_iterator(num_epochs=1): golden = np.array([i]) np.testing.assert_array_equal(item[0], golden) golden = np.array([[i, i + 1], [i + 2, i + 3]]) @@ -327,7 +327,7 @@ def test_generator_11(): # Expected column order is |out1|out2| i = 0 - for item in data1.create_tuple_iterator(): + for item in data1.create_tuple_iterator(num_epochs=1): # len should be 2 because col0 is dropped (not included in columns_order) assert len(item) == 2 golden = np.array([[i, i + 1], [i + 2, i + 3]]) @@ -349,7 +349,7 @@ def test_generator_12(): # Expected column order is |col0|col1| i = 0 - for item in data1.create_tuple_iterator(): + for item in data1.create_tuple_iterator(num_epochs=1): assert len(item) == 2 golden = np.array([i * 5]) np.testing.assert_array_equal(item[0], golden) @@ -362,7 +362,7 @@ def test_generator_12(): # Expected column order is |col0|col1| i = 0 - for item in data1.create_tuple_iterator(): + for item in data1.create_tuple_iterator(num_epochs=1): assert len(item) == 2 golden = np.array([i * 5]) np.testing.assert_array_equal(item[1], golden) @@ -383,7 +383,7 @@ def test_generator_13(): # Expected column order is |out0|col1| i = 0 - for item in data1.create_tuple_iterator(): + for item in data1.create_tuple_iterator(num_epochs=1): assert len(item) == 2 golden = np.array([i * 5]) np.testing.assert_array_equal(item[0], golden) @@ -391,7 +391,7 @@ def test_generator_13(): np.testing.assert_array_equal(item[1], golden) i = i + 1 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # len should be 2 because col0 is dropped (not included in columns_order) assert len(item) == 2 golden = np.array([i * 5]) @@ -410,7 +410,7 @@ def test_generator_14(): source = [(np.array([x]),) for x in range(256)] ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler(), num_parallel_workers=4).repeat(2) i = 0 - for data in ds1.create_dict_iterator(): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(data["data"], golden) i = i + 1 @@ -428,7 +428,7 @@ def test_generator_15(): source = [(np.array([x]),) for x in range(256)] ds1 = ds.GeneratorDataset(source, ["data"], sampler=sampler, num_parallel_workers=4).repeat(2) i = 0 - for data in ds1.create_dict_iterator(): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(data["data"], golden) i = i + 1 @@ -447,7 +447,7 @@ def test_generator_16(): data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=ds.SequentialSampler()) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["col0"], golden) golden = np.array([i + 1]) @@ -467,7 +467,7 @@ def test_generator_17(): data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=sampler) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["col0"], golden) golden = np.array([i + 1]) @@ -527,7 +527,7 @@ def test_generator_sequential_sampler(): source = [(np.array([x]),) for x in range(64)] ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler()) i = 0 - for data in ds1.create_dict_iterator(): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(data["data"], golden) i = i + 1 @@ -536,7 +536,7 @@ def test_generator_sequential_sampler(): def test_generator_random_sampler(): source = [(np.array([x]),) for x in range(64)] ds1 = ds.GeneratorDataset(source, ["data"], shuffle=True) - for _ in ds1.create_dict_iterator(): # each data is a dictionary + for _ in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary pass @@ -545,7 +545,7 @@ def test_generator_distributed_sampler(): for sid in range(8): ds1 = ds.GeneratorDataset(source, ["data"], shuffle=False, num_shards=8, shard_id=sid) i = sid - for data in ds1.create_dict_iterator(): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(data["data"], golden) i = i + 8 @@ -559,17 +559,17 @@ def test_generator_num_samples(): ds3 = ds.GeneratorDataset(generator_1d, ["data"], num_samples=num_samples) count = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): count = count + 1 assert count == num_samples count = 0 - for _ in ds2.create_dict_iterator(): + for _ in ds2.create_dict_iterator(num_epochs=1): count = count + 1 assert count == num_samples count = 0 - for _ in ds3.create_dict_iterator(): + for _ in ds3.create_dict_iterator(num_epochs=1): count = count + 1 assert count == num_samples @@ -581,12 +581,12 @@ def test_generator_num_samples_underflow(): ds3 = ds.GeneratorDataset(generator_1d, ["data"], num_samples=num_samples) count = 0 - for _ in ds2.create_dict_iterator(): + for _ in ds2.create_dict_iterator(num_epochs=1): count = count + 1 assert count == 64 count = 0 - for _ in ds3.create_dict_iterator(): + for _ in ds3.create_dict_iterator(num_epochs=1): count = count + 1 assert count == 64 @@ -604,7 +604,7 @@ def type_tester_with_type_check_2c_schema(t, c): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) np.testing.assert_array_equal(item["data0"], golden) i = i + 4 @@ -635,7 +635,7 @@ def test_generator_dataset_size_0(): data_size = data1.get_dataset_size() num_rows = 0 - for _ in data1.create_dict_iterator(): # each data is a dictionary + for _ in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary num_rows = num_rows + 1 assert data_size == num_rows @@ -652,7 +652,7 @@ def test_generator_dataset_size_1(): data_size = data1.get_dataset_size() num_rows = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_rows = num_rows + 1 assert data_size == num_rows @@ -669,7 +669,7 @@ def test_generator_dataset_size_2(): data_size = data1.get_dataset_size() num_rows = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_rows = num_rows + 1 assert data_size == num_rows @@ -686,7 +686,7 @@ def test_generator_dataset_size_3(): data_size = data1.get_dataset_size() num_rows = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_rows += 1 assert data_size == num_rows @@ -702,7 +702,7 @@ def test_generator_dataset_size_4(): data_size = data1.get_dataset_size() num_rows = 0 - for _ in data1.create_dict_iterator(): # each data is a dictionary + for _ in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary num_rows = num_rows + 1 assert data_size == num_rows @@ -716,7 +716,7 @@ def test_generator_dataset_size_5(): data1 = ds.GeneratorDataset(dataset_generator, ["data"], num_shards=3, shard_id=0) num_rows = 0 - for _ in data1.create_dict_iterator(): # each data is a dictionary + for _ in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary num_rows = num_rows + 1 data_size = data1.get_dataset_size() assert data_size == num_rows @@ -737,7 +737,7 @@ def manual_test_generator_keyboard_interrupt(): return 1024 ds1 = ds.GeneratorDataset(MyDS(), ["data"], num_parallel_workers=4).repeat(2) - for _ in ds1.create_dict_iterator(): # each data is a dictionary + for _ in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary pass diff --git a/tests/ut/python/dataset/test_datasets_imagefolder.py b/tests/ut/python/dataset/test_datasets_imagefolder.py index 8e5679076d4..f1e0d160f0d 100644 --- a/tests/ut/python/dataset/test_datasets_imagefolder.py +++ b/tests/ut/python/dataset/test_datasets_imagefolder.py @@ -28,7 +28,7 @@ def test_imagefolder_basic(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -48,7 +48,7 @@ def test_imagefolder_numsamples(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -61,7 +61,7 @@ def test_imagefolder_numsamples(): data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_parallel_workers=2, sampler=random_sampler) num_iter = 0 - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 3 @@ -70,7 +70,7 @@ def test_imagefolder_numsamples(): data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_parallel_workers=2, sampler=random_sampler) num_iter = 0 - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 3 @@ -86,7 +86,7 @@ def test_imagefolder_numshards(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -106,7 +106,7 @@ def test_imagefolder_shardid(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -126,7 +126,7 @@ def test_imagefolder_noshuffle(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -147,7 +147,7 @@ def test_imagefolder_extrashuffle(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -171,7 +171,7 @@ def test_imagefolder_classindex(): 333, 333, 333, 333, 333, 333, 333, 333, 333, 333, 333] num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -196,7 +196,7 @@ def test_imagefolder_negative_classindex(): -333, -333, -333, -333, -333, -333, -333, -333, -333, -333, -333] num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -218,7 +218,7 @@ def test_imagefolder_extensions(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -239,7 +239,7 @@ def test_imagefolder_decode(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -267,7 +267,7 @@ def test_sequential_sampler(): result = [] num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" result.append(item["label"]) num_iter += 1 @@ -287,7 +287,7 @@ def test_random_sampler(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -308,7 +308,7 @@ def test_distributed_sampler(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -329,7 +329,7 @@ def test_pk_sampler(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -351,7 +351,7 @@ def test_subset_random_sampler(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -373,7 +373,7 @@ def test_weighted_random_sampler(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -393,7 +393,7 @@ def test_imagefolder_rename(): data1 = data1.repeat(repeat_count) num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -405,7 +405,7 @@ def test_imagefolder_rename(): data1 = data1.rename(input_columns=["image"], output_columns="image2") num_iter = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image2"])) logger.info("label is {}".format(item["label"])) @@ -430,7 +430,7 @@ def test_imagefolder_zip(): data3 = ds.zip((data1, data2)) num_iter = 0 - for item in data3.create_dict_iterator(): # each data is a dictionary + for item in data3.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) diff --git a/tests/ut/python/dataset/test_datasets_manifestop.py b/tests/ut/python/dataset/test_datasets_manifestop.py index a6493c4061a..e28e4b4ab71 100644 --- a/tests/ut/python/dataset/test_datasets_manifestop.py +++ b/tests/ut/python/dataset/test_datasets_manifestop.py @@ -26,7 +26,7 @@ def test_manifest_dataset_train(): count = 0 cat_count = 0 dog_count = 0 - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): logger.info("item[image] is {}".format(item["image"])) count = count + 1 if item["label"].size == 1 and item["label"] == 0: @@ -41,7 +41,7 @@ def test_manifest_dataset_train(): def test_manifest_dataset_eval(): data = ds.ManifestDataset(DATA_FILE, "eval", decode=True) count = 0 - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): logger.info("item[image] is {}".format(item["image"])) count = count + 1 if item["label"] != 0 and item["label"] != 1: @@ -55,7 +55,7 @@ def test_manifest_dataset_class_index(): out_class_indexing = data.get_class_indexing() assert out_class_indexing == {"dog": 11} count = 0 - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): logger.info("item[image] is {}".format(item["image"])) count = count + 1 if item["label"] != 11: @@ -71,7 +71,7 @@ def test_manifest_dataset_get_class_index(): class_indexing = data.get_class_indexing() assert class_indexing == {'cat': 0, 'dog': 1, 'flower': 2} count = 0 - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): logger.info("item[image] is {}".format(item["image"])) count = count + 1 assert count == 4 @@ -81,7 +81,7 @@ def test_manifest_dataset_multi_label(): data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False) count = 0 expect_label = [1, 0, 0, [0, 2]] - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): assert item["label"].tolist() == expect_label[count] logger.info("item[image] is {}".format(item["image"])) count = count + 1 @@ -107,7 +107,7 @@ def test_manifest_dataset_multi_label_onehot(): data = data.map(input_columns=["label"], operations=multi_label_hot) data = data.batch(2) count = 0 - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): assert item["label"].tolist() == expect_label[count] logger.info("item[image] is {}".format(item["image"])) count = count + 1 diff --git a/tests/ut/python/dataset/test_datasets_mnist.py b/tests/ut/python/dataset/test_datasets_mnist.py index dfd6f7c6fc6..a78ff702b6a 100644 --- a/tests/ut/python/dataset/test_datasets_mnist.py +++ b/tests/ut/python/dataset/test_datasets_mnist.py @@ -64,7 +64,7 @@ def test_mnist_content_check(): num_iter = 0 # in this example, each dictionary has keys "image" and "label" image_list, label_list = [], [] - for i, data in enumerate(data1.create_dict_iterator()): + for i, data in enumerate(data1.create_dict_iterator(num_epochs=1)): image_list.append(data["image"]) label_list.append("label {}".format(data["label"])) np.testing.assert_array_equal(data["image"], images[i]) @@ -82,14 +82,14 @@ def test_mnist_basic(): # case 1: test loading whole dataset data1 = ds.MnistDataset(DATA_DIR) num_iter1 = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter1 += 1 assert num_iter1 == 10000 # case 2: test num_samples data2 = ds.MnistDataset(DATA_DIR, num_samples=500) num_iter2 = 0 - for _ in data2.create_dict_iterator(): + for _ in data2.create_dict_iterator(num_epochs=1): num_iter2 += 1 assert num_iter2 == 500 @@ -97,7 +97,7 @@ def test_mnist_basic(): data3 = ds.MnistDataset(DATA_DIR, num_samples=200) data3 = data3.repeat(5) num_iter3 = 0 - for _ in data3.create_dict_iterator(): + for _ in data3.create_dict_iterator(num_epochs=1): num_iter3 += 1 assert num_iter3 == 1000 @@ -109,7 +109,7 @@ def test_mnist_basic(): assert data4.get_dataset_size() == 15 assert data4.get_batch_size() == 7 num_iter4 = 0 - for _ in data4.create_dict_iterator(): + for _ in data4.create_dict_iterator(num_epochs=1): num_iter4 += 1 assert num_iter4 == 15 @@ -121,7 +121,7 @@ def test_mnist_basic(): assert data5.get_dataset_size() == 14 assert data5.get_batch_size() == 7 num_iter5 = 0 - for _ in data5.create_dict_iterator(): + for _ in data5.create_dict_iterator(num_epochs=1): num_iter5 += 1 assert num_iter5 == 14 @@ -137,7 +137,7 @@ def test_mnist_pk_sampler(): data = ds.MnistDataset(DATA_DIR, sampler=sampler) num_iter = 0 label_list = [] - for item in data.create_dict_iterator(): + for item in data.create_dict_iterator(num_epochs=1): label_list.append(item["label"]) num_iter += 1 np.testing.assert_array_equal(golden, label_list) @@ -155,7 +155,7 @@ def test_mnist_sequential_sampler(): data2 = ds.MnistDataset(DATA_DIR, shuffle=False, num_samples=num_samples) label_list1, label_list2 = [], [] num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): label_list1.append(item1["label"]) label_list2.append(item2["label"]) num_iter += 1 @@ -214,7 +214,7 @@ def test_mnist_visualize(plot=False): data1 = ds.MnistDataset(DATA_DIR, num_samples=10, shuffle=False) num_iter = 0 image_list, label_list = [], [] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): image = item["image"] label = item["label"] image_list.append(image) diff --git a/tests/ut/python/dataset/test_datasets_sharding.py b/tests/ut/python/dataset/test_datasets_sharding.py index ce6a30077fc..db819885f0b 100644 --- a/tests/ut/python/dataset/test_datasets_sharding.py +++ b/tests/ut/python/dataset/test_datasets_sharding.py @@ -25,7 +25,7 @@ def test_imagefolder_shardings(print_res=False): shuffle=shuffle, class_indexing=class_index, decode=True) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary res.append(item["label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -59,7 +59,7 @@ def test_tfrecord_shardings1(print_res=False): shuffle=ds.Shuffle.FILES, num_parallel_workers=1) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary res.append(item["scalars"][0]) if print_res: logger.info("scalars of dataset: {}".format(res)) @@ -97,7 +97,7 @@ def test_tfrecord_shardings4(print_res=False): shuffle=ds.Shuffle.FILES, num_parallel_workers=4) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary res.append(item["scalars"][0]) if print_res: logger.info("scalars of dataset: {}".format(res)) @@ -141,7 +141,7 @@ def test_manifest_shardings(print_res=False): shuffle=shuffle, decode=True) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary res.append(item["label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -166,7 +166,7 @@ def test_voc_shardings(print_res=False): data1 = ds.VOCDataset(voc_dir, decode=True, sampler=sampler) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary res.append(item["image"].shape[0]) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -194,7 +194,7 @@ def test_cifar10_shardings(print_res=False): shuffle=shuffle) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary res.append(item["label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -214,7 +214,7 @@ def test_cifar100_shardings(print_res=False): shuffle=shuffle) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary res.append(item["coarse_label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -233,7 +233,7 @@ def test_mnist_shardings(print_res=False): shuffle=shuffle) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary res.append(item["label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) diff --git a/tests/ut/python/dataset/test_datasets_textfileop.py b/tests/ut/python/dataset/test_datasets_textfileop.py index fb115e68295..40f58a5cf5b 100644 --- a/tests/ut/python/dataset/test_datasets_textfileop.py +++ b/tests/ut/python/dataset/test_datasets_textfileop.py @@ -25,7 +25,7 @@ DATA_ALL_FILE = "../data/dataset/testTextFileDataset/*" def test_textline_dataset_one_file(): data = ds.TextFileDataset(DATA_FILE) count = 0 - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): logger.info("{}".format(i["text"])) count += 1 assert count == 3 @@ -34,7 +34,7 @@ def test_textline_dataset_one_file(): def test_textline_dataset_all_file(): data = ds.TextFileDataset(DATA_ALL_FILE) count = 0 - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): logger.info("{}".format(i["text"])) count += 1 assert count == 5 @@ -43,7 +43,7 @@ def test_textline_dataset_all_file(): def test_textline_dataset_num_samples_zero(): data = ds.TextFileDataset(DATA_FILE, num_samples=0) count = 0 - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): logger.info("{}".format(i["text"])) count += 1 assert count == 3 @@ -56,7 +56,7 @@ def test_textline_dataset_shuffle_false4(): count = 0 line = ["This is a text file.", "Another file.", "Be happy every day.", "End of file.", "Good luck to everyone."] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -73,7 +73,7 @@ def test_textline_dataset_shuffle_false1(): count = 0 line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", "Another file.", "End of file."] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -90,7 +90,7 @@ def test_textline_dataset_shuffle_files4(): count = 0 line = ["This is a text file.", "Another file.", "Be happy every day.", "End of file.", "Good luck to everyone."] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -107,7 +107,7 @@ def test_textline_dataset_shuffle_files1(): count = 0 line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", "Another file.", "End of file."] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -124,7 +124,7 @@ def test_textline_dataset_shuffle_global4(): count = 0 line = ["Another file.", "Good luck to everyone.", "End of file.", "This is a text file.", "Be happy every day."] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -141,7 +141,7 @@ def test_textline_dataset_shuffle_global1(): count = 0 line = ["Another file.", "Good luck to everyone.", "This is a text file.", "End of file.", "Be happy every day."] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -154,7 +154,7 @@ def test_textline_dataset_shuffle_global1(): def test_textline_dataset_num_samples(): data = ds.TextFileDataset(DATA_FILE, num_samples=2) count = 0 - for _ in data.create_dict_iterator(): + for _ in data.create_dict_iterator(num_epochs=1): count += 1 assert count == 2 @@ -162,7 +162,7 @@ def test_textline_dataset_num_samples(): def test_textline_dataset_distribution(): data = ds.TextFileDataset(DATA_ALL_FILE, num_shards=2, shard_id=1) count = 0 - for _ in data.create_dict_iterator(): + for _ in data.create_dict_iterator(num_epochs=1): count += 1 assert count == 3 @@ -174,7 +174,7 @@ def test_textline_dataset_repeat(): line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", "This is a text file.", "Be happy every day.", "Good luck to everyone.", "This is a text file.", "Be happy every day.", "Good luck to everyone."] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 diff --git a/tests/ut/python/dataset/test_datasets_tfrecord.py b/tests/ut/python/dataset/test_datasets_tfrecord.py index 36791ac4c68..d134c38f0d2 100644 --- a/tests/ut/python/dataset/test_datasets_tfrecord.py +++ b/tests/ut/python/dataset/test_datasets_tfrecord.py @@ -39,7 +39,7 @@ def test_tfrecord_shape(): schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaRank0.json" ds1 = ds.TFRecordDataset(FILES, schema_file) ds1 = ds1.batch(2) - for data in ds1.create_dict_iterator(): + for data in ds1.create_dict_iterator(num_epochs=1): logger.info(data) output_shape = ds1.output_shapes() assert len(output_shape[-1]) == 1 @@ -51,7 +51,7 @@ def test_tfrecord_read_all_dataset(): ds1 = ds.TFRecordDataset(FILES, schema_file) assert ds1.get_dataset_size() == 12 count = 0 - for _ in ds1.create_tuple_iterator(): + for _ in ds1.create_tuple_iterator(num_epochs=1): count += 1 assert count == 12 @@ -62,7 +62,7 @@ def test_tfrecord_num_samples(): ds1 = ds.TFRecordDataset(FILES, schema_file, num_samples=8) assert ds1.get_dataset_size() == 8 count = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): count += 1 assert count == 8 @@ -73,7 +73,7 @@ def test_tfrecord_num_samples2(): ds1 = ds.TFRecordDataset(FILES, schema_file) assert ds1.get_dataset_size() == 7 count = 0 - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): count += 1 assert count == 7 @@ -139,7 +139,7 @@ def test_tfrecord_multi_files(): data1 = ds.TFRecordDataset(DATA_FILES2, SCHEMA_FILE2, shuffle=False) data1 = data1.repeat(1) num_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 12 @@ -187,7 +187,7 @@ def test_tfrecord_shard(): shuffle=ds.Shuffle.FILES) data1 = data1.repeat(num_repeats) res = list() - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(item["scalars"][0]) return res @@ -215,7 +215,7 @@ def test_tfrecord_shard_equal_rows(): ds1 = ds.TFRecordDataset(tf_files, num_shards=num_shards, shard_id=shard_id, shard_equal_rows=True) ds1 = ds1.repeat(num_repeats) res = list() - for data in ds1.create_dict_iterator(): + for data in ds1.create_dict_iterator(num_epochs=1): res.append(data["scalars"][0]) return res @@ -238,7 +238,7 @@ def test_tfrecord_shard_equal_rows(): def test_tfrecord_no_schema_columns_list(): logger.info("test_tfrecord_no_schema_columns_list") data = ds.TFRecordDataset(FILES, shuffle=False, columns_list=["col_sint16"]) - row = data.create_dict_iterator().__next__() + row = data.create_dict_iterator(num_epochs=1).__next__() assert row["col_sint16"] == [-32768] with pytest.raises(KeyError) as info: @@ -258,7 +258,7 @@ def test_tfrecord_schema_columns_list(): schema.add_column('col_sint32', de_type=mstype.int64, shape=[1]) schema.add_column('col_sint64', de_type=mstype.int64, shape=[1]) data = ds.TFRecordDataset(FILES, schema=schema, shuffle=False, columns_list=["col_sint16"]) - row = data.create_dict_iterator().__next__() + row = data.create_dict_iterator(num_epochs=1).__next__() assert row["col_sint16"] == [-32768] with pytest.raises(KeyError) as info: @@ -275,7 +275,7 @@ def test_tfrecord_invalid_files(): data = ds.TFRecordDataset(files, SCHEMA_FILE, shuffle=ds.Shuffle.FILES) with pytest.raises(RuntimeError) as info: - _ = data.create_dict_iterator().get_next() + _ = data.create_dict_iterator(num_epochs=1).get_next() assert "cannot be opened" in str(info.value) assert "not valid tfrecord files" in str(info.value) assert valid_file not in str(info.value) diff --git a/tests/ut/python/dataset/test_datasets_voc.py b/tests/ut/python/dataset/test_datasets_voc.py index 1978b7005f4..107292eeccb 100644 --- a/tests/ut/python/dataset/test_datasets_voc.py +++ b/tests/ut/python/dataset/test_datasets_voc.py @@ -23,7 +23,7 @@ TARGET_SHAPE = [680, 680, 680, 680, 642, 607, 561, 596, 612, 680] def test_voc_segmentation(): data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True, shuffle=False) num = 0 - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): assert item["image"].shape[0] == IMAGE_SHAPE[num] assert item["target"].shape[0] == TARGET_SHAPE[num] num += 1 @@ -34,7 +34,7 @@ def test_voc_detection(): data1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) num = 0 count = [0, 0, 0, 0, 0, 0] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): assert item["image"].shape[0] == IMAGE_SHAPE[num] for label in item["label"]: count[label[0]] += 1 @@ -53,7 +53,7 @@ def test_voc_class_index(): assert (class_index2 == {'car': 0, 'cat': 1, 'train': 5}) num = 0 count = [0, 0, 0, 0, 0, 0] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): for label in item["label"]: count[label[0]] += 1 assert label[0] in (0, 1, 5) @@ -71,7 +71,7 @@ def test_voc_get_class_indexing(): assert (class_index2 == {'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5}) num = 0 count = [0, 0, 0, 0, 0, 0] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): for label in item["label"]: count[label[0]] += 1 assert label[0] in (0, 1, 2, 3, 4, 5) @@ -93,7 +93,7 @@ def test_case_0(): data1 = data1.batch(batch_size, drop_remainder=True) num = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num += 1 assert num == 20 @@ -110,7 +110,7 @@ def test_case_1(): data1 = data1.batch(batch_size, drop_remainder=True, pad_info={}) num = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num += 1 assert num == 18 @@ -122,12 +122,12 @@ def test_case_2(): dataset1, dataset2 = data1.split(sizes=sizes, randomize=randomize) num_iter = 0 - for _ in dataset1.create_dict_iterator(): + for _ in dataset1.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 5 num_iter = 0 - for _ in dataset2.create_dict_iterator(): + for _ in dataset2.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 5 @@ -135,7 +135,7 @@ def test_case_2(): def test_voc_exception(): try: data1 = ds.VOCDataset(DATA_DIR, task="InvalidTask", mode="train", decode=True) - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): pass assert False except ValueError: @@ -143,7 +143,7 @@ def test_voc_exception(): try: data2 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", class_indexing={"cat": 0}, decode=True) - for _ in data2.create_dict_iterator(): + for _ in data2.create_dict_iterator(num_epochs=1): pass assert False except ValueError: @@ -151,7 +151,7 @@ def test_voc_exception(): try: data3 = ds.VOCDataset(DATA_DIR, task="Detection", mode="notexist", decode=True) - for _ in data3.create_dict_iterator(): + for _ in data3.create_dict_iterator(num_epochs=1): pass assert False except ValueError: @@ -159,7 +159,7 @@ def test_voc_exception(): try: data4 = ds.VOCDataset(DATA_DIR, task="Detection", mode="xmlnotexist", decode=True) - for _ in data4.create_dict_iterator(): + for _ in data4.create_dict_iterator(num_epochs=1): pass assert False except RuntimeError: @@ -167,7 +167,7 @@ def test_voc_exception(): try: data5 = ds.VOCDataset(DATA_DIR, task="Detection", mode="invalidxml", decode=True) - for _ in data5.create_dict_iterator(): + for _ in data5.create_dict_iterator(num_epochs=1): pass assert False except RuntimeError: @@ -175,7 +175,7 @@ def test_voc_exception(): try: data6 = ds.VOCDataset(DATA_DIR, task="Detection", mode="xmlnoobject", decode=True) - for _ in data6.create_dict_iterator(): + for _ in data6.create_dict_iterator(num_epochs=1): pass assert False except RuntimeError: diff --git a/tests/ut/python/dataset/test_decode.py b/tests/ut/python/dataset/test_decode.py index ac4995fc231..7b732d8263a 100644 --- a/tests/ut/python/dataset/test_decode.py +++ b/tests/ut/python/dataset/test_decode.py @@ -40,7 +40,7 @@ def test_decode_op(): # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): actual = item1["image"] expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) @@ -59,13 +59,13 @@ def test_decode_op_tf_file_dataset(): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.FILES) data1 = data1.map(input_columns=["image"], operations=vision.Decode(True)) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): logger.info('decode == {}'.format(item['image'])) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): actual = item1["image"] expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) diff --git a/tests/ut/python/dataset/test_duplicate_op.py b/tests/ut/python/dataset/test_duplicate_op.py index 9de3453a7eb..97a858bfa24 100644 --- a/tests/ut/python/dataset/test_duplicate_op.py +++ b/tests/ut/python/dataset/test_duplicate_op.py @@ -26,7 +26,7 @@ def compare(array): array = np.array(array) data = data.map(input_columns=["x"], output_columns=["x", "y"], columns_order=["x", "y"], operations=ops.Duplicate()) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(array, d["x"]) np.testing.assert_array_equal(array, d["y"]) diff --git a/tests/ut/python/dataset/test_epoch_ctrl.py b/tests/ut/python/dataset/test_epoch_ctrl.py index 3a5ddb3b8cd..17fc9b78f22 100644 --- a/tests/ut/python/dataset/test_epoch_ctrl.py +++ b/tests/ut/python/dataset/test_epoch_ctrl.py @@ -134,7 +134,7 @@ def test_generator_dict_0(): i = 0 # create the iterator inside the loop declaration - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -152,7 +152,7 @@ def test_generator_dict_1(): i = 0 # BAD. Do not create iterator every time inside. # Create iterator outside the epoch for loop. - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -318,7 +318,7 @@ def test_generator_tuple_0(): i = 0 # create the iterator inside the loop declaration - for item in data1.create_tuple_iterator(): # each data is a dictionary + for item in data1.create_tuple_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item[0], golden) i = i + 1 @@ -336,7 +336,7 @@ def test_generator_tuple_1(): i = 0 # BAD. Do not create iterator every time inside. # Create iterator outside the epoch for loop. - for item in data1.create_tuple_iterator(): # each data is a dictionary + for item in data1.create_tuple_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item[0], golden) i = i + 1 diff --git a/tests/ut/python/dataset/test_exceptions.py b/tests/ut/python/dataset/test_exceptions.py index 253eb564aeb..0e0c2d1b420 100644 --- a/tests/ut/python/dataset/test_exceptions.py +++ b/tests/ut/python/dataset/test_exceptions.py @@ -50,7 +50,7 @@ def test_exception_02(): # Confirm 1 sample in dataset assert sum([1 for _ in data]) == 1 num_iters = 0 - for _ in data.create_dict_iterator(): + for _ in data.create_dict_iterator(num_epochs=1): num_iters += 1 assert num_iters == 1 diff --git a/tests/ut/python/dataset/test_filterop.py b/tests/ut/python/dataset/test_filterop.py index 1c99b7864bf..f5514ded867 100644 --- a/tests/ut/python/dataset/test_filterop.py +++ b/tests/ut/python/dataset/test_filterop.py @@ -35,7 +35,7 @@ def test_diff_predicate_func(): num_iter = 0 label_list = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): num_iter += 1 label = data["label"] label_list.append(label) @@ -64,7 +64,7 @@ def test_filter_by_generator_with_no(): dataset_f = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4) num_iter = 0 expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - for item in dataset_f.create_dict_iterator(): + for item in dataset_f.create_dict_iterator(num_epochs=1): assert item["data"] == expected_rs[num_iter] num_iter += 1 @@ -77,7 +77,7 @@ def test_filter_by_generator_with_repeat(): num_iter = 0 ret_data = [] expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - for item in dataset_f.create_dict_iterator(): + for item in dataset_f.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append(item["data"]) assert num_iter == 44 @@ -95,7 +95,7 @@ def test_filter_by_generator_with_repeat_after(): num_iter = 0 ret_data = [] expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - for item in dataset_r.create_dict_iterator(): + for item in dataset_r.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append(item["data"]) assert num_iter == 44 @@ -120,7 +120,7 @@ def test_filter_by_generator_with_batch(): dataset_f = dataset_b.filter(predicate=filter_func_batch, num_parallel_workers=4) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(): + for item in dataset_f.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append(item["data"]) assert num_iter == 3 @@ -136,7 +136,7 @@ def test_filter_by_generator_with_batch_after(): dataset_b = dataset_f.batch(4) num_iter = 0 ret_data = [] - for item in dataset_b.create_dict_iterator(): + for item in dataset_b.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append(item["data"]) assert num_iter == 6 @@ -155,7 +155,7 @@ def test_filter_by_generator_with_shuffle(): dataset_s = dataset.shuffle(4) dataset_f = dataset_s.filter(predicate=filter_func_shuffle, num_parallel_workers=4) num_iter = 0 - for _ in dataset_f.create_dict_iterator(): + for _ in dataset_f.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 21 @@ -170,7 +170,7 @@ def test_filter_by_generator_with_shuffle_after(): dataset_f = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4) dataset_s = dataset_f.shuffle(4) num_iter = 0 - for _ in dataset_s.create_dict_iterator(): + for _ in dataset_s.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 21 @@ -202,7 +202,7 @@ def test_filter_by_generator_with_zip(): dataset_f = dataz.filter(predicate=filter_func_zip, num_parallel_workers=1) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(): + for item in dataset_f.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append({"data1": item["data1"], "data2": item["data2"]}) assert num_iter == 21 @@ -221,7 +221,7 @@ def test_filter_by_generator_with_zip_after(): dataz = ds.zip((dt1, dt2)) num_iter = 0 ret_data = [] - for item in dataz.create_dict_iterator(): + for item in dataz.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append({"data1": item["data1"], "data2": item["data2"]}) assert num_iter == 21 @@ -266,7 +266,7 @@ def test_filter_by_generator_with_map_all_col(): dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(): + for item in dataset_f.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append(item["col1"]) assert num_iter == 3 @@ -282,7 +282,7 @@ def test_filter_by_generator_with_map_part_col(): dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(): + for item in dataset_f.create_dict_iterator(num_epochs=1): num_iter += 1 print(item) ret_data.append(item["out1"]) @@ -302,7 +302,7 @@ def test_filter_by_generator_with_rename(): dataset_f = dataset_b.filter(predicate=filter_func_rename, num_parallel_workers=4) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(): + for item in dataset_f.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append(item["col1"]) assert num_iter == 55 @@ -336,7 +336,7 @@ def test_filter_by_generator_with_input_column(): dataset_f4 = dataset_f3.filter(predicate=filter_func_input_column1, num_parallel_workers=4) num_iter = 0 ret_data = [] - for item in dataset_f4.create_dict_iterator(): + for item in dataset_f4.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append(item["out1"]) assert num_iter == 8 @@ -370,7 +370,7 @@ def test_filter_by_generator_Partial0(): dataset_zip = ds.zip((dataset1, dataset2)) dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) ret = [] - for item in dataset_f1.create_dict_iterator(): + for item in dataset_f1.create_dict_iterator(num_epochs=1): ret.append(item["col1"]) assert ret[0] == 5 assert ret[6] == 12 @@ -384,7 +384,7 @@ def test_filter_by_generator_Partial1(): dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) dataset_map = dataset_f1.map(input_columns=["col1"], output_columns=["out1"], operations=lambda x1: x1 + 400) ret = [] - for item in dataset_map.create_dict_iterator(): + for item in dataset_map.create_dict_iterator(num_epochs=1): ret.append(item["out1"]) assert ret[0] == 405 assert ret[6] == 412 @@ -403,7 +403,7 @@ def test_filter_by_generator_Partial2(): operations=lambda x1, x3: (x1 + 400, x3 + 500)) ret1 = [] ret3 = [] - for item in dataset_map.create_dict_iterator(): + for item in dataset_map.create_dict_iterator(num_epochs=1): ret1.append(item["out1"]) ret3.append(item["out3"]) assert ret1[0] == 400 @@ -428,7 +428,7 @@ def test_filter_by_generator_Partial(): dataset_s = dataset.shuffle(4) dataset_f1 = dataset_s.filter(input_columns=["col1", "col2"], predicate=filter_func_Partial, num_parallel_workers=1) - for item in dataset_f1.create_dict_iterator(): + for item in dataset_f1.create_dict_iterator(num_epochs=1): assert item["col1"] % 3 == 0 @@ -442,7 +442,7 @@ def test_filte_case_dataset_cifar10(): DATA_DIR_10 = "../data/dataset/testCifar10Data" dataset_c = ds.Cifar10Dataset(dataset_dir=DATA_DIR_10, num_samples=100000, shuffle=False) dataset_f1 = dataset_c.filter(input_columns=["image", "label"], predicate=filter_func_cifar, num_parallel_workers=1) - for item in dataset_f1.create_dict_iterator(): + for item in dataset_f1.create_dict_iterator(num_epochs=1): # in this example, each dictionary has keys "image" and "label" assert item["label"] % 3 == 0 @@ -476,7 +476,7 @@ def test_filter_by_generator_with_map_all_sort(): dataset_f = dataz.filter(predicate=filter_func_part_sort, num_parallel_workers=1) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(): + for item in dataset_f.create_dict_iterator(num_epochs=1): num_iter += 1 ret_data.append(item) @@ -490,7 +490,7 @@ def test_filter_by_generator_get_dataset_size(): data_sie = dataset.get_dataset_size() num_iter = 0 - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): num_iter += 1 assert data_sie == num_iter diff --git a/tests/ut/python/dataset/test_five_crop.py b/tests/ut/python/dataset/test_five_crop.py index 1c0289a4443..103a4aade70 100644 --- a/tests/ut/python/dataset/test_five_crop.py +++ b/tests/ut/python/dataset/test_five_crop.py @@ -53,7 +53,7 @@ def test_five_crop_op(plot=False): data2 = data2.map(input_columns=["image"], operations=transform_2()) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_2 = item2["image"] diff --git a/tests/ut/python/dataset/test_from_dataset.py b/tests/ut/python/dataset/test_from_dataset.py index 7b6333ba656..e285f1fc216 100644 --- a/tests/ut/python/dataset/test_from_dataset.py +++ b/tests/ut/python/dataset/test_from_dataset.py @@ -28,7 +28,7 @@ def test_demo_basic_from_dataset(): special_first=True) data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "")) res = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): res.append(d["text"].item()) assert res == [4, 5, 3, 6, 7, 2], res @@ -41,7 +41,7 @@ def test_demo_basic_from_dataset_with_tokenizer(): special_first=True) data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "")) res = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): res.append(list(d["text"])) assert res == [[13, 3, 7, 14, 9, 17, 3, 2, 19, 9, 2, 11, 3, 4, 16, 4, 8, 6, 5], [21, 20, 10, 25, 23, 26], [24, 22, 10, 12, 8, 6, 7, 4, 18, 15, 5], [2, 2]] @@ -62,7 +62,7 @@ def test_from_dataset(): special_first=True) corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab, "")) res = [] - for d in corpus_dataset.create_dict_iterator(): + for d in corpus_dataset.create_dict_iterator(num_epochs=1): res.append(list(d["text"])) return res @@ -110,7 +110,7 @@ def test_from_dataset_special_token(): data = ds.GeneratorDataset(gen_input(texts), column_names=["text"]) data = data.map(input_columns="text", operations=text.Lookup(vocab, "")) res = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): res.append(d["text"].item()) return res diff --git a/tests/ut/python/dataset/test_graphdata.py b/tests/ut/python/dataset/test_graphdata.py index 0f78cfd03a8..7bee945df0d 100644 --- a/tests/ut/python/dataset/test_graphdata.py +++ b/tests/ut/python/dataset/test_graphdata.py @@ -186,7 +186,7 @@ def test_graphdata_generatordataset(): dataset = ds.GeneratorDataset(source=GNNGraphDataset(g, batch_num), column_names=out_column_names, sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4) dataset = dataset.repeat(2) - itr = dataset.create_dict_iterator() + itr = dataset.create_dict_iterator(num_epochs=1) i = 0 for data in itr: assert data['neighbors'].shape == (2, 7) diff --git a/tests/ut/python/dataset/test_graphdata_distributed.py b/tests/ut/python/dataset/test_graphdata_distributed.py index 9762b3e8f77..5e695c02139 100644 --- a/tests/ut/python/dataset/test_graphdata_distributed.py +++ b/tests/ut/python/dataset/test_graphdata_distributed.py @@ -112,7 +112,7 @@ def test_graphdata_distributed(): sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4, python_multiprocessing=False) dataset = dataset.repeat(2) - itr = dataset.create_dict_iterator() + itr = dataset.create_dict_iterator(num_epochs=1) i = 0 for data in itr: assert data['neighbors'].shape == (2, 7) diff --git a/tests/ut/python/dataset/test_iterator.py b/tests/ut/python/dataset/test_iterator.py index 70da93a0ccc..86b50c3f516 100644 --- a/tests/ut/python/dataset/test_iterator.py +++ b/tests/ut/python/dataset/test_iterator.py @@ -28,7 +28,8 @@ def check(project_columns): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS, shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=project_columns, shuffle=False) - for data_actual, data_expected in zip(data1.create_tuple_iterator(project_columns), data2.create_tuple_iterator()): + for data_actual, data_expected in zip(data1.create_tuple_iterator(project_columns, num_epochs=1), + data2.create_tuple_iterator(num_epochs=1)): assert len(data_actual) == len(data_expected) assert all([np.array_equal(d1, d2) for d1, d2 in zip(data_actual, data_expected)]) @@ -48,9 +49,9 @@ def test_iterator_create_tuple(): def test_iterator_weak_ref(): ITERATORS_LIST.clear() data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR) - itr1 = data.create_tuple_iterator() - itr2 = data.create_tuple_iterator() - itr3 = data.create_tuple_iterator() + itr1 = data.create_tuple_iterator(num_epochs=1) + itr2 = data.create_tuple_iterator(num_epochs=1) + itr3 = data.create_tuple_iterator(num_epochs=1) assert len(ITERATORS_LIST) == 3 assert sum(itr() is not None for itr in ITERATORS_LIST) == 3 @@ -67,9 +68,9 @@ def test_iterator_weak_ref(): assert len(ITERATORS_LIST) == 3 assert sum(itr() is not None for itr in ITERATORS_LIST) == 0 - itr1 = data.create_tuple_iterator() - itr2 = data.create_tuple_iterator() - itr3 = data.create_tuple_iterator() + itr1 = data.create_tuple_iterator(num_epochs=1) + itr2 = data.create_tuple_iterator(num_epochs=1) + itr3 = data.create_tuple_iterator(num_epochs=1) _cleanup() with pytest.raises(AttributeError) as info: @@ -102,7 +103,7 @@ def test_tree_copy(): data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS) data1 = data.map(operations=[MyDict()]) - itr = data1.create_tuple_iterator() + itr = data1.create_tuple_iterator(num_epochs=1) assert id(data1) != id(itr.dataset) assert id(data) != id(itr.dataset.children[0]) diff --git a/tests/ut/python/dataset/test_linear_transformation.py b/tests/ut/python/dataset/test_linear_transformation.py index f932916ed83..6d1c7829a2f 100644 --- a/tests/ut/python/dataset/test_linear_transformation.py +++ b/tests/ut/python/dataset/test_linear_transformation.py @@ -62,7 +62,7 @@ def test_linear_transformation_op(plot=False): image_transformed = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_transformed.append(image1) diff --git a/tests/ut/python/dataset/test_minddataset.py b/tests/ut/python/dataset/test_minddataset.py index 465475c2a20..0aa56d9206f 100644 --- a/tests/ut/python/dataset/test_minddataset.py +++ b/tests/ut/python/dataset/test_minddataset.py @@ -186,7 +186,7 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file): NLP_FILE_NAME + "0", None, num_readers, shuffle=False) assert data_set.get_dataset_size() == 16 num_iter = 0 - for x, item in zip(data, data_set.create_dict_iterator()): + for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1)): assert (item["array_a"] == x["array_a"]).all() assert (item["array_b"] == x["array_b"]).all() assert item["array_c"].tobytes() == x["array_c"] @@ -205,7 +205,7 @@ def test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file): OLD_NLP_FILE_NAME + "0", None, num_readers, shuffle=False) assert old_data_set.get_dataset_size() == 16 num_iter = 0 - for x, item in zip(old_data_set.create_dict_iterator(), data_set.create_dict_iterator()): + for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1), data_set.create_dict_iterator(num_epochs=1)): assert (item["array_a"] == x["array_a"]).all() assert (item["array_b"] == x["array_b"]).all() assert (item["array_c"] == x["array_c"]).all() @@ -254,7 +254,7 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -276,7 +276,7 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): num_shards=num_shards, shard_id=partition_id, num_samples=1) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -298,7 +298,7 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): num_shards=num_shards, shard_id=partition_id, num_samples=2) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -320,7 +320,7 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): num_shards=num_shards, shard_id=partition_id, num_samples=3) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -348,7 +348,7 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c data_set = data_set.repeat(3) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -387,7 +387,7 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc data_set = data_set.repeat(3) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -420,7 +420,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): data_set = data_set.repeat(3) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 @@ -446,7 +446,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): data_set2 = data_set2.repeat(3) num_iter = 0 - for item in data_set2.create_dict_iterator(): + for item in data_set2.create_dict_iterator(num_epochs=1): logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 @@ -477,7 +477,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): data_set3 = data_set3.repeat(3) num_iter = 0 - for item in data_set3.create_dict_iterator(): + for item in data_set3.create_dict_iterator(num_epochs=1): logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 @@ -509,7 +509,7 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file): repeat_num = 2 data_set = data_set.repeat(repeat_num) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- get dataset size {} -----------------".format(num_iter)) logger.info( @@ -538,7 +538,7 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): data_set = data_set.repeat(2) num_iter = 0 labels = [] - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- get dataset size {} -----------------".format(num_iter)) logger.info( @@ -567,7 +567,7 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): operations=resize_op, num_parallel_workers=2) data_set = data_set.batch(32, drop_remainder=True) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- get dataset size {} -----------------".format(num_iter)) logger.info( @@ -586,7 +586,7 @@ def test_cv_minddataset_issue_888(add_and_remove_cv_file): data_set = data_set.shuffle(2) data_set = data_set.repeat(9) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 18 @@ -599,7 +599,7 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file): for x in range(FILES_NUM)], columns_list, num_readers) assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -621,7 +621,7 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): data_set = ds.MindDataset([CV_FILE_NAME + "0"], columns_list, num_readers) assert data_set.get_dataset_size() < 10 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -674,7 +674,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): columns_list, num_readers) assert data_set.get_dataset_size() == 30 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -734,7 +734,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): columns_list, num_readers) assert data_set.get_dataset_size() < 20 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -764,7 +764,7 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -784,7 +784,7 @@ def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): data_set = ds.MindDataset(NLP_FILE_NAME + "0", None, num_readers) assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -858,7 +858,7 @@ def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file): data_set = ds.MindDataset(CV_FILE_NAME + "0") assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -881,7 +881,7 @@ def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file): repeat_num = 2 data_set = data_set.repeat(repeat_num) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- repeat two test {} ------------------------".format(num_iter)) logger.info( @@ -1210,7 +1210,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 13 for field in item: if isinstance(item[field], np.ndarray): @@ -1229,7 +1229,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1246,7 +1246,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 4 for field in item: if isinstance(item[field], np.ndarray): @@ -1265,7 +1265,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1284,7 +1284,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 5 for field in item: if isinstance(item[field], np.ndarray): @@ -1303,7 +1303,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 5 for field in item: if isinstance(item[field], np.ndarray): @@ -1323,7 +1323,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 11 for field in item: if isinstance(item[field], np.ndarray): @@ -1413,7 +1413,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 7 for field in item: if isinstance(item[field], np.ndarray): @@ -1431,7 +1431,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1449,7 +1449,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 2 for field in item: if isinstance(item[field], np.ndarray): @@ -1467,7 +1467,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 2 for field in item: if isinstance(item[field], np.ndarray): @@ -1485,7 +1485,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1504,7 +1504,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 5 for field in item: if isinstance(item[field], np.ndarray): @@ -1607,7 +1607,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 8 for field in item: if isinstance(item[field], np.ndarray): @@ -1627,7 +1627,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 6 for field in item: if isinstance(item[field], np.ndarray): @@ -1647,7 +1647,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1667,7 +1667,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1685,7 +1685,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 1 for field in item: if isinstance(item[field], np.ndarray): @@ -1706,7 +1706,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 8 for field in item: if isinstance(item[field], np.ndarray): @@ -1753,7 +1753,7 @@ def test_numpy_generic(): data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, shuffle=False) assert data_set.get_dataset_size() == 10 idx = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert item['label1'] == item['label1'] assert item['label2'] == item['label2'] assert item['label3'] == item['label3'] @@ -1853,7 +1853,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( shuffle=False) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 8 for field in item: if isinstance(item[field], np.ndarray): @@ -1875,7 +1875,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( shuffle=False) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 2 for field in item: if isinstance(item[field], np.ndarray): @@ -1897,7 +1897,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( shuffle=False) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 2 for field in item: if isinstance(item[field], np.ndarray): diff --git a/tests/ut/python/dataset/test_minddataset_exception.py b/tests/ut/python/dataset/test_minddataset_exception.py index 51621750c8f..4c7a8e7078f 100644 --- a/tests/ut/python/dataset/test_minddataset_exception.py +++ b/tests/ut/python/dataset/test_minddataset_exception.py @@ -97,7 +97,7 @@ def test_invalid_mindrecord(): with pytest.raises(Exception, match="MindRecordOp init failed"): data_set = ds.MindDataset('dummy.mindrecord', columns_list, num_readers) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 try: assert num_iter == 0 @@ -116,7 +116,7 @@ def test_minddataset_lack_db(): with pytest.raises(Exception, match="MindRecordOp init failed"): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 try: assert num_iter == 0 @@ -135,7 +135,7 @@ def test_cv_minddataset_pk_sample_error_class_column(): with pytest.raises(Exception, match="MindRecordOp launch failed"): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME)) @@ -150,7 +150,7 @@ def test_cv_minddataset_pk_sample_exclusive_shuffle(): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler, shuffle=False) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME)) @@ -165,7 +165,7 @@ def test_cv_minddataset_reader_different_schema(): data_set = ds.MindDataset([CV_FILE_NAME, CV1_FILE_NAME], columns_list, num_readers) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME)) @@ -182,7 +182,7 @@ def test_cv_minddataset_reader_different_page_size(): data_set = ds.MindDataset([CV_FILE_NAME, CV1_FILE_NAME], columns_list, num_readers) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME)) @@ -197,7 +197,7 @@ def test_minddataset_invalidate_num_shards(): with pytest.raises(Exception) as error_info: data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 1, 2) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 try: assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value) @@ -217,7 +217,7 @@ def test_minddataset_invalidate_shard_id(): with pytest.raises(Exception) as error_info: data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 1, -1) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 try: assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value) @@ -237,7 +237,7 @@ def test_minddataset_shard_id_bigger_than_num_shard(): with pytest.raises(Exception) as error_info: data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 2) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 try: assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value) @@ -249,7 +249,7 @@ def test_minddataset_shard_id_bigger_than_num_shard(): with pytest.raises(Exception) as error_info: data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 try: assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value) @@ -274,7 +274,7 @@ def test_cv_minddataset_partition_num_samples_equals_0(): num_shards=num_shards, shard_id=partition_id, num_samples=0) num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 with pytest.raises(Exception) as error_info: partitions(5) diff --git a/tests/ut/python/dataset/test_minddataset_multi_images.py b/tests/ut/python/dataset/test_minddataset_multi_images.py index 80b94940abe..e1dc485fdac 100644 --- a/tests/ut/python/dataset/test_minddataset_multi_images.py +++ b/tests/ut/python/dataset/test_minddataset_multi_images.py @@ -29,7 +29,7 @@ def test_cv_minddataset_reader_two_png_tutorial(): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 5 logger.info("-------------- cv reader basic is {} -----------------".format(num_iter)) logger.info("-------------- item[id] is {} ------------------------".format(item["id"])) @@ -50,7 +50,7 @@ def test_cv_minddataset_reader_two_png_tutorial_just_image2(): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 2 logger.info("-------------- cv reader basic is {} -----------------".format(num_iter)) logger.info("-------------- item[img_data] is {} ------------------".format(item["img_data"])) diff --git a/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py b/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py index 5ef3a7adcb9..a0d206d8d00 100644 --- a/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py +++ b/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py @@ -57,7 +57,7 @@ def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial(): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): assert len(item) == 7 logger.info("item: {}".format(item)) assert item["image_0"].dtype == np.uint8 diff --git a/tests/ut/python/dataset/test_minddataset_padded.py b/tests/ut/python/dataset/test_minddataset_padded.py index a05879ab019..b9d74a934ab 100644 --- a/tests/ut/python/dataset/test_minddataset_padded.py +++ b/tests/ut/python/dataset/test_minddataset_padded.py @@ -123,7 +123,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): assert data_set.get_dataset_size() == 15 num_iter = 0 num_padded_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: {} ------------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} ----------------------------".format(item["label"])) @@ -158,7 +158,7 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file): padded_sample=padded_sample, num_padded=num_padded) assert data_set.get_dataset_size() == dataset_size - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -205,7 +205,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f assert data_set.get_dataset_size() == dataset_size data_set = data_set.repeat(repeat_size) local_index = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -266,7 +266,7 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv padded_sample=padded_sample, num_padded=num_padded) num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 return num_iter @@ -309,7 +309,7 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re shard_id=partition_id, padded_sample=padded_sample, num_padded=num_padded) - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -331,7 +331,7 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c shard_id=partition_id, padded_sample=padded_sample, num_padded=num_padded) - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -352,7 +352,7 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample) - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -373,7 +373,7 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov num_shards=num_shards, shard_id=partition_id, num_padded=num_padded) - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -403,7 +403,7 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file): padded_sample=padded_sample, num_padded=num_padded) assert data_set.get_dataset_size() == dataset_size - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format(item["input_ids"], item["input_ids"].shape)) @@ -448,7 +448,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_ data_set = data_set.repeat(repeat_size) local_index = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format(item["input_ids"], item["input_ids"].shape)) @@ -508,7 +508,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul assert data_set.get_dataset_size() == dataset_size data_set = data_set.repeat(repeat_size) inner_num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) logger.info("-------------- item[input_ids]: {}, shape: {} -----------------" diff --git a/tests/ut/python/dataset/test_minddataset_sampler.py b/tests/ut/python/dataset/test_minddataset_sampler.py index b60302c3e37..19604bad8bf 100644 --- a/tests/ut/python/dataset/test_minddataset_sampler.py +++ b/tests/ut/python/dataset/test_minddataset_sampler.py @@ -70,7 +70,7 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -90,7 +90,7 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file): assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[data]: \ @@ -111,7 +111,7 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file): assert data_set.get_dataset_size() == 9 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -132,7 +132,7 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file): assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -152,7 +152,7 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file): assert data_set.get_dataset_size() == 9 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -172,7 +172,7 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 15 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -191,7 +191,7 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 15 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -210,7 +210,7 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -231,7 +231,7 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -254,7 +254,7 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -277,7 +277,7 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 0 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -300,7 +300,7 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -322,7 +322,7 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -345,7 +345,7 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): assert data_set.get_dataset_size() == 10 num_iter = 0 new_dataset = [] - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -371,7 +371,7 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): epoch1_dataset = [] epoch2_dataset = [] epoch3_dataset = [] - for item in ds1.create_dict_iterator(): + for item in ds1.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -400,7 +400,7 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -422,7 +422,7 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 4 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -447,7 +447,7 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file): dataset_size = data_set.get_dataset_size() assert dataset_size == 10 num_iter = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(num_epochs=1): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -473,7 +473,7 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file): assert d1.get_dataset_size() == 8 assert d2.get_dataset_size() == 2 num_iter = 0 - for item in d1.create_dict_iterator(): + for item in d1.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -485,7 +485,7 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file): num_iter += 1 assert num_iter == 8 num_iter = 0 - for item in d2.create_dict_iterator(): + for item in d2.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -509,7 +509,7 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): assert d1.get_dataset_size() == 8 assert d2.get_dataset_size() == 2 num_iter = 0 - for item in d1.create_dict_iterator(): + for item in d1.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -521,7 +521,7 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): num_iter += 1 assert num_iter == 8 num_iter = 0 - for item in d2.create_dict_iterator(): + for item in d2.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -545,7 +545,7 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): assert d1.get_dataset_size() == 4 assert d2.get_dataset_size() == 6 num_iter = 0 - for item in d1.create_dict_iterator(): + for item in d1.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -557,7 +557,7 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): num_iter += 1 assert num_iter == 4 num_iter = 0 - for item in d2.create_dict_iterator(): + for item in d2.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -585,7 +585,7 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): d1_dataset = [] d2_dataset = [] num_iter = 0 - for item in d1.create_dict_iterator(): + for item in d1.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -596,7 +596,7 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): num_iter += 1 assert num_iter == 8 num_iter = 0 - for item in d2.create_dict_iterator(): + for item in d2.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -628,7 +628,7 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file): num_iter = 0 d1_shard1 = [] - for item in d1.create_dict_iterator(): + for item in d1.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -649,7 +649,7 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file): epoch2_dataset = [] epoch3_dataset = [] num_iter = 0 - for item in d1s.create_dict_iterator(): + for item in d1s.create_dict_iterator(num_epochs=1): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( diff --git a/tests/ut/python/dataset/test_mixup_label_smoothing.py b/tests/ut/python/dataset/test_mixup_label_smoothing.py index c554b022428..dbee6617c98 100644 --- a/tests/ut/python/dataset/test_mixup_label_smoothing.py +++ b/tests/ut/python/dataset/test_mixup_label_smoothing.py @@ -45,7 +45,7 @@ def test_one_hot_op(): golden_label = np.ones(num_classes) * epsilon_para / num_classes golden_label[1] = 1 - epsilon_para / num_classes - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): label = data["label"] logger.info("label is {}".format(label)) logger.info("golden_label is {}".format(golden_label)) @@ -84,7 +84,7 @@ def test_mix_up_single(): ] ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) - for data1, data2 in zip(ds1.create_dict_iterator(), ds2.create_dict_iterator()): + for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): image1 = data1["image"] label = data1["label"] logger.info("label is {}".format(label)) @@ -134,7 +134,7 @@ def test_mix_up_multi(): ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) num_iter = 0 batch1_image1 = 0 - for data1, data2 in zip(ds1.create_dict_iterator(), ds2.create_dict_iterator()): + for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): image1 = data1["image"] label1 = data1["label"] logger.info("label: {}".format(label1)) diff --git a/tests/ut/python/dataset/test_ngram_op.py b/tests/ut/python/dataset/test_ngram_op.py index 777fca8764a..367bcea9bdc 100644 --- a/tests/ut/python/dataset/test_ngram_op.py +++ b/tests/ut/python/dataset/test_ngram_op.py @@ -42,7 +42,7 @@ def test_multiple_ngrams(): dataset = dataset.map(input_columns=["text"], operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " ")) i = 0 - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i] i += 1 @@ -64,7 +64,7 @@ def test_simple_ngram(): dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=" ")) i = 0 - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i], i i += 1 @@ -79,7 +79,7 @@ def test_corner_cases(): try: dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"]) dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep)) - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): return [d.decode("utf8") for d in data["text"]] except (ValueError, TypeError) as e: return str(e) diff --git a/tests/ut/python/dataset/test_nlp.py b/tests/ut/python/dataset/test_nlp.py index cb517160a19..cab9c03a4fe 100644 --- a/tests/ut/python/dataset/test_nlp.py +++ b/tests/ut/python/dataset/test_nlp.py @@ -38,7 +38,7 @@ def test_on_tokenized_line(): data = data.map(input_columns=["text"], operations=lookup) res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14], [11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32) - for i, d in enumerate(data.create_dict_iterator()): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): np.testing.assert_array_equal(d["text"], res[i]) @@ -56,7 +56,7 @@ def test_on_tokenized_line_with_no_special_tokens(): data = data.map(input_columns=["text"], operations=lookup) res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12], [9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32) - for i, d in enumerate(data.create_dict_iterator()): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): np.testing.assert_array_equal(d["text"], res[i]) diff --git a/tests/ut/python/dataset/test_noop_mode.py b/tests/ut/python/dataset/test_noop_mode.py index 0ea96732001..fbdc3d5d219 100644 --- a/tests/ut/python/dataset/test_noop_mode.py +++ b/tests/ut/python/dataset/test_noop_mode.py @@ -24,7 +24,7 @@ def test_noop_pserver(): os.environ['MS_ROLE'] = 'MS_PSERVER' data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True, shuffle=False) num = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num += 1 assert num == 0 del os.environ['MS_ROLE'] @@ -34,7 +34,7 @@ def test_noop_sched(): os.environ['MS_ROLE'] = 'MS_SCHED' data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True, shuffle=False) num = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num += 1 assert num == 0 del os.environ['MS_ROLE'] diff --git a/tests/ut/python/dataset/test_normalizeOp.py b/tests/ut/python/dataset/test_normalizeOp.py index d5ebc799f91..7c69d7481c7 100644 --- a/tests/ut/python/dataset/test_normalizeOp.py +++ b/tests/ut/python/dataset/test_normalizeOp.py @@ -106,7 +106,7 @@ def test_normalize_op_c(plot=False): data2 = data2.map(input_columns=["image"], operations=decode_op) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_de_normalized = item1["image"] image_original = item2["image"] image_np_normalized = normalize_np(image_original, mean, std) @@ -143,7 +143,7 @@ def test_normalize_op_py(plot=False): data2 = data2.map(input_columns=["image"], operations=transform()) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_de_normalized = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_np_normalized = (normalize_np(item2["image"].transpose(1, 2, 0), mean, std) * 255).astype(np.uint8) image_original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) @@ -171,7 +171,7 @@ def test_decode_op(): data1 = data1.map(input_columns=["image"], operations=decode_op) num_iter = 0 - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): logger.info("Looping inside iterator {}".format(num_iter)) _ = item["image"] num_iter += 1 @@ -194,7 +194,7 @@ def test_decode_normalize_op(): data1 = data1.map(input_columns=["image"], operations=[decode_op, normalize_op]) num_iter = 0 - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): logger.info("Looping inside iterator {}".format(num_iter)) _ = item["image"] num_iter += 1 @@ -263,7 +263,7 @@ def test_normalize_exception_invalid_size_py(): logger.info("test_normalize_exception_invalid_size_py") data = util_test_normalize([0.75, 0.25], [0.18, 0.32], "python") try: - _ = data.create_dict_iterator().get_next() + _ = data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Length of mean and std must both be 1 or" in str(e) diff --git a/tests/ut/python/dataset/test_onehot_op.py b/tests/ut/python/dataset/test_onehot_op.py index 9020663b06c..ac92ab7fe74 100644 --- a/tests/ut/python/dataset/test_onehot_op.py +++ b/tests/ut/python/dataset/test_onehot_op.py @@ -90,7 +90,7 @@ def test_one_hot_post_aug(): data1 = data1.batch(batch_size, drop_remainder=True) num_iter = 0 - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): logger.info("image is: {}".format(item["image"])) logger.info("label is: {}".format(item["label"])) num_iter += 1 diff --git a/tests/ut/python/dataset/test_opt.py b/tests/ut/python/dataset/test_opt.py index 41e9f252c38..f40458ebcbe 100644 --- a/tests/ut/python/dataset/test_opt.py +++ b/tests/ut/python/dataset/test_opt.py @@ -37,7 +37,7 @@ def test_case_0(): data1 = data1.batch(2) - for _ in data1.create_dict_iterator(): # each data is a dictionary + for _ in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary pass diff --git a/tests/ut/python/dataset/test_opt_pass.py b/tests/ut/python/dataset/test_opt_pass.py index d89ceab73ec..97f3bcb4c26 100644 --- a/tests/ut/python/dataset/test_opt_pass.py +++ b/tests/ut/python/dataset/test_opt_pass.py @@ -30,7 +30,7 @@ def test_map_reorder0(): data0 = data0.map(input_columns="col0", output_columns="out", columns_order=["col1", "out"], operations=(lambda x: x)) - for item in data0.create_tuple_iterator(): # each data is a dictionary + for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary assert item == [np.array(1), np.array(0)] # tests the construction of multiple ops from a single dataset. @@ -49,7 +49,7 @@ def test_map_reorder1(): data2 = ds.zip((data0, data1)) data2 = data2.map(input_columns="a0", columns_order=["b2", "a2", "b1", "a1", "b0", "a0"], operations=(lambda x: x)) - for item in data2.create_tuple_iterator(): + for item in data2.create_tuple_iterator(num_epochs=1): assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)] # tests the construction of multiple ops from a single dataset. diff --git a/tests/ut/python/dataset/test_pad.py b/tests/ut/python/dataset/test_pad.py index a3038a4b911..9c81ad5390b 100644 --- a/tests/ut/python/dataset/test_pad.py +++ b/tests/ut/python/dataset/test_pad.py @@ -55,7 +55,7 @@ def test_pad_op(): data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=transform()) - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) @@ -93,7 +93,7 @@ def test_pad_grayscale(): pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20)) data1 = data1.map(input_columns=["image"], operations=pad_gray) dataset_shape_1 = [] - for item1 in data1.create_dict_iterator(): + for item1 in data1.create_dict_iterator(num_epochs=1): c_image = item1["image"] dataset_shape_1.append(c_image.shape) @@ -107,7 +107,7 @@ def test_pad_grayscale(): data2 = data2.map(input_columns=["image"], operations=ctrans) - for item2 in data2.create_dict_iterator(): + for item2 in data2.create_dict_iterator(num_epochs=1): c_image = item2["image"] dataset_shape_2.append(c_image.shape) diff --git a/tests/ut/python/dataset/test_pad_batch.py b/tests/ut/python/dataset/test_pad_batch.py index cea3427604c..aae340e8c41 100644 --- a/tests/ut/python/dataset/test_pad_batch.py +++ b/tests/ut/python/dataset/test_pad_batch.py @@ -62,7 +62,7 @@ def test_batch_padding_01(): data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], -2), "col1d": ([2], -1)}) data1 = data1.repeat(2) - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal([[0, -1], [1, -1]], data["col1d"]) np.testing.assert_array_equal([[[100, -2], [200, -2]], [[101, -2], [201, -2]]], data["col2d"]) @@ -71,7 +71,7 @@ def test_batch_padding_02(): data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], -2)}) data1 = data1.repeat(2) - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal([[0], [1]], data["col1d"]) np.testing.assert_array_equal([[[100, -2]], [[101, -2]]], data["col2d"]) @@ -81,7 +81,7 @@ def test_batch_padding_03(): data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, -1)}) # pad automatically data1 = data1.repeat(2) res = dict() - for ind, data in enumerate(data1.create_dict_iterator()): + for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1)): res[ind] = data["col"].copy() np.testing.assert_array_equal(res[0], [[0, -1], [0, 1]]) np.testing.assert_array_equal(res[1], [[0, 1, 2, -1], [0, 1, 2, 3]]) @@ -93,7 +93,7 @@ def test_batch_padding_04(): data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically data1 = data1.repeat(2) - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(data["col1"], [[0, 0], [0, 1]]) np.testing.assert_array_equal(data["col2"], [[100, 0], [100, 101]]) @@ -102,7 +102,7 @@ def test_batch_padding_05(): data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) data1 = data1.batch(batch_size=3, drop_remainder=False, pad_info={"col2": ([2, None], -2), "col1": (None, -1)}) # pad automatically - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(data["col1"], [[[0, -1, -1]], [[0, 1, -1]], [[0, 1, 2]]]) np.testing.assert_array_equal(data["col2"], [[[100, -2, -2], [-2, -2, -2]], [[100, 101, -2], [-2, -2, -2]], [[100, 101, 102], [-2, -2, -2]]]) @@ -117,7 +117,7 @@ def batch_padding_performance_3d(): data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info) start_time = time.time() num_batches = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_batches += 1 _ = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time) # print(res) @@ -133,7 +133,7 @@ def batch_padding_performance_1d(): data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info) start_time = time.time() num_batches = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_batches += 1 _ = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time) # print(res) @@ -149,7 +149,7 @@ def batch_pyfunc_padding_3d(): data1 = data1.batch(batch_size=24, drop_remainder=True) start_time = time.time() num_batches = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_batches += 1 _ = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time) # print(res) @@ -164,7 +164,7 @@ def batch_pyfunc_padding_1d(): data1 = data1.batch(batch_size=24, drop_remainder=True) start_time = time.time() num_batches = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_batches += 1 _ = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time) # print(res) @@ -180,7 +180,7 @@ def test_pad_via_map(): data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816)))) data1 = data1.batch(batch_size=25, drop_remainder=True) res = [] - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): res.append(data["image"]) return res @@ -189,7 +189,7 @@ def test_pad_via_map(): data2 = data2.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)}) res = [] - for data in data2.create_dict_iterator(): + for data in data2.create_dict_iterator(num_epochs=1): res.append(data["image"]) return res diff --git a/tests/ut/python/dataset/test_paddeddataset.py b/tests/ut/python/dataset/test_paddeddataset.py index 4fc4eea5a85..48869918215 100644 --- a/tests/ut/python/dataset/test_paddeddataset.py +++ b/tests/ut/python/dataset/test_paddeddataset.py @@ -52,7 +52,7 @@ def test_TFRecord_Padded(): testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) concat_ds.use_sampler(testsampler) shard_list = [] - for item in concat_ds.create_dict_iterator(): + for item in concat_ds.create_dict_iterator(num_epochs=1): shard_list.append(len(item['image'])) verify_list.append(shard_list) assert verify_list == result_list @@ -74,7 +74,7 @@ def test_GeneratorDataSet_Padded(): distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) data3.use_sampler(distributed_sampler) tem_list = [] - for ele in data3.create_dict_iterator(): + for ele in data3.create_dict_iterator(num_epochs=1): tem_list.append(ele['col1'][0]) verify_list.append(tem_list) @@ -98,7 +98,7 @@ def test_Reapeat_afterPadded(): ds3.use_sampler(testsampler) repeat_num = 2 ds3 = ds3.repeat(repeat_num) - for item in ds3.create_dict_iterator(): + for item in ds3.create_dict_iterator(num_epochs=1): verify_list.append(len(item['image'])) assert verify_list == result_list * repeat_num @@ -140,7 +140,7 @@ def test_Unevenly_distributed(): tem_list = [] testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) ds3.use_sampler(testsampler) - for item in ds3.create_dict_iterator(): + for item in ds3.create_dict_iterator(num_epochs=1): tem_list.append(len(item['image'])) verify_list.append(tem_list) assert verify_list == result_list @@ -164,7 +164,7 @@ def test_three_datasets_connected(): distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) data4.use_sampler(distributed_sampler) tem_list = [] - for ele in data4.create_dict_iterator(): + for ele in data4.create_dict_iterator(num_epochs=1): tem_list.append(ele['col1'][0]) verify_list.append(tem_list) @@ -220,7 +220,7 @@ def test_imagefolder_padded(): assert sum([1 for _ in data3]) == 10 verify_list = [] - for ele in data3.create_dict_iterator(): + for ele in data3.create_dict_iterator(num_epochs=1): verify_list.append(len(ele['image'])) assert verify_list[8] == 1 assert verify_list[9] == 6 @@ -246,7 +246,7 @@ def test_imagefolder_padded_with_decode(): data3.use_sampler(testsampler) data3 = data3.map(input_columns="image", operations=V_C.Decode()) shard_sample_count = 0 - for ele in data3.create_dict_iterator(): + for ele in data3.create_dict_iterator(num_epochs=1): print("label: {}".format(ele['label'])) count += 1 shard_sample_count += 1 @@ -275,7 +275,7 @@ def test_imagefolder_padded_with_decode_and_get_dataset_size(): shard_dataset_size = data3.get_dataset_size() data3 = data3.map(input_columns="image", operations=V_C.Decode()) shard_sample_count = 0 - for ele in data3.create_dict_iterator(): + for ele in data3.create_dict_iterator(num_epochs=1): print("label: {}".format(ele['label'])) count += 1 shard_sample_count += 1 @@ -298,7 +298,7 @@ def test_more_shard_padded(): tem_list = [] testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) data3.use_sampler(testsampler) - for item in data3.create_dict_iterator(): + for item in data3.create_dict_iterator(num_epochs=1): tem_list.append(item['col1']) vertifyList.append(tem_list) @@ -324,7 +324,7 @@ def test_more_shard_padded(): tem_list = [] testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) ds3.use_sampler(testsampler) - for item in ds3.create_dict_iterator(): + for item in ds3.create_dict_iterator(num_epochs=1): tem_list.append(len(item['image'])) vertifyList1.append(tem_list) @@ -408,7 +408,7 @@ def test_Mindrecord_Padded(remove_mindrecord_file): testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) ds2.use_sampler(testsampler) tem_list = [] - for ele in ds2.create_dict_iterator(): + for ele in ds2.create_dict_iterator(num_epochs=1): tem_list.append(int(ele['file_name'].tostring().decode().lstrip('image_').rstrip('.jpg'))) result_list.append(tem_list) assert result_list == verify_list @@ -421,7 +421,7 @@ def test_clue_padded_and_skip_with_0_samples(): data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train') count = 0 - for _ in data.create_dict_iterator(): + for _ in data.create_dict_iterator(num_epochs=1): count += 1 assert count == 3 @@ -437,20 +437,20 @@ def test_clue_padded_and_skip_with_0_samples(): dataset.use_sampler(testsampler) assert dataset.get_dataset_size() == 2 count = 0 - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): count += 1 assert count == 2 dataset = dataset.skip(count=2) # dataset2 has none samples count = 0 - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): count += 1 assert count == 0 with pytest.raises(ValueError, match="There is no samples in the "): dataset = dataset.concat(data_copy1) count = 0 - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): count += 1 assert count == 2 diff --git a/tests/ut/python/dataset/test_pair_truncate.py b/tests/ut/python/dataset/test_pair_truncate.py index 8cc40ee1264..784879f4ae7 100644 --- a/tests/ut/python/dataset/test_pair_truncate.py +++ b/tests/ut/python/dataset/test_pair_truncate.py @@ -24,7 +24,7 @@ import mindspore.dataset.text as text def compare(in1, in2, length, out1, out2): data = ds.NumpySlicesDataset({"s1": [in1], "s2": [in2]}) data = data.map(input_columns=["s1", "s2"], operations=text.TruncateSequencePair(length)) - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(out1, d["s1"]) np.testing.assert_array_equal(out2, d["s2"]) diff --git a/tests/ut/python/dataset/test_pyfunc.py b/tests/ut/python/dataset/test_pyfunc.py index b512de52300..0e8aa49dded 100644 --- a/tests/ut/python/dataset/test_pyfunc.py +++ b/tests/ut/python/dataset/test_pyfunc.py @@ -36,7 +36,7 @@ def test_case_0(): data1 = data1.map(input_columns="col0", output_columns="out", operations=(lambda x: x + x)) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) np.testing.assert_array_equal(item["out"], golden) @@ -57,7 +57,7 @@ def test_case_1(): columns_order=["out0", "out1"]) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["out0"], golden) @@ -81,7 +81,7 @@ def test_case_2(): columns_order=["out"]) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) np.testing.assert_array_equal(item["out"], golden) @@ -103,7 +103,7 @@ def test_case_3(): operations=(lambda x, y: (x, x + y, x + y + 1)), columns_order=["out0", "out1", "out2"]) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["out0"], golden) @@ -129,7 +129,7 @@ def test_case_4(): operations=(lambda x, y: (x, x + y, x + y + 1)), columns_order=["out0", "out1", "out2"]) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["out0"], golden) @@ -156,7 +156,7 @@ def test_case_5(): data1 = data1.map(input_columns="col0", output_columns="out", operations=func_5) - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[1, 1], [1, 1]]) np.testing.assert_array_equal(item["out"], golden) @@ -175,7 +175,7 @@ def test_case_6(): operations=[(lambda x: x + x), (lambda x: x + x)]) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 4, (i + 1) * 4], [(i + 2) * 4, (i + 3) * 4]]) np.testing.assert_array_equal(item["out"], golden) @@ -195,7 +195,7 @@ def test_case_7(): num_parallel_workers=4, python_multiprocessing=True) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) np.testing.assert_array_equal(item["out"], golden) @@ -218,7 +218,7 @@ def test_case_8(): python_multiprocessing=True) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["out0"], golden) @@ -243,7 +243,7 @@ def test_case_9(): num_parallel_workers=4, python_multiprocessing=True) i = 0 - for item in data1.create_dict_iterator(): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2 + 3, (i + 1) * 2 + 3], [(i + 2) * 2 + 3, (i + 3) * 2 + 3]]) np.testing.assert_array_equal(item["out"], golden) diff --git a/tests/ut/python/dataset/test_python_tokenizer.py b/tests/ut/python/dataset/test_python_tokenizer.py index 78db5532149..77b13c837bc 100644 --- a/tests/ut/python/dataset/test_python_tokenizer.py +++ b/tests/ut/python/dataset/test_python_tokenizer.py @@ -41,7 +41,7 @@ def test_whitespace_tokenizer_ch(): tokenizer = text.PythonTokenizer(my_tokenizer) dataset = dataset.map(operations=tokenizer, num_parallel_workers=1) tokens = [] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): s = text.to_str(i['text']).tolist() tokens.append(s) logger.info("The out tokens is : {}".format(tokens)) diff --git a/tests/ut/python/dataset/test_random_affine.py b/tests/ut/python/dataset/test_random_affine.py index 17e5fde4b57..c99fa35e715 100644 --- a/tests/ut/python/dataset/test_random_affine.py +++ b/tests/ut/python/dataset/test_random_affine.py @@ -58,7 +58,7 @@ def test_random_affine_op(plot=False): image_affine = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_affine.append(image1) @@ -91,7 +91,7 @@ def test_random_affine_op_c(plot=False): image_affine = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = item1["image"] image2 = item2["image"] image_affine.append(image1) @@ -193,7 +193,7 @@ def test_random_affine_py_exception_non_pil_images(): py_vision.RandomAffine(degrees=(15, 15))]) dataset = dataset.map(input_columns=["image"], operations=transform(), num_parallel_workers=3, python_multiprocessing=True) - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): break except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) diff --git a/tests/ut/python/dataset/test_random_apply.py b/tests/ut/python/dataset/test_random_apply.py index 23e5d1a22fd..49d828386fd 100644 --- a/tests/ut/python/dataset/test_random_apply.py +++ b/tests/ut/python/dataset/test_random_apply.py @@ -57,7 +57,7 @@ def test_random_apply_op(plot=False): image_apply = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_apply.append(image1) @@ -118,7 +118,7 @@ def test_random_apply_exception_random_crop_badinput(): data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(input_columns=["image"], operations=transform()) try: - _ = data.create_dict_iterator().get_next() + _ = data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Crop size" in str(e) diff --git a/tests/ut/python/dataset/test_random_choice.py b/tests/ut/python/dataset/test_random_choice.py index eb0a5c9ebae..df632feb53b 100644 --- a/tests/ut/python/dataset/test_random_choice.py +++ b/tests/ut/python/dataset/test_random_choice.py @@ -54,7 +54,7 @@ def test_random_choice_op(plot=False): image_choice = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_choice.append(image1) @@ -93,7 +93,7 @@ def test_random_choice_comp(plot=False): image_choice = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_choice.append(image1) @@ -124,7 +124,7 @@ def test_random_choice_exception_random_crop_badinput(): data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(input_columns=["image"], operations=transform()) try: - _ = data.create_dict_iterator().get_next() + _ = data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Crop size" in str(e) diff --git a/tests/ut/python/dataset/test_random_color.py b/tests/ut/python/dataset/test_random_color.py index e17df28ce33..98b45d60500 100644 --- a/tests/ut/python/dataset/test_random_color.py +++ b/tests/ut/python/dataset/test_random_color.py @@ -118,7 +118,7 @@ def test_random_color_c(degrees=(0.1, 1.9), plot=False, run_golden=True): image_random_color_op = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): actual = item1["image"] expected = item2["image"] image.append(actual) @@ -193,7 +193,7 @@ def test_compare_random_color_op(degrees=None, plot=False): image_random_color_op = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): actual = item1["image"] expected = item2["image"] image_random_color_op.append(actual) diff --git a/tests/ut/python/dataset/test_random_color_adjust.py b/tests/ut/python/dataset/test_random_color_adjust.py index 3eb55043b4e..3dad5549f54 100644 --- a/tests/ut/python/dataset/test_random_color_adjust.py +++ b/tests/ut/python/dataset/test_random_color_adjust.py @@ -52,7 +52,7 @@ def util_test_random_color_adjust_error(brightness=(1, 1), contrast=(1, 1), satu with pytest.raises(RuntimeError) as info: data1 = data1.map(input_columns=["image"], operations=random_adjust_op) dataset_shape_1 = [] - for item1 in data1.create_dict_iterator(): + for item1 in data1.create_dict_iterator(num_epochs=1): c_image = item1["image"] dataset_shape_1.append(c_image.shape) @@ -91,7 +91,7 @@ def util_test_random_color_adjust_op(brightness=(1, 1), contrast=(1, 1), saturat data2 = data2.map(input_columns=["image"], operations=transform()) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_random_crop.py b/tests/ut/python/dataset/test_random_crop.py index 89153dee56b..d7bb06867fe 100644 --- a/tests/ut/python/dataset/test_random_crop.py +++ b/tests/ut/python/dataset/test_random_crop.py @@ -51,7 +51,7 @@ def test_random_crop_op_c(plot=False): image_cropped = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = item1["image"] image2 = item2["image"] image_cropped.append(image1) @@ -85,7 +85,7 @@ def test_random_crop_op_py(plot=False): crop_images = [] original_images = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): crop = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) crop_images.append(crop) @@ -254,7 +254,7 @@ def test_random_crop_04_c(): data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_crop_op) try: - data.create_dict_iterator().get_next() + data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Crop size is greater than the image dim" in str(e) @@ -277,7 +277,7 @@ def test_random_crop_04_py(): transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) try: - data.create_dict_iterator().get_next() + data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Crop size" in str(e) @@ -497,7 +497,7 @@ def test_random_crop_09(): transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) try: - data.create_dict_iterator().get_next() + data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "should be PIL Image" in str(e) @@ -528,7 +528,7 @@ def test_random_crop_comp(plot=False): image_c_cropped = [] image_py_cropped = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_c_cropped.append(c_image) diff --git a/tests/ut/python/dataset/test_random_crop_and_resize.py b/tests/ut/python/dataset/test_random_crop_and_resize.py index 58ae31f4d27..5e5ed23da2a 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize.py @@ -52,7 +52,7 @@ def test_random_crop_and_resize_op_c(plot=False): num_iter = 0 crop_and_resize_images = [] original_images = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): crop_and_resize = item1["image"] original = item2["image"] # Note: resize the original image with the same size as the one applied RandomResizedCrop() @@ -94,7 +94,7 @@ def test_random_crop_and_resize_op_py(plot=False): num_iter = 0 crop_and_resize_images = [] original_images = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): crop_and_resize = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) original = cv2.resize(original, (512, 256)) @@ -326,7 +326,7 @@ def test_random_crop_and_resize_comp(plot=False): image_c_cropped = [] image_py_cropped = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_c_cropped.append(c_image) diff --git a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py index 026808e9dee..5048c9dd08e 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py @@ -58,7 +58,7 @@ def test_random_resized_crop_with_bbox_op_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -92,7 +92,7 @@ def test_random_resized_crop_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -127,7 +127,7 @@ def test_random_resized_crop_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -154,7 +154,7 @@ def test_random_resized_crop_with_bbox_op_invalid_c(): columns_order=["image", "bbox"], operations=[test_op]) - for _ in dataVoc2.create_dict_iterator(): + for _ in dataVoc2.create_dict_iterator(num_epochs=1): break except ValueError as err: @@ -180,7 +180,7 @@ def test_random_resized_crop_with_bbox_op_invalid2_c(): columns_order=["image", "bbox"], operations=[test_op]) - for _ in dataVoc2.create_dict_iterator(): + for _ in dataVoc2.create_dict_iterator(num_epochs=1): break except ValueError as err: diff --git a/tests/ut/python/dataset/test_random_crop_decode_resize.py b/tests/ut/python/dataset/test_random_crop_decode_resize.py index c6125d4b690..35f9410c43b 100644 --- a/tests/ut/python/dataset/test_random_crop_decode_resize.py +++ b/tests/ut/python/dataset/test_random_crop_decode_resize.py @@ -46,7 +46,7 @@ def test_random_crop_decode_resize_op(plot=False): num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): if num_iter > 0: break image1 = item1["image"] diff --git a/tests/ut/python/dataset/test_random_crop_with_bbox.py b/tests/ut/python/dataset/test_random_crop_with_bbox.py index 28a68a7c38a..186ce0fd633 100644 --- a/tests/ut/python/dataset/test_random_crop_with_bbox.py +++ b/tests/ut/python/dataset/test_random_crop_with_bbox.py @@ -53,7 +53,7 @@ def test_random_crop_with_bbox_op_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -83,7 +83,7 @@ def test_random_crop_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -118,7 +118,7 @@ def test_random_crop_with_bbox_op2_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -152,7 +152,7 @@ def test_random_crop_with_bbox_op3_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -190,7 +190,7 @@ def test_random_crop_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -217,7 +217,7 @@ def test_random_crop_with_bbox_op_invalid_c(): columns_order=["image", "bbox"], operations=[test_op]) # Add column for "bbox" - for _ in dataVoc2.create_dict_iterator(): + for _ in dataVoc2.create_dict_iterator(num_epochs=1): break except TypeError as err: logger.info("Got an exception in DE: {}".format(str(err))) @@ -257,7 +257,7 @@ def test_random_crop_with_bbox_op_bad_padding(): columns_order=["image", "bbox"], operations=[test_op]) - for _ in dataVoc2.create_dict_iterator(): + for _ in dataVoc2.create_dict_iterator(num_epochs=1): break except ValueError as err: logger.info("Got an exception in DE: {}".format(str(err))) @@ -271,7 +271,7 @@ def test_random_crop_with_bbox_op_bad_padding(): columns_order=["image", "bbox"], operations=[test_op]) - for _ in dataVoc2.create_dict_iterator(): + for _ in dataVoc2.create_dict_iterator(num_epochs=1): break except RuntimeError as err: logger.info("Got an exception in DE: {}".format(str(err))) diff --git a/tests/ut/python/dataset/test_random_dataset.py b/tests/ut/python/dataset/test_random_dataset.py index 56a2a931134..c66821fea90 100644 --- a/tests/ut/python/dataset/test_random_dataset.py +++ b/tests/ut/python/dataset/test_random_dataset.py @@ -29,7 +29,7 @@ def test_randomdataset_basic1(): ds1 = ds1.repeat(4) num_iter = 0 - for data in ds1.create_dict_iterator(): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("{} image: {}".format(num_iter, data["image"])) logger.info("{} label: {}".format(num_iter, data["label"])) @@ -54,7 +54,7 @@ def test_randomdataset_basic2(): ds1 = ds1.repeat(4) num_iter = 0 - for data in ds1.create_dict_iterator(): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" # logger.info(data["image"]) logger.info("printing the label: {}".format(data["label"])) @@ -77,7 +77,7 @@ def test_randomdataset_basic3(): ds1 = ds1.repeat(2) num_iter = 0 - for _ in ds1.create_tuple_iterator(): + for _ in ds1.create_tuple_iterator(num_epochs=1): num_iter += 1 logger.info("Number of data in ds1: {}".format(num_iter)) diff --git a/tests/ut/python/dataset/test_random_erasing.py b/tests/ut/python/dataset/test_random_erasing.py index 3265ac2a631..b4952397918 100644 --- a/tests/ut/python/dataset/test_random_erasing.py +++ b/tests/ut/python/dataset/test_random_erasing.py @@ -55,7 +55,7 @@ def test_random_erasing_op(plot=False): data2 = data2.map(input_columns=["image"], operations=transform_2()) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_random_grayscale.py b/tests/ut/python/dataset/test_random_grayscale.py index 4cb25c3a3a9..75e1d18305b 100644 --- a/tests/ut/python/dataset/test_random_grayscale.py +++ b/tests/ut/python/dataset/test_random_grayscale.py @@ -55,7 +55,7 @@ def test_random_grayscale_valid_prob(plot=False): image_gray = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_gray.append(image1) @@ -94,7 +94,7 @@ def test_random_grayscale_input_grayscale_images(): image_gray = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_gray.append(image1) diff --git a/tests/ut/python/dataset/test_random_horizontal_flip.py b/tests/ut/python/dataset/test_random_horizontal_flip.py index ef4f5b8eb6f..9eac83cfb58 100644 --- a/tests/ut/python/dataset/test_random_horizontal_flip.py +++ b/tests/ut/python/dataset/test_random_horizontal_flip.py @@ -58,7 +58,7 @@ def test_random_horizontal_op(plot=False): data2 = data2.map(input_columns=["image"], operations=decode_op) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): # with the seed value, we can only guarantee the first number generated if num_iter > 0: @@ -193,7 +193,7 @@ def test_random_horizontal_comp(plot=False): images_list_c = [] images_list_py = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_c = item1["image"] image_py = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) images_list_c.append(image_c) diff --git a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py index 64c8de1c5e1..5ec00fafb35 100644 --- a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py +++ b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py @@ -52,7 +52,7 @@ def test_random_horizontal_flip_with_bbox_op_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -82,7 +82,7 @@ def test_random_horizontal_flip_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -121,7 +121,7 @@ def test_random_horizontal_flip_with_bbox_valid_rand_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -164,7 +164,7 @@ def test_random_horizontal_flip_with_bbox_valid_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_random_order.py b/tests/ut/python/dataset/test_random_order.py index 863bcd67780..d429f6b8f00 100644 --- a/tests/ut/python/dataset/test_random_order.py +++ b/tests/ut/python/dataset/test_random_order.py @@ -57,7 +57,7 @@ def test_random_order_op(plot=False): image_order = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_order.append(image1) diff --git a/tests/ut/python/dataset/test_random_perspective.py b/tests/ut/python/dataset/test_random_perspective.py index 992bf2b2227..001a57dbd2e 100644 --- a/tests/ut/python/dataset/test_random_perspective.py +++ b/tests/ut/python/dataset/test_random_perspective.py @@ -58,7 +58,7 @@ def test_random_perspective_op(plot=False): image_perspective = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_perspective.append(image1) diff --git a/tests/ut/python/dataset/test_random_posterize.py b/tests/ut/python/dataset/test_random_posterize.py index 9f5013de9b9..27f22a4dcc3 100644 --- a/tests/ut/python/dataset/test_random_posterize.py +++ b/tests/ut/python/dataset/test_random_posterize.py @@ -53,7 +53,7 @@ def test_random_posterize_op_c(plot=False, run_golden=False): image_posterize = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) @@ -99,7 +99,7 @@ def test_random_posterize_op_fixed_point_c(plot=False, run_golden=True): image_posterize = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) diff --git a/tests/ut/python/dataset/test_random_resize.py b/tests/ut/python/dataset/test_random_resize.py index 1ac790ed190..e7cb6328bbe 100644 --- a/tests/ut/python/dataset/test_random_resize.py +++ b/tests/ut/python/dataset/test_random_resize.py @@ -44,7 +44,7 @@ def test_random_resize_op(plot=False): image_original = [] image_resized = [] num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) diff --git a/tests/ut/python/dataset/test_random_resize_with_bbox.py b/tests/ut/python/dataset/test_random_resize_with_bbox.py index 439a6dc89d7..64db0deab19 100644 --- a/tests/ut/python/dataset/test_random_resize_with_bbox.py +++ b/tests/ut/python/dataset/test_random_resize_with_bbox.py @@ -58,7 +58,7 @@ def test_random_resize_with_bbox_op_voc_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -101,7 +101,7 @@ def test_random_resize_with_bbox_op_rand_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -143,7 +143,7 @@ def test_random_resize_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_random_rotation.py b/tests/ut/python/dataset/test_random_rotation.py index a6efd3ccec0..84a5b1482af 100644 --- a/tests/ut/python/dataset/test_random_rotation.py +++ b/tests/ut/python/dataset/test_random_rotation.py @@ -50,7 +50,7 @@ def test_random_rotation_op_c(plot=False): data2 = data2.map(input_columns=["image"], operations=decode_op) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): if num_iter > 0: break rotation_de = item1["image"] @@ -86,7 +86,7 @@ def test_random_rotation_op_py(plot=False): data2 = data2.map(input_columns=["image"], operations=transform2()) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): if num_iter > 0: break rotation_de = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) @@ -116,7 +116,7 @@ def test_random_rotation_expand(): data1 = data1.map(input_columns=["image"], operations=random_rotation_op) num_iter = 0 - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): rotation = item["image"] logger.info("shape after rotate: {}".format(rotation.shape)) num_iter += 1 @@ -192,7 +192,7 @@ def test_rotation_diff(plot=False): num_iter = 0 image_list_c, image_list_py = [], [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_random_select_subpolicy.py b/tests/ut/python/dataset/test_random_select_subpolicy.py index 4248f9d0488..4b3ac83c47b 100644 --- a/tests/ut/python/dataset/test_random_select_subpolicy.py +++ b/tests/ut/python/dataset/test_random_select_subpolicy.py @@ -26,7 +26,7 @@ def test_random_select_subpolicy(): data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(input_columns=["col"], operations=visions.RandomSelectSubpolicy(policy)) res = [] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: diff --git a/tests/ut/python/dataset/test_random_solarize_op.py b/tests/ut/python/dataset/test_random_solarize_op.py index f39abcc1728..f636f814b00 100644 --- a/tests/ut/python/dataset/test_random_solarize_op.py +++ b/tests/ut/python/dataset/test_random_solarize_op.py @@ -62,7 +62,7 @@ def test_random_solarize_op(threshold=(10, 150), plot=False, run_golden=True): image_solarized = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_solarized.append(item1["image"].copy()) image.append(item2["image"].copy()) if plot: diff --git a/tests/ut/python/dataset/test_random_vertical_flip.py b/tests/ut/python/dataset/test_random_vertical_flip.py index a3d02959fdd..8574bc69702 100644 --- a/tests/ut/python/dataset/test_random_vertical_flip.py +++ b/tests/ut/python/dataset/test_random_vertical_flip.py @@ -58,7 +58,7 @@ def test_random_vertical_op(plot=False): data2 = data2.map(input_columns=["image"], operations=decode_op) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): # with the seed value, we can only guarantee the first number generated if num_iter > 0: @@ -193,7 +193,7 @@ def test_random_vertical_comp(plot=False): images_list_c = [] images_list_py = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_c = item1["image"] image_py = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) images_list_c.append(image_c) diff --git a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py index 1447c31c765..c978bf918c7 100644 --- a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py +++ b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py @@ -53,7 +53,7 @@ def test_random_vertical_flip_with_bbox_op_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -84,7 +84,7 @@ def test_random_vertical_flip_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -121,7 +121,7 @@ def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -161,7 +161,7 @@ def test_random_vertical_flip_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -186,7 +186,7 @@ def test_random_vertical_flip_with_bbox_op_invalid_c(): columns_order=["image", "bbox"], operations=[test_op]) - for _ in dataVoc2.create_dict_iterator(): + for _ in dataVoc2.create_dict_iterator(num_epochs=1): break except ValueError as err: diff --git a/tests/ut/python/dataset/test_rename.py b/tests/ut/python/dataset/test_rename.py index 8aa3143bb55..0e267daf3fa 100644 --- a/tests/ut/python/dataset/test_rename.py +++ b/tests/ut/python/dataset/test_rename.py @@ -34,7 +34,7 @@ def test_rename(): num_iter = 0 - for _, item in enumerate(data.create_dict_iterator()): + for _, item in enumerate(data.create_dict_iterator(num_epochs=1)): logger.info("item[mask] is {}".format(item["masks"])) np.testing.assert_equal(item["masks"], item["input_ids"]) logger.info("item[seg_ids] is {}".format(item["seg_ids"])) diff --git a/tests/ut/python/dataset/test_repeat.py b/tests/ut/python/dataset/test_repeat.py index 81700dc03eb..0d0567832a5 100644 --- a/tests/ut/python/dataset/test_repeat.py +++ b/tests/ut/python/dataset/test_repeat.py @@ -84,7 +84,7 @@ def test_tf_repeat_03(): data1 = data1.batch(batch_size, drop_remainder=True) num_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter += 1 logger.info("Number of tf data in data1: {}".format(num_iter)) assert num_iter == 2 @@ -267,7 +267,7 @@ def test_repeat_count1(): dataset_size = data1.get_dataset_size() logger.info("dataset repeat then batch's size is {}".format(dataset_size)) num1_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num1_iter += 1 assert data1_size == 3 @@ -289,7 +289,7 @@ def test_repeat_count2(): dataset_size = data1.get_dataset_size() logger.info("dataset batch then repeat's size is {}".format(dataset_size)) num1_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num1_iter += 1 assert data1_size == 3 diff --git a/tests/ut/python/dataset/test_rescale_op.py b/tests/ut/python/dataset/test_rescale_op.py index a26f9a50f35..a0fac869841 100644 --- a/tests/ut/python/dataset/test_rescale_op.py +++ b/tests/ut/python/dataset/test_rescale_op.py @@ -42,7 +42,7 @@ def get_rescaled(image_id): decode_op = vision.Decode() data1 = data1.map(input_columns=["image"], operations=decode_op) num_iter = 0 - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): image = item["image"] if num_iter == image_id: return rescale_np(image) @@ -68,7 +68,7 @@ def test_rescale_op(plot=False): data2 = data1.map(input_columns=["image"], operations=rescale_op) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_original = item1["image"] image_de_rescaled = item2["image"] image_np_rescaled = get_rescaled(num_iter) diff --git a/tests/ut/python/dataset/test_resize.py b/tests/ut/python/dataset/test_resize.py index a187e0c53cd..11c2efd8e59 100644 --- a/tests/ut/python/dataset/test_resize.py +++ b/tests/ut/python/dataset/test_resize.py @@ -47,7 +47,7 @@ def test_resize_op(plot=False): data2 = data1.map(input_columns=["image"], operations=resize_op) image_original = [] image_resized = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) @@ -79,7 +79,7 @@ def test_resize_md5(plot=False): # Compare with expected md5 from images save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) diff --git a/tests/ut/python/dataset/test_resize_with_bbox.py b/tests/ut/python/dataset/test_resize_with_bbox.py index af10ed9449c..6cb3c3ab7fd 100644 --- a/tests/ut/python/dataset/test_resize_with_bbox.py +++ b/tests/ut/python/dataset/test_resize_with_bbox.py @@ -58,7 +58,7 @@ def test_resize_with_bbox_op_voc_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -95,7 +95,7 @@ def test_resize_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCOCO1.create_dict_iterator(), dataCOCO2.create_dict_iterator()): + for unAug, Aug in zip(dataCOCO1.create_dict_iterator(num_epochs=1), dataCOCO2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -133,7 +133,7 @@ def test_resize_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_rgb_hsv.py b/tests/ut/python/dataset/test_rgb_hsv.py index 8f8c13c2498..ad36fe06d16 100644 --- a/tests/ut/python/dataset/test_rgb_hsv.py +++ b/tests/ut/python/dataset/test_rgb_hsv.py @@ -154,7 +154,7 @@ def test_rgb_hsv_pipeline(): ds2 = ds2.map(input_columns=["image"], operations=transform2()) num_iter = 0 - for data1, data2 in zip(ds1.create_dict_iterator(), ds2.create_dict_iterator()): + for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): num_iter += 1 ori_img = data1["image"] cvt_img = data2["image"] diff --git a/tests/ut/python/dataset/test_sampler.py b/tests/ut/python/dataset/test_sampler.py index a7ec89c2092..e852ec9bf3a 100644 --- a/tests/ut/python/dataset/test_sampler.py +++ b/tests/ut/python/dataset/test_sampler.py @@ -33,7 +33,7 @@ def test_sequential_sampler(print_res=False): if num_repeats is not None: data1 = data1.repeat(num_repeats) res = [] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): logger.info("item[image].shape[0]: {}, item[label].item(): {}" .format(item["image"].shape[0], item["label"].item())) res.append(map_[(item["image"].shape[0], item["label"].item())]) @@ -55,7 +55,7 @@ def test_random_sampler(print_res=False): data1 = ds.ManifestDataset(manifest_file, sampler=sampler) data1 = data1.repeat(num_repeats) res = [] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(map_[(item["image"].shape[0], item["label"].item())]) if print_res: logger.info("image.shapes and labels: {}".format(res)) @@ -78,7 +78,7 @@ def test_random_sampler_multi_iter(print_res=False): data1 = ds.ManifestDataset(manifest_file, sampler=sampler) while num_repeats > 0: res = [] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(map_[(item["image"].shape[0], item["label"].item())]) if print_res: logger.info("image.shapes and labels: {}".format(res)) @@ -135,7 +135,7 @@ def test_python_sampler(): if num_repeats is not None: data1 = data1.repeat(num_repeats) res = [] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): logger.info("item[image].shape[0]: {}, item[label].item(): {}" .format(item["image"].shape[0], item["label"].item())) res.append(map_[(item["image"].shape[0], item["label"].item())]) @@ -174,7 +174,7 @@ def test_subset_sampler(): d = ds.ManifestDataset(manifest_file, sampler=sampler) res = [] - for item in d.create_dict_iterator(): + for item in d.create_dict_iterator(num_epochs=1): res.append(map_[(item["image"].shape[0], item["label"].item())]) return res @@ -202,7 +202,7 @@ def test_sampler_chain(): data1 = ds.ManifestDataset(manifest_file, sampler=sampler) res = [] - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): logger.info("item[image].shape[0]: {}, item[label].item(): {}" .format(item["image"].shape[0], item["label"].item())) res.append(map_[(item["image"].shape[0], item["label"].item())]) diff --git a/tests/ut/python/dataset/test_save_op.py b/tests/ut/python/dataset/test_save_op.py index 07f374130e0..967b9ab86b6 100644 --- a/tests/ut/python/dataset/test_save_op.py +++ b/tests/ut/python/dataset/test_save_op.py @@ -109,7 +109,7 @@ def test_case_00(add_and_remove_cv_file): # only bin data shuffle=False) assert d2.get_dataset_size() == 5 num_iter = 0 - for item in d2.create_dict_iterator(): + for item in d2.create_dict_iterator(num_epochs=1): assert len(item) == 5 for field in item: if isinstance(item[field], np.ndarray): @@ -152,7 +152,7 @@ def test_case_01(add_and_remove_cv_file): # only raw data shuffle=False) assert d2.get_dataset_size() == 6 num_iter = 0 - for item in d2.create_dict_iterator(): + for item in d2.create_dict_iterator(num_epochs=1): logger.info(item) assert len(item) == 2 for field in item: @@ -289,7 +289,7 @@ def test_case_02(add_and_remove_cv_file): # muti-bytes shuffle=False) assert d2.get_dataset_size() == 6 num_iter = 0 - for item in d2.create_dict_iterator(): + for item in d2.create_dict_iterator(num_epochs=1): assert len(item) == 13 for field in item: if isinstance(item[field], np.ndarray): @@ -322,7 +322,7 @@ def test_case_03(add_and_remove_cv_file): shuffle=False) i = 0 - for item in d2.create_dict_iterator(): # each data is a dictionary + for item in d2.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -351,7 +351,7 @@ def type_tester(t): i = 0 num_repeat = 0 - for item in d2.create_dict_iterator(): # each data is a dictionary + for item in d2.create_dict_iterator(num_epochs=1): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) logger.info(item) np.testing.assert_array_equal(item["data"], golden) @@ -409,14 +409,14 @@ def test_case_07(): os.remove("{}.db".format(CV_FILE_NAME2)) d1 = ds.TFRecordDataset(TFRECORD_FILES, shuffle=False) tf_data = [] - for x in d1.create_dict_iterator(): + for x in d1.create_dict_iterator(num_epochs=1): tf_data.append(x) d1.save(CV_FILE_NAME2, FILES_NUM) d2 = ds.MindDataset(dataset_file=CV_FILE_NAME2, num_parallel_workers=num_readers, shuffle=False) mr_data = [] - for x in d2.create_dict_iterator(): + for x in d2.create_dict_iterator(num_epochs=1): mr_data.append(x) count = 0 for x in tf_data: diff --git a/tests/ut/python/dataset/test_sentencepiece_tokenizer.py b/tests/ut/python/dataset/test_sentencepiece_tokenizer.py index d50ed01e7be..731b19677c5 100644 --- a/tests/ut/python/dataset/test_sentencepiece_tokenizer.py +++ b/tests/ut/python/dataset/test_sentencepiece_tokenizer.py @@ -27,7 +27,7 @@ def test_from_vocab_to_str_UNIGRAM(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -39,7 +39,7 @@ def test_from_vocab_to_str_BPE(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁saw', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'c', 'ope', '.'] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -52,7 +52,7 @@ def test_from_vocab_to_str_CHAR(): dataset = dataset.map(operations=tokenizer) expect = ['▁', 'I', '▁', 's', 'a', 'w', '▁', 'a', '▁', 'g', 'i', 'r', 'l', '▁', 'w', 'i', 't', 'h',\ '▁', 'a', '▁', 't', 'e', 'l', 'e', 's', 'c', 'o', 'p', 'e', '.'] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -64,7 +64,7 @@ def test_from_vocab_to_str_WORD(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁saw', '▁a', '▁girl', '▁with', '▁a', '▁telescope.'] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -76,7 +76,7 @@ def test_from_vocab_to_int(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = [6, 329, 183, 8, 945, 23, 8, 3783, 4382, 4641, 1405, 4] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): ret = i["text"] for key, value in enumerate(ret): assert value == expect[key] @@ -89,7 +89,7 @@ def test_from_file_to_str(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -102,7 +102,7 @@ def test_from_file_to_int(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = [6, 329, 183, 8, 945, 23, 8, 3783, 4382, 4641, 1405, 4] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): ret = i["text"] for key, value in enumerate(ret): assert value == expect[key] @@ -115,7 +115,7 @@ def test_build_from_dataset(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -134,7 +134,7 @@ def zip_test(dataset): dataset_1 = dataset_1.apply(apply_func) dataset_zip = ds.zip((dataset_1, dataset_2)) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset_zip.create_dict_iterator(): + for i in dataset_zip.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -144,7 +144,7 @@ def concat_test(dataset): dataset_1 = copy.deepcopy(dataset) dataset = dataset.concat(dataset_1) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] diff --git a/tests/ut/python/dataset/test_serdes_dataset.py b/tests/ut/python/dataset/test_serdes_dataset.py index d041f44bc18..7cb873cba98 100644 --- a/tests/ut/python/dataset/test_serdes_dataset.py +++ b/tests/ut/python/dataset/test_serdes_dataset.py @@ -30,6 +30,7 @@ from mindspore.dataset.transforms.vision import Inter from test_minddataset_sampler import add_and_remove_cv_file, get_data, CV_DIR_NAME, CV_FILE_NAME from util import config_get_set_num_parallel_workers + def test_imagefolder(remove_json_files=True): """ Test simulating resnet50 dataset pipeline. @@ -77,8 +78,10 @@ def test_imagefolder(remove_json_files=True): data4 = ds.deserialize(input_dict=ds1_dict) num_samples = 0 # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2) - for item1, item2, item3, item4 in zip(data1.create_dict_iterator(), data2.create_dict_iterator(), - data3.create_dict_iterator(), data4.create_dict_iterator()): + for item1, item2, item3, item4 in zip(data1.create_dict_iterator(num_epochs=1), + data2.create_dict_iterator(num_epochs=1), + data3.create_dict_iterator(num_epochs=1), + data4.create_dict_iterator(num_epochs=1)): np.testing.assert_array_equal(item1['image'], item2['image']) np.testing.assert_array_equal(item1['image'], item3['image']) np.testing.assert_array_equal(item1['label'], item2['label']) @@ -117,8 +120,8 @@ def test_mnist_dataset(remove_json_files=True): data3 = ds.deserialize(json_filepath="mnist_dataset_pipeline_1.json") num = 0 - for data1, data2, data3 in zip(data1.create_dict_iterator(), data2.create_dict_iterator(), - data3.create_dict_iterator()): + for data1, data2, data3 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1), + data3.create_dict_iterator(num_epochs=1)): np.testing.assert_array_equal(data1['image'], data2['image']) np.testing.assert_array_equal(data1['image'], data3['image']) np.testing.assert_array_equal(data1['label'], data2['label']) @@ -197,8 +200,9 @@ def test_random_crop(): data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) data2 = data2.map(input_columns="image", operations=decode_op) - for item1, item1_1, item2 in zip(data1.create_dict_iterator(), data1_1.create_dict_iterator(), - data2.create_dict_iterator()): + for item1, item1_1, item2 in zip(data1.create_dict_iterator(num_epochs=1), + data1_1.create_dict_iterator(num_epochs=1), + data2.create_dict_iterator(num_epochs=1)): np.testing.assert_array_equal(item1['image'], item1_1['image']) _ = item2["image"] @@ -251,7 +255,7 @@ def test_minddataset(add_and_remove_cv_file): _ = get_data(CV_DIR_NAME) assert data_set.get_dataset_size() == 5 num_iter = 0 - for _ in data_set.create_dict_iterator(): + for _ in data_set.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 5 diff --git a/tests/ut/python/dataset/test_shuffle.py b/tests/ut/python/dataset/test_shuffle.py index 6da7a1c8856..be488f62f58 100644 --- a/tests/ut/python/dataset/test_shuffle.py +++ b/tests/ut/python/dataset/test_shuffle.py @@ -129,7 +129,7 @@ def test_shuffle_06(): data2 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES) data2 = data2.shuffle(buffer_size=buffer_size) - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): np.testing.assert_equal(item1, item2) diff --git a/tests/ut/python/dataset/test_skip.py b/tests/ut/python/dataset/test_skip.py index 87e4122f848..16d7c369bab 100644 --- a/tests/ut/python/dataset/test_skip.py +++ b/tests/ut/python/dataset/test_skip.py @@ -38,7 +38,7 @@ def test_tf_skip(): data1 = data1.skip(2) num_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter += 1 assert num_iter == 1 @@ -205,7 +205,7 @@ def test_skip_exception_1(): try: data1 = data1.skip(count=-1) num_iter = 0 - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): num_iter += 1 except RuntimeError as e: diff --git a/tests/ut/python/dataset/test_sliding_window.py b/tests/ut/python/dataset/test_sliding_window.py index 4fdd7a25c07..ab4eeeacddc 100644 --- a/tests/ut/python/dataset/test_sliding_window.py +++ b/tests/ut/python/dataset/test_sliding_window.py @@ -28,7 +28,7 @@ def test_sliding_window_string(): dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(2, 0)) result = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): for i in range(data['text'].shape[0]): result.append([]) for j in range(data['text'].shape[1]): @@ -46,7 +46,7 @@ def test_sliding_window_number(): dataset = ds.GeneratorDataset(gen(inputs), column_names=["number"]) dataset = dataset.map(input_columns=["number"], operations=text.SlidingWindow(1, -1)) - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(data['number'], expect) def test_sliding_window_big_width(): @@ -56,7 +56,7 @@ def test_sliding_window_big_width(): dataset = ds.NumpySlicesDataset(inputs, column_names=["number"], shuffle=False) dataset = dataset.map(input_columns=["number"], operations=text.SlidingWindow(30, 0)) - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(data['number'], expect) def test_sliding_window_exception(): @@ -82,7 +82,7 @@ def test_sliding_window_exception(): inputs = [[1, 2, 3, 4, 5]] dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False) dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(3, -100)) - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert False except RuntimeError as e: @@ -92,7 +92,7 @@ def test_sliding_window_exception(): inputs = ["aa", "bb", "cc"] dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False) dataset = dataset.map(input_columns=["text"], operations=text.SlidingWindow(2, 0)) - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert False except RuntimeError as e: diff --git a/tests/ut/python/dataset/test_soft_dvpp.py b/tests/ut/python/dataset/test_soft_dvpp.py index 8f1182bddf9..82556f8d059 100644 --- a/tests/ut/python/dataset/test_soft_dvpp.py +++ b/tests/ut/python/dataset/test_soft_dvpp.py @@ -42,7 +42,7 @@ def test_soft_dvpp_decode_resize_jpeg(plot=False): data2 = data2.map(input_columns=["image"], operations=soft_dvpp_decode_resize_op) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): if num_iter > 0: break image1 = item1["image"] @@ -72,7 +72,7 @@ def test_soft_dvpp_decode_random_crop_resize_jpeg(plot=False): data2 = data2.map(input_columns=["image"], operations=soft_dvpp_random_crop_decode_resize_op) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): if num_iter > 0: break image1 = item1["image"] @@ -103,7 +103,7 @@ def test_soft_dvpp_decode_resize_jpeg_supplement(plot=False): data2 = data2.map(input_columns=["image"], operations=soft_dvpp_decode_resize_op) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): if num_iter > 0: break image1 = item1["image"] diff --git a/tests/ut/python/dataset/test_split.py b/tests/ut/python/dataset/test_split.py index 7dd140f60e3..bfa12e91889 100644 --- a/tests/ut/python/dataset/test_split.py +++ b/tests/ut/python/dataset/test_split.py @@ -90,11 +90,11 @@ def test_unmappable_split(): s1, s2 = d.split([4, 1], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(item["text"].item().decode("utf8")) assert s1_output == text_file_data[0:4] @@ -104,11 +104,11 @@ def test_unmappable_split(): s1, s2 = d.split([0.8, 0.2], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(item["text"].item().decode("utf8")) assert s1_output == text_file_data[0:4] @@ -118,11 +118,11 @@ def test_unmappable_split(): s1, s2 = d.split([0.33, 0.67], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(item["text"].item().decode("utf8")) assert s1_output == text_file_data[0:2] @@ -143,11 +143,11 @@ def test_unmappable_randomize_deterministic(): for _ in range(10): s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(item["text"].item().decode("utf8")) # note no overlap @@ -172,11 +172,11 @@ def test_unmappable_randomize_repeatable(): s2 = s2.repeat(num_epochs) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(item["text"].item().decode("utf8")) # note no overlap @@ -208,7 +208,7 @@ def test_unmappable_multi_split(): s1_correct_output = [text_file_data[0], text_file_data[2], text_file_data[1], text_file_data[4]] s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(item["text"].item().decode("utf8")) assert s1_output == s1_correct_output @@ -216,15 +216,15 @@ def test_unmappable_multi_split(): s1s1, s1s2, s1s3 = s1.split([1, 2, 1], randomize=False) s1s1_output = [] - for item in s1s1.create_dict_iterator(): + for item in s1s1.create_dict_iterator(num_epochs=1): s1s1_output.append(item["text"].item().decode("utf8")) s1s2_output = [] - for item in s1s2.create_dict_iterator(): + for item in s1s2.create_dict_iterator(num_epochs=1): s1s2_output.append(item["text"].item().decode("utf8")) s1s3_output = [] - for item in s1s3.create_dict_iterator(): + for item in s1s3.create_dict_iterator(num_epochs=1): s1s3_output.append(item["text"].item().decode("utf8")) assert s1s1_output == [s1_correct_output[0]] @@ -232,7 +232,7 @@ def test_unmappable_multi_split(): assert s1s3_output == [s1_correct_output[3]] s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(item["text"].item().decode("utf8")) assert s2_output == [text_file_data[3]] @@ -243,15 +243,15 @@ def test_unmappable_multi_split(): s1s1, s1s2, s1s3 = s1.split([1, 2, 1]) s1s1_output = [] - for item in s1s1.create_dict_iterator(): + for item in s1s1.create_dict_iterator(num_epochs=1): s1s1_output.append(item["text"].item().decode("utf8")) s1s2_output = [] - for item in s1s2.create_dict_iterator(): + for item in s1s2.create_dict_iterator(num_epochs=1): s1s2_output.append(item["text"].item().decode("utf8")) s1s3_output = [] - for item in s1s3.create_dict_iterator(): + for item in s1s3.create_dict_iterator(num_epochs=1): s1s3_output.append(item["text"].item().decode("utf8")) assert s1s1_output == [s1_correct_output[shuffled_ids[0]]] @@ -259,7 +259,7 @@ def test_unmappable_multi_split(): assert s1s3_output == [s1_correct_output[shuffled_ids[3]]] s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(item["text"].item().decode("utf8")) assert s2_output == [text_file_data[3]] @@ -285,11 +285,11 @@ def test_mappable_split_general(): s1, s2 = d.split([4, 1], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2, 3] @@ -299,11 +299,11 @@ def test_mappable_split_general(): s1, s2 = d.split([0.8, 0.2], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2, 3] @@ -313,11 +313,11 @@ def test_mappable_split_general(): s1, s2 = d.split([0.33, 0.67], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1] @@ -331,11 +331,11 @@ def test_mappable_split_optimized(): s1, s2 = d.split([4, 1], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2, 3] @@ -345,11 +345,11 @@ def test_mappable_split_optimized(): s1, s2 = d.split([0.8, 0.2], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2, 3] @@ -359,11 +359,11 @@ def test_mappable_split_optimized(): s1, s2 = d.split([0.33, 0.67], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1] @@ -379,11 +379,11 @@ def test_mappable_randomize_deterministic(): for _ in range(10): s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) # note no overlap @@ -403,11 +403,11 @@ def test_mappable_randomize_repeatable(): s2 = s2.repeat(num_epochs) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) # note no overlap @@ -442,12 +442,12 @@ def test_mappable_sharding(): # shard 0 s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) # shard 1 d2s1_output = [] - for item in d2s1.create_dict_iterator(): + for item in d2s1.create_dict_iterator(num_epochs=1): d2s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) rows_per_shard_per_epoch = 2 @@ -468,11 +468,11 @@ def test_mappable_sharding(): # test other split s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) d2s2_output = [] - for item in d2s2.create_dict_iterator(): + for item in d2s2.create_dict_iterator(num_epochs=1): d2s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s2_output == [2] @@ -498,7 +498,7 @@ def test_mappable_multi_split(): s1_correct_output = [0, 1, 3, 4] s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == s1_correct_output @@ -506,15 +506,15 @@ def test_mappable_multi_split(): s1s1, s1s2, s1s3 = s1.split([1, 2, 1], randomize=False) s1s1_output = [] - for item in s1s1.create_dict_iterator(): + for item in s1s1.create_dict_iterator(num_epochs=1): s1s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s2_output = [] - for item in s1s2.create_dict_iterator(): + for item in s1s2.create_dict_iterator(num_epochs=1): s1s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s3_output = [] - for item in s1s3.create_dict_iterator(): + for item in s1s3.create_dict_iterator(num_epochs=1): s1s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1s1_output == [s1_correct_output[0]] @@ -522,7 +522,7 @@ def test_mappable_multi_split(): assert s1s3_output == [s1_correct_output[3]] s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s2_output == [2] @@ -533,15 +533,15 @@ def test_mappable_multi_split(): s1s1, s1s2, s1s3 = s1.split([1, 2, 1]) s1s1_output = [] - for item in s1s1.create_dict_iterator(): + for item in s1s1.create_dict_iterator(num_epochs=1): s1s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s2_output = [] - for item in s1s2.create_dict_iterator(): + for item in s1s2.create_dict_iterator(num_epochs=1): s1s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s3_output = [] - for item in s1s3.create_dict_iterator(): + for item in s1s3.create_dict_iterator(num_epochs=1): s1s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1s1_output == [s1_correct_output[random_sampler_ids[0]]] @@ -549,7 +549,7 @@ def test_mappable_multi_split(): assert s1s3_output == [s1_correct_output[random_sampler_ids[3]]] s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s2_output == [2] @@ -561,11 +561,11 @@ def test_rounding(): s1, s2 = d.split([0.5, 0.5], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2] @@ -575,15 +575,15 @@ def test_rounding(): s1, s2, s3 = d.split([0.15, 0.55, 0.3], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(): + for item in s1.create_dict_iterator(num_epochs=1): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(): + for item in s2.create_dict_iterator(num_epochs=1): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s3_output = [] - for item in s3.create_dict_iterator(): + for item in s3.create_dict_iterator(num_epochs=1): s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0] diff --git a/tests/ut/python/dataset/test_sync_wait.py b/tests/ut/python/dataset/test_sync_wait.py index eb2261a5d34..b64cae79ac4 100644 --- a/tests/ut/python/dataset/test_sync_wait.py +++ b/tests/ut/python/dataset/test_sync_wait.py @@ -48,7 +48,7 @@ def test_simple_sync_wait(): dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) dataset = dataset.batch(batch_size) count = 0 - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): assert data["input"][0] == count count += batch_size data = {"loss": count} @@ -72,7 +72,7 @@ def test_simple_shuffle_sync(): dataset = dataset.batch(batch_size) count = 0 - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): count += 1 data = {"loss": count} dataset.sync_update(condition_name="policy", data=data) @@ -98,7 +98,7 @@ def test_two_sync(): dataset = dataset.batch(batch_size) count = 0 - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): count += 1 data = {"loss": count} dataset.sync_update(condition_name="every batch", data=data) @@ -122,7 +122,7 @@ def test_sync_epoch(): for _ in range(3): aug.update({"loss": 0}) count = 0 - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): assert data["input"][0] == count count += batch_size data = {"loss": count} @@ -149,7 +149,7 @@ def test_multiple_iterators(): dataset2 = dataset2.map(input_columns=["input"], operations=[aug.preprocess]) dataset2 = dataset2.batch(batch_size, drop_remainder=True) - for item1, item2 in zip(dataset.create_dict_iterator(), dataset2.create_dict_iterator()): + for item1, item2 in zip(dataset.create_dict_iterator(num_epochs=1), dataset2.create_dict_iterator(num_epochs=1)): assert item1["input"][0] == item2["input"][0] data1 = {"loss": item1["input"][0]} data2 = {"loss": item2["input"][0]} @@ -222,7 +222,7 @@ def test_sync_exception_04(): dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) count = 0 with pytest.raises(RuntimeError) as e: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): count += 1 data = {"loss": count} dataset.sync_update(condition_name="every batch", num_batch=-1, data=data) @@ -242,7 +242,7 @@ def test_sync_exception_05(): dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update) dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) with pytest.raises(RuntimeError) as e: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): dataset.disable_sync() count += 1 data = {"loss": count} diff --git a/tests/ut/python/dataset/test_ten_crop.py b/tests/ut/python/dataset/test_ten_crop.py index d196bc05cf5..b23daa91460 100644 --- a/tests/ut/python/dataset/test_ten_crop.py +++ b/tests/ut/python/dataset/test_ten_crop.py @@ -50,7 +50,7 @@ def util_test_ten_crop(crop_size, vertical_flip=False, plot=False): transform_2 = vision.ComposeOp(transforms_2) data2 = data2.map(input_columns=["image"], operations=transform_2()) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_2 = item2["image"] @@ -173,7 +173,7 @@ def test_ten_crop_wrong_img_error_msg(): data = data.map(input_columns=["image"], operations=transform()) with pytest.raises(RuntimeError) as info: - data.create_tuple_iterator().get_next() + data.create_tuple_iterator(num_epochs=1).get_next() error_msg = "TypeError: img should be PIL Image or Numpy array. Got " # error msg comes from ToTensor() diff --git a/tests/ut/python/dataset/test_tensor_string.py b/tests/ut/python/dataset/test_tensor_string.py index c833527bc32..0d821f683d3 100644 --- a/tests/ut/python/dataset/test_tensor_string.py +++ b/tests/ut/python/dataset/test_tensor_string.py @@ -124,7 +124,7 @@ def test_tfrecord1(): data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, schema=s) - for i, d in enumerate(data.create_dict_iterator()): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): assert d["line"].shape == line[i].shape assert d["words"].shape == words[i].shape assert d["chinese"].shape == chinese[i].shape @@ -136,7 +136,7 @@ def test_tfrecord1(): def test_tfrecord2(): data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, schema='../data/dataset/testTextTFRecord/datasetSchema.json') - for i, d in enumerate(data.create_dict_iterator()): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): assert d["line"].shape == line[i].shape assert d["words"].shape == words[i].shape assert d["chinese"].shape == chinese[i].shape @@ -153,7 +153,7 @@ def test_tfrecord3(): data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, schema=s) - for i, d in enumerate(data.create_dict_iterator()): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): assert d["line"].shape == line[i].shape assert d["words"].shape == words[i].reshape([2, 2]).shape assert d["chinese"].shape == chinese[i].shape @@ -186,7 +186,7 @@ def create_text_mindrecord(): def test_mindrecord(): data = ds.MindDataset("../data/dataset/testTextMindRecord/test.mindrecord", shuffle=False) - for i, d in enumerate(data.create_dict_iterator()): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): assert d["english"].shape == line[i].shape assert d["chinese"].shape == chinese[i].shape np.testing.assert_array_equal(line[i], to_str(d["english"])) @@ -231,7 +231,7 @@ def test_batch_padding_01(): data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], b"-2"), "col1d": ([2], b"-1")}) data1 = data1.repeat(2) - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal([[b"0", b"-1"], [b"1", b"-1"]], data["col1d"]) np.testing.assert_array_equal([[[b"100", b"-2"], [b"200", b"-2"]], [[b"101", b"-2"], [b"201", b"-2"]]], data["col2d"]) @@ -241,7 +241,7 @@ def test_batch_padding_02(): data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], "")}) data1 = data1.repeat(2) - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal([[b"0"], [b"1"]], data["col1d"]) np.testing.assert_array_equal([[[b"100", b""]], [[b"101", b""]]], data["col2d"]) @@ -251,7 +251,7 @@ def test_batch_padding_03(): data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, "PAD_VALUE")}) # pad automatically data1 = data1.repeat(2) res = dict() - for ind, data in enumerate(data1.create_dict_iterator()): + for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1)): res[ind] = data["col"].copy() np.testing.assert_array_equal(res[0], [[b"0", b"PAD_VALUE"], [0, 1]]) np.testing.assert_array_equal(res[1], [[b"0", b"1", b"2", b"PAD_VALUE"], [b"0", b"1", b"2", b"3"]]) @@ -263,7 +263,7 @@ def test_batch_padding_04(): data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically data1 = data1.repeat(2) - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(data["col1"], [[b"0", b""], [b"0", b"1"]]) np.testing.assert_array_equal(data["col2"], [[b"100", b""], [b"100", b"101"]]) @@ -272,7 +272,7 @@ def test_batch_padding_05(): data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) data1 = data1.batch(batch_size=3, drop_remainder=False, pad_info={"col2": ([2, None], "-2"), "col1": (None, "-1")}) # pad automatically - for data in data1.create_dict_iterator(): + for data in data1.create_dict_iterator(num_epochs=1): np.testing.assert_array_equal(data["col1"], [[[b"0", b"-1", b"-1"]], [[b"0", b"1", b"-1"]], [[b"0", b"1", b"2"]]]) np.testing.assert_array_equal(data["col2"], diff --git a/tests/ut/python/dataset/test_text_basic_tokenizer.py b/tests/ut/python/dataset/test_text_basic_tokenizer.py index 822790fd608..3e143ad698c 100644 --- a/tests/ut/python/dataset/test_text_basic_tokenizer.py +++ b/tests/ut/python/dataset/test_text_basic_tokenizer.py @@ -82,7 +82,7 @@ def check_basic_tokenizer_default(first, last, expected_tokens, expected_offsets dataset = dataset.map(operations=basic_tokenizer) count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']) logger.info("Out:", token) logger.info("Exp:", expected_tokens[count]) @@ -108,7 +108,7 @@ def check_basic_tokenizer_with_offsets(first, last, expected_tokens, expected_of dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], columns_order=['token', 'offsets_start', 'offsets_limit'], operations=basic_tokenizer) count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']) logger.info("Out:", token) logger.info("Exp:", expected_tokens[count]) diff --git a/tests/ut/python/dataset/test_text_bert_tokenizer.py b/tests/ut/python/dataset/test_text_bert_tokenizer.py index b29f94eb324..a858cd2dc11 100644 --- a/tests/ut/python/dataset/test_text_bert_tokenizer.py +++ b/tests/ut/python/dataset/test_text_bert_tokenizer.py @@ -187,7 +187,7 @@ def check_bert_tokenizer_default(first, last, expect_str, preserve_unused_token=preserve_unused_token) dataset = dataset.map(operations=tokenizer_op) count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']) logger.info("Out:", token) logger.info("Exp:", expect_str[count]) @@ -215,7 +215,7 @@ def check_bert_tokenizer_with_offsets(first, last, expect_str, dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op) count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']) logger.info("Out:", token) logger.info("Exp:", expect_str[count]) diff --git a/tests/ut/python/dataset/test_text_jieba_tokenizer.py b/tests/ut/python/dataset/test_text_jieba_tokenizer.py index 66665b61e69..7a1518ff9e8 100644 --- a/tests/ut/python/dataset/test_text_jieba_tokenizer.py +++ b/tests/ut/python/dataset/test_text_jieba_tokenizer.py @@ -32,7 +32,7 @@ def test_jieba_1(): operations=jieba_op, num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] ret = [] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -45,7 +45,7 @@ def test_jieba_1_1(): data = data.map(input_columns=["text"], operations=jieba_op, num_parallel_workers=1) expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧'] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -58,7 +58,7 @@ def test_jieba_1_2(): data = data.map(input_columns=["text"], operations=jieba_op, num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -73,7 +73,7 @@ def test_jieba_2(): expect = ['男默女泪', '市', '长江大桥'] data = data.map(input_columns=["text"], operations=jieba_op, num_parallel_workers=2) - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -88,7 +88,7 @@ def test_jieba_2_1(): data = data.map(input_columns=["text"], operations=jieba_op, num_parallel_workers=2) expect = ['男默女泪', '市', '长江大桥'] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -112,7 +112,7 @@ def test_jieba_2_3(): data = data.map(input_columns=["text"], operations=jieba_op, num_parallel_workers=2) expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -130,7 +130,7 @@ def test_jieba_3(): data = data.map(input_columns=["text"], operations=jieba_op, num_parallel_workers=1) expect = ['男默女泪', '市', '长江大桥'] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -149,7 +149,7 @@ def test_jieba_3_1(): data = data.map(input_columns=["text"], operations=jieba_op, num_parallel_workers=1) expect = ['男默女泪', '市长', '江大桥'] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -165,7 +165,7 @@ def test_jieba_4(): data = data.map(input_columns=["text"], operations=jieba_op, num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -191,7 +191,7 @@ def test_jieba_5(): data = data.map(input_columns=["text"], operations=jieba_op, num_parallel_workers=1) expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -208,7 +208,7 @@ def test_jieba_with_offsets_1(): expected_offsets_start = [0, 12, 21, 27, 33, 36, 42] expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48] ret = [] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -228,7 +228,7 @@ def test_jieba_with_offsets_1_1(): expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧'] expected_offsets_start = [0, 6, 12, 15, 18, 21, 27, 33, 36, 42, 45] expected_offsets_limit = [6, 12, 15, 18, 21, 27, 33, 36, 42, 45, 48] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -248,7 +248,7 @@ def test_jieba_with_offsets_1_2(): expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] expected_offsets_start = [0, 12, 21, 27, 33, 36, 42] expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -270,7 +270,7 @@ def test_jieba_with_offsets_2(): operations=jieba_op, num_parallel_workers=2) expected_offsets_start = [0, 12, 15] expected_offsets_limit = [12, 15, 27] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -292,7 +292,7 @@ def test_jieba_with_offsets_2_1(): expect = ['男默女泪', '市', '长江大桥'] expected_offsets_start = [0, 12, 15] expected_offsets_limit = [12, 15, 27] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -314,7 +314,7 @@ def test_jieba_with_offsets_2_2(): expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51] expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -339,7 +339,7 @@ def test_jieba_with_offsets_3(): expect = ['男默女泪', '市', '长江大桥'] expected_offsets_start = [0, 12, 15] expected_offsets_limit = [12, 15, 27] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -365,7 +365,7 @@ def test_jieba_with_offsets_3_1(): expect = ['男默女泪', '市长', '江大桥'] expected_offsets_start = [0, 12, 18] expected_offsets_limit = [12, 18, 27] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -388,7 +388,7 @@ def test_jieba_with_offsets_4(): expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] expected_offsets_start = [0, 12, 21, 27, 33, 36, 42] expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -411,7 +411,7 @@ def test_jieba_with_offsets_5(): expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51] expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -439,7 +439,7 @@ def test_jieba_6(): data = data.map(input_columns=["text"], operations=pytoken_op, num_parallel_workers=1) expect = ['今天天气太', '好了我们一', '起去外面玩吧'] - for i in data.create_dict_iterator(): + for i in data.create_dict_iterator(num_epochs=1): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] diff --git a/tests/ut/python/dataset/test_text_tokenizer.py b/tests/ut/python/dataset/test_text_tokenizer.py index 2e2b7b741dd..f9854ef1113 100644 --- a/tests/ut/python/dataset/test_text_tokenizer.py +++ b/tests/ut/python/dataset/test_text_tokenizer.py @@ -45,7 +45,7 @@ def test_unicode_char_tokenizer_default(): tokenizer = text.UnicodeCharTokenizer() dataset = dataset.map(operations=tokenizer) tokens = [] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']).tolist() tokens.append(token) logger.info("The out tokens is : {}".format(tokens)) @@ -67,7 +67,7 @@ def test_unicode_char_tokenizer_with_offsets(): expected_offsets_limit = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [3, 6, 9, 12, 15, 18], [3, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17], [1, 2]] count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']).tolist() tokens.append(token) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) @@ -89,7 +89,7 @@ def test_whitespace_tokenizer_default(): tokenizer = text.WhitespaceTokenizer() dataset = dataset.map(operations=tokenizer) tokens = [] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']).tolist() tokens.append(token) logger.info("The out tokens is : {}".format(tokens)) @@ -112,7 +112,7 @@ def test_whitespace_tokenizer_with_offsets(): expected_offsets_start = [[0, 8, 11], [0], [0], [0]] expected_offsets_limit = [[7, 10, 19], [18], [17], [0]] count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']).tolist() tokens.append(token) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) @@ -136,7 +136,7 @@ def test_unicode_script_tokenizer_default(): dataset = dataset.map(operations=tokenizer) tokens = [] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']).tolist() tokens.append(token) logger.info("The out tokens is : {}".format(tokens)) @@ -155,7 +155,7 @@ def test_unicode_script_tokenizer_default2(): tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=True) dataset = dataset.map(operations=tokenizer) tokens = [] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']).tolist() tokens.append(token) logger.info("The out tokens is :", tokens) @@ -178,7 +178,7 @@ def test_unicode_script_tokenizer_with_offsets(): expected_offsets_start = [[0, 8, 11, 18], [0, 15], [0, 9, 16], [0]] expected_offsets_limit = [[7, 10, 18, 19], [15, 18], [9, 16, 17], [0]] count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']).tolist() tokens.append(token) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) @@ -204,7 +204,7 @@ def test_unicode_script_tokenizer_with_offsets2(): expected_offsets_start = [[0, 7, 8, 10, 11, 18], [0, 15], [0, 9, 16], [0]] expected_offsets_limit = [[7, 8, 10, 11, 18, 19], [15, 18], [9, 16, 17], [2]] count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']).tolist() tokens.append(token) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) @@ -224,7 +224,7 @@ def test_case_fold(): dataset = dataset.map(operations=op) lower_strs = [] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']).tolist() lower_strs.append(token) assert lower_strs == expect_strs @@ -241,7 +241,7 @@ def test_normalize_utf8(): dataset = dataset.map(operations=normalize) out_bytes = [] out_texts = [] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): out_bytes.append(i['text']) out_texts.append(text.to_str(i['text']).tolist()) logger.info("The out bytes is : ", out_bytes) @@ -282,7 +282,7 @@ def test_regex_replace(): replace_op = text.RegexReplace(pattern, replace) dataset = dataset.map(operations=replace_op) out_text = [] - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']).tolist() out_text.append(token) logger.info("Out:", out_text) @@ -310,7 +310,7 @@ def test_regex_tokenizer_default(): dataset = dataset.map(operations=tokenizer_op) out_text = [] count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']).tolist() np.testing.assert_array_equal(token, expect_str[count]) count += 1 @@ -343,7 +343,7 @@ def test_regex_tokenizer_with_offsets(): columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op) out_text = [] count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']).tolist() np.testing.assert_array_equal(token, expect_str[count]) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) diff --git a/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py index 8b47ec971ef..988ba4b7a6c 100644 --- a/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py +++ b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py @@ -108,7 +108,7 @@ def check_wordpiece_tokenizer_default(first, last, expect_str, expected_offsets_ max_bytes_per_token=max_bytes_per_token) dataset = dataset.map(operations=tokenizer_op) count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['text']) logger.info("Out:", token) logger.info("Exp:", expect_str[count]) @@ -129,7 +129,7 @@ def check_wordpiece_tokenizer_with_offsets(first, last, expect_str, expected_off dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'], columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op) count = 0 - for i in dataset.create_dict_iterator(): + for i in dataset.create_dict_iterator(num_epochs=1): token = text.to_str(i['token']) logger.info("Out:", token) logger.info("Exp:", expect_str[count]) diff --git a/tests/ut/python/dataset/test_to_number_op.py b/tests/ut/python/dataset/test_to_number_op.py index 47b39e7a682..60e409b5461 100644 --- a/tests/ut/python/dataset/test_to_number_op.py +++ b/tests/ut/python/dataset/test_to_number_op.py @@ -42,7 +42,7 @@ def test_to_number_typical_case_integral(): expected_output = [int(string) for string in inputs] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["strings"]) assert output == expected_output @@ -58,7 +58,7 @@ def test_to_number_typical_case_non_integral(): expected_output = [float(string) for string in inputs] output = [] - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): output.append(data["strings"]) for expected, actual, epsilon in zip(expected_output, output, epsilons): @@ -72,7 +72,7 @@ def out_of_bounds_error_message_check(dataset, np_type, value_to_cast): type_name = str(np.dtype(np_type)) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert "String input " + value_to_cast + " will be out of bounds if casted to " + type_name in str(info.value) assert "valid range is: [" + type_min + ", " + type_max + "]" in str(info.value) @@ -100,7 +100,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[0])) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert "outside of valid float16 range" in str(info.value) @@ -109,7 +109,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[1])) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert "String input " + input_strings[0] + " will be out of bounds if casted to float32" in str(info.value) @@ -118,7 +118,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[2])) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert "String input " + input_strings[0] + " will be out of bounds if casted to float64" in str(info.value) @@ -129,7 +129,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[0])) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert "outside of valid float16 range" in str(info.value) @@ -138,7 +138,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[1])) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert "String input " + input_strings[0] + " will be out of bounds if casted to float32" in str(info.value) @@ -147,7 +147,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_non_integral_types[2])) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert "String input " + input_strings[0] + " will be out of bounds if casted to float64" in str(info.value) @@ -158,19 +158,19 @@ def test_to_number_boundaries_integral(): input_strings = [str(type_info.max)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): assert data["strings"] == int(input_strings[0]) input_strings = [str(type_info.min)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): assert data["strings"] == int(input_strings[0]) input_strings = [str(0)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(ms_type)) - for data in dataset.create_dict_iterator(): + for data in dataset.create_dict_iterator(num_epochs=1): assert data["strings"] == int(input_strings[0]) @@ -180,7 +180,7 @@ def test_to_number_invalid_input(): dataset = dataset.map(input_columns=["strings"], operations=text.ToNumber(mstype.int32)) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(): + for _ in dataset.create_dict_iterator(num_epochs=1): pass assert "It is invalid to convert " + input_strings[0] + " to a number" in str(info.value) diff --git a/tests/ut/python/dataset/test_to_type.py b/tests/ut/python/dataset/test_to_type.py index 0cd42f5e2ad..a1a01a9856f 100644 --- a/tests/ut/python/dataset/test_to_type.py +++ b/tests/ut/python/dataset/test_to_type.py @@ -53,7 +53,7 @@ def test_to_type_op(): transform2 = py_vision.ComposeOp(transforms2) data2 = data2.map(input_columns=["image"], operations=transform2()) - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): image1 = item1["image"] image2 = item2["image"] diff --git a/tests/ut/python/dataset/test_type_cast.py b/tests/ut/python/dataset/test_type_cast.py index 64820250cf0..b47c874d1b8 100644 --- a/tests/ut/python/dataset/test_type_cast.py +++ b/tests/ut/python/dataset/test_type_cast.py @@ -55,7 +55,7 @@ def test_type_cast(): data2 = data2.map(input_columns=["image"], operations=transform()) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) @@ -95,7 +95,7 @@ def test_type_cast_string(): data2 = data2.map(input_columns=["image"], operations=transform()) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py index a52da41e206..10f896cf386 100644 --- a/tests/ut/python/dataset/test_uniform_augment.py +++ b/tests/ut/python/dataset/test_uniform_augment.py @@ -255,7 +255,7 @@ def test_cpp_uniform_augment_random_crop_badinput(num_ops=1): ds1 = ds1.batch(batch_size, drop_remainder=True, num_parallel_workers=1) num_batches = 0 try: - for _ in ds1.create_dict_iterator(): + for _ in ds1.create_dict_iterator(num_epochs=1): num_batches += 1 except Exception as e: assert "Crop size" in str(e) diff --git a/tests/ut/python/dataset/test_var_batch_map.py b/tests/ut/python/dataset/test_var_batch_map.py index 75979457ced..5d9c0b709e7 100644 --- a/tests/ut/python/dataset/test_var_batch_map.py +++ b/tests/ut/python/dataset/test_var_batch_map.py @@ -25,12 +25,12 @@ def test_batch_corner_cases(): def test_repeat_batch(gen_num, repeats, batch_size, drop, res): data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).repeat(repeats).batch(batch_size, drop) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(item["num"]) def test_batch_repeat(gen_num, repeats, batch_size, drop, res): data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).batch(batch_size, drop).repeat(repeats) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(item["num"]) tst1, tst2, tst3, tst4 = [], [], [], [] @@ -81,7 +81,7 @@ def test_variable_size_batch(): def test_repeat_batch(gen_num, r, drop, func, res): data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).repeat(r).batch(batch_size=func, drop_remainder=drop) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(item["num"]) # same as test_repeat_batch except each row is passed through via a map which makes a copy of each element @@ -89,14 +89,14 @@ def test_variable_size_batch(): res = [] data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).repeat(r) \ .batch(batch_size=func, drop_remainder=drop, input_columns=["num"], per_batch_map=simple_copy) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(item["num"]) return res def test_batch_repeat(gen_num, r, drop, func, res): data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).batch(batch_size=func, drop_remainder=drop).repeat( r) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(item["num"]) # same as test_batch_repeat except each row is passed through via a map which makes a copy of each element @@ -104,7 +104,7 @@ def test_variable_size_batch(): res = [] data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]) \ .batch(batch_size=func, drop_remainder=drop, input_columns=["num"], per_batch_map=simple_copy).repeat(r) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(item["num"]) return res @@ -162,7 +162,7 @@ def test_basic_batch_map(): def batch_map_config(num, r, batch_size, func, res): data1 = ds.GeneratorDataset((lambda: gen(num)), ["num"]) \ .batch(batch_size=batch_size, input_columns=["num"], per_batch_map=func).repeat(r) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(item["num"]) tst1, tst2, = [], [] @@ -201,7 +201,7 @@ def test_batch_multi_col_map(): def batch_map_config(num, r, batch_size, func, col_names, res): data1 = ds.GeneratorDataset((lambda: gen(num)), ["num", "num_square"]) \ .batch(batch_size=batch_size, input_columns=col_names, per_batch_map=func).repeat(r) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(np.array([item["num"], item["num_square"]])) tst1, tst2, tst3, tst4 = [], [], [], [] @@ -253,7 +253,7 @@ def test_var_batch_multi_col_map(): def batch_map_config(num, r, fbatch, fmap, col_names, res): data1 = ds.GeneratorDataset((lambda: gen_3_cols(num)), ["col1", "col2", "col3"]) \ .batch(batch_size=fbatch, input_columns=col_names, per_batch_map=fmap).repeat(r) - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): res.append(np.array([item["col1"], item["col2"], item["col3"]])) tst1 = [] @@ -277,7 +277,7 @@ def test_var_batch_var_resize(): data1 = data1.batch(batch_size=add_one, drop_remainder=True, input_columns=["image"], per_batch_map=np_psedo_resize) # i-th batch has shape [i, i^2, i^2, 3] i = 1 - for item in data1.create_dict_iterator(): + for item in data1.create_dict_iterator(num_epochs=1): assert item["image"].shape == (i, i ** 2, i ** 2, 3), "\ntest_var_batch_var_resize FAILED\n" i += 1 @@ -297,7 +297,7 @@ def test_exception(): data1 = ds.GeneratorDataset((lambda: gen(100)), ["num"]).batch(bad_batch_size) try: - for _ in data1.create_dict_iterator(): + for _ in data1.create_dict_iterator(num_epochs=1): pass assert False except RuntimeError: @@ -305,7 +305,7 @@ def test_exception(): data2 = ds.GeneratorDataset((lambda: gen(100)), ["num"]).batch(4, input_columns=["num"], per_batch_map=bad_map_func) try: - for _ in data2.create_dict_iterator(): + for _ in data2.create_dict_iterator(num_epochs=1): pass assert False except RuntimeError: diff --git a/tests/ut/python/dataset/test_vocab.py b/tests/ut/python/dataset/test_vocab.py index cf3d457e31b..04cb463eb8c 100644 --- a/tests/ut/python/dataset/test_vocab.py +++ b/tests/ut/python/dataset/test_vocab.py @@ -31,7 +31,7 @@ def test_from_list_tutorial(): data = data.map(input_columns=["text"], operations=lookup) ind = 0 res = [2, 1, 4, 5, 6, 7] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): assert d["text"] == res[ind], ind ind += 1 @@ -43,7 +43,7 @@ def test_from_file_tutorial(): data = data.map(input_columns=["text"], operations=lookup) ind = 0 res = [10, 11, 12, 15, 13, 14] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): assert d["text"] == res[ind], ind ind += 1 @@ -55,7 +55,7 @@ def test_from_dict_tutorial(): data = data.map(input_columns=["text"], operations=lookup) res = [3, 6, 2, 4, 5, 6] ind = 0 - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): assert d["text"] == res[ind], ind ind += 1 @@ -80,7 +80,7 @@ def test_from_list(): data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"]) data = data.map(input_columns=["text"], operations=text.Lookup(vocab, unknown_token)) res = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): res.append(d["text"].item()) return res except (ValueError, RuntimeError, TypeError) as e: @@ -119,7 +119,7 @@ def test_from_file(): data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"]) data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "s2")) res = [] - for d in data.create_dict_iterator(): + for d in data.create_dict_iterator(num_epochs=1): res.append(d["text"].item()) return res except ValueError as e: diff --git a/tests/ut/python/dataset/test_zip.py b/tests/ut/python/dataset/test_zip.py index ebfab86aff1..69ea31cd340 100644 --- a/tests/ut/python/dataset/test_zip.py +++ b/tests/ut/python/dataset/test_zip.py @@ -138,7 +138,7 @@ def test_zip_exception_01(): dataz = ds.zip((data1, data1)) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator()): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -159,7 +159,7 @@ def test_zip_exception_02(): dataz = ds.zip((data1, data2)) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator()): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -180,7 +180,7 @@ def test_zip_exception_03(): dataz = dataz.repeat(2) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator()): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -200,7 +200,7 @@ def test_zip_exception_04(): dataz = dataz.repeat(2) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator()): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -221,7 +221,7 @@ def test_zip_exception_05(): dataz = ds.zip(data1, data2) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator()): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -241,7 +241,7 @@ def test_zip_exception_06(): dataz = ds.zip(data1) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator()): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) diff --git a/tests/ut/python/dataset/util.py b/tests/ut/python/dataset/util.py index 533b353d838..667350d5e59 100644 --- a/tests/ut/python/dataset/util.py +++ b/tests/ut/python/dataset/util.py @@ -88,7 +88,7 @@ def save_and_check_dict(data, filename, generate_golden=False): num_iter = 0 result_dict = {} - for item in data.create_dict_iterator(): # each data is a dictionary + for item in data.create_dict_iterator(num_epochs=1): # each data is a dictionary for data_key in list(item.keys()): if data_key not in result_dict: result_dict[data_key] = [] @@ -119,7 +119,7 @@ def save_and_check_md5(data, filename, generate_golden=False): num_iter = 0 result_dict = {} - for item in data.create_dict_iterator(): # each data is a dictionary + for item in data.create_dict_iterator(num_epochs=1): # each data is a dictionary for data_key in list(item.keys()): if data_key not in result_dict: result_dict[data_key] = [] @@ -146,7 +146,7 @@ def save_and_check_tuple(data, parameters, filename, generate_golden=False): num_iter = 0 result_dict = {} - for item in data.create_tuple_iterator(): # each data is a dictionary + for item in data.create_tuple_iterator(num_epochs=1): # each data is a dictionary for data_key, _ in enumerate(item): if data_key not in result_dict: result_dict[data_key] = [] @@ -393,7 +393,7 @@ def check_bad_bbox(data, test_op, invalid_bbox_type, expected_error): output_columns=["image", "bbox"], columns_order=["image", "bbox"], operations=[test_op]) # Add column for "bbox" - for _, _ in enumerate(data.create_dict_iterator()): + for _, _ in enumerate(data.create_dict_iterator(num_epochs=1)): break except RuntimeError as error: logger.info("Got an exception in DE: {}".format(str(error)))