!5968 [MD] Update map calls to use correct order for its kwargs

Merge pull request !5968 from nhussain/api_changes2
This commit is contained in:
mindspore-ci-bot 2020-09-11 05:23:00 +08:00 committed by Gitee
commit 12b50bdcc4
154 changed files with 1355 additions and 1307 deletions

View File

@ -282,8 +282,9 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp):
>>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32], >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeCharTokenizer(True) >>> tokenizer_op = text.UnicodeCharTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
""" """
@check_with_offsets @check_with_offsets
@ -313,8 +314,9 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp):
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'], >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
>>> max_bytes_per_token=100, with_offsets=True) >>> max_bytes_per_token=100, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(operations=tokenizer_op,
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
""" """
@check_wordpiece_tokenizer @check_wordpiece_tokenizer
@ -378,8 +380,9 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_start", dtype=uint32], >>> # ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.WhitespaceTokenizer(True) >>> tokenizer_op = text.WhitespaceTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
""" """
@check_with_offsets @check_with_offsets
@ -404,8 +407,9 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_start", dtype=uint32], >>> # ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True) >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
""" """
@check_unicode_script_tokenizer @check_unicode_script_tokenizer
@ -497,8 +501,9 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_start", dtype=uint32], >>> # ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True) >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
""" """
@check_regex_tokenizer @check_regex_tokenizer
@ -540,8 +545,9 @@ if platform.system().lower() != 'windows':
>>> normalization_form=NormalizeForm.NONE, >>> normalization_form=NormalizeForm.NONE,
>>> preserve_unused_token=True, >>> preserve_unused_token=True,
>>> with_offsets=True) >>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
""" """
@check_basic_tokenizer @check_basic_tokenizer
@ -593,8 +599,9 @@ if platform.system().lower() != 'windows':
>>> unknown_token=100, lower_case=False, keep_whitespace=False, >>> unknown_token=100, lower_case=False, keep_whitespace=False,
>>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True, >>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
>>> with_offsets=True) >>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
""" """
@check_bert_tokenizer @check_bert_tokenizer

View File

@ -39,14 +39,14 @@ def create_dataset_cifar10(data_path, batch_size=32, repeat_size=1, status="trai
random_horizontal_op = CV.RandomHorizontalFlip() random_horizontal_op = CV.RandomHorizontalFlip()
channel_swap_op = CV.HWC2CHW() channel_swap_op = CV.HWC2CHW()
typecast_op = C.TypeCast(mstype.int32) typecast_op = C.TypeCast(mstype.int32)
cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op) cifar_ds = cifar_ds.map(operations=typecast_op, input_columns="label")
if status == "train": if status == "train":
cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op) cifar_ds = cifar_ds.map(operations=random_crop_op, input_columns="image")
cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op) cifar_ds = cifar_ds.map(operations=random_horizontal_op, input_columns="image")
cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op) cifar_ds = cifar_ds.map(operations=resize_op, input_columns="image")
cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op) cifar_ds = cifar_ds.map(operations=rescale_op, input_columns="image")
cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op) cifar_ds = cifar_ds.map(operations=normalize_op, input_columns="image")
cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op) cifar_ds = cifar_ds.map(operations=channel_swap_op, input_columns="image")
cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size) cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)

View File

@ -84,8 +84,9 @@ class SegDataset:
shuffle=True, num_parallel_workers=self.num_readers, shuffle=True, num_parallel_workers=self.num_readers,
num_shards=self.shard_num, shard_id=self.shard_id) num_shards=self.shard_num, shard_id=self.shard_id)
transforms_list = self.preprocess_ transforms_list = self.preprocess_
data_set = data_set.map(input_columns=["data", "label"], output_columns=["data", "label"], data_set = data_set.map(operations=transforms_list, input_columns=["data", "label"],
operations=transforms_list, num_parallel_workers=self.num_parallel_calls) output_columns=["data", "label"],
num_parallel_workers=self.num_parallel_calls)
data_set = data_set.shuffle(buffer_size=self.batch_size * 10) data_set = data_set.shuffle(buffer_size=self.batch_size * 10)
data_set = data_set.batch(self.batch_size, drop_remainder=True) data_set = data_set.batch(self.batch_size, drop_remainder=True)
data_set = data_set.repeat(repeat) data_set = data_set.repeat(repeat)

View File

@ -74,8 +74,10 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
ious = ious.T ious = ious.T
return ious return ious
class PhotoMetricDistortion: class PhotoMetricDistortion:
"""Photo Metric Distortion""" """Photo Metric Distortion"""
def __init__(self, def __init__(self,
brightness_delta=32, brightness_delta=32,
contrast_range=(0.5, 1.5), contrast_range=(0.5, 1.5),
@ -134,8 +136,10 @@ class PhotoMetricDistortion:
return img, boxes, labels return img, boxes, labels
class Expand: class Expand:
"""expand image""" """expand image"""
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)): def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
if to_rgb: if to_rgb:
self.mean = mean[::-1] self.mean = mean[::-1]
@ -158,12 +162,13 @@ class Expand:
boxes += np.tile((left, top), 2) boxes += np.tile((left, top), 2)
return img, boxes, labels return img, boxes, labels
def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num): def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""rescale operation for image""" """rescale operation for image"""
img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True) img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
if img_data.shape[0] > config.img_height: if img_data.shape[0] > config.img_height:
img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True) img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True)
scale_factor = scale_factor*scale_factor2 scale_factor = scale_factor * scale_factor2
img_shape = np.append(img_shape, scale_factor) img_shape = np.append(img_shape, scale_factor)
img_shape = np.asarray(img_shape, dtype=np.float32) img_shape = np.asarray(img_shape, dtype=np.float32)
gt_bboxes = gt_bboxes * scale_factor gt_bboxes = gt_bboxes * scale_factor
@ -171,7 +176,8 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num) return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num): def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""resize operation for image""" """resize operation for image"""
@ -188,7 +194,8 @@ def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num) return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num): def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
"""resize operation for image of eval""" """resize operation for image of eval"""
@ -205,7 +212,8 @@ def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num) return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num): def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""impad operation for image""" """impad operation for image"""
@ -213,12 +221,14 @@ def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
img_data = img_data.astype(np.float32) img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num) return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num): def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""imnormalize operation for image""" """imnormalize operation for image"""
img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True) img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True)
img_data = img_data.astype(np.float32) img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num) return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num): def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""flip operation for image""" """flip operation for image"""
img_data = img img_data = img
@ -229,7 +239,8 @@ def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1
return (img_data, img_shape, flipped, gt_label, gt_num) return (img_data, img_shape, flipped, gt_label, gt_num)
def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num): def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num):
"""flipped generation""" """flipped generation"""
@ -240,11 +251,13 @@ def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num):
flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1
return (img_data, img_shape, flipped, gt_label, gt_num) return (img_data, img_shape, flipped, gt_label, gt_num)
def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num): def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num):
img_data = img[:, :, ::-1] img_data = img[:, :, ::-1]
return (img_data, img_shape, gt_bboxes, gt_label, gt_num) return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""transpose operation for image""" """transpose operation for image"""
@ -257,6 +270,7 @@ def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, gt_bboxes, gt_label, gt_num) return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num): def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""photo crop operation for image""" """photo crop operation for image"""
random_photo = PhotoMetricDistortion() random_photo = PhotoMetricDistortion()
@ -264,6 +278,7 @@ def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, gt_bboxes, gt_label, gt_num) return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num): def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""expand operation for image""" """expand operation for image"""
expand = Expand() expand = Expand()
@ -271,8 +286,10 @@ def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img, img_shape, gt_bboxes, gt_label, gt_num) return (img, img_shape, gt_bboxes, gt_label, gt_num)
def preprocess_fn(image, box, is_training): def preprocess_fn(image, box, is_training):
"""Preprocess function for dataset.""" """Preprocess function for dataset."""
def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert): def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert):
image_shape = image_shape[:2] image_shape = image_shape[:2]
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert
@ -325,6 +342,7 @@ def preprocess_fn(image, box, is_training):
return _data_aug(image, box, is_training) return _data_aug(image, box, is_training)
def create_coco_label(is_training): def create_coco_label(is_training):
"""Get image path and annotation from COCO.""" """Get image path and annotation from COCO."""
from pycocotools.coco import COCO from pycocotools.coco import COCO
@ -334,7 +352,7 @@ def create_coco_label(is_training):
if is_training: if is_training:
data_type = config.train_data_type data_type = config.train_data_type
#Classes need to train or test. # Classes need to train or test.
train_cls = config.coco_classes train_cls = config.coco_classes
train_cls_dict = {} train_cls_dict = {}
for i, cls in enumerate(train_cls): for i, cls in enumerate(train_cls):
@ -375,6 +393,7 @@ def create_coco_label(is_training):
return image_files, image_anno_dict return image_files, image_anno_dict
def anno_parser(annos_str): def anno_parser(annos_str):
"""Parse annotation from string to list.""" """Parse annotation from string to list."""
annos = [] annos = []
@ -383,6 +402,7 @@ def anno_parser(annos_str):
annos.append(anno) annos.append(anno)
return annos return annos
def filter_valid_data(image_dir, anno_path): def filter_valid_data(image_dir, anno_path):
"""Filter valid image file, which both in image_dir and anno_path.""" """Filter valid image file, which both in image_dir and anno_path."""
image_files = [] image_files = []
@ -404,6 +424,7 @@ def filter_valid_data(image_dir, anno_path):
image_files.append(image_path) image_files.append(image_path)
return image_files, image_anno_dict return image_files, image_anno_dict
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8): def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8):
"""Create MindRecord file.""" """Create MindRecord file."""
mindrecord_dir = config.mindrecord_dir mindrecord_dir = config.mindrecord_dir
@ -435,7 +456,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id, ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id,
num_parallel_workers=1, shuffle=is_training) num_parallel_workers=1, shuffle=is_training)
decode = C.Decode() decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode, num_parallel_workers=1) ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=1)
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
hwc_to_chw = C.HWC2CHW() hwc_to_chw = C.HWC2CHW()
@ -447,38 +468,39 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
type_cast3 = CC.TypeCast(mstype.bool_) type_cast3 = CC.TypeCast(mstype.bool_)
if is_training: if is_training:
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"], output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"], column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
flip = (np.random.rand() < config.flip_ratio) flip = (np.random.rand() < config.flip_ratio)
if flip: if flip:
ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0, horizontally_op], ds = ds.map(operations=[normalize_op, type_cast0, horizontally_op], input_columns=["image"],
num_parallel_workers=12) num_parallel_workers=12)
ds = ds.map(input_columns=["image", "image_shape", "box", "label", "valid_num"], ds = ds.map(operations=flipped_generation,
operations=flipped_generation, num_parallel_workers=num_parallel_workers) input_columns=["image", "image_shape", "box", "label", "valid_num"],
num_parallel_workers=num_parallel_workers)
else: else:
ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0], ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"],
num_parallel_workers=12) num_parallel_workers=12)
ds = ds.map(input_columns=["image"], operations=[hwc_to_chw, type_cast1], ds = ds.map(operations=[hwc_to_chw, type_cast1], input_columns=["image"],
num_parallel_workers=12) num_parallel_workers=12)
else: else:
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"], output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"], column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=[normalize_op, hwc_to_chw, type_cast1], ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"],
num_parallel_workers=24) num_parallel_workers=24)
# transpose_column from python to c # transpose_column from python to c
ds = ds.map(input_columns=["image_shape"], operations=[type_cast1]) ds = ds.map(operations=[type_cast1], input_columns=["image_shape"])
ds = ds.map(input_columns=["box"], operations=[type_cast1]) ds = ds.map(operations=[type_cast1], input_columns=["box"])
ds = ds.map(input_columns=["label"], operations=[type_cast2]) ds = ds.map(operations=[type_cast2], input_columns=["label"])
ds = ds.map(input_columns=["valid_num"], operations=[type_cast3]) ds = ds.map(operations=[type_cast3], input_columns=["valid_num"])
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num) ds = ds.repeat(repeat_num)

View File

@ -55,8 +55,8 @@ def create_dataset_cifar10(data_home, repeat_num=1, training=True):
c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]
# apply map operations on images # apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op) data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(input_columns="image", operations=c_trans) data_set = data_set.map(operations=c_trans, input_columns="image")
# apply batch operations # apply batch operations
data_set = data_set.batch(batch_size=cifar_cfg.batch_size, drop_remainder=True) data_set = data_set.batch(batch_size=cifar_cfg.batch_size, drop_remainder=True)

View File

@ -60,8 +60,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
C.HWC2CHW() C.HWC2CHW()
] ]
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums)
# apply batch operations # apply batch operations
ds = ds.batch(cfg.batch_size, drop_remainder=True) ds = ds.batch(cfg.batch_size, drop_remainder=True)
# apply dataset repeat operation # apply dataset repeat operation

View File

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
buffer_size = 10000 buffer_size = 10000

View File

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
buffer_size = 10000 buffer_size = 10000

View File

@ -498,24 +498,24 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
num_parallel_workers=4, shuffle=is_training) num_parallel_workers=4, shuffle=is_training)
decode = C.Decode() decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode) ds = ds.map(operations=decode, input_columns=["image"])
compose_map_func = (lambda image, annotation, mask, mask_shape: compose_map_func = (lambda image, annotation, mask, mask_shape:
preprocess_fn(image, annotation, mask, mask_shape, is_training)) preprocess_fn(image, annotation, mask, mask_shape, is_training))
if is_training: if is_training:
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"], ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
operations=compose_map_func,
python_multiprocessing=False, python_multiprocessing=False,
num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
else: else:
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"], ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1):
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply shuffle operations # apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size) ds = ds.shuffle(buffer_size=buffer_size)

View File

@ -89,8 +89,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=16)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
@ -152,7 +152,7 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num=
compose = P2.Compose(trans) compose = P2.Compose(trans)
ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True) ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -70,8 +70,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply shuffle operations # apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size) ds = ds.shuffle(buffer_size=buffer_size)

View File

@ -61,8 +61,8 @@ def create_dataset(dataset_path, config, do_train, repeat_num=1):
C.HWC2CHW() C.HWC2CHW()
] ]
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=config.work_nums)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=config.work_nums)
# apply batch operations # apply batch operations
ds = ds.batch(config.batch_size, drop_remainder=True) ds = ds.batch(config.batch_size, drop_remainder=True)
# apply dataset repeat operation # apply dataset repeat operation

View File

@ -22,6 +22,7 @@ import mindspore.dataset.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2 import mindspore.dataset.transforms.c_transforms as C2
from mindspore.communication.management import init, get_rank, get_group_size from mindspore.communication.management import init, get_rank, get_group_size
def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
""" """
create a train or evaluate cifar10 dataset for resnet50 create a train or evaluate cifar10 dataset for resnet50
@ -65,8 +66,8 @@ def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target=
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
@ -126,8 +127,8 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target=
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
@ -165,7 +166,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
if do_train: if do_train:
trans = [ trans = [
C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
C.RandomHorizontalFlip(rank_id/ (rank_id +1)), C.RandomHorizontalFlip(rank_id / (rank_id + 1)),
C.Normalize(mean=mean, std=std), C.Normalize(mean=mean, std=std),
C.HWC2CHW() C.HWC2CHW()
] ]
@ -180,8 +181,8 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
@ -190,6 +191,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
return ds return ds
def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
""" """
create a train or eval imagenet2012 dataset for se-resnet50 create a train or eval imagenet2012 dataset for se-resnet50
@ -233,8 +235,8 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target=
] ]
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=12, operations=trans) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=12)
ds = ds.map(input_columns="label", num_parallel_workers=12, operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
@ -244,6 +246,7 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target=
return ds return ds
def _get_rank_info(): def _get_rank_info():
""" """
get rank size and rank id get rank size and rank id

View File

@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
@ -147,7 +147,7 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe
trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op]
compose = P2.Compose(trans) compose = P2.Compose(trans)
ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True) ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -75,8 +75,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=num_parallels, operations=trans) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=num_parallels)
ds = ds.map(input_columns="label", num_parallel_workers=num_parallels, operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallels)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -25,6 +25,7 @@ from src.utils.sampler import DistributedSampler
ImageFile.LOAD_TRUNCATED_IMAGES = True ImageFile.LOAD_TRUNCATED_IMAGES = True
class TxtDataset(): class TxtDataset():
""" """
create txt dataset. create txt dataset.
@ -33,6 +34,7 @@ class TxtDataset():
Returns: Returns:
de_dataset. de_dataset.
""" """
def __init__(self, root, txt_name): def __init__(self, root, txt_name):
super(TxtDataset, self).__init__() super(TxtDataset, self).__init__()
self.imgs = [] self.imgs = []
@ -142,10 +144,10 @@ def classification_dataset(data_dir, image_size, per_batch_size, max_epoch, rank
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle) sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler) de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=num_parallel_workers, de_dataset = de_dataset.map(operations=transform_img, input_columns="image",
operations=transform_img) num_parallel_workers=num_parallel_workers)
de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=num_parallel_workers, de_dataset = de_dataset.map(operations=transform_label, input_columns="label",
operations=transform_label) num_parallel_workers=num_parallel_workers)
columns_to_project = ["image", "label"] columns_to_project = ["image", "label"]
de_dataset = de_dataset.project(columns=columns_to_project) de_dataset = de_dataset.project(columns=columns_to_project)

View File

@ -72,8 +72,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
] ]
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums)
# apply batch operations # apply batch operations
ds = ds.batch(cfg.batch_size, drop_remainder=True) ds = ds.batch(cfg.batch_size, drop_remainder=True)

View File

@ -34,13 +34,15 @@ def _rand(a=0., b=1.):
"""Generate random.""" """Generate random."""
return np.random.rand() * (b - a) + a return np.random.rand() * (b - a) + a
def get_imageId_from_fileName(filename): def get_imageId_from_fileName(filename):
"""Get imageID from fileName""" """Get imageID from fileName"""
try: try:
filename = os.path.splitext(filename)[0] filename = os.path.splitext(filename)[0]
return int(filename) return int(filename)
except: except:
raise NotImplementedError('Filename %s is supposed to be an integer.'%(filename)) raise NotImplementedError('Filename %s is supposed to be an integer.' % (filename))
def random_sample_crop(image, boxes): def random_sample_crop(image, boxes):
"""Random Crop the image and boxes""" """Random Crop the image and boxes"""
@ -64,7 +66,7 @@ def random_sample_crop(image, boxes):
left = _rand() * (width - w) left = _rand() * (width - w)
top = _rand() * (height - h) top = _rand() * (height - h)
rect = np.array([int(top), int(left), int(top+h), int(left+w)]) rect = np.array([int(top), int(left), int(top + h), int(left + w)])
overlap = jaccard_numpy(boxes, rect) overlap = jaccard_numpy(boxes, rect)
# dropout some boxes # dropout some boxes
@ -103,13 +105,14 @@ def random_sample_crop(image, boxes):
def preprocess_fn(img_id, image, box, is_training): def preprocess_fn(img_id, image, box, is_training):
"""Preprocess function for dataset.""" """Preprocess function for dataset."""
def _infer_data(image, input_shape): def _infer_data(image, input_shape):
img_h, img_w, _ = image.shape img_h, img_w, _ = image.shape
input_h, input_w = input_shape input_h, input_w = input_shape
image = cv2.resize(image, (input_w, input_h)) image = cv2.resize(image, (input_w, input_h))
#When the channels of image is 1 # When the channels of image is 1
if len(image.shape) == 2: if len(image.shape) == 2:
image = np.expand_dims(image, axis=-1) image = np.expand_dims(image, axis=-1)
image = np.concatenate([image, image, image], axis=-1) image = np.concatenate([image, image, image], axis=-1)
@ -150,6 +153,7 @@ def preprocess_fn(img_id, image, box, is_training):
box, label, num_match = ssd_bboxes_encode(box) box, label, num_match = ssd_bboxes_encode(box)
return image, box, label, num_match return image, box, label, num_match
return _data_aug(image, box, is_training, image_size=config.img_shape) return _data_aug(image, box, is_training, image_size=config.img_shape)
@ -158,7 +162,7 @@ def create_voc_label(is_training):
voc_dir = config.voc_dir voc_dir = config.voc_dir
cls_map = {name: i for i, name in enumerate(config.coco_classes)} cls_map = {name: i for i, name in enumerate(config.coco_classes)}
sub_dir = 'train' if is_training else 'eval' sub_dir = 'train' if is_training else 'eval'
#sub_dir = 'train' # sub_dir = 'train'
voc_dir = os.path.join(voc_dir, sub_dir) voc_dir = os.path.join(voc_dir, sub_dir)
if not os.path.isdir(voc_dir): if not os.path.isdir(voc_dir):
raise ValueError(f'Cannot find {sub_dir} dataset path.') raise ValueError(f'Cannot find {sub_dir} dataset path.')
@ -244,6 +248,7 @@ def create_voc_label(is_training):
return images, image_files_dict, image_anno_dict return images, image_files_dict, image_anno_dict
def create_coco_label(is_training): def create_coco_label(is_training):
"""Get image path and annotation from COCO.""" """Get image path and annotation from COCO."""
from pycocotools.coco import COCO from pycocotools.coco import COCO
@ -253,7 +258,7 @@ def create_coco_label(is_training):
if is_training: if is_training:
data_type = config.train_data_type data_type = config.train_data_type
#Classes need to train or test. # Classes need to train or test.
train_cls = config.coco_classes train_cls = config.coco_classes
train_cls_dict = {} train_cls_dict = {}
for i, cls in enumerate(train_cls): for i, cls in enumerate(train_cls):
@ -391,9 +396,10 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num
ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num, ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num,
shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training) shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training)
decode = C.Decode() decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode) ds = ds.map(operations=decode, input_columns=["image"])
change_swap_op = C.HWC2CHW() change_swap_op = C.HWC2CHW()
normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training)) compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training))
if is_training: if is_training:
@ -402,11 +408,11 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num
else: else:
output_columns = ["img_id", "image", "image_shape"] output_columns = ["img_id", "image", "image_shape"]
trans = [normalize_op, change_swap_op] trans = [normalize_op, change_swap_op]
ds = ds.map(input_columns=["img_id", "image", "annotation"], ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"],
output_columns=output_columns, column_order=output_columns, output_columns=output_columns, column_order=output_columns,
operations=compose_map_func, python_multiprocessing=is_training, python_multiprocessing=is_training,
num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training, ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training,
num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num) ds = ds.repeat(repeat_num)

View File

@ -54,8 +54,8 @@ def vgg_create_dataset(data_home, image_size, batch_size, rank_id=0, rank_size=1
changeswap_op] changeswap_op]
# apply map operations on images # apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op) data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(input_columns="image", operations=c_trans) data_set = data_set.map(operations=c_trans, input_columns="image")
# apply repeat operations # apply repeat operations
data_set = data_set.repeat(repeat_num) data_set = data_set.repeat(repeat_num)
@ -157,8 +157,8 @@ def classification_dataset(data_dir, image_size, per_batch_size, rank=0, group_s
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle) sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler) de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=8, operations=transform_img) de_dataset = de_dataset.map(operations=transform_img, input_columns="image", num_parallel_workers=8)
de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=8, operations=transform_label) de_dataset = de_dataset.map(operations=transform_label, input_columns="label", num_parallel_workers=8)
columns_to_project = ["image", "label"] columns_to_project = ["image", "label"]
de_dataset = de_dataset.project(columns=columns_to_project) de_dataset = de_dataset.project(columns=columns_to_project)

View File

@ -90,8 +90,8 @@ def create_dataset(dataset_path, batch_size=1, num_shards=1, shard_id=0, device_
label_trans = [ label_trans = [
c.TypeCast(mstype.int32) c.TypeCast(mstype.int32)
] ]
ds = ds.map(input_columns=["image"], num_parallel_workers=8, operations=image_trans) ds = ds.map(operations=image_trans, input_columns=["image"], num_parallel_workers=8)
ds = ds.map(input_columns=["label"], num_parallel_workers=8, operations=label_trans) ds = ds.map(operations=label_trans, input_columns=["label"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
return ds return ds

View File

@ -176,11 +176,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"], ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
sampler=distributed_sampler) sampler=distributed_sampler)
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
ds = ds.map(input_columns=["image", "img_id"], ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"], output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"], column_order=["image", "image_shape", "img_id"],
operations=compose_map_func, num_parallel_workers=8) num_parallel_workers=8)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8) ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(max_epoch) ds = ds.repeat(max_epoch)

View File

@ -173,11 +173,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"], ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
sampler=distributed_sampler) sampler=distributed_sampler)
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
ds = ds.map(input_columns=["image", "img_id"], ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"], output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"], column_order=["image", "image_shape", "img_id"],
operations=compose_map_func, num_parallel_workers=8) num_parallel_workers=8)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8) ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(max_epoch) ds = ds.repeat(max_epoch)

View File

@ -296,21 +296,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num=
ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank, ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank,
num_parallel_workers=num_parallel_workers, shuffle=is_training) num_parallel_workers=num_parallel_workers, shuffle=is_training)
decode = C.Decode() decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode) ds = ds.map(operations=decode, input_columns=["image"])
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
if is_training: if is_training:
hwc_to_chw = C.HWC2CHW() hwc_to_chw = C.HWC2CHW()
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num) ds = ds.repeat(repeat_num)
else: else:
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "annotation"], output_columns=["image", "image_shape", "annotation"],
column_order=["image", "image_shape", "annotation"], column_order=["image", "image_shape", "annotation"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
return ds return ds

View File

@ -112,12 +112,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''):
"masked_lm_weights", "masked_lm_weights",
"next_sentence_labels"]) "next_sentence_labels"])
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.repeat(repeat_count) ds = ds.repeat(repeat_count)
# apply batch operations # apply batch operations

View File

@ -42,30 +42,31 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
usage=data_usage, shuffle=shuffle_dataset) usage=data_usage, shuffle=shuffle_dataset)
### Processing label ### Processing label
if data_usage == 'test': if data_usage == 'test':
dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"], dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"],
column_order=["id", "label_id", "sentence"], operations=ops.Duplicate()) column_order=["id", "label_id", "sentence"])
dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0)) dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"])
else: else:
label_vocab = text.Vocab.from_list(label_list) label_vocab = text.Vocab.from_list(label_list)
label_lookup = text.Lookup(label_vocab) label_lookup = text.Lookup(label_vocab)
dataset = dataset.map(input_columns="label_desc", output_columns="label_id", operations=label_lookup) dataset = dataset.map(operations=label_lookup, input_columns="label_desc", output_columns="label_id")
### Processing sentence ### Processing sentence
vocab = text.Vocab.from_file(bert_vocab_path) vocab = text.Vocab.from_file(bert_vocab_path)
tokenizer = text.BertTokenizer(vocab, lower_case=True) tokenizer = text.BertTokenizer(vocab, lower_case=True)
lookup = text.Lookup(vocab, unknown_token='[UNK]') lookup = text.Lookup(vocab, unknown_token='[UNK]')
dataset = dataset.map(input_columns=["sentence"], operations=tokenizer) dataset = dataset.map(operations=tokenizer, input_columns=["sentence"])
dataset = dataset.map(input_columns=["sentence"], operations=ops.Slice(slice(0, max_seq_len))) dataset = dataset.map(operations=ops.Slice(slice(0, max_seq_len)), input_columns=["sentence"])
dataset = dataset.map(input_columns=["sentence"], dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), append=np.array(["[SEP]"], dtype='S')), input_columns=["sentence"])
append=np.array(["[SEP]"], dtype='S'))) dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["text_ids"])
dataset = dataset.map(input_columns=["sentence"], output_columns=["text_ids"], operations=lookup) dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"])
dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0)) dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"], output_columns=["text_ids", "mask_ids"],
column_order=["text_ids", "mask_ids", "label_id"], operations=ops.Duplicate()) column_order=["text_ids", "mask_ids", "label_id"])
dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32)) dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"])
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "segment_ids"], dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate()) output_columns=["text_ids", "segment_ids"],
dataset = dataset.map(input_columns=["segment_ids"], operations=ops.Fill(0)) column_order=["text_ids", "mask_ids", "segment_ids", "label_id"])
dataset = dataset.map(operations=ops.Fill(0), input_columns=["segment_ids"])
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
return dataset return dataset
@ -86,50 +87,51 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
usage=data_usage, shuffle=shuffle_dataset) usage=data_usage, shuffle=shuffle_dataset)
### Processing label ### Processing label
if data_usage == 'test': if data_usage == 'test':
dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"], dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"],
column_order=["id", "label_id", "sentence1", "sentence2"], operations=ops.Duplicate()) column_order=["id", "label_id", "sentence1", "sentence2"])
dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0)) dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"])
else: else:
label_vocab = text.Vocab.from_list(label_list) label_vocab = text.Vocab.from_list(label_list)
label_lookup = text.Lookup(label_vocab) label_lookup = text.Lookup(label_vocab)
dataset = dataset.map(input_columns="label", output_columns="label_id", operations=label_lookup) dataset = dataset.map(operations=label_lookup, input_columns="label", output_columns="label_id")
### Processing sentence pairs ### Processing sentence pairs
vocab = text.Vocab.from_file(bert_vocab_path) vocab = text.Vocab.from_file(bert_vocab_path)
tokenizer = text.BertTokenizer(vocab, lower_case=True) tokenizer = text.BertTokenizer(vocab, lower_case=True)
lookup = text.Lookup(vocab, unknown_token='[UNK]') lookup = text.Lookup(vocab, unknown_token='[UNK]')
### Tokenizing sentences and truncate sequence pair ### Tokenizing sentences and truncate sequence pair
dataset = dataset.map(input_columns=["sentence1"], operations=tokenizer) dataset = dataset.map(operations=tokenizer, input_columns=["sentence1"])
dataset = dataset.map(input_columns=["sentence2"], operations=tokenizer) dataset = dataset.map(operations=tokenizer, input_columns=["sentence2"])
dataset = dataset.map(input_columns=["sentence1", "sentence2"], dataset = dataset.map(operations=text.TruncateSequencePair(max_seq_len - 3),
operations=text.TruncateSequencePair(max_seq_len-3)) input_columns=["sentence1", "sentence2"])
### Adding special tokens ### Adding special tokens
dataset = dataset.map(input_columns=["sentence1"], dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), append=np.array(["[SEP]"], dtype='S')),
append=np.array(["[SEP]"], dtype='S'))) input_columns=["sentence1"])
dataset = dataset.map(input_columns=["sentence2"], dataset = dataset.map(operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')),
operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S'))) input_columns=["sentence2"])
### Generating segment_ids ### Generating segment_ids
dataset = dataset.map(input_columns=["sentence1"], output_columns=["sentence1", "type_sentence1"], dataset = dataset.map(operations=ops.Duplicate(), input_columns=["sentence1"],
column_order=["sentence1", "type_sentence1", "sentence2", "label_id"], output_columns=["sentence1", "type_sentence1"],
operations=ops.Duplicate()) column_order=["sentence1", "type_sentence1", "sentence2", "label_id"])
dataset = dataset.map(input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"], dataset = dataset.map(operations=ops.Duplicate(),
column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"], input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"],
operations=ops.Duplicate()) column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"])
dataset = dataset.map(input_columns=["type_sentence1"], operations=[lookup, ops.Fill(0)]) dataset = dataset.map(operations=[lookup, ops.Fill(0)], input_columns=["type_sentence1"])
dataset = dataset.map(input_columns=["type_sentence2"], operations=[lookup, ops.Fill(1)]) dataset = dataset.map(operations=[lookup, ops.Fill(1)], input_columns=["type_sentence2"])
dataset = dataset.map(input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"], dataset = dataset.map(operations=ops.Concatenate(),
column_order=["sentence1", "sentence2", "segment_ids", "label_id"], input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"],
operations=ops.Concatenate()) column_order=["sentence1", "sentence2", "segment_ids", "label_id"])
dataset = dataset.map(input_columns=["segment_ids"], operations=ops.PadEnd([max_seq_len], 0)) dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["segment_ids"])
### Generating text_ids ### Generating text_ids
dataset = dataset.map(input_columns=["sentence1", "sentence2"], output_columns=["text_ids"], dataset = dataset.map(operations=ops.Concatenate(),
column_order=["text_ids", "segment_ids", "label_id"], input_columns=["sentence1", "sentence2"], output_columns=["text_ids"],
operations=ops.Concatenate()) column_order=["text_ids", "segment_ids", "label_id"])
dataset = dataset.map(input_columns=["text_ids"], operations=lookup) dataset = dataset.map(operations=lookup, input_columns=["text_ids"])
dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0)) dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"])
### Generating mask_ids ### Generating mask_ids
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"], dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate()) output_columns=["text_ids", "mask_ids"],
dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32)) column_order=["text_ids", "mask_ids", "segment_ids", "label_id"])
dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"])
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
return dataset return dataset

View File

@ -39,12 +39,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None,
ori_dataset_size = ds.get_dataset_size() ori_dataset_size = ds.get_dataset_size()
print('origin dataset size: ', ori_dataset_size) print('origin dataset size: ', ori_dataset_size)
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations # apply batch operations
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
logger.info("data size: {}".format(ds.get_dataset_size())) logger.info("data size: {}".format(ds.get_dataset_size()))
@ -60,12 +60,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle) columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle)
if assessment_method == "Spearman_correlation": if assessment_method == "Spearman_correlation":
type_cast_op_float = C.TypeCast(mstype.float32) type_cast_op_float = C.TypeCast(mstype.float32)
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
else: else:
ds = ds.map(input_columns="label_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label_ids")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count) ds = ds.repeat(repeat_count)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
@ -80,12 +80,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle) columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle)
if assessment_method == "Spearman_correlation": if assessment_method == "Spearman_correlation":
type_cast_op_float = C.TypeCast(mstype.float32) type_cast_op_float = C.TypeCast(mstype.float32)
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
else: else:
ds = ds.map(input_columns="label_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label_ids")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count) ds = ds.repeat(repeat_count)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
@ -101,14 +101,14 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche
columns_list=["input_ids", "input_mask", "segment_ids", "start_positions", columns_list=["input_ids", "input_mask", "segment_ids", "start_positions",
"end_positions", "unique_ids", "is_impossible"], "end_positions", "unique_ids", "is_impossible"],
shuffle=do_shuffle) shuffle=do_shuffle)
ds = ds.map(input_columns="start_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="start_positions")
ds = ds.map(input_columns="end_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="end_positions")
else: else:
ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None, ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"]) columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count) ds = ds.repeat(repeat_count)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -117,12 +117,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''):
"masked_lm_weights", "masked_lm_weights",
"next_sentence_labels"]) "next_sentence_labels"])
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.repeat(repeat_count) ds = ds.repeat(repeat_count)
# apply batch operations # apply batch operations

View File

@ -40,12 +40,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None,
ori_dataset_size = ds.get_dataset_size() ori_dataset_size = ds.get_dataset_size()
print('origin dataset size: ', ori_dataset_size) print('origin dataset size: ', ori_dataset_size)
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations # apply batch operations
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
logger.info("data size: {}".format(ds.get_dataset_size())) logger.info("data size: {}".format(ds.get_dataset_size()))
@ -61,12 +61,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"]) columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
if assessment_method == "Spearman_correlation": if assessment_method == "Spearman_correlation":
type_cast_op_float = C.TypeCast(mstype.float32) type_cast_op_float = C.TypeCast(mstype.float32)
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
else: else:
ds = ds.map(input_columns="label_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label_ids")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count) ds = ds.repeat(repeat_count)
# apply shuffle operation # apply shuffle operation
buffer_size = 960 buffer_size = 960
@ -84,12 +84,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"]) columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
if assessment_method == "Spearman_correlation": if assessment_method == "Spearman_correlation":
type_cast_op_float = C.TypeCast(mstype.float32) type_cast_op_float = C.TypeCast(mstype.float32)
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
else: else:
ds = ds.map(input_columns="label_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label_ids")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count) ds = ds.repeat(repeat_count)
# apply shuffle operation # apply shuffle operation
buffer_size = 960 buffer_size = 960
@ -107,17 +107,17 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche
columns_list=["input_ids", "input_mask", "segment_ids", columns_list=["input_ids", "input_mask", "segment_ids",
"start_positions", "end_positions", "start_positions", "end_positions",
"unique_ids", "is_impossible"]) "unique_ids", "is_impossible"])
ds = ds.map(input_columns="start_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="start_positions")
ds = ds.map(input_columns="end_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="end_positions")
else: else:
ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None, ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"]) columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count) ds = ds.repeat(repeat_count)
# apply shuffle operation # apply shuffle operation
buffer_size = 960 buffer_size = 960

View File

@ -60,12 +60,12 @@ def _load_dataset(input_files, batch_size, epoch_count=1,
repeat_count = epoch_count repeat_count = epoch_count
type_cast_op = deC.TypeCast(mstype.int32) type_cast_op = deC.TypeCast(mstype.int32)
ds = ds.map(input_columns="src", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="src")
ds = ds.map(input_columns="src_padding", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="src_padding")
ds = ds.map(input_columns="prev_opt", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="prev_opt")
ds = ds.map(input_columns="prev_padding", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="prev_padding")
ds = ds.map(input_columns="target", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="target")
ds = ds.map(input_columns="tgt_padding", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="tgt_padding")
ds = ds.rename( ds = ds.rename(
input_columns=["src", input_columns=["src",

View File

@ -49,11 +49,11 @@ def create_tinybert_dataset(task='td', batch_size=32, device_num=1, rank=0,
shard_equal_rows=True) shard_equal_rows=True)
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
if task == "td": if task == "td":
ds = ds.map(input_columns="label_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label_ids")
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -40,12 +40,12 @@ def load_test_data(batch_size=1, data_file=None):
"target_eos_ids", "target_eos_mask"], "target_eos_ids", "target_eos_mask"],
shuffle=False) shuffle=False)
type_cast_op = deC.TypeCast(mstype.int32) type_cast_op = deC.TypeCast(mstype.int32)
ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids")
ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask")
ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids")
ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask")
ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids")
ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask")
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
ds.channel_name = 'transformer' ds.channel_name = 'transformer'

View File

@ -30,12 +30,12 @@ def create_transformer_dataset(epoch_count=1, rank_size=1, rank_id=0, do_shuffle
shuffle=(do_shuffle == "true"), num_shards=rank_size, shard_id=rank_id) shuffle=(do_shuffle == "true"), num_shards=rank_size, shard_id=rank_id)
type_cast_op = deC.TypeCast(mstype.int32) type_cast_op = deC.TypeCast(mstype.int32)
ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids")
ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask")
ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids")
ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask")
ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids")
ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask")
# apply batch operations # apply batch operations
ds = ds.batch(transformer_net_cfg.batch_size, drop_remainder=True) ds = ds.batch(transformer_net_cfg.batch_size, drop_remainder=True)

View File

@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True):
changeswap_op] changeswap_op]
# apply map operations on images # apply map operations on images
ds = ds.map(input_columns="label", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label")
ds = ds.map(input_columns="image", operations=c_trans) ds = ds.map(operations=c_trans, input_columns="image")
# apply repeat operations # apply repeat operations
ds = ds.repeat(repeat_num) ds = ds.repeat(repeat_num)

View File

@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True):
changeswap_op] changeswap_op]
# apply map operations on images # apply map operations on images
ds = ds.map(input_columns="label", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label")
ds = ds.map(input_columns="image", operations=c_trans) ds = ds.map(operations=c_trans, input_columns="image")
# apply repeat operations # apply repeat operations
ds = ds.repeat(repeat_num) ds = ds.repeat(repeat_num)

View File

@ -298,21 +298,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num
ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank, ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank,
num_parallel_workers=num_parallel_workers, shuffle=False) num_parallel_workers=num_parallel_workers, shuffle=False)
decode = C.Decode() decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode) ds = ds.map(operations=decode, input_columns=["image"])
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
if is_training: if is_training:
hwc_to_chw = C.HWC2CHW() hwc_to_chw = C.HWC2CHW()
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num) ds = ds.repeat(repeat_num)
else: else:
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "annotation"], output_columns=["image", "image_shape", "annotation"],
column_order=["image", "image_shape", "annotation"], column_order=["image", "image_shape", "annotation"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
return ds return ds

View File

@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False):
if sink_mode: if sink_mode:
sink_size = 100 sink_size = 100
new_repeat_count = 3 new_repeat_count = 3
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
logger.info("data size: {}".format(ds.get_dataset_size())) logger.info("data size: {}".format(ds.get_dataset_size()))

View File

@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False):
if sink_mode: if sink_mode:
sink_size = 100 sink_size = 100
new_repeat_count = 3 new_repeat_count = 3
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
logger.info("data size: {}".format(ds.get_dataset_size())) logger.info("data size: {}".format(ds.get_dataset_size()))

View File

@ -41,12 +41,12 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", d
print('origin dataset size: ', ori_dataset_size) print('origin dataset size: ', ori_dataset_size)
new_repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size()) new_repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size())
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations # apply batch operations
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
ds = ds.repeat(max(new_repeat_count, repeat_count)) ds = ds.repeat(max(new_repeat_count, repeat_count))

View File

@ -92,12 +92,12 @@ def me_de_train_dataset():
"next_sentence_labels", "masked_lm_positions", "next_sentence_labels", "masked_lm_positions",
"masked_lm_ids", "masked_lm_weights"], shuffle=False) "masked_lm_ids", "masked_lm_weights"], shuffle=False)
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations # apply batch operations
batch_size = int(os.getenv('BATCH_SIZE', '16')) batch_size = int(os.getenv('BATCH_SIZE', '16'))
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -97,10 +97,10 @@ def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train", shu
# wrapped with GeneratorDataset # wrapped with GeneratorDataset
dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=None) dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=None)
dataset = dataset.map(input_columns=["image", "label"], operations=DataTransform(args, usage=usage)) dataset = dataset.map(operations=DataTransform(args, usage=usage), input_columns=["image", "label"])
channelswap_op = C.HWC2CHW() channelswap_op = C.HWC2CHW()
dataset = dataset.map(input_columns="image", operations=channelswap_op) dataset = dataset.map(operations=channelswap_op, input_columns="image")
# 1464 samples / batch_size 8 = 183 batches # 1464 samples / batch_size 8 = 183 batches
# epoch_num is num of steps # epoch_num is num of steps

View File

@ -68,8 +68,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -71,8 +71,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
type_cast_op = C2.TypeCast(mstype.int32) type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -171,11 +171,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
buffer_size = 10000 buffer_size = 10000

View File

@ -47,11 +47,11 @@ def test_me_de_train_dataset():
rescale_op = vision.Rescale(rescale, shift) rescale_op = vision.Rescale(rescale, shift)
# apply map operations on images # apply map operations on images
data_set_new = data_set_new.map(input_columns="image/encoded", operations=decode_op) data_set_new = data_set_new.map(operations=decode_op, input_columns="image/encoded")
data_set_new = data_set_new.map(input_columns="image/encoded", operations=resize_op) data_set_new = data_set_new.map(operations=resize_op, input_columns="image/encoded")
data_set_new = data_set_new.map(input_columns="image/encoded", operations=rescale_op) data_set_new = data_set_new.map(operations=rescale_op, input_columns="image/encoded")
hwc2chw_op = vision.HWC2CHW() hwc2chw_op = vision.HWC2CHW()
data_set_new = data_set_new.map(input_columns="image/encoded", operations=hwc2chw_op) data_set_new = data_set_new.map(operations=hwc2chw_op, input_columns="image/encoded")
data_set_new = data_set_new.repeat(1) data_set_new = data_set_new.repeat(1)
# apply batch operations # apply batch operations
batch_size_new = 32 batch_size_new = 32

View File

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
buffer_size = 10000 buffer_size = 10000

View File

@ -87,9 +87,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
hwc2chw_op = CV.HWC2CHW() hwc2chw_op = CV.HWC2CHW()
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
mnist_ds = mnist_ds.batch(batch_size) mnist_ds = mnist_ds.batch(batch_size)

View File

@ -77,9 +77,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
hwc2chw_op = CV.HWC2CHW() hwc2chw_op = CV.HWC2CHW()
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
mnist_ds = mnist_ds.batch(batch_size) mnist_ds = mnist_ds.batch(batch_size)

View File

@ -145,9 +145,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
hwc2chw_op = CV.HWC2CHW() hwc2chw_op = CV.HWC2CHW()
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
mnist_ds = mnist_ds.batch(batch_size) mnist_ds = mnist_ds.batch(batch_size)

View File

@ -98,11 +98,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
buffer_size = 10000 buffer_size = 10000

View File

@ -107,11 +107,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
buffer_size = 10000 buffer_size = 10000

View File

@ -351,8 +351,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32):
changeswap_op] changeswap_op]
# apply map operations on images # apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op) data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(input_columns="image", operations=c_trans) data_set = data_set.map(operations=c_trans, input_columns="image")
# apply shuffle operations # apply shuffle operations
data_set = data_set.shuffle(buffer_size=1000) data_set = data_set.shuffle(buffer_size=1000)

View File

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
buffer_size = 10000 buffer_size = 10000

View File

@ -114,11 +114,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers
type_cast_op = C.TypeCast(mstype.int32) type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images # apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps # apply DatasetOps
mnist_ds = mnist_ds.shuffle(buffer_size=10000) # 10000 as in LeNet train script mnist_ds = mnist_ds.shuffle(buffer_size=10000) # 10000 as in LeNet train script

View File

@ -90,8 +90,8 @@ def create_dataset(repeat_num=1, training=True):
changeswap_op] changeswap_op]
# apply map operations on images # apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op) data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(input_columns="image", operations=c_trans) data_set = data_set.map(operations=c_trans, input_columns="image")
# apply repeat operations # apply repeat operations
data_set = data_set.repeat(repeat_num) data_set = data_set.repeat(repeat_num)

View File

@ -68,8 +68,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32):
changeswap_op] changeswap_op]
# apply map operations on images # apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op) data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(input_columns="image", operations=c_trans) data_set = data_set.map(operations=c_trans, input_columns="image")
# apply shuffle operations # apply shuffle operations
data_set = data_set.shuffle(buffer_size=1000) data_set = data_set.shuffle(buffer_size=1000)

View File

@ -79,8 +79,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32, rank_id=0, rank_s
changeswap_op] changeswap_op]
# apply map operations on images # apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op) data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(input_columns="image", operations=c_trans) data_set = data_set.map(operations=c_trans, input_columns="image")
# apply shuffle operations # apply shuffle operations
data_set = data_set.shuffle(buffer_size=1000) data_set = data_set.shuffle(buffer_size=1000)

View File

@ -29,7 +29,7 @@ def test_case_0():
# apply dataset operations # apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x: x + x)) ds1 = ds1.map(operations=(lambda x: x + x), input_columns=col, output_columns="out")
print("************** Output Tensor *****************") print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary for data in ds1.create_dict_iterator(): # each data is a dictionary
@ -49,7 +49,7 @@ def test_case_1():
# apply dataset operations # apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1"], operations=(lambda x: (x, x + x))) ds1 = ds1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"])
print("************** Output Tensor *****************") print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary for data in ds1.create_dict_iterator(): # each data is a dictionary
@ -72,7 +72,7 @@ def test_case_2():
# apply dataset operations # apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x, y: x + y)) ds1 = ds1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out")
print("************** Output Tensor *****************") print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary for data in ds1.create_dict_iterator(): # each data is a dictionary
@ -93,8 +93,8 @@ def test_case_3():
# apply dataset operations # apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"], ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col,
operations=(lambda x, y: (x, x + y, x + x + y))) output_columns=["out0", "out1", "out2"])
print("************** Output Tensor *****************") print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary for data in ds1.create_dict_iterator(): # each data is a dictionary
@ -119,8 +119,8 @@ def test_case_4():
# apply dataset operations # apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4, ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col,
operations=(lambda x, y: (x, x + y, x + x + y))) output_columns=["out0", "out1", "out2"], num_parallel_workers=4)
print("************** Output Tensor *****************") print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary for data in ds1.create_dict_iterator(): # each data is a dictionary

View File

@ -39,12 +39,12 @@ def test_HWC2CHW(plot=False):
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
hwc2chw_op = c_vision.HWC2CHW() hwc2chw_op = c_vision.HWC2CHW()
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op) data2 = data2.map(operations=decode_op, input_columns=["image"])
image_transposed = [] image_transposed = []
image = [] image = []
@ -72,8 +72,8 @@ def test_HWC2CHW_md5():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
hwc2chw_op = c_vision.HWC2CHW() hwc2chw_op = c_vision.HWC2CHW()
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "HWC2CHW_01_result.npz" filename = "HWC2CHW_01_result.npz"
@ -90,8 +90,8 @@ def test_HWC2CHW_comp(plot=False):
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
hwc2chw_op = c_vision.HWC2CHW() hwc2chw_op = c_vision.HWC2CHW()
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -101,7 +101,7 @@ def test_HWC2CHW_comp(plot=False):
py_vision.HWC2CHW() py_vision.HWC2CHW()
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = data2.map(input_columns=["image"], operations=transform) data2 = data2.map(operations=transform, input_columns=["image"])
image_c_transposed = [] image_c_transposed = []
image_py_transposed = [] image_py_transposed = []

View File

@ -42,8 +42,7 @@ def test_auto_contrast_py(plot=False):
F.Resize((224, 224)), F.Resize((224, 224)),
F.ToTensor()]) F.ToTensor()])
ds_original = ds.map(input_columns="image", ds_original = ds.map(operations=transforms_original, input_columns="image")
operations=transforms_original)
ds_original = ds_original.batch(512) ds_original = ds_original.batch(512)
@ -64,8 +63,7 @@ def test_auto_contrast_py(plot=False):
F.AutoContrast(cutoff=10.0, ignore=[10, 20]), F.AutoContrast(cutoff=10.0, ignore=[10, 20]),
F.ToTensor()]) F.ToTensor()])
ds_auto_contrast = ds.map(input_columns="image", ds_auto_contrast = ds.map(operations=transforms_auto_contrast, input_columns="image")
operations=transforms_auto_contrast)
ds_auto_contrast = ds_auto_contrast.batch(512) ds_auto_contrast = ds_auto_contrast.batch(512)
@ -99,17 +97,14 @@ def test_auto_contrast_c(plot=False):
# AutoContrast Images # AutoContrast Images
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
operations=[C.Decode(),
C.Resize((224, 224))])
python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20]) python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20])
c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20]) c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20])
transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)), transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)),
python_op, python_op,
np.array]) np.array])
ds_auto_contrast_py = ds.map(input_columns="image", ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image")
operations=transforms_op)
ds_auto_contrast_py = ds_auto_contrast_py.batch(512) ds_auto_contrast_py = ds_auto_contrast_py.batch(512)
@ -122,12 +117,9 @@ def test_auto_contrast_c(plot=False):
axis=0) axis=0)
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
operations=[C.Decode(),
C.Resize((224, 224))])
ds_auto_contrast_c = ds.map(input_columns="image", ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image")
operations=c_op)
ds_auto_contrast_c = ds_auto_contrast_c.batch(512) ds_auto_contrast_c = ds_auto_contrast_c.batch(512)
@ -162,9 +154,7 @@ def test_auto_contrast_one_channel_c(plot=False):
# AutoContrast Images # AutoContrast Images
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
operations=[C.Decode(),
C.Resize((224, 224))])
python_op = F.AutoContrast() python_op = F.AutoContrast()
c_op = C.AutoContrast() c_op = C.AutoContrast()
# not using F.ToTensor() since it converts to floats # not using F.ToTensor() since it converts to floats
@ -174,8 +164,7 @@ def test_auto_contrast_one_channel_c(plot=False):
python_op, python_op,
np.array]) np.array])
ds_auto_contrast_py = ds.map(input_columns="image", ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image")
operations=transforms_op)
ds_auto_contrast_py = ds_auto_contrast_py.batch(512) ds_auto_contrast_py = ds_auto_contrast_py.batch(512)
@ -188,13 +177,10 @@ def test_auto_contrast_one_channel_c(plot=False):
axis=0) axis=0)
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])],
operations=[C.Decode(), input_columns=["image"])
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
ds_auto_contrast_c = ds.map(input_columns="image", ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image")
operations=c_op)
ds_auto_contrast_c = ds_auto_contrast_c.batch(512) ds_auto_contrast_c = ds_auto_contrast_c.batch(512)
@ -223,8 +209,7 @@ def test_auto_contrast_mnist_c(plot=False):
""" """
logger.info("Test AutoContrast C Op With MNIST Images") logger.info("Test AutoContrast C Op With MNIST Images")
ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
ds_auto_contrast_c = ds.map(input_columns="image", ds_auto_contrast_c = ds.map(operations=C.AutoContrast(cutoff=1, ignore=(0, 255)), input_columns="image")
operations=C.AutoContrast(cutoff=1, ignore=(0, 255)))
ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
images = [] images = []
@ -252,25 +237,20 @@ def test_auto_contrast_invalid_ignore_param_c():
logger.info("Test AutoContrast C Op with invalid ignore parameter") logger.info("Test AutoContrast C Op with invalid ignore parameter")
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(),
operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])]) lambda img: np.array(img[:, :, 0])], input_columns=["image"])
# invalid ignore # invalid ignore
ds = ds.map(input_columns="image", ds = ds.map(operations=C.AutoContrast(ignore=255.5), input_columns="image")
operations=C.AutoContrast(ignore=255.5))
except TypeError as error: except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value 255.5 is not of type" in str(error) assert "Argument ignore with value 255.5 is not of type" in str(error)
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)),
operations=[C.Decode(), lambda img: np.array(img[:, :, 0])], input_columns=["image"])
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
# invalid ignore # invalid ignore
ds = ds.map(input_columns="image", ds = ds.map(operations=C.AutoContrast(ignore=(10, 100)), input_columns="image")
operations=C.AutoContrast(ignore=(10, 100)))
except TypeError as error: except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value (10,100) is not of type" in str(error) assert "Argument ignore with value (10,100) is not of type" in str(error)
@ -283,25 +263,21 @@ def test_auto_contrast_invalid_cutoff_param_c():
logger.info("Test AutoContrast C Op with invalid cutoff parameter") logger.info("Test AutoContrast C Op with invalid cutoff parameter")
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(),
operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])]) lambda img: np.array(img[:, :, 0])], input_columns=["image"])
# invalid ignore # invalid ignore
ds = ds.map(input_columns="image", ds = ds.map(operations=C.AutoContrast(cutoff=-10.0), input_columns="image")
operations=C.AutoContrast(cutoff=-10.0))
except ValueError as error: except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(),
operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])]) lambda img: np.array(img[:, :, 0])], input_columns=["image"])
# invalid ignore # invalid ignore
ds = ds.map(input_columns="image", ds = ds.map(operations=C.AutoContrast(cutoff=120.0), input_columns="image")
operations=C.AutoContrast(cutoff=120.0))
except ValueError as error: except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
@ -314,21 +290,21 @@ def test_auto_contrast_invalid_ignore_param_py():
logger.info("Test AutoContrast python Op with invalid ignore parameter") logger.info("Test AutoContrast python Op with invalid ignore parameter")
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.AutoContrast(ignore=255.5), F.AutoContrast(ignore=255.5),
F.ToTensor()])]) F.ToTensor()])],
input_columns=["image"])
except TypeError as error: except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value 255.5 is not of type" in str(error) assert "Argument ignore with value 255.5 is not of type" in str(error)
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.AutoContrast(ignore=(10, 100)), F.AutoContrast(ignore=(10, 100)),
F.ToTensor()])]) F.ToTensor()])],
input_columns=["image"])
except TypeError as error: except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value (10,100) is not of type" in str(error) assert "Argument ignore with value (10,100) is not of type" in str(error)
@ -341,21 +317,22 @@ def test_auto_contrast_invalid_cutoff_param_py():
logger.info("Test AutoContrast python Op with invalid cutoff parameter") logger.info("Test AutoContrast python Op with invalid cutoff parameter")
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.AutoContrast(cutoff=-10.0), F.AutoContrast(cutoff=-10.0),
F.ToTensor()])]) F.ToTensor()])],
input_columns=["image"])
except ValueError as error: except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.AutoContrast(cutoff=120.0), F.AutoContrast(cutoff=120.0),
F.ToTensor()])]) F.ToTensor()])],
input_columns=["image"])
except ValueError as error: except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)

View File

@ -49,10 +49,9 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False):
test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1) test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1)
# map to apply ops # map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
column_order=["image", "bbox"], column_order=["image", "bbox"])
operations=[test_op])
filename = "bounding_box_augment_rotation_c_result.npz" filename = "bounding_box_augment_rotation_c_result.npz"
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@ -88,10 +87,9 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):
test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9) test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9)
# map to apply ops # map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
column_order=["image", "bbox"], column_order=["image", "bbox"])
operations=[test_op])
filename = "bounding_box_augment_crop_c_result.npz" filename = "bounding_box_augment_crop_c_result.npz"
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@ -126,10 +124,9 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9) test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9)
# map to apply ops # map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
column_order=["image", "bbox"], column_order=["image", "bbox"]) # Add column for "bbox"
operations=[test_op]) # Add column for "bbox"
filename = "bounding_box_augment_valid_ratio_c_result.npz" filename = "bounding_box_augment_valid_ratio_c_result.npz"
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@ -163,10 +160,9 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False):
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1) test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)
dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"], dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
column_order=["image", "bbox"], column_order=["image", "bbox"])
operations=[test_op])
unaugSamp, augSamp = [], [] unaugSamp, augSamp = [], []
@ -195,20 +191,19 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False):
# map to apply ops # map to apply ops
# Add column for "bbox" # Add column for "bbox"
dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], dataVoc1 = dataVoc1.map(
operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)),
input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"])
dataVoc2 = dataVoc2.map(
operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)),
input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"])
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
column_order=["image", "bbox"], column_order=["image", "bbox"])
operations=lambda img, bbox:
(img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=lambda img, bbox:
(img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=[test_op])
filename = "bounding_box_augment_valid_edge_c_result.npz" filename = "bounding_box_augment_valid_edge_c_result.npz"
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@ -238,10 +233,9 @@ def test_bounding_box_augment_invalid_ratio_c():
# ratio range is from 0 - 1 # ratio range is from 0 - 1
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1.5) test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1.5)
# map to apply ops # map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
column_order=["image", "bbox"], column_order=["image", "bbox"]) # Add column for "bbox"
operations=[test_op]) # Add column for "bbox"
except ValueError as error: except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(error) assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(error)

View File

@ -25,7 +25,7 @@ def test_compose():
def test_config(arr, op_list): def test_config(arr, op_list):
try: try:
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
data = data.map(input_columns=["col"], operations=ops.Compose(op_list)) data = data.map(operations=ops.Compose(op_list), input_columns=["col"])
res = [] res = []
for i in data.create_dict_iterator(num_epochs=1): for i in data.create_dict_iterator(num_epochs=1):
res.append(i["col"].tolist()) res.append(i["col"].tolist())

View File

@ -24,7 +24,7 @@ def test_random_apply():
def test_config(arr, op_list, prob=0.5): def test_config(arr, op_list, prob=0.5):
try: try:
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
data = data.map(input_columns=["col"], operations=ops.RandomApply(op_list, prob)) data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"])
res = [] res = []
for i in data.create_dict_iterator(num_epochs=1): for i in data.create_dict_iterator(num_epochs=1):
res.append(i["col"].tolist()) res.append(i["col"].tolist())

View File

@ -48,7 +48,7 @@ def test_cache_map_basic1():
# This DATA_DIR only has 2 images in it # This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
filename = "cache_map_01_result.npz" filename = "cache_map_01_result.npz"
@ -77,7 +77,7 @@ def test_cache_map_basic2():
# This DATA_DIR only has 2 images in it # This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
filename = "cache_map_02_result.npz" filename = "cache_map_02_result.npz"
@ -107,7 +107,7 @@ def test_cache_map_basic3():
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
logger.info("ds1.dataset_size is ", ds1.get_dataset_size()) logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
num_iter = 0 num_iter = 0
@ -131,7 +131,7 @@ def test_cache_map_basic4():
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(operations=decode_op, input_columns=["image"])
logger.info("ds1.dataset_size is ", ds1.get_dataset_size()) logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
shape = ds1.output_shapes() shape = ds1.output_shapes()
logger.info(shape) logger.info(shape)
@ -167,7 +167,7 @@ def test_cache_map_failure1():
# This DATA_DIR only has 2 images in it # This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
try: try:

View File

@ -108,7 +108,7 @@ def test_cache_nomap_basic3():
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
num_iter = 0 num_iter = 0
@ -160,7 +160,7 @@ def test_cache_nomap_basic4():
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
num_iter = 0 num_iter = 0
@ -197,7 +197,7 @@ def test_cache_nomap_basic5():
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
num_iter = 0 num_iter = 0
@ -237,7 +237,7 @@ def test_cache_nomap_basic6():
# there was not any cache. # there was not any cache.
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_shards=3, shard_id=1, cache=some_cache) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_shards=3, shard_id=1, cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
num_iter = 0 num_iter = 0
@ -273,7 +273,7 @@ def test_cache_nomap_basic7():
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
num_iter = 0 num_iter = 0
@ -343,11 +343,11 @@ def test_cache_nomap_allowed_share2():
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds2 = ds2.shuffle(buffer_size=2) ds2 = ds2.shuffle(buffer_size=2)
num_iter = 0 num_iter = 0
@ -418,10 +418,10 @@ def test_cache_nomap_allowed_share4():
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=1) ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=1)
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=2) ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=2)
num_iter = 0 num_iter = 0
for _ in ds1.create_dict_iterator(num_epochs=1): for _ in ds1.create_dict_iterator(num_epochs=1):
@ -458,10 +458,10 @@ def test_cache_nomap_disallowed_share1():
rescale_op = c_vision.Rescale(1.0 / 255.0, -1.0) rescale_op = c_vision.Rescale(1.0 / 255.0, -1.0)
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds2 = ds2.map(input_columns=["image"], operations=rescale_op, cache=some_cache) ds2 = ds2.map(operations=rescale_op, input_columns=["image"], cache=some_cache)
num_iter = 0 num_iter = 0
for _ in ds1.create_dict_iterator(num_epochs=1): for _ in ds1.create_dict_iterator(num_epochs=1):

View File

@ -40,12 +40,12 @@ def test_center_crop_op(height=375, width=375, plot=False):
decode_op = vision.Decode() decode_op = vision.Decode()
# 3 images [375, 500] [600, 500] [512, 512] # 3 images [375, 500] [600, 500] [512, 512]
center_crop_op = vision.CenterCrop([height, width]) center_crop_op = vision.CenterCrop([height, width])
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=center_crop_op) data1 = data1.map(operations=center_crop_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
data2 = data2.map(input_columns=["image"], operations=decode_op) data2 = data2.map(operations=decode_op, input_columns=["image"])
image_cropped = [] image_cropped = []
image = [] image = []
@ -67,8 +67,8 @@ def test_center_crop_md5(height=375, width=375):
decode_op = vision.Decode() decode_op = vision.Decode()
# 3 images [375, 500] [600, 500] [512, 512] # 3 images [375, 500] [600, 500] [512, 512]
center_crop_op = vision.CenterCrop([height, width]) center_crop_op = vision.CenterCrop([height, width])
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=center_crop_op) data1 = data1.map(operations=center_crop_op, input_columns=["image"])
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "center_crop_01_result.npz" filename = "center_crop_01_result.npz"
save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
@ -84,8 +84,8 @@ def test_center_crop_comp(height=375, width=375, plot=False):
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
center_crop_op = vision.CenterCrop([height, width]) center_crop_op = vision.CenterCrop([height, width])
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=center_crop_op) data1 = data1.map(operations=center_crop_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -95,7 +95,7 @@ def test_center_crop_comp(height=375, width=375, plot=False):
py_vision.ToTensor() py_vision.ToTensor()
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = data2.map(input_columns=["image"], operations=transform) data2 = data2.map(operations=transform, input_columns=["image"])
image_c_cropped = [] image_c_cropped = []
image_py_cropped = [] image_py_cropped = []
@ -126,11 +126,11 @@ def test_crop_grayscale(height=375, width=375):
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
# If input is grayscale, the output dimensions should be single channel # If input is grayscale, the output dimensions should be single channel
crop_gray = vision.CenterCrop([height, width]) crop_gray = vision.CenterCrop([height, width])
data1 = data1.map(input_columns=["image"], operations=crop_gray) data1 = data1.map(operations=crop_gray, input_columns=["image"])
for item1 in data1.create_dict_iterator(num_epochs=1): for item1 in data1.create_dict_iterator(num_epochs=1):
c_image = item1["image"] c_image = item1["image"]

View File

@ -121,7 +121,7 @@ def test_concat_05():
data2 = ds.GeneratorDataset(generator_10, ["col1"]) data2 = ds.GeneratorDataset(generator_10, ["col1"])
type_cast_op = C.TypeCast(mstype.float32) type_cast_op = C.TypeCast(mstype.float32)
data1 = data1.map(input_columns=["col1"], operations=type_cast_op) data1 = data1.map(operations=type_cast_op, input_columns=["col1"])
data3 = data1 + data2 data3 = data1 + data2
@ -319,8 +319,8 @@ def test_concat_14():
F.Resize((224, 224)), F.Resize((224, 224)),
F.ToTensor()]) F.ToTensor()])
data1 = data1.map(input_columns=["image"], operations=transforms1) data1 = data1.map(operations=transforms1, input_columns=["image"])
data2 = data2.map(input_columns=["image"], operations=transforms1) data2 = data2.map(operations=transforms1, input_columns=["image"])
data3 = data1 + data2 data3 = data1 + data2
expected, output = [], [] expected, output = [], []

View File

@ -31,7 +31,7 @@ def test_concatenate_op_all():
append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float) append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float)
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor) concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op) data = data.map(operations=concatenate_op, input_columns=["col"])
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
11., 12.]) 11., 12.])
for data_row in data: for data_row in data:
@ -45,7 +45,7 @@ def test_concatenate_op_none():
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate() concatenate_op = data_trans.Concatenate()
data = data.map(input_columns=["col"], operations=concatenate_op) data = data.map(operations=concatenate_op, input_columns=["col"])
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float)) np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float))
@ -59,7 +59,7 @@ def test_concatenate_op_string():
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor) concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op) data = data.map(operations=concatenate_op, input_columns=["col"])
expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S') expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S')
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)
@ -74,8 +74,8 @@ def test_concatenate_op_multi_input_string():
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor) concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor)
data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"], data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
operations=concatenate_op) output_columns=["out1"])
expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S') expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S')
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)
@ -89,8 +89,8 @@ def test_concatenate_op_multi_input_numeric():
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor) concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor)
data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"], data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
operations=concatenate_op) output_columns=["out1"])
expected = np.array([3, 5, 1, 2, 3, 4]) expected = np.array([3, 5, 1, 2, 3, 4])
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)
@ -104,7 +104,7 @@ def test_concatenate_op_type_mismatch():
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(0, prepend_tensor) concatenate_op = data_trans.Concatenate(0, prepend_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op) data = data.map(operations=concatenate_op, input_columns=["col"])
with pytest.raises(RuntimeError) as error_info: with pytest.raises(RuntimeError) as error_info:
for _ in data: for _ in data:
pass pass
@ -119,7 +119,7 @@ def test_concatenate_op_type_mismatch2():
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(0, prepend_tensor) concatenate_op = data_trans.Concatenate(0, prepend_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op) data = data.map(operations=concatenate_op, input_columns=["col"])
with pytest.raises(RuntimeError) as error_info: with pytest.raises(RuntimeError) as error_info:
for _ in data: for _ in data:
pass pass
@ -134,7 +134,7 @@ def test_concatenate_op_incorrect_dim():
concatenate_op = data_trans.Concatenate(0, prepend_tensor) concatenate_op = data_trans.Concatenate(0, prepend_tensor)
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
data = data.map(input_columns=["col"], operations=concatenate_op) data = data.map(operations=concatenate_op, input_columns=["col"])
with pytest.raises(RuntimeError) as error_info: with pytest.raises(RuntimeError) as error_info:
for _ in data: for _ in data:
pass pass
@ -155,7 +155,7 @@ def test_concatenate_op_negative_axis():
append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float) append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float)
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(-1, prepend_tensor, append_tensor) concatenate_op = data_trans.Concatenate(-1, prepend_tensor, append_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op) data = data.map(operations=concatenate_op, input_columns=["col"])
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
11., 12.]) 11., 12.])
for data_row in data: for data_row in data:

View File

@ -86,12 +86,12 @@ def test_pipeline():
num_parallel_workers_original = ds.config.get_num_parallel_workers() num_parallel_workers_original = ds.config.get_num_parallel_workers()
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
data1 = data1.map(input_columns=["image"], operations=[c_vision.Decode(True)]) data1 = data1.map(operations=[c_vision.Decode(True)], input_columns=["image"])
ds.serialize(data1, "testpipeline.json") ds.serialize(data1, "testpipeline.json")
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, num_parallel_workers=num_parallel_workers_original, data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, num_parallel_workers=num_parallel_workers_original,
shuffle=False) shuffle=False)
data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode(True)]) data2 = data2.map(operations=[c_vision.Decode(True)], input_columns=["image"])
ds.serialize(data2, "testpipeline2.json") ds.serialize(data2, "testpipeline2.json")
# check that the generated output is different # check that the generated output is different
@ -131,14 +131,14 @@ def test_deterministic_run_fail():
# outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random # outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=random_crop_op) data1 = data1.map(operations=random_crop_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op) data2 = data2.map(operations=decode_op, input_columns=["image"])
# If seed is set up on constructor # If seed is set up on constructor
data2 = data2.map(input_columns=["image"], operations=random_crop_op) data2 = data2.map(operations=random_crop_op, input_columns=["image"])
try: try:
dataset_equal(data1, data2, 0) dataset_equal(data1, data2, 0)
@ -171,16 +171,16 @@ def test_seed_undeterministic():
# We get the seed when constructor is called # We get the seed when constructor is called
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=random_crop_op) data1 = data1.map(operations=random_crop_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op) data2 = data2.map(operations=decode_op, input_columns=["image"])
# Since seed is set up on constructor, so the two ops output deterministic sequence. # Since seed is set up on constructor, so the two ops output deterministic sequence.
# Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random # Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
data2 = data2.map(input_columns=["image"], operations=random_crop_op2) data2 = data2.map(operations=random_crop_op2, input_columns=["image"])
try: try:
dataset_equal(data1, data2, 0) dataset_equal(data1, data2, 0)
except Exception as e: except Exception as e:
@ -211,15 +211,15 @@ def test_seed_deterministic():
# seed will be read in during constructor call # seed will be read in during constructor call
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=random_crop_op) data1 = data1.map(operations=random_crop_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op) data2 = data2.map(operations=decode_op, input_columns=["image"])
# If seed is set up on constructor, so the two ops output deterministic sequence # If seed is set up on constructor, so the two ops output deterministic sequence
random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
data2 = data2.map(input_columns=["image"], operations=random_crop_op2) data2 = data2.map(operations=random_crop_op2, input_columns=["image"])
dataset_equal(data1, data2, 0) dataset_equal(data1, data2, 0)
@ -246,15 +246,15 @@ def test_deterministic_run_distribution():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1) random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=random_horizontal_flip_op) data1 = data1.map(operations=random_horizontal_flip_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op) data2 = data2.map(operations=decode_op, input_columns=["image"])
# If seed is set up on constructor, so the two ops output deterministic sequence # If seed is set up on constructor, so the two ops output deterministic sequence
random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1) random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1)
data2 = data2.map(input_columns=["image"], operations=random_horizontal_flip_op2) data2 = data2.map(operations=random_horizontal_flip_op2, input_columns=["image"])
dataset_equal(data1, data2, 0) dataset_equal(data1, data2, 0)
@ -285,7 +285,7 @@ def test_deterministic_python_seed():
py_vision.ToTensor(), py_vision.ToTensor(),
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
data1_output = [] data1_output = []
# config.set_seed() calls random.seed() # config.set_seed() calls random.seed()
for data_one in data1.create_dict_iterator(num_epochs=1): for data_one in data1.create_dict_iterator(num_epochs=1):
@ -293,7 +293,7 @@ def test_deterministic_python_seed():
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform) data2 = data2.map(operations=transform, input_columns=["image"])
# config.set_seed() calls random.seed(), resets seed for next dataset iterator # config.set_seed() calls random.seed(), resets seed for next dataset iterator
ds.config.set_seed(0) ds.config.set_seed(0)
@ -328,7 +328,7 @@ def test_deterministic_python_seed_multi_thread():
py_vision.ToTensor(), py_vision.ToTensor(),
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform, python_multiprocessing=True) data1 = data1.map(operations=transform, input_columns=["image"], python_multiprocessing=True)
data1_output = [] data1_output = []
# config.set_seed() calls random.seed() # config.set_seed() calls random.seed()
for data_one in data1.create_dict_iterator(num_epochs=1): for data_one in data1.create_dict_iterator(num_epochs=1):
@ -337,7 +337,7 @@ def test_deterministic_python_seed_multi_thread():
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
# If seed is set up on constructor # If seed is set up on constructor
data2 = data2.map(input_columns=["image"], operations=transform, python_multiprocessing=True) data2 = data2.map(operations=transform, input_columns=["image"], python_multiprocessing=True)
# config.set_seed() calls random.seed() # config.set_seed() calls random.seed()
ds.config.set_seed(0) ds.config.set_seed(0)

View File

@ -30,6 +30,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
GENERATE_GOLDEN = False GENERATE_GOLDEN = False
def test_cut_out_op(plot=False): def test_cut_out_op(plot=False):
""" """
Test Cutout Test Cutout
@ -45,7 +46,7 @@ def test_cut_out_op(plot=False):
f.RandomErasing(value='random') f.RandomErasing(value='random')
] ]
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1) data1 = data1.map(operations=transform_1, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -57,7 +58,7 @@ def test_cut_out_op(plot=False):
cut_out_op cut_out_op
] ]
data2 = data2.map(input_columns=["image"], operations=transforms_2) data2 = data2.map(operations=transforms_2, input_columns=["image"])
num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
@ -91,7 +92,7 @@ def test_cut_out_op_multicut(plot=False):
f.ToTensor(), f.ToTensor(),
] ]
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1) data1 = data1.map(operations=transform_1, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -103,7 +104,7 @@ def test_cut_out_op_multicut(plot=False):
cut_out_op cut_out_op
] ]
data2 = data2.map(input_columns=["image"], operations=transforms_2) data2 = data2.map(operations=transforms_2, input_columns=["image"])
num_iter = 0 num_iter = 0
image_list_1, image_list_2 = [], [] image_list_1, image_list_2 = [], []
@ -136,8 +137,8 @@ def test_cut_out_md5():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = c.Decode() decode_op = c.Decode()
cut_out_op = c.CutOut(100) cut_out_op = c.CutOut(100)
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=cut_out_op) data1 = data1.map(operations=cut_out_op, input_columns=["image"])
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
transforms = [ transforms = [
@ -146,7 +147,7 @@ def test_cut_out_md5():
f.Cutout(100) f.Cutout(100)
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = data2.map(input_columns=["image"], operations=transform) data2 = data2.map(operations=transform, input_columns=["image"])
# Compare with expected md5 from images # Compare with expected md5 from images
filename1 = "cut_out_01_c_result.npz" filename1 = "cut_out_01_c_result.npz"
@ -174,7 +175,7 @@ def test_cut_out_comp(plot=False):
f.Cutout(200) f.Cutout(200)
] ]
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1) data1 = data1.map(operations=transform_1, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -184,7 +185,7 @@ def test_cut_out_comp(plot=False):
c.CutOut(200) c.CutOut(200)
] ]
data2 = data2.map(input_columns=["image"], operations=transforms_2) data2 = data2.map(operations=transforms_2, input_columns=["image"])
num_iter = 0 num_iter = 0
image_list_1, image_list_2 = [], [] image_list_1, image_list_2 = [], []

View File

@ -51,12 +51,12 @@ def test_cutmix_batch_success1(plot=False):
# CutMix Images # CutMix Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
hwc2chw_op = vision.HWC2CHW() hwc2chw_op = vision.HWC2CHW()
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW, 2.0, 0.5) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW, 2.0, 0.5)
data1 = data1.batch(5, drop_remainder=True) data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
images_cutmix = None images_cutmix = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
@ -94,12 +94,12 @@ def test_cutmix_batch_success2(plot=False):
# CutMix Images # CutMix Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
rescale_op = vision.Rescale((1.0/255.0), 0.0) rescale_op = vision.Rescale((1.0 / 255.0), 0.0)
data1 = data1.map(input_columns=["image"], operations=rescale_op) data1 = data1.map(operations=rescale_op, input_columns=["image"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data1 = data1.batch(5, drop_remainder=True) data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
images_cutmix = None images_cutmix = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
@ -125,7 +125,7 @@ def test_cutmix_batch_success3(plot=False):
ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)
images_original = None images_original = None
@ -139,14 +139,14 @@ def test_cutmix_batch_success3(plot=False):
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
data1 = data1.map(input_columns=["image"], operations=[decode_op]) data1 = data1.map(operations=[decode_op], input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data1 = data1.batch(4, pad_info={}, drop_remainder=True) data1 = data1.batch(4, pad_info={}, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
images_cutmix = None images_cutmix = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
@ -172,7 +172,7 @@ def test_cutmix_batch_success4(plot=False):
ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
ds_original = ds_original.batch(2, drop_remainder=True) ds_original = ds_original.batch(2, drop_remainder=True)
images_original = None images_original = None
@ -186,14 +186,14 @@ def test_cutmix_batch_success4(plot=False):
data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False) data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
data1 = data1.map(input_columns=["image"], operations=[decode_op]) data1 = data1.map(operations=[decode_op], input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=100) one_hot_op = data_trans.OneHot(num_classes=100)
data1 = data1.map(input_columns=["attr"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["attr"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9)
data1 = data1.batch(2, drop_remainder=True) data1 = data1.batch(2, drop_remainder=True)
data1 = data1.map(input_columns=["image", "attr"], operations=cutmix_batch_op) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "attr"])
images_cutmix = None images_cutmix = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
@ -223,10 +223,10 @@ def test_cutmix_batch_nhwc_md5():
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data = data.map(input_columns=["label"], operations=one_hot_op) data = data.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data = data.batch(5, drop_remainder=True) data = data.batch(5, drop_remainder=True)
data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op) data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"])
filename = "cutmix_batch_c_nhwc_result.npz" filename = "cutmix_batch_c_nhwc_result.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
@ -247,12 +247,12 @@ def test_cutmix_batch_nchw_md5():
# CutMixBatch Images # CutMixBatch Images
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
hwc2chw_op = vision.HWC2CHW() hwc2chw_op = vision.HWC2CHW()
data = data.map(input_columns=["image"], operations=hwc2chw_op) data = data.map(operations=hwc2chw_op, input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data = data.map(input_columns=["label"], operations=one_hot_op) data = data.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
data = data.batch(5, drop_remainder=True) data = data.batch(5, drop_remainder=True)
data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op) data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"])
filename = "cutmix_batch_c_nchw_result.npz" filename = "cutmix_batch_c_nchw_result.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
@ -273,10 +273,10 @@ def test_cutmix_batch_fail1():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
with pytest.raises(RuntimeError) as error: with pytest.raises(RuntimeError) as error:
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_cutmix = image images_cutmix = image
@ -297,7 +297,7 @@ def test_cutmix_batch_fail2():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error: with pytest.raises(ValueError) as error:
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, -1) vision.CutMixBatch(mode.ImageBatchFormat.NHWC, -1)
error_message = "Input is not within the required interval" error_message = "Input is not within the required interval"
@ -315,7 +315,7 @@ def test_cutmix_batch_fail3():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error: with pytest.raises(ValueError) as error:
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, 2) vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, 2)
error_message = "Input is not within the required interval" error_message = "Input is not within the required interval"
@ -333,7 +333,7 @@ def test_cutmix_batch_fail4():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error: with pytest.raises(ValueError) as error:
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, -1) vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, -1)
error_message = "Input is not within the required interval" error_message = "Input is not within the required interval"
@ -351,10 +351,10 @@ def test_cutmix_batch_fail5():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data1 = data1.batch(5, drop_remainder=True) data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image"], operations=cutmix_batch_op) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image"])
with pytest.raises(RuntimeError) as error: with pytest.raises(RuntimeError) as error:
images_cutmix = np.array([]) images_cutmix = np.array([])
@ -378,10 +378,10 @@ def test_cutmix_batch_fail6():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
data1 = data1.batch(5, drop_remainder=True) data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
with pytest.raises(RuntimeError) as error: with pytest.raises(RuntimeError) as error:
images_cutmix = np.array([]) images_cutmix = np.array([])
@ -406,7 +406,7 @@ def test_cutmix_batch_fail7():
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data1 = data1.batch(5, drop_remainder=True) data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
with pytest.raises(RuntimeError) as error: with pytest.raises(RuntimeError) as error:
images_cutmix = np.array([]) images_cutmix = np.array([])
@ -430,7 +430,7 @@ def test_cutmix_batch_fail8():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error: with pytest.raises(ValueError) as error:
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.0) vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.0)
error_message = "Input is not within the required interval" error_message = "Input is not within the required interval"

View File

@ -59,7 +59,7 @@ def test_numpy_slices_list_append():
data1 = de.TFRecordDataset(DATA_DIR) data1 = de.TFRecordDataset(DATA_DIR)
resize_op = vision.Resize((resize_height, resize_width)) resize_op = vision.Resize((resize_height, resize_width))
data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True), resize_op]) data1 = data1.map(operations=[vision.Decode(True), resize_op], input_columns=["image"])
res = [] res = []
for data in data1.create_dict_iterator(num_epochs=1): for data in data1.create_dict_iterator(num_epochs=1):

View File

@ -46,8 +46,8 @@ def test_celeba_dataset_op():
data = data.repeat(2) data = data.repeat(2)
center_crop = vision.CenterCrop(crop_size) center_crop = vision.CenterCrop(crop_size)
resize_op = vision.Resize(resize_size, Inter.LINEAR) # Bilinear mode resize_op = vision.Resize(resize_size, Inter.LINEAR) # Bilinear mode
data = data.map(input_columns=["image"], operations=center_crop) data = data.map(operations=center_crop, input_columns=["image"])
data = data.map(input_columns=["image"], operations=resize_op) data = data.map(operations=resize_op, input_columns=["image"])
count = 0 count = 0
for item in data.create_dict_iterator(num_epochs=1): for item in data.create_dict_iterator(num_epochs=1):

View File

@ -25,6 +25,7 @@ INVALID_FILE = "../data/dataset/testCOCO/annotations/invalid.json"
LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json" LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json"
INVALID_CATEGORY_ID_FILE = "../data/dataset/testCOCO/annotations/invalid_category_id.json" INVALID_CATEGORY_ID_FILE = "../data/dataset/testCOCO/annotations/invalid_category_id.json"
def test_coco_detection(): def test_coco_detection():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection",
decode=True, shuffle=False) decode=True, shuffle=False)
@ -57,6 +58,7 @@ def test_coco_detection():
np.testing.assert_array_equal(np.array([[5]]), category_id[4]) np.testing.assert_array_equal(np.array([[5]]), category_id[4])
np.testing.assert_array_equal(np.array([[6]]), category_id[5]) np.testing.assert_array_equal(np.array([[6]]), category_id[5])
def test_coco_stuff(): def test_coco_stuff():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff", data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff",
decode=True, shuffle=False) decode=True, shuffle=False)
@ -97,6 +99,7 @@ def test_coco_stuff():
segmentation[5]) segmentation[5])
np.testing.assert_array_equal(np.array([[0]]), iscrowd[5]) np.testing.assert_array_equal(np.array([[0]]), iscrowd[5])
def test_coco_keypoint(): def test_coco_keypoint():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint", data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint",
decode=True, shuffle=False) decode=True, shuffle=False)
@ -124,6 +127,7 @@ def test_coco_keypoint():
keypoints[1]) keypoints[1])
np.testing.assert_array_equal(np.array([[10]]), num_keypoints[1]) np.testing.assert_array_equal(np.array([[10]]), num_keypoints[1])
def test_coco_panoptic(): def test_coco_panoptic():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True, shuffle=False) data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True, shuffle=False)
num_iter = 0 num_iter = 0
@ -151,6 +155,7 @@ def test_coco_panoptic():
np.testing.assert_array_equal(np.array([[0], [0]]), iscrowd[1]) np.testing.assert_array_equal(np.array([[0], [0]]), iscrowd[1])
np.testing.assert_array_equal(np.array([[43102], [6079]]), area[1]) np.testing.assert_array_equal(np.array([[43102], [6079]]), area[1])
def test_coco_detection_classindex(): def test_coco_detection_classindex():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
class_index = data1.get_class_indexing() class_index = data1.get_class_indexing()
@ -161,6 +166,7 @@ def test_coco_detection_classindex():
num_iter += 1 num_iter += 1
assert num_iter == 6 assert num_iter == 6
def test_coco_panootic_classindex(): def test_coco_panootic_classindex():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True) data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True)
class_index = data1.get_class_indexing() class_index = data1.get_class_indexing()
@ -170,6 +176,7 @@ def test_coco_panootic_classindex():
num_iter += 1 num_iter += 1
assert num_iter == 2 assert num_iter == 2
def test_coco_case_0(): def test_coco_case_0():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
data1 = data1.shuffle(10) data1 = data1.shuffle(10)
@ -179,6 +186,7 @@ def test_coco_case_0():
num_iter += 1 num_iter += 1
assert num_iter == 2 assert num_iter == 2
def test_coco_case_1(): def test_coco_case_1():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
sizes = [0.5, 0.5] sizes = [0.5, 0.5]
@ -194,28 +202,31 @@ def test_coco_case_1():
num_iter += 1 num_iter += 1
assert num_iter == 3 assert num_iter == 3
def test_coco_case_2(): def test_coco_case_2():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
resize_op = vision.Resize((224, 224)) resize_op = vision.Resize((224, 224))
data1 = data1.map(input_columns=["image"], operations=resize_op) data1 = data1.map(operations=resize_op, input_columns=["image"])
data1 = data1.repeat(4) data1 = data1.repeat(4)
num_iter = 0 num_iter = 0
for _ in data1.__iter__(): for _ in data1.__iter__():
num_iter += 1 num_iter += 1
assert num_iter == 24 assert num_iter == 24
def test_coco_case_3(): def test_coco_case_3():
data1 = ds.CocoDataset(DATA_DIR_2, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) data1 = ds.CocoDataset(DATA_DIR_2, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
resize_op = vision.Resize((224, 224)) resize_op = vision.Resize((224, 224))
data1 = data1.map(input_columns=["image"], operations=resize_op) data1 = data1.map(operations=resize_op, input_columns=["image"])
data1 = data1.repeat(4) data1 = data1.repeat(4)
num_iter = 0 num_iter = 0
for _ in data1.__iter__(): for _ in data1.__iter__():
num_iter += 1 num_iter += 1
assert num_iter == 24 assert num_iter == 24
def test_coco_case_exception(): def test_coco_case_exception():
try: try:
data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection") data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection")

View File

@ -25,6 +25,7 @@ def generator_1d():
for i in range(64): for i in range(64):
yield (np.array([i]),) yield (np.array([i]),)
class DatasetGenerator: class DatasetGenerator:
def __init__(self): def __init__(self):
pass pass
@ -241,11 +242,11 @@ def test_generator_8():
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(input_columns="col0", output_columns="out0", operations=(lambda x: x * 3), data1 = data1.map(operations=(lambda x: x * 3), input_columns="col0", output_columns="out0",
num_parallel_workers=2) num_parallel_workers=2)
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x * 7, x)), data1 = data1.map(operations=(lambda x: (x * 7, x)), input_columns="col1", output_columns=["out1", "out2"],
num_parallel_workers=2, column_order=["out0", "out1", "out2"]) num_parallel_workers=2, column_order=["out0", "out1", "out2"])
data1 = data1.map(input_columns="out2", output_columns="out2", operations=(lambda x: x + 1), data1 = data1.map(operations=(lambda x: x + 1), input_columns="out2", output_columns="out2",
num_parallel_workers=2) num_parallel_workers=2)
i = 0 i = 0
@ -268,9 +269,9 @@ def test_generator_9():
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["image", "label"]) data1 = ds.GeneratorDataset(generator_mc(2048), ["image", "label"])
data2 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"]) data2 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
data1 = data1.map(input_columns="label", operations=(lambda x: x * 3), data1 = data1.map(operations=(lambda x: x * 3), input_columns="label",
num_parallel_workers=4) num_parallel_workers=4)
data2 = data2.map(input_columns="label", operations=(lambda x: x * 3), data2 = data2.map(operations=(lambda x: x * 3), input_columns="label",
num_parallel_workers=4) num_parallel_workers=4)
# Expected column order is not changed. # Expected column order is not changed.
@ -298,7 +299,7 @@ def test_generator_10():
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
column_order=['col0', 'out1', 'out2'], num_parallel_workers=2) column_order=['col0', 'out1', 'out2'], num_parallel_workers=2)
# Expected column order is |col0|out1|out2| # Expected column order is |col0|out1|out2|
@ -322,7 +323,7 @@ def test_generator_11():
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
column_order=['out1', 'out2'], num_parallel_workers=2) column_order=['out1', 'out2'], num_parallel_workers=2)
# Expected column order is |out1|out2| # Expected column order is |out1|out2|
@ -503,7 +504,7 @@ def test_generator_error_3():
with pytest.raises(ValueError) as info: with pytest.raises(ValueError) as info:
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"]) data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
data1 = data1.map(input_columns=["label"], output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], output_columns=["out1", "out2"],
num_parallel_workers=2) num_parallel_workers=2)
for _ in data1: for _ in data1:
@ -515,7 +516,7 @@ def test_generator_error_4():
with pytest.raises(RuntimeError) as info: with pytest.raises(RuntimeError) as info:
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"]) data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
data1 = data1.map(input_columns=["label"], operations=(lambda x: (x, x * 5)), data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"],
num_parallel_workers=2) num_parallel_workers=2)
for _ in data1: for _ in data1:
@ -706,6 +707,7 @@ def test_generator_dataset_size_4():
num_rows = num_rows + 1 num_rows = num_rows + 1
assert data_size == num_rows assert data_size == num_rows
def test_generator_dataset_size_5(): def test_generator_dataset_size_5():
""" """
Test get_dataset_size after create_dict_iterator Test get_dataset_size after create_dict_iterator

View File

@ -103,8 +103,8 @@ def test_manifest_dataset_multi_label_onehot():
data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False) data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False)
expect_label = [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 1]]] expect_label = [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 1]]]
one_hot_encode = data_trans.OneHot(3) one_hot_encode = data_trans.OneHot(3)
data = data.map(input_columns=["label"], operations=one_hot_encode) data = data.map(operations=one_hot_encode, input_columns=["label"])
data = data.map(input_columns=["label"], operations=multi_label_hot) data = data.map(operations=multi_label_hot, input_columns=["label"])
data = data.batch(2) data = data.batch(2)
count = 0 count = 0
for item in data.create_dict_iterator(num_epochs=1): for item in data.create_dict_iterator(num_epochs=1):

View File

@ -85,8 +85,8 @@ def test_case_0():
resize_op = vision.Resize((224, 224)) resize_op = vision.Resize((224, 224))
data1 = data1.map(input_columns=["image"], operations=resize_op) data1 = data1.map(operations=resize_op, input_columns=["image"])
data1 = data1.map(input_columns=["target"], operations=resize_op) data1 = data1.map(operations=resize_op, input_columns=["target"])
repeat_num = 4 repeat_num = 4
data1 = data1.repeat(repeat_num) data1 = data1.repeat(repeat_num)
batch_size = 2 batch_size = 2
@ -103,7 +103,7 @@ def test_case_1():
resize_op = vision.Resize((224, 224)) resize_op = vision.Resize((224, 224))
data1 = data1.map(input_columns=["image"], operations=resize_op) data1 = data1.map(operations=resize_op, input_columns=["image"])
repeat_num = 4 repeat_num = 4
data1 = data1.repeat(repeat_num) data1 = data1.repeat(repeat_num)
batch_size = 2 batch_size = 2

View File

@ -36,7 +36,7 @@ def test_decode_op():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
# Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). # Serialize and Load dataset requires using vision.Decode instead of vision.Decode().
data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)]) data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -57,7 +57,7 @@ def test_decode_op_tf_file_dataset():
# Decode with rgb format set to True # Decode with rgb format set to True
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.FILES) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.FILES)
data1 = data1.map(input_columns=["image"], operations=vision.Decode(True)) data1 = data1.map(operations=vision.Decode(True), input_columns=["image"])
for item in data1.create_dict_iterator(num_epochs=1): for item in data1.create_dict_iterator(num_epochs=1):
logger.info('decode == {}'.format(item['image'])) logger.info('decode == {}'.format(item['image']))

View File

@ -54,8 +54,8 @@ def test_case_1():
resize_op = vision.Resize((resize_height, resize_width)) resize_op = vision.Resize((resize_height, resize_width))
# apply map operations on images # apply map operations on images
data = data.map(input_columns=["image"], operations=decode_op) data = data.map(operations=decode_op, input_columns=["image"])
data = data.map(input_columns=["image"], operations=resize_op) data = data.map(operations=resize_op, input_columns=["image"])
batch_size = 3 batch_size = 3
data = data.batch(batch_size, drop_remainder=True) data = data.batch(batch_size, drop_remainder=True)
@ -79,8 +79,8 @@ def test_case_2():
resize_op = vision.Resize((resize_height, resize_width)) resize_op = vision.Resize((resize_height, resize_width))
# apply map operations on images # apply map operations on images
data = data.map(input_columns=["image"], operations=decode_op) data = data.map(operations=decode_op, input_columns=["image"])
data = data.map(input_columns=["image"], operations=resize_op) data = data.map(operations=resize_op, input_columns=["image"])
batch_size = 2 batch_size = 2
data = data.batch(batch_size, drop_remainder=True) data = data.batch(batch_size, drop_remainder=True)
@ -107,8 +107,8 @@ def test_case_3():
resize_op = vision.Resize((resize_height, resize_width)) resize_op = vision.Resize((resize_height, resize_width))
# apply map operations on images # apply map operations on images
data = data.map(input_columns=["image"], operations=decode_op) data = data.map(operations=decode_op, input_columns=["image"])
data = data.map(input_columns=["image"], operations=resize_op) data = data.map(operations=resize_op, input_columns=["image"])
data = data.repeat(2) data = data.repeat(2)

View File

@ -24,8 +24,8 @@ import mindspore.dataset.transforms.c_transforms as ops
def compare(array): def compare(array):
data = ds.NumpySlicesDataset([array], column_names="x") data = ds.NumpySlicesDataset([array], column_names="x")
array = np.array(array) array = np.array(array)
data = data.map(input_columns=["x"], output_columns=["x", "y"], column_order=["x", "y"], data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"],
operations=ops.Duplicate()) column_order=["x", "y"])
for d in data.create_dict_iterator(num_epochs=1): for d in data.create_dict_iterator(num_epochs=1):
np.testing.assert_array_equal(array, d["x"]) np.testing.assert_array_equal(array, d["x"])
np.testing.assert_array_equal(array, d["y"]) np.testing.assert_array_equal(array, d["y"])

View File

@ -79,7 +79,7 @@ def test_decode_op():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
# Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). # Serialize and Load dataset requires using vision.Decode instead of vision.Decode().
data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)]) data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)

View File

@ -43,8 +43,7 @@ def test_equalize_py(plot=False):
F.Resize((224, 224)), F.Resize((224, 224)),
F.ToTensor()]) F.ToTensor()])
ds_original = ds.map(input_columns="image", ds_original = ds.map(operations=transforms_original, input_columns="image")
operations=transforms_original)
ds_original = ds_original.batch(512) ds_original = ds_original.batch(512)
@ -64,8 +63,7 @@ def test_equalize_py(plot=False):
F.Equalize(), F.Equalize(),
F.ToTensor()]) F.ToTensor()])
ds_equalize = ds.map(input_columns="image", ds_equalize = ds.map(operations=transforms_equalize, input_columns="image")
operations=transforms_equalize)
ds_equalize = ds_equalize.batch(512) ds_equalize = ds_equalize.batch(512)
@ -98,8 +96,7 @@ def test_equalize_c(plot=False):
transforms_original = [C.Decode(), C.Resize(size=[224, 224])] transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
ds_original = ds.map(input_columns="image", ds_original = ds.map(operations=transforms_original, input_columns="image")
operations=transforms_original)
ds_original = ds_original.batch(512) ds_original = ds_original.batch(512)
@ -117,8 +114,7 @@ def test_equalize_c(plot=False):
transform_equalize = [C.Decode(), C.Resize(size=[224, 224]), transform_equalize = [C.Decode(), C.Resize(size=[224, 224]),
C.Equalize()] C.Equalize()]
ds_equalize = ds.map(input_columns="image", ds_equalize = ds.map(operations=transform_equalize, input_columns="image")
operations=transform_equalize)
ds_equalize = ds_equalize.batch(512) ds_equalize = ds_equalize.batch(512)
@ -147,11 +143,9 @@ def test_equalize_py_c(plot=False):
# equalize Images in cpp # equalize Images in cpp
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
operations=[C.Decode(), C.Resize((224, 224))])
ds_c_equalize = ds.map(input_columns="image", ds_c_equalize = ds.map(operations=C.Equalize(), input_columns="image")
operations=C.Equalize())
ds_c_equalize = ds_c_equalize.batch(512) ds_c_equalize = ds_c_equalize.batch(512)
@ -165,16 +159,14 @@ def test_equalize_py_c(plot=False):
# Equalize images in python # Equalize images in python
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
operations=[C.Decode(), C.Resize((224, 224))])
transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8), transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
F.ToPIL(), F.ToPIL(),
F.Equalize(), F.Equalize(),
np.array]) np.array])
ds_p_equalize = ds.map(input_columns="image", ds_p_equalize = ds.map(operations=transforms_p_equalize, input_columns="image")
operations=transforms_p_equalize)
ds_p_equalize = ds_p_equalize.batch(512) ds_p_equalize = ds_p_equalize.batch(512)
@ -206,13 +198,10 @@ def test_equalize_one_channel():
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)),
operations=[C.Decode(), lambda img: np.array(img[:, :, 0])], input_columns=["image"])
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
ds.map(input_columns="image", ds.map(operations=c_op, input_columns="image")
operations=c_op)
except RuntimeError as e: except RuntimeError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
@ -225,8 +214,7 @@ def test_equalize_mnist_c(plot=False):
""" """
logger.info("Test Equalize C Op With MNIST Images") logger.info("Test Equalize C Op With MNIST Images")
ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
ds_equalize_c = ds.map(input_columns="image", ds_equalize_c = ds.map(operations=C.Equalize(), input_columns="image")
operations=C.Equalize())
ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
images = [] images = []
@ -259,7 +247,7 @@ def test_equalize_md5_py():
F.Equalize(), F.Equalize(),
F.ToTensor()]) F.ToTensor()])
data1 = data1.map(input_columns="image", operations=transforms) data1 = data1.map(operations=transforms, input_columns="image")
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "equalize_01_result.npz" filename = "equalize_01_result.npz"
save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
@ -279,7 +267,7 @@ def test_equalize_md5_c():
C.Equalize(), C.Equalize(),
F.ToTensor()] F.ToTensor()]
data = ds.map(input_columns="image", operations=transforms_equalize) data = ds.map(operations=transforms_equalize, input_columns="image")
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "equalize_01_result_c.npz" filename = "equalize_01_result_c.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -29,7 +29,7 @@ def test_exception_01():
logger.info("test_exception_01") logger.info("test_exception_01")
data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"]) data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"])
with pytest.raises(TypeError) as info: with pytest.raises(TypeError) as info:
data.map(input_columns=["image"], operations=vision.Resize(100, 100)) data.map(operations=vision.Resize(100, 100), input_columns=["image"])
assert "Argument interpolation with value 100 is not of type (<enum 'Inter'>,)" in str(info.value) assert "Argument interpolation with value 100 is not of type (<enum 'Inter'>,)" in str(info.value)
@ -45,8 +45,8 @@ def test_exception_02():
num_samples = 1 num_samples = 1
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
data = data.map(input_columns=["image"], operations=vision.Decode()) data = data.map(operations=vision.Decode(), input_columns=["image"])
data = data.map(input_columns=["image"], operations=vision.Resize((100, 100))) data = data.map(operations=vision.Resize((100, 100)), input_columns=["image"])
# Confirm 1 sample in dataset # Confirm 1 sample in dataset
assert sum([1 for _ in data]) == 1 assert sum([1 for _ in data]) == 1
num_iters = 0 num_iters = 0

View File

@ -28,7 +28,7 @@ def test_fillop_basic():
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill(3) fill_op = data_trans.Fill(3)
data = data.map(input_columns=["col"], operations=fill_op) data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array([3, 3, 3, 3], dtype=np.uint8) expected = np.array([3, 3, 3, 3], dtype=np.uint8)
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)
@ -41,7 +41,7 @@ def test_fillop_down_type_cast():
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill(-3) fill_op = data_trans.Fill(-3)
data = data.map(input_columns=["col"], operations=fill_op) data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array([253, 253, 253, 253], dtype=np.uint8) expected = np.array([253, 253, 253, 253], dtype=np.uint8)
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)
@ -54,7 +54,7 @@ def test_fillop_up_type_cast():
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill(3) fill_op = data_trans.Fill(3)
data = data.map(input_columns=["col"], operations=fill_op) data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array([3., 3., 3., 3.], dtype=np.float) expected = np.array([3., 3., 3., 3.], dtype=np.float)
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)
@ -67,7 +67,7 @@ def test_fillop_string():
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill("error") fill_op = data_trans.Fill("error")
data = data.map(input_columns=["col"], operations=fill_op) data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array(['error', 'error'], dtype='S') expected = np.array(['error', 'error'], dtype='S')
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)
@ -79,7 +79,7 @@ def test_fillop_error_handling():
data = ds.GeneratorDataset(gen, column_names=["col"]) data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill("words") fill_op = data_trans.Fill("words")
data = data.map(input_columns=["col"], operations=fill_op) data = data.map(operations=fill_op, input_columns=["col"])
with pytest.raises(RuntimeError) as error_info: with pytest.raises(RuntimeError) as error_info:
for _ in data: for _ in data:

View File

@ -30,7 +30,7 @@ def test_diff_predicate_func():
cde.Resize([64, 64]) cde.Resize([64, 64])
] ]
dataset = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], shuffle=False) dataset = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], shuffle=False)
dataset = dataset.map(input_columns=["image"], operations=transforms, num_parallel_workers=1) dataset = dataset.map(operations=transforms, input_columns=["image"], num_parallel_workers=1)
dataset = dataset.filter(input_columns=["image", "label"], predicate=predicate_func, num_parallel_workers=4) dataset = dataset.filter(input_columns=["image", "label"], predicate=predicate_func, num_parallel_workers=4)
num_iter = 0 num_iter = 0
@ -261,8 +261,8 @@ def func_map_part(data_col1):
# test with map # test with map
def test_filter_by_generator_with_map_all_col(): def test_filter_by_generator_with_map_all_col():
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
dataset_map = dataset.map(input_columns=["col1"], output_columns=["col1"], operations=func_map_part) dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["col1"])
# dataset_map = dataset.map( operations=func_map_part) # dataset_map = dataset.map(operations=func_map_part)
dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1) dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1)
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
@ -277,7 +277,7 @@ def test_filter_by_generator_with_map_all_col():
# test with map # test with map
def test_filter_by_generator_with_map_part_col(): def test_filter_by_generator_with_map_part_col():
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part) dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"])
dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4) dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4)
num_iter = 0 num_iter = 0
@ -328,7 +328,7 @@ def filter_func_input_column3(col1):
# test with input_columns # test with input_columns
def test_filter_by_generator_with_input_column(): def test_filter_by_generator_with_input_column():
dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"]) dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"])
dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part) dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"])
dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1, dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1,
num_parallel_workers=4) num_parallel_workers=4)
dataset_f2 = dataset_f1.filter(input_columns=["out1"], predicate=filter_func_input_column2, num_parallel_workers=4) dataset_f2 = dataset_f1.filter(input_columns=["out1"], predicate=filter_func_input_column2, num_parallel_workers=4)
@ -382,7 +382,7 @@ def test_filter_by_generator_Partial1():
dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"]) dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"])
dataset_zip = ds.zip((dataset1, dataset2)) dataset_zip = ds.zip((dataset1, dataset2))
dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2)
dataset_map = dataset_f1.map(input_columns=["col1"], output_columns=["out1"], operations=lambda x1: x1 + 400) dataset_map = dataset_f1.map(operations=lambda x1: x1 + 400, input_columns=["col1"], output_columns=["out1"])
ret = [] ret = []
for item in dataset_map.create_dict_iterator(num_epochs=1): for item in dataset_map.create_dict_iterator(num_epochs=1):
ret.append(item["out1"]) ret.append(item["out1"])
@ -399,8 +399,8 @@ def test_filter_by_generator_Partial2():
dataset2f = dataset2.filter(input_columns=["col3"], predicate=lambda x: x not in [203, 207, 209], dataset2f = dataset2.filter(input_columns=["col3"], predicate=lambda x: x not in [203, 207, 209],
num_parallel_workers=2) num_parallel_workers=2)
dataset_zip = ds.zip((dataset1f, dataset2f)) dataset_zip = ds.zip((dataset1f, dataset2f))
dataset_map = dataset_zip.map(input_columns=["col1", "col3"], output_columns=["out1", "out3"], dataset_map = dataset_zip.map(operations=lambda x1, x3: (x1 + 400, x3 + 500), input_columns=["col1", "col3"],
operations=lambda x1, x3: (x1 + 400, x3 + 500)) output_columns=["out1", "out3"])
ret1 = [] ret1 = []
ret3 = [] ret3 = []
for item in dataset_map.create_dict_iterator(num_epochs=1): for item in dataset_map.create_dict_iterator(num_epochs=1):
@ -484,6 +484,7 @@ def test_filter_by_generator_with_map_all_sort():
assert ret_data[0]["col1"] == 0 assert ret_data[0]["col1"] == 0
assert ret_data[9]["col6"] == 509 assert ret_data[9]["col6"] == 509
def test_filter_by_generator_get_dataset_size(): def test_filter_by_generator_get_dataset_size():
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4) dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4)

View File

@ -41,7 +41,7 @@ def test_five_crop_op(plot=False):
vision.ToTensor(), vision.ToTensor(),
] ]
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1) data1 = data1.map(operations=transform_1, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -51,7 +51,7 @@ def test_five_crop_op(plot=False):
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
] ]
transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2) transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2)
data2 = data2.map(input_columns=["image"], operations=transform_2) data2 = data2.map(operations=transform_2, input_columns=["image"])
num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
@ -85,7 +85,7 @@ def test_five_crop_error_msg():
vision.ToTensor() vision.ToTensor()
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data = data.map(input_columns=["image"], operations=transform) data = data.map(operations=transform, input_columns=["image"])
with pytest.raises(RuntimeError) as info: with pytest.raises(RuntimeError) as info:
for _ in data: for _ in data:
@ -110,7 +110,7 @@ def test_five_crop_md5():
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data = data.map(input_columns=["image"], operations=transform) data = data.map(operations=transform, input_columns=["image"])
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "five_crop_01_result.npz" filename = "five_crop_01_result.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -26,7 +26,7 @@ def test_demo_basic_from_dataset():
vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None, vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None,
special_tokens=["<pad>", "<unk>"], special_tokens=["<pad>", "<unk>"],
special_first=True) special_first=True)
data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>")) data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
res = [] res = []
for d in data.create_dict_iterator(num_epochs=1): for d in data.create_dict_iterator(num_epochs=1):
res.append(d["text"].item()) res.append(d["text"].item())
@ -36,10 +36,10 @@ def test_demo_basic_from_dataset():
def test_demo_basic_from_dataset_with_tokenizer(): def test_demo_basic_from_dataset_with_tokenizer():
""" this is a tutorial on how from_dataset should be used in a normal use case with tokenizer""" """ this is a tutorial on how from_dataset should be used in a normal use case with tokenizer"""
data = ds.TextFileDataset("../data/dataset/testTokenizerData/1.txt", shuffle=False) data = ds.TextFileDataset("../data/dataset/testTokenizerData/1.txt", shuffle=False)
data = data.map(input_columns=["text"], operations=text.UnicodeCharTokenizer()) data = data.map(operations=text.UnicodeCharTokenizer(), input_columns=["text"])
vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"], vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"],
special_first=True) special_first=True)
data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>")) data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
res = [] res = []
for d in data.create_dict_iterator(num_epochs=1): for d in data.create_dict_iterator(num_epochs=1):
res.append(list(d["text"])) res.append(list(d["text"]))
@ -60,7 +60,7 @@ def test_from_dataset():
corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"]) corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", "<unk>"], vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", "<unk>"],
special_first=True) special_first=True)
corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab, "<unk>")) corpus_dataset = corpus_dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text")
res = [] res = []
for d in corpus_dataset.create_dict_iterator(num_epochs=1): for d in corpus_dataset.create_dict_iterator(num_epochs=1):
res.append(list(d["text"])) res.append(list(d["text"]))
@ -108,7 +108,7 @@ def test_from_dataset_special_token():
corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"]) corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first) vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first)
data = ds.GeneratorDataset(gen_input(texts), column_names=["text"]) data = ds.GeneratorDataset(gen_input(texts), column_names=["text"])
data = data.map(input_columns="text", operations=text.Lookup(vocab, "<unk>")) data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text")
res = [] res = []
for d in data.create_dict_iterator(num_epochs=1): for d in data.create_dict_iterator(num_epochs=1):
res.append(d["text"].item()) res.append(d["text"].item())

View File

@ -95,16 +95,16 @@ def test_get_column_name_manifest():
def test_get_column_name_map(): def test_get_column_name_map():
data = ds.Cifar10Dataset(CIFAR10_DIR) data = ds.Cifar10Dataset(CIFAR10_DIR)
center_crop_op = vision.CenterCrop(10) center_crop_op = vision.CenterCrop(10)
data = data.map(input_columns=["image"], operations=center_crop_op) data = data.map(operations=center_crop_op, input_columns=["image"])
assert data.get_col_names() == ["image", "label"] assert data.get_col_names() == ["image", "label"]
data = ds.Cifar10Dataset(CIFAR10_DIR) data = ds.Cifar10Dataset(CIFAR10_DIR)
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["image"]) data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["image"])
assert data.get_col_names() == ["image", "label"] assert data.get_col_names() == ["image", "label"]
data = ds.Cifar10Dataset(CIFAR10_DIR) data = ds.Cifar10Dataset(CIFAR10_DIR)
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1"]) data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1"])
assert data.get_col_names() == ["col1", "label"] assert data.get_col_names() == ["col1", "label"]
data = ds.Cifar10Dataset(CIFAR10_DIR) data = ds.Cifar10Dataset(CIFAR10_DIR)
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1", "col2"], data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1", "col2"],
column_order=["col2", "col1"]) column_order=["col2", "col1"])
assert data.get_col_names() == ["col2", "col1"] assert data.get_col_names() == ["col2", "col1"]

View File

@ -42,8 +42,7 @@ def test_invert_py(plot=False):
F.Resize((224, 224)), F.Resize((224, 224)),
F.ToTensor()]) F.ToTensor()])
ds_original = ds.map(input_columns="image", ds_original = ds.map(operations=transforms_original, input_columns="image")
operations=transforms_original)
ds_original = ds_original.batch(512) ds_original = ds_original.batch(512)
@ -63,8 +62,7 @@ def test_invert_py(plot=False):
F.Invert(), F.Invert(),
F.ToTensor()]) F.ToTensor()])
ds_invert = ds.map(input_columns="image", ds_invert = ds.map(operations=transforms_invert, input_columns="image")
operations=transforms_invert)
ds_invert = ds_invert.batch(512) ds_invert = ds_invert.batch(512)
@ -97,8 +95,7 @@ def test_invert_c(plot=False):
transforms_original = [C.Decode(), C.Resize(size=[224, 224])] transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
ds_original = ds.map(input_columns="image", ds_original = ds.map(operations=transforms_original, input_columns="image")
operations=transforms_original)
ds_original = ds_original.batch(512) ds_original = ds_original.batch(512)
@ -116,8 +113,7 @@ def test_invert_c(plot=False):
transform_invert = [C.Decode(), C.Resize(size=[224, 224]), transform_invert = [C.Decode(), C.Resize(size=[224, 224]),
C.Invert()] C.Invert()]
ds_invert = ds.map(input_columns="image", ds_invert = ds.map(operations=transform_invert, input_columns="image")
operations=transform_invert)
ds_invert = ds_invert.batch(512) ds_invert = ds_invert.batch(512)
@ -146,11 +142,9 @@ def test_invert_py_c(plot=False):
# Invert Images in cpp # Invert Images in cpp
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
operations=[C.Decode(), C.Resize((224, 224))])
ds_c_invert = ds.map(input_columns="image", ds_c_invert = ds.map(operations=C.Invert(), input_columns="image")
operations=C.Invert())
ds_c_invert = ds_c_invert.batch(512) ds_c_invert = ds_c_invert.batch(512)
@ -164,16 +158,14 @@ def test_invert_py_c(plot=False):
# invert images in python # invert images in python
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
operations=[C.Decode(), C.Resize((224, 224))])
transforms_p_invert = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8), transforms_p_invert = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
F.ToPIL(), F.ToPIL(),
F.Invert(), F.Invert(),
np.array]) np.array])
ds_p_invert = ds.map(input_columns="image", ds_p_invert = ds.map(operations=transforms_p_invert, input_columns="image")
operations=transforms_p_invert)
ds_p_invert = ds_p_invert.batch(512) ds_p_invert = ds_p_invert.batch(512)
@ -205,13 +197,10 @@ def test_invert_one_channel():
try: try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)),
operations=[C.Decode(), lambda img: np.array(img[:, :, 0])], input_columns=["image"])
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
ds.map(input_columns="image", ds.map(operations=c_op, input_columns="image")
operations=c_op)
except RuntimeError as e: except RuntimeError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
@ -231,7 +220,7 @@ def test_invert_md5_py():
F.Invert(), F.Invert(),
F.ToTensor()]) F.ToTensor()])
data = ds.map(input_columns="image", operations=transforms_invert) data = ds.map(operations=transforms_invert, input_columns="image")
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "invert_01_result_py.npz" filename = "invert_01_result_py.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
@ -251,7 +240,7 @@ def test_invert_md5_c():
C.Invert(), C.Invert(),
F.ToTensor()] F.ToTensor()]
data = ds.map(input_columns="image", operations=transforms_invert) data = ds.map(operations=transforms_invert, input_columns="image")
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "invert_01_result_c.npz" filename = "invert_01_result_c.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -51,15 +51,15 @@ def test_linear_transformation_op(plot=False):
# First dataset # First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
# Note: if transformation matrix is diagonal matrix with all 1 in diagonal, # Note: if transformation matrix is diagonal matrix with all 1 in diagonal,
# the output matrix in expected to be the same as the input matrix. # the output matrix in expected to be the same as the input matrix.
data1 = data1.map(input_columns=["image"], data1 = data1.map(operations=py_vision.LinearTransformation(transformation_matrix, mean_vector),
operations=py_vision.LinearTransformation(transformation_matrix, mean_vector)) input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform) data2 = data2.map(operations=transform, input_columns=["image"])
image_transformed = [] image_transformed = []
image = [] image = []
@ -98,7 +98,7 @@ def test_linear_transformation_md5():
py_vision.LinearTransformation(transformation_matrix, mean_vector) py_vision.LinearTransformation(transformation_matrix, mean_vector)
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "linear_transformation_01_result.npz" filename = "linear_transformation_01_result.npz"
@ -128,7 +128,7 @@ def test_linear_transformation_exception_01():
py_vision.LinearTransformation(None, mean_vector) py_vision.LinearTransformation(None, mean_vector)
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
except TypeError as e: except TypeError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e) assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
@ -157,7 +157,7 @@ def test_linear_transformation_exception_02():
py_vision.LinearTransformation(transformation_matrix, None) py_vision.LinearTransformation(transformation_matrix, None)
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
except TypeError as e: except TypeError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e) assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
@ -187,7 +187,7 @@ def test_linear_transformation_exception_03():
py_vision.LinearTransformation(transformation_matrix, mean_vector) py_vision.LinearTransformation(transformation_matrix, mean_vector)
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
except ValueError as e: except ValueError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
assert "square matrix" in str(e) assert "square matrix" in str(e)
@ -217,7 +217,7 @@ def test_linear_transformation_exception_04():
py_vision.LinearTransformation(transformation_matrix, mean_vector) py_vision.LinearTransformation(transformation_matrix, mean_vector)
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
except ValueError as e: except ValueError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
assert "should match" in str(e) assert "should match" in str(e)

View File

@ -73,6 +73,7 @@ def add_and_remove_cv_file():
os.remove("{}".format(x)) os.remove("{}".format(x))
os.remove("{}.db".format(x)) os.remove("{}.db".format(x))
@pytest.fixture @pytest.fixture
def add_and_remove_nlp_file(): def add_and_remove_nlp_file():
"""add/remove nlp file""" """add/remove nlp file"""
@ -265,6 +266,7 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file):
assert partitions(5) == 2 assert partitions(5) == 2
assert partitions(9) == 2 assert partitions(9) == 2
def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
@ -287,6 +289,7 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
assert partitions(5) == 1 assert partitions(5) == 1
assert partitions(9) == 1 assert partitions(9) == 1
def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
@ -309,6 +312,7 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
assert partitions(5) == 2 assert partitions(5) == 2
assert partitions(9) == 2 assert partitions(9) == 2
def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
@ -354,11 +358,11 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1 num_iter += 1
if num_iter <= 4: if num_iter <= 4:
epoch1.append(item["file_name"]) # save epoch 1 list epoch1.append(item["file_name"]) # save epoch 1 list
elif num_iter <= 8: elif num_iter <= 8:
epoch2.append(item["file_name"]) # save epoch 2 list epoch2.append(item["file_name"]) # save epoch 2 list
else: else:
epoch3.append(item["file_name"]) # save epoch 3 list epoch3.append(item["file_name"]) # save epoch 3 list
assert num_iter == 12 assert num_iter == 12
assert len(epoch1) == 4 assert len(epoch1) == 4
assert len(epoch2) == 4 assert len(epoch2) == 4
@ -376,9 +380,9 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
num_shards = 3 num_shards = 3
epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result
for partition_id in range(num_shards): for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
@ -392,7 +396,7 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
# total 3 partition, 4 result per epoch, total 12 result # total 3 partition, 4 result per epoch, total 12 result
epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result
num_iter += 1 num_iter += 1
assert num_iter == 12 assert num_iter == 12
assert epoch_result[partition_id][0] not in (epoch_result[partition_id][1], epoch_result[partition_id][2]) assert epoch_result[partition_id][0] not in (epoch_result[partition_id][1], epoch_result[partition_id][2])
@ -425,11 +429,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1 num_iter += 1
if num_iter <= 10: if num_iter <= 10:
epoch1.append(item["file_name"]) # save epoch 1 list epoch1.append(item["file_name"]) # save epoch 1 list
elif num_iter <= 20: elif num_iter <= 20:
epoch2.append(item["file_name"]) # save epoch 2 list epoch2.append(item["file_name"]) # save epoch 2 list
else: else:
epoch3.append(item["file_name"]) # save epoch 3 list epoch3.append(item["file_name"]) # save epoch 3 list
assert num_iter == 30 assert num_iter == 30
assert len(epoch1) == 10 assert len(epoch1) == 10
assert len(epoch2) == 10 assert len(epoch2) == 10
@ -451,11 +455,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1 num_iter += 1
if num_iter <= 10: if num_iter <= 10:
epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list
elif num_iter <= 20: elif num_iter <= 20:
epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list
else: else:
epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list
assert num_iter == 30 assert num_iter == 30
assert len(epoch1_new_dataset) == 10 assert len(epoch1_new_dataset) == 10
assert len(epoch2_new_dataset) == 10 assert len(epoch2_new_dataset) == 10
@ -482,11 +486,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1 num_iter += 1
if num_iter <= 10: if num_iter <= 10:
epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list
elif num_iter <= 20: elif num_iter <= 20:
epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list
else: else:
epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list
assert num_iter == 30 assert num_iter == 30
assert len(epoch1_new_dataset2) == 10 assert len(epoch1_new_dataset2) == 10
assert len(epoch2_new_dataset2) == 10 assert len(epoch2_new_dataset2) == 10
@ -532,8 +536,8 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file):
data_set = data_set.map( data_set = data_set.map(
input_columns=["data"], operations=decode_op, num_parallel_workers=2) input_columns=["data"], operations=decode_op, num_parallel_workers=2)
resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
data_set = data_set.map(input_columns="data", data_set = data_set.map(operations=resize_op, input_columns="data",
operations=resize_op, num_parallel_workers=2) num_parallel_workers=2)
data_set = data_set.batch(2) data_set = data_set.batch(2)
data_set = data_set.repeat(2) data_set = data_set.repeat(2)
num_iter = 0 num_iter = 0
@ -563,8 +567,8 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file):
data_set = data_set.map( data_set = data_set.map(
input_columns=["data"], operations=decode_op, num_parallel_workers=2) input_columns=["data"], operations=decode_op, num_parallel_workers=2)
resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
data_set = data_set.map(input_columns="data", data_set = data_set.map(operations=resize_op, input_columns="data",
operations=resize_op, num_parallel_workers=2) num_parallel_workers=2)
data_set = data_set.batch(32, drop_remainder=True) data_set = data_set.batch(32, drop_remainder=True)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1): for item in data_set.create_dict_iterator(num_epochs=1):
@ -707,6 +711,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
if os.path.exists("{}.db".format(CV2_FILE_NAME)): if os.path.exists("{}.db".format(CV2_FILE_NAME)):
os.remove("{}.db".format(CV2_FILE_NAME)) os.remove("{}.db".format(CV2_FILE_NAME))
def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0')) paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)] for x in range(FILES_NUM)]
@ -757,6 +762,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
os.remove("{}".format(x)) os.remove("{}".format(x))
os.remove("{}.db".format(x)) os.remove("{}.db".format(x))
def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """tutorial for cv minderdataset."""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
@ -778,6 +784,7 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
num_iter += 1 num_iter += 1
assert num_iter == 10 assert num_iter == 10
def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file):
"""tutorial for nlp minderdataset.""" """tutorial for nlp minderdataset."""
num_readers = 4 num_readers = 4
@ -1522,6 +1529,7 @@ def test_write_with_multi_bytes_and_MindDataset():
os.remove("{}".format(mindrecord_file_name)) os.remove("{}".format(mindrecord_file_name))
os.remove("{}.db".format(mindrecord_file_name)) os.remove("{}.db".format(mindrecord_file_name))
def test_write_with_multi_array_and_MindDataset(): def test_write_with_multi_array_and_MindDataset():
mindrecord_file_name = "test.mindrecord" mindrecord_file_name = "test.mindrecord"
try: try:
@ -1741,9 +1749,9 @@ def test_numpy_generic():
for idx in range(10): for idx in range(10):
row = {} row = {}
row['label1'] = np.int32(idx) row['label1'] = np.int32(idx)
row['label2'] = np.int64(idx*10) row['label2'] = np.int64(idx * 10)
row['label3'] = np.float32(idx+0.12345) row['label3'] = np.float32(idx + 0.12345)
row['label4'] = np.float64(idx+0.12345789) row['label4'] = np.float64(idx + 0.12345789)
data.append(row) data.append(row)
writer.add_schema(cv_schema_json, "img_schema") writer.add_schema(cv_schema_json, "img_schema")
writer.write_raw_data(data) writer.write_raw_data(data)
@ -1923,6 +1931,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(
os.remove("{}".format(mindrecord_file_name)) os.remove("{}".format(mindrecord_file_name))
os.remove("{}.db".format(mindrecord_file_name)) os.remove("{}.db".format(mindrecord_file_name))
if __name__ == '__main__': if __name__ == '__main__':
test_nlp_compress_data(add_and_remove_nlp_compress_file) test_nlp_compress_data(add_and_remove_nlp_compress_file)
test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file) test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file)

View File

@ -37,9 +37,9 @@ def test_one_hot_op():
num_classes = 2 num_classes = 2
epsilon_para = 0.1 epsilon_para = 0.1
transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para),] transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para)]
transform_label = f.Compose(transforms) transform_label = f.Compose(transforms)
dataset = dataset.map(input_columns=["label"], operations=transform_label) dataset = dataset.map(operations=transform_label, input_columns=["label"])
golden_label = np.ones(num_classes) * epsilon_para / num_classes golden_label = np.ones(num_classes) * epsilon_para / num_classes
golden_label[1] = 1 - epsilon_para / num_classes golden_label[1] = 1 - epsilon_para / num_classes
@ -69,9 +69,9 @@ def test_mix_up_single():
resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR) resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR)
one_hot_encode = c.OneHot(num_classes) # num_classes is input argument one_hot_encode = c.OneHot(num_classes) # num_classes is input argument
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.map(input_columns=["image"], operations=resize_op) ds1 = ds1.map(operations=resize_op, input_columns=["image"])
ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode) ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"])
# apply batch operations # apply batch operations
batch_size = 3 batch_size = 3
@ -81,7 +81,7 @@ def test_mix_up_single():
alpha = 0.2 alpha = 0.2
transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=True) transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=True)
] ]
ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):
image1 = data1["image"] image1 = data1["image"]
@ -118,9 +118,9 @@ def test_mix_up_multi():
resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR) resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR)
one_hot_encode = c.OneHot(num_classes) # num_classes is input argument one_hot_encode = c.OneHot(num_classes) # num_classes is input argument
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.map(input_columns=["image"], operations=resize_op) ds1 = ds1.map(operations=resize_op, input_columns=["image"])
ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode) ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"])
# apply batch operations # apply batch operations
batch_size = 3 batch_size = 3
@ -130,7 +130,7 @@ def test_mix_up_multi():
alpha = 0.2 alpha = 0.2
transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False) transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False)
] ]
ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])
num_iter = 0 num_iter = 0
batch1_image1 = 0 batch1_image1 = 0
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):

View File

@ -30,6 +30,7 @@ DATA_DIR3 = "../data/dataset/testCelebAData/"
GENERATE_GOLDEN = False GENERATE_GOLDEN = False
def test_mixup_batch_success1(plot=False): def test_mixup_batch_success1(plot=False):
""" """
Test MixUpBatch op with specified alpha parameter Test MixUpBatch op with specified alpha parameter
@ -51,10 +52,10 @@ def test_mixup_batch_success1(plot=False):
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch(2) mixup_batch_op = vision.MixUpBatch(2)
data1 = data1.batch(5, drop_remainder=True) data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
images_mixup = None images_mixup = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
@ -81,7 +82,7 @@ def test_mixup_batch_success2(plot=False):
# Original Images # Original Images
ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)
images_original = None images_original = None
@ -95,14 +96,14 @@ def test_mixup_batch_success2(plot=False):
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
data1 = data1.map(input_columns=["image"], operations=[decode_op]) data1 = data1.map(operations=[decode_op], input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch(2.0) mixup_batch_op = vision.MixUpBatch(2.0)
data1 = data1.batch(4, pad_info={}, drop_remainder=True) data1 = data1.batch(4, pad_info={}, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
images_mixup = None images_mixup = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
@ -142,10 +143,10 @@ def test_mixup_batch_success3(plot=False):
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch() mixup_batch_op = vision.MixUpBatch()
data1 = data1.batch(5, drop_remainder=True) data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
images_mixup = np.array([]) images_mixup = np.array([])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
@ -173,7 +174,7 @@ def test_mixup_batch_success4(plot=False):
# Original Images # Original Images
ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
ds_original = ds_original.batch(2, drop_remainder=True) ds_original = ds_original.batch(2, drop_remainder=True)
images_original = None images_original = None
@ -187,14 +188,14 @@ def test_mixup_batch_success4(plot=False):
data1 = ds.CelebADataset(DATA_DIR3, shuffle=False) data1 = ds.CelebADataset(DATA_DIR3, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
data1 = data1.map(input_columns=["image"], operations=[decode_op]) data1 = data1.map(operations=[decode_op], input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=100) one_hot_op = data_trans.OneHot(num_classes=100)
data1 = data1.map(input_columns=["attr"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["attr"])
mixup_batch_op = vision.MixUpBatch() mixup_batch_op = vision.MixUpBatch()
data1 = data1.batch(2, drop_remainder=True) data1 = data1.batch(2, drop_remainder=True)
data1 = data1.map(input_columns=["image", "attr"], operations=mixup_batch_op) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "attr"])
images_mixup = np.array([]) images_mixup = np.array([])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
@ -224,10 +225,10 @@ def test_mixup_batch_md5():
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data = data.map(input_columns=["label"], operations=one_hot_op) data = data.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch() mixup_batch_op = vision.MixUpBatch()
data = data.batch(5, drop_remainder=True) data = data.batch(5, drop_remainder=True)
data = data.map(input_columns=["image", "label"], operations=mixup_batch_op) data = data.map(operations=mixup_batch_op, input_columns=["image", "label"])
filename = "mixup_batch_c_result.npz" filename = "mixup_batch_c_result.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
@ -259,10 +260,10 @@ def test_mixup_batch_fail1():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch(0.1) mixup_batch_op = vision.MixUpBatch(0.1)
with pytest.raises(RuntimeError) as error: with pytest.raises(RuntimeError) as error:
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_mixup = image images_mixup = image
@ -294,7 +295,7 @@ def test_mixup_batch_fail2():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error: with pytest.raises(ValueError) as error:
vision.MixUpBatch(-1) vision.MixUpBatch(-1)
error_message = "Input is not within the required interval" error_message = "Input is not within the required interval"
@ -322,10 +323,10 @@ def test_mixup_batch_fail3():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch() mixup_batch_op = vision.MixUpBatch()
data1 = data1.batch(5, drop_remainder=True) data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image"], operations=mixup_batch_op) data1 = data1.map(operations=mixup_batch_op, input_columns=["image"])
with pytest.raises(RuntimeError) as error: with pytest.raises(RuntimeError) as error:
images_mixup = np.array([]) images_mixup = np.array([])
@ -337,6 +338,7 @@ def test_mixup_batch_fail3():
error_message = "Both images and labels columns are required" error_message = "Both images and labels columns are required"
assert error_message in str(error.value) assert error_message in str(error.value)
def test_mixup_batch_fail4(): def test_mixup_batch_fail4():
""" """
Test MixUpBatch Fail 2 Test MixUpBatch Fail 2
@ -359,7 +361,7 @@ def test_mixup_batch_fail4():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10) one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op) data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error: with pytest.raises(ValueError) as error:
vision.MixUpBatch(0.0) vision.MixUpBatch(0.0)
error_message = "Input is not within the required interval" error_message = "Input is not within the required interval"
@ -389,7 +391,7 @@ def test_mixup_batch_fail5():
mixup_batch_op = vision.MixUpBatch() mixup_batch_op = vision.MixUpBatch()
data1 = data1.batch(5, drop_remainder=True) data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
with pytest.raises(RuntimeError) as error: with pytest.raises(RuntimeError) as error:
images_mixup = np.array([]) images_mixup = np.array([])

View File

@ -39,7 +39,7 @@ def test_multiple_ngrams():
yield (np.array(line.split(" "), dtype='S'),) yield (np.array(line.split(" "), dtype='S'),)
dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"]) dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
dataset = dataset.map(input_columns=["text"], operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " ")) dataset = dataset.map(operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "), input_columns="text")
i = 0 i = 0
for data in dataset.create_dict_iterator(num_epochs=1): for data in dataset.create_dict_iterator(num_epochs=1):
@ -61,7 +61,7 @@ def test_simple_ngram():
yield (np.array(line.split(" "), dtype='S'),) yield (np.array(line.split(" "), dtype='S'),)
dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"]) dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=" ")) dataset = dataset.map(operations=text.Ngram(3, separator=" "), input_columns="text")
i = 0 i = 0
for data in dataset.create_dict_iterator(num_epochs=1): for data in dataset.create_dict_iterator(num_epochs=1):
@ -78,7 +78,7 @@ def test_corner_cases():
try: try:
dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"]) dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep)) dataset = dataset.map(operations=text.Ngram(n, l_pad, r_pad, separator=sep), input_columns=["text"])
for data in dataset.create_dict_iterator(num_epochs=1): for data in dataset.create_dict_iterator(num_epochs=1):
return [d.decode("utf8") for d in data["text"]] return [d.decode("utf8") for d in data["text"]]
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:

View File

@ -32,10 +32,10 @@ def test_on_tokenized_line():
for line in f: for line in f:
word = line.split(',')[0] word = line.split(',')[0]
jieba_op.add_word(word) jieba_op.add_word(word)
data = data.map(input_columns=["text"], operations=jieba_op) data = data.map(operations=jieba_op, input_columns=["text"])
vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"]) vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"])
lookup = text.Lookup(vocab, "<unk>") lookup = text.Lookup(vocab, "<unk>")
data = data.map(input_columns=["text"], operations=lookup) data = data.map(operations=lookup, input_columns=["text"])
res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14], res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14],
[11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32) [11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32)
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):
@ -50,10 +50,10 @@ def test_on_tokenized_line_with_no_special_tokens():
word = line.split(',')[0] word = line.split(',')[0]
jieba_op.add_word(word) jieba_op.add_word(word)
data = data.map(input_columns=["text"], operations=jieba_op) data = data.map(operations=jieba_op, input_columns=["text"])
vocab = text.Vocab.from_file(VOCAB_FILE, ",") vocab = text.Vocab.from_file(VOCAB_FILE, ",")
lookup = text.Lookup(vocab, "not") lookup = text.Lookup(vocab, "not")
data = data.map(input_columns=["text"], operations=lookup) data = data.map(operations=lookup, input_columns=["text"])
res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12], res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12],
[9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32) [9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32)
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):

View File

@ -51,8 +51,8 @@ def util_test_normalize(mean, std, op_type):
normalize_op = c_vision.Normalize(mean, std) normalize_op = c_vision.Normalize(mean, std)
# Generate dataset # Generate dataset
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(input_columns=["image"], operations=decode_op) data = data.map(operations=decode_op, input_columns=["image"])
data = data.map(input_columns=["image"], operations=normalize_op) data = data.map(operations=normalize_op, input_columns=["image"])
elif op_type == "python": elif op_type == "python":
# define map operations # define map operations
transforms = [ transforms = [
@ -63,7 +63,7 @@ def util_test_normalize(mean, std, op_type):
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
# Generate dataset # Generate dataset
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(input_columns=["image"], operations=transform) data = data.map(operations=transform, input_columns=["image"])
else: else:
raise ValueError("Wrong parameter value") raise ValueError("Wrong parameter value")
return data return data
@ -82,7 +82,7 @@ def util_test_normalize_grayscale(num_output_channels, mean, std):
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
# Generate dataset # Generate dataset
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(input_columns=["image"], operations=transform) data = data.map(operations=transform, input_columns=["image"])
return data return data
@ -99,12 +99,12 @@ def test_normalize_op_c(plot=False):
# First dataset # First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=normalize_op) data1 = data1.map(operations=normalize_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op) data2 = data2.map(operations=decode_op, input_columns=["image"])
num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
@ -136,12 +136,12 @@ def test_normalize_op_py(plot=False):
# First dataset # First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=normalize_op) data1 = data1.map(operations=normalize_op, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform) data2 = data2.map(operations=transform, input_columns=["image"])
num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
@ -169,7 +169,7 @@ def test_decode_op():
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
# apply map operations on images # apply map operations on images
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
num_iter = 0 num_iter = 0
for item in data1.create_dict_iterator(num_epochs=1): for item in data1.create_dict_iterator(num_epochs=1):
@ -192,7 +192,7 @@ def test_decode_normalize_op():
normalize_op = c_vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0]) normalize_op = c_vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0])
# apply map operations on images # apply map operations on images
data1 = data1.map(input_columns=["image"], operations=[decode_op, normalize_op]) data1 = data1.map(operations=[decode_op, normalize_op], input_columns=["image"])
num_iter = 0 num_iter = 0
for item in data1.create_dict_iterator(num_epochs=1): for item in data1.create_dict_iterator(num_epochs=1):

View File

@ -47,13 +47,14 @@ def test_one_hot():
# First dataset # First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=depth) one_hot_op = data_trans.OneHot(num_classes=depth)
data1 = data1.map(input_columns=["label"], operations=one_hot_op, column_order=["label"]) data1 = data1.map(operations=one_hot_op, input_columns=["label"], column_order=["label"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False)
assert dataset_equal_with_function(data1, data2, 0, one_hot, depth) assert dataset_equal_with_function(data1, data2, 0, one_hot, depth)
def test_one_hot_post_aug(): def test_one_hot_post_aug():
""" """
Test One Hot Encoding after Multiple Data Augmentation Operators Test One Hot Encoding after Multiple Data Augmentation Operators
@ -72,14 +73,14 @@ def test_one_hot_post_aug():
resize_op = c_vision.Resize((resize_height, resize_width)) resize_op = c_vision.Resize((resize_height, resize_width))
# Apply map operations on images # Apply map operations on images
data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=rescale_op) data1 = data1.map(operations=rescale_op, input_columns=["image"])
data1 = data1.map(input_columns=["image"], operations=resize_op) data1 = data1.map(operations=resize_op, input_columns=["image"])
# Apply one-hot encoding on labels # Apply one-hot encoding on labels
depth = 4 depth = 4
one_hot_encode = data_trans.OneHot(depth) one_hot_encode = data_trans.OneHot(depth)
data1 = data1.map(input_columns=["label"], operations=one_hot_encode) data1 = data1.map(operations=one_hot_encode, input_columns=["label"])
# Apply datasets ops # Apply datasets ops
buffer_size = 100 buffer_size = 100

View File

@ -16,6 +16,7 @@ import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
# tests the construction of multiple ops from a single dataset. # tests the construction of multiple ops from a single dataset.
# map dataset with columns order arguments should produce a ProjectOp over MapOp # map dataset with columns order arguments should produce a ProjectOp over MapOp
# This test does not utilize the compiling passes at this time. # This test does not utilize the compiling passes at this time.
@ -27,12 +28,13 @@ def test_map_reorder0():
# Generator -> Map # Generator -> Map
data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"]) data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"])
data0 = data0.map(input_columns="col0", output_columns="out", column_order=["col1", "out"], data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out",
operations=(lambda x: x)) column_order=["col1", "out"])
for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary
assert item == [np.array(1), np.array(0)] assert item == [np.array(1), np.array(0)]
# tests the construction of multiple ops from a single dataset. # tests the construction of multiple ops from a single dataset.
# map dataset with columns order arguments should produce a ProjectOp over MapOp # map dataset with columns order arguments should produce a ProjectOp over MapOp
# This test does not utilize the compiling passes at this time. # This test does not utilize the compiling passes at this time.
@ -43,20 +45,20 @@ def test_map_reorder1():
# Three map and zip # Three map and zip
data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"]) data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"])
data0 = data0.map(input_columns="a0", column_order=["a2", "a1", "a0"], operations=(lambda x: x)) data0 = data0.map(operations=(lambda x: x), input_columns="a0", column_order=["a2", "a1", "a0"])
data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"]) data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"])
data1 = data1.map(input_columns="b0", column_order=["b1", "b2", "b0"], operations=(lambda x: x)) data1 = data1.map(operations=(lambda x: x), input_columns="b0", column_order=["b1", "b2", "b0"])
data2 = ds.zip((data0, data1)) data2 = ds.zip((data0, data1))
data2 = data2.map(input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"], operations=(lambda x: x)) data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"])
for item in data2.create_tuple_iterator(num_epochs=1): for item in data2.create_tuple_iterator(num_epochs=1):
assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)] assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)]
# tests the construction of multiple ops from a single dataset. # tests the construction of multiple ops from a single dataset.
# TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp. # TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp.
# This test does not utilize the compiling passes at this time. # This test does not utilize the compiling passes at this time.
def test_shuffle(): def test_shuffle():
FILES = ["../data/dataset/testTFTestAllTypes/test.data"] FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json" SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"

View File

@ -44,7 +44,7 @@ def test_pad_op():
pad_op, pad_op,
] ]
data1 = data1.map(input_columns=["image"], operations=ctrans) data1 = data1.map(operations=ctrans, input_columns=["image"])
# Second dataset # Second dataset
transforms = [ transforms = [
@ -54,7 +54,7 @@ def test_pad_op():
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform) data2 = data2.map(operations=transform, input_columns=["image"])
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
c_image = item1["image"] c_image = item1["image"]
@ -88,11 +88,11 @@ def test_pad_grayscale():
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform) data1 = data1.map(operations=transform, input_columns=["image"])
# if input is grayscale, the output dimensions should be single channel # if input is grayscale, the output dimensions should be single channel
pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20)) pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20))
data1 = data1.map(input_columns=["image"], operations=pad_gray) data1 = data1.map(operations=pad_gray, input_columns=["image"])
dataset_shape_1 = [] dataset_shape_1 = []
for item1 in data1.create_dict_iterator(num_epochs=1): for item1 in data1.create_dict_iterator(num_epochs=1):
c_image = item1["image"] c_image = item1["image"]
@ -106,7 +106,7 @@ def test_pad_grayscale():
ctrans = [decode_op, pad_gray] ctrans = [decode_op, pad_gray]
dataset_shape_2 = [] dataset_shape_2 = []
data2 = data2.map(input_columns=["image"], operations=ctrans) data2 = data2.map(operations=ctrans, input_columns=["image"])
for item2 in data2.create_dict_iterator(num_epochs=1): for item2 in data2.create_dict_iterator(num_epochs=1):
c_image = item2["image"] c_image = item2["image"]
@ -132,7 +132,7 @@ def test_pad_md5():
pad_op, pad_op,
] ]
data1 = data1.map(input_columns=["image"], operations=ctrans) data1 = data1.map(operations=ctrans, input_columns=["image"])
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -142,7 +142,7 @@ def test_pad_md5():
py_vision.ToTensor(), py_vision.ToTensor(),
] ]
transform = mindspore.dataset.transforms.py_transforms.Compose(pytrans) transform = mindspore.dataset.transforms.py_transforms.Compose(pytrans)
data2 = data2.map(input_columns=["image"], operations=transform) data2 = data2.map(operations=transform, input_columns=["image"])
# Compare with expected md5 from images # Compare with expected md5 from images
filename1 = "pad_01_c_result.npz" filename1 = "pad_01_c_result.npz"
save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN)

View File

@ -127,7 +127,7 @@ def batch_padding_performance_1d():
cifar10_dir = "../data/dataset/testCifar10Data" cifar10_dir = "../data/dataset/testCifar10Data"
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
data1 = data1.repeat(24) data1 = data1.repeat(24)
data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image")
pad_info = {"image": ([3888], 0)} # 3888 =36*36*3 pad_info = {"image": ([3888], 0)} # 3888 =36*36*3
# pad_info = None # pad_info = None
data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info) data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info)
@ -144,7 +144,7 @@ def batch_pyfunc_padding_3d():
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
data1 = data1.repeat(24) data1 = data1.repeat(24)
# pad_info = {"image": ([36, 36, 3], 0)} # pad_info = {"image": ([36, 36, 3], 0)}
data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))), data1 = data1.map(operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))), input_columns="image",
python_multiprocessing=False) python_multiprocessing=False)
data1 = data1.batch(batch_size=24, drop_remainder=True) data1 = data1.batch(batch_size=24, drop_remainder=True)
start_time = time.time() start_time = time.time()
@ -159,8 +159,8 @@ def batch_pyfunc_padding_1d():
cifar10_dir = "../data/dataset/testCifar10Data" cifar10_dir = "../data/dataset/testCifar10Data"
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
data1 = data1.repeat(24) data1 = data1.repeat(24)
data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image")
data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816))), python_multiprocessing=False) data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image", python_multiprocessing=False)
data1 = data1.batch(batch_size=24, drop_remainder=True) data1 = data1.batch(batch_size=24, drop_remainder=True)
start_time = time.time() start_time = time.time()
num_batches = 0 num_batches = 0
@ -176,8 +176,8 @@ def test_pad_via_map():
def pad_map_config(): def pad_map_config():
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3] data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3]
data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d
data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816)))) data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image")
data1 = data1.batch(batch_size=25, drop_remainder=True) data1 = data1.batch(batch_size=25, drop_remainder=True)
res = [] res = []
for data in data1.create_dict_iterator(num_epochs=1): for data in data1.create_dict_iterator(num_epochs=1):
@ -186,7 +186,7 @@ def test_pad_via_map():
def pad_batch_config(): def pad_batch_config():
data2 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3] data2 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3]
data2 = data2.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d
data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)}) data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)})
res = [] res = []
for data in data2.create_dict_iterator(num_epochs=1): for data in data2.create_dict_iterator(num_epochs=1):

Some files were not shown because too many files have changed in this diff Show More