change map calls

This commit is contained in:
nhussain 2020-09-09 13:23:02 -04:00
parent 77e05e32a4
commit 92e99ff224
154 changed files with 1355 additions and 1307 deletions

View File

@ -282,8 +282,9 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp):
>>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeCharTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_with_offsets
@ -313,8 +314,9 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp):
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
>>> max_bytes_per_token=100, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op,
>>> input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_wordpiece_tokenizer
@ -378,8 +380,9 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.WhitespaceTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_with_offsets
@ -404,8 +407,9 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_unicode_script_tokenizer
@ -497,8 +501,9 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_regex_tokenizer
@ -540,8 +545,9 @@ if platform.system().lower() != 'windows':
>>> normalization_form=NormalizeForm.NONE,
>>> preserve_unused_token=True,
>>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_basic_tokenizer
@ -593,8 +599,9 @@ if platform.system().lower() != 'windows':
>>> unknown_token=100, lower_case=False, keep_whitespace=False,
>>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
>>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
"""
@check_bert_tokenizer

View File

@ -39,14 +39,14 @@ def create_dataset_cifar10(data_path, batch_size=32, repeat_size=1, status="trai
random_horizontal_op = CV.RandomHorizontalFlip()
channel_swap_op = CV.HWC2CHW()
typecast_op = C.TypeCast(mstype.int32)
cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op)
cifar_ds = cifar_ds.map(operations=typecast_op, input_columns="label")
if status == "train":
cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op)
cifar_ds = cifar_ds.map(operations=random_crop_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=random_horizontal_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=resize_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=rescale_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=normalize_op, input_columns="image")
cifar_ds = cifar_ds.map(operations=channel_swap_op, input_columns="image")
cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)

View File

@ -84,8 +84,9 @@ class SegDataset:
shuffle=True, num_parallel_workers=self.num_readers,
num_shards=self.shard_num, shard_id=self.shard_id)
transforms_list = self.preprocess_
data_set = data_set.map(input_columns=["data", "label"], output_columns=["data", "label"],
operations=transforms_list, num_parallel_workers=self.num_parallel_calls)
data_set = data_set.map(operations=transforms_list, input_columns=["data", "label"],
output_columns=["data", "label"],
num_parallel_workers=self.num_parallel_calls)
data_set = data_set.shuffle(buffer_size=self.batch_size * 10)
data_set = data_set.batch(self.batch_size, drop_remainder=True)
data_set = data_set.repeat(repeat)

View File

@ -74,8 +74,10 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
ious = ious.T
return ious
class PhotoMetricDistortion:
"""Photo Metric Distortion"""
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
@ -134,8 +136,10 @@ class PhotoMetricDistortion:
return img, boxes, labels
class Expand:
"""expand image"""
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
if to_rgb:
self.mean = mean[::-1]
@ -158,12 +162,13 @@ class Expand:
boxes += np.tile((left, top), 2)
return img, boxes, labels
def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""rescale operation for image"""
img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
if img_data.shape[0] > config.img_height:
img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True)
scale_factor = scale_factor*scale_factor2
scale_factor = scale_factor * scale_factor2
img_shape = np.append(img_shape, scale_factor)
img_shape = np.asarray(img_shape, dtype=np.float32)
gt_bboxes = gt_bboxes * scale_factor
@ -171,7 +176,8 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""resize operation for image"""
@ -188,7 +194,8 @@ def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
"""resize operation for image of eval"""
@ -205,7 +212,8 @@ def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""impad operation for image"""
@ -213,12 +221,14 @@ def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""imnormalize operation for image"""
img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True)
img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""flip operation for image"""
img_data = img
@ -229,7 +239,8 @@ def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1
return (img_data, img_shape, flipped, gt_label, gt_num)
return (img_data, img_shape, flipped, gt_label, gt_num)
def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num):
"""flipped generation"""
@ -240,11 +251,13 @@ def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num):
flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1
return (img_data, img_shape, flipped, gt_label, gt_num)
return (img_data, img_shape, flipped, gt_label, gt_num)
def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num):
img_data = img[:, :, ::-1]
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""transpose operation for image"""
@ -257,6 +270,7 @@ def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""photo crop operation for image"""
random_photo = PhotoMetricDistortion()
@ -264,6 +278,7 @@ def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""expand operation for image"""
expand = Expand()
@ -271,8 +286,10 @@ def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
return (img, img_shape, gt_bboxes, gt_label, gt_num)
def preprocess_fn(image, box, is_training):
"""Preprocess function for dataset."""
def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert):
image_shape = image_shape[:2]
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert
@ -325,6 +342,7 @@ def preprocess_fn(image, box, is_training):
return _data_aug(image, box, is_training)
def create_coco_label(is_training):
"""Get image path and annotation from COCO."""
from pycocotools.coco import COCO
@ -334,7 +352,7 @@ def create_coco_label(is_training):
if is_training:
data_type = config.train_data_type
#Classes need to train or test.
# Classes need to train or test.
train_cls = config.coco_classes
train_cls_dict = {}
for i, cls in enumerate(train_cls):
@ -375,6 +393,7 @@ def create_coco_label(is_training):
return image_files, image_anno_dict
def anno_parser(annos_str):
"""Parse annotation from string to list."""
annos = []
@ -383,6 +402,7 @@ def anno_parser(annos_str):
annos.append(anno)
return annos
def filter_valid_data(image_dir, anno_path):
"""Filter valid image file, which both in image_dir and anno_path."""
image_files = []
@ -404,6 +424,7 @@ def filter_valid_data(image_dir, anno_path):
image_files.append(image_path)
return image_files, image_anno_dict
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8):
"""Create MindRecord file."""
mindrecord_dir = config.mindrecord_dir
@ -435,7 +456,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id,
num_parallel_workers=1, shuffle=is_training)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode, num_parallel_workers=1)
ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=1)
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
hwc_to_chw = C.HWC2CHW()
@ -447,38 +468,39 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
type_cast3 = CC.TypeCast(mstype.bool_)
if is_training:
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
num_parallel_workers=num_parallel_workers)
flip = (np.random.rand() < config.flip_ratio)
if flip:
ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0, horizontally_op],
ds = ds.map(operations=[normalize_op, type_cast0, horizontally_op], input_columns=["image"],
num_parallel_workers=12)
ds = ds.map(input_columns=["image", "image_shape", "box", "label", "valid_num"],
operations=flipped_generation, num_parallel_workers=num_parallel_workers)
ds = ds.map(operations=flipped_generation,
input_columns=["image", "image_shape", "box", "label", "valid_num"],
num_parallel_workers=num_parallel_workers)
else:
ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0],
ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"],
num_parallel_workers=12)
ds = ds.map(input_columns=["image"], operations=[hwc_to_chw, type_cast1],
ds = ds.map(operations=[hwc_to_chw, type_cast1], input_columns=["image"],
num_parallel_workers=12)
else:
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=[normalize_op, hwc_to_chw, type_cast1],
ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"],
num_parallel_workers=24)
# transpose_column from python to c
ds = ds.map(input_columns=["image_shape"], operations=[type_cast1])
ds = ds.map(input_columns=["box"], operations=[type_cast1])
ds = ds.map(input_columns=["label"], operations=[type_cast2])
ds = ds.map(input_columns=["valid_num"], operations=[type_cast3])
ds = ds.map(operations=[type_cast1], input_columns=["image_shape"])
ds = ds.map(operations=[type_cast1], input_columns=["box"])
ds = ds.map(operations=[type_cast2], input_columns=["label"])
ds = ds.map(operations=[type_cast3], input_columns=["valid_num"])
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)

View File

@ -55,8 +55,8 @@ def create_dataset_cifar10(data_home, repeat_num=1, training=True):
c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]
# apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op)
data_set = data_set.map(input_columns="image", operations=c_trans)
data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(operations=c_trans, input_columns="image")
# apply batch operations
data_set = data_set.batch(batch_size=cifar_cfg.batch_size, drop_remainder=True)

View File

@ -60,8 +60,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
C.HWC2CHW()
]
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums)
# apply batch operations
ds = ds.batch(cfg.batch_size, drop_remainder=True)
# apply dataset repeat operation

View File

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000

View File

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000

View File

@ -498,24 +498,24 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
num_parallel_workers=4, shuffle=is_training)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode)
ds = ds.map(operations=decode, input_columns=["image"])
compose_map_func = (lambda image, annotation, mask, mask_shape:
preprocess_fn(image, annotation, mask, mask_shape, is_training))
if is_training:
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"],
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
operations=compose_map_func,
python_multiprocessing=False,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
else:
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"],
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1):
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size)

View File

@ -89,8 +89,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=16)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -152,7 +152,7 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num=
compose = P2.Compose(trans)
ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True)
ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -70,8 +70,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size)

View File

@ -61,8 +61,8 @@ def create_dataset(dataset_path, config, do_train, repeat_num=1):
C.HWC2CHW()
]
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=config.work_nums)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=config.work_nums)
# apply batch operations
ds = ds.batch(config.batch_size, drop_remainder=True)
# apply dataset repeat operation

View File

@ -22,6 +22,7 @@ import mindspore.dataset.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2
from mindspore.communication.management import init, get_rank, get_group_size
def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
"""
create a train or evaluate cifar10 dataset for resnet50
@ -65,8 +66,8 @@ def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target=
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -126,8 +127,8 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target=
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -165,7 +166,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
if do_train:
trans = [
C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
C.RandomHorizontalFlip(rank_id/ (rank_id +1)),
C.RandomHorizontalFlip(rank_id / (rank_id + 1)),
C.Normalize(mean=mean, std=std),
C.HWC2CHW()
]
@ -180,8 +181,8 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -190,6 +191,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
return ds
def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
"""
create a train or eval imagenet2012 dataset for se-resnet50
@ -233,8 +235,8 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target=
]
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=12, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=12, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=12)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -244,6 +246,7 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target=
return ds
def _get_rank_info():
"""
get rank size and rank id

View File

@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -147,7 +147,7 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe
trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op]
compose = P2.Compose(trans)
ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True)
ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -75,8 +75,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=num_parallels, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=num_parallels, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=num_parallels)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallels)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -25,6 +25,7 @@ from src.utils.sampler import DistributedSampler
ImageFile.LOAD_TRUNCATED_IMAGES = True
class TxtDataset():
"""
create txt dataset.
@ -33,6 +34,7 @@ class TxtDataset():
Returns:
de_dataset.
"""
def __init__(self, root, txt_name):
super(TxtDataset, self).__init__()
self.imgs = []
@ -142,10 +144,10 @@ def classification_dataset(data_dir, image_size, per_batch_size, max_epoch, rank
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=num_parallel_workers,
operations=transform_img)
de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=num_parallel_workers,
operations=transform_label)
de_dataset = de_dataset.map(operations=transform_img, input_columns="image",
num_parallel_workers=num_parallel_workers)
de_dataset = de_dataset.map(operations=transform_label, input_columns="label",
num_parallel_workers=num_parallel_workers)
columns_to_project = ["image", "label"]
de_dataset = de_dataset.project(columns=columns_to_project)

View File

@ -72,8 +72,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
]
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums)
# apply batch operations
ds = ds.batch(cfg.batch_size, drop_remainder=True)

View File

@ -34,13 +34,15 @@ def _rand(a=0., b=1.):
"""Generate random."""
return np.random.rand() * (b - a) + a
def get_imageId_from_fileName(filename):
"""Get imageID from fileName"""
try:
filename = os.path.splitext(filename)[0]
return int(filename)
except:
raise NotImplementedError('Filename %s is supposed to be an integer.'%(filename))
raise NotImplementedError('Filename %s is supposed to be an integer.' % (filename))
def random_sample_crop(image, boxes):
"""Random Crop the image and boxes"""
@ -64,7 +66,7 @@ def random_sample_crop(image, boxes):
left = _rand() * (width - w)
top = _rand() * (height - h)
rect = np.array([int(top), int(left), int(top+h), int(left+w)])
rect = np.array([int(top), int(left), int(top + h), int(left + w)])
overlap = jaccard_numpy(boxes, rect)
# dropout some boxes
@ -103,13 +105,14 @@ def random_sample_crop(image, boxes):
def preprocess_fn(img_id, image, box, is_training):
"""Preprocess function for dataset."""
def _infer_data(image, input_shape):
img_h, img_w, _ = image.shape
input_h, input_w = input_shape
image = cv2.resize(image, (input_w, input_h))
#When the channels of image is 1
# When the channels of image is 1
if len(image.shape) == 2:
image = np.expand_dims(image, axis=-1)
image = np.concatenate([image, image, image], axis=-1)
@ -150,6 +153,7 @@ def preprocess_fn(img_id, image, box, is_training):
box, label, num_match = ssd_bboxes_encode(box)
return image, box, label, num_match
return _data_aug(image, box, is_training, image_size=config.img_shape)
@ -158,7 +162,7 @@ def create_voc_label(is_training):
voc_dir = config.voc_dir
cls_map = {name: i for i, name in enumerate(config.coco_classes)}
sub_dir = 'train' if is_training else 'eval'
#sub_dir = 'train'
# sub_dir = 'train'
voc_dir = os.path.join(voc_dir, sub_dir)
if not os.path.isdir(voc_dir):
raise ValueError(f'Cannot find {sub_dir} dataset path.')
@ -244,6 +248,7 @@ def create_voc_label(is_training):
return images, image_files_dict, image_anno_dict
def create_coco_label(is_training):
"""Get image path and annotation from COCO."""
from pycocotools.coco import COCO
@ -253,7 +258,7 @@ def create_coco_label(is_training):
if is_training:
data_type = config.train_data_type
#Classes need to train or test.
# Classes need to train or test.
train_cls = config.coco_classes
train_cls_dict = {}
for i, cls in enumerate(train_cls):
@ -391,9 +396,10 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num
ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num,
shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode)
ds = ds.map(operations=decode, input_columns=["image"])
change_swap_op = C.HWC2CHW()
normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255])
normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training))
if is_training:
@ -402,11 +408,11 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num
else:
output_columns = ["img_id", "image", "image_shape"]
trans = [normalize_op, change_swap_op]
ds = ds.map(input_columns=["img_id", "image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"],
output_columns=output_columns, column_order=output_columns,
operations=compose_map_func, python_multiprocessing=is_training,
python_multiprocessing=is_training,
num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training,
ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)

View File

@ -54,8 +54,8 @@ def vgg_create_dataset(data_home, image_size, batch_size, rank_id=0, rank_size=1
changeswap_op]
# apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op)
data_set = data_set.map(input_columns="image", operations=c_trans)
data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(operations=c_trans, input_columns="image")
# apply repeat operations
data_set = data_set.repeat(repeat_num)
@ -157,8 +157,8 @@ def classification_dataset(data_dir, image_size, per_batch_size, rank=0, group_s
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=8, operations=transform_img)
de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=8, operations=transform_label)
de_dataset = de_dataset.map(operations=transform_img, input_columns="image", num_parallel_workers=8)
de_dataset = de_dataset.map(operations=transform_label, input_columns="label", num_parallel_workers=8)
columns_to_project = ["image", "label"]
de_dataset = de_dataset.project(columns=columns_to_project)

View File

@ -90,8 +90,8 @@ def create_dataset(dataset_path, batch_size=1, num_shards=1, shard_id=0, device_
label_trans = [
c.TypeCast(mstype.int32)
]
ds = ds.map(input_columns=["image"], num_parallel_workers=8, operations=image_trans)
ds = ds.map(input_columns=["label"], num_parallel_workers=8, operations=label_trans)
ds = ds.map(operations=image_trans, input_columns=["image"], num_parallel_workers=8)
ds = ds.map(operations=label_trans, input_columns=["label"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True)
return ds

View File

@ -176,11 +176,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
sampler=distributed_sampler)
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
ds = ds.map(input_columns=["image", "img_id"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"],
operations=compose_map_func, num_parallel_workers=8)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8)
num_parallel_workers=8)
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(max_epoch)

View File

@ -173,11 +173,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
sampler=distributed_sampler)
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
ds = ds.map(input_columns=["image", "img_id"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"],
operations=compose_map_func, num_parallel_workers=8)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8)
num_parallel_workers=8)
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(max_epoch)

View File

@ -296,21 +296,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num=
ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank,
num_parallel_workers=num_parallel_workers, shuffle=is_training)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode)
ds = ds.map(operations=decode, input_columns=["image"])
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
if is_training:
hwc_to_chw = C.HWC2CHW()
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers)
num_parallel_workers=num_parallel_workers)
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)
else:
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "annotation"],
column_order=["image", "image_shape", "annotation"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
num_parallel_workers=num_parallel_workers)
return ds

View File

@ -112,12 +112,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''):
"masked_lm_weights",
"next_sentence_labels"])
type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.repeat(repeat_count)
# apply batch operations

View File

@ -42,30 +42,31 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
usage=data_usage, shuffle=shuffle_dataset)
### Processing label
if data_usage == 'test':
dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
column_order=["id", "label_id", "sentence"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"],
column_order=["id", "label_id", "sentence"])
dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"])
else:
label_vocab = text.Vocab.from_list(label_list)
label_lookup = text.Lookup(label_vocab)
dataset = dataset.map(input_columns="label_desc", output_columns="label_id", operations=label_lookup)
dataset = dataset.map(operations=label_lookup, input_columns="label_desc", output_columns="label_id")
### Processing sentence
vocab = text.Vocab.from_file(bert_vocab_path)
tokenizer = text.BertTokenizer(vocab, lower_case=True)
lookup = text.Lookup(vocab, unknown_token='[UNK]')
dataset = dataset.map(input_columns=["sentence"], operations=tokenizer)
dataset = dataset.map(input_columns=["sentence"], operations=ops.Slice(slice(0, max_seq_len)))
dataset = dataset.map(input_columns=["sentence"],
operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
append=np.array(["[SEP]"], dtype='S')))
dataset = dataset.map(input_columns=["sentence"], output_columns=["text_ids"], operations=lookup)
dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
column_order=["text_ids", "mask_ids", "label_id"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "segment_ids"],
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["segment_ids"], operations=ops.Fill(0))
dataset = dataset.map(operations=tokenizer, input_columns=["sentence"])
dataset = dataset.map(operations=ops.Slice(slice(0, max_seq_len)), input_columns=["sentence"])
dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
append=np.array(["[SEP]"], dtype='S')), input_columns=["sentence"])
dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["text_ids"])
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"])
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
output_columns=["text_ids", "mask_ids"],
column_order=["text_ids", "mask_ids", "label_id"])
dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"])
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
output_columns=["text_ids", "segment_ids"],
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"])
dataset = dataset.map(operations=ops.Fill(0), input_columns=["segment_ids"])
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
return dataset
@ -86,50 +87,51 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
usage=data_usage, shuffle=shuffle_dataset)
### Processing label
if data_usage == 'test':
dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
column_order=["id", "label_id", "sentence1", "sentence2"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"],
column_order=["id", "label_id", "sentence1", "sentence2"])
dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"])
else:
label_vocab = text.Vocab.from_list(label_list)
label_lookup = text.Lookup(label_vocab)
dataset = dataset.map(input_columns="label", output_columns="label_id", operations=label_lookup)
dataset = dataset.map(operations=label_lookup, input_columns="label", output_columns="label_id")
### Processing sentence pairs
vocab = text.Vocab.from_file(bert_vocab_path)
tokenizer = text.BertTokenizer(vocab, lower_case=True)
lookup = text.Lookup(vocab, unknown_token='[UNK]')
### Tokenizing sentences and truncate sequence pair
dataset = dataset.map(input_columns=["sentence1"], operations=tokenizer)
dataset = dataset.map(input_columns=["sentence2"], operations=tokenizer)
dataset = dataset.map(input_columns=["sentence1", "sentence2"],
operations=text.TruncateSequencePair(max_seq_len-3))
dataset = dataset.map(operations=tokenizer, input_columns=["sentence1"])
dataset = dataset.map(operations=tokenizer, input_columns=["sentence2"])
dataset = dataset.map(operations=text.TruncateSequencePair(max_seq_len - 3),
input_columns=["sentence1", "sentence2"])
### Adding special tokens
dataset = dataset.map(input_columns=["sentence1"],
operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
append=np.array(["[SEP]"], dtype='S')))
dataset = dataset.map(input_columns=["sentence2"],
operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')))
dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
append=np.array(["[SEP]"], dtype='S')),
input_columns=["sentence1"])
dataset = dataset.map(operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')),
input_columns=["sentence2"])
### Generating segment_ids
dataset = dataset.map(input_columns=["sentence1"], output_columns=["sentence1", "type_sentence1"],
column_order=["sentence1", "type_sentence1", "sentence2", "label_id"],
operations=ops.Duplicate())
dataset = dataset.map(input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"],
column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"],
operations=ops.Duplicate())
dataset = dataset.map(input_columns=["type_sentence1"], operations=[lookup, ops.Fill(0)])
dataset = dataset.map(input_columns=["type_sentence2"], operations=[lookup, ops.Fill(1)])
dataset = dataset.map(input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"],
column_order=["sentence1", "sentence2", "segment_ids", "label_id"],
operations=ops.Concatenate())
dataset = dataset.map(input_columns=["segment_ids"], operations=ops.PadEnd([max_seq_len], 0))
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["sentence1"],
output_columns=["sentence1", "type_sentence1"],
column_order=["sentence1", "type_sentence1", "sentence2", "label_id"])
dataset = dataset.map(operations=ops.Duplicate(),
input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"],
column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"])
dataset = dataset.map(operations=[lookup, ops.Fill(0)], input_columns=["type_sentence1"])
dataset = dataset.map(operations=[lookup, ops.Fill(1)], input_columns=["type_sentence2"])
dataset = dataset.map(operations=ops.Concatenate(),
input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"],
column_order=["sentence1", "sentence2", "segment_ids", "label_id"])
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["segment_ids"])
### Generating text_ids
dataset = dataset.map(input_columns=["sentence1", "sentence2"], output_columns=["text_ids"],
column_order=["text_ids", "segment_ids", "label_id"],
operations=ops.Concatenate())
dataset = dataset.map(input_columns=["text_ids"], operations=lookup)
dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
dataset = dataset.map(operations=ops.Concatenate(),
input_columns=["sentence1", "sentence2"], output_columns=["text_ids"],
column_order=["text_ids", "segment_ids", "label_id"])
dataset = dataset.map(operations=lookup, input_columns=["text_ids"])
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"])
### Generating mask_ids
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
output_columns=["text_ids", "mask_ids"],
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"])
dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"])
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
return dataset

View File

@ -39,12 +39,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None,
ori_dataset_size = ds.get_dataset_size()
print('origin dataset size: ', ori_dataset_size)
type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
logger.info("data size: {}".format(ds.get_dataset_size()))
@ -60,12 +60,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle)
if assessment_method == "Spearman_correlation":
type_cast_op_float = C.TypeCast(mstype.float32)
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
else:
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -80,12 +80,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle)
if assessment_method == "Spearman_correlation":
type_cast_op_float = C.TypeCast(mstype.float32)
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
else:
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
@ -101,14 +101,14 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche
columns_list=["input_ids", "input_mask", "segment_ids", "start_positions",
"end_positions", "unique_ids", "is_impossible"],
shuffle=do_shuffle)
ds = ds.map(input_columns="start_positions", operations=type_cast_op)
ds = ds.map(input_columns="end_positions", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="start_positions")
ds = ds.map(operations=type_cast_op, input_columns="end_positions")
else:
ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -117,12 +117,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''):
"masked_lm_weights",
"next_sentence_labels"])
type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.repeat(repeat_count)
# apply batch operations

View File

@ -40,12 +40,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None,
ori_dataset_size = ds.get_dataset_size()
print('origin dataset size: ', ori_dataset_size)
type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
logger.info("data size: {}".format(ds.get_dataset_size()))
@ -61,12 +61,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
if assessment_method == "Spearman_correlation":
type_cast_op_float = C.TypeCast(mstype.float32)
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
else:
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count)
# apply shuffle operation
buffer_size = 960
@ -84,12 +84,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
if assessment_method == "Spearman_correlation":
type_cast_op_float = C.TypeCast(mstype.float32)
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
else:
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count)
# apply shuffle operation
buffer_size = 960
@ -107,17 +107,17 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche
columns_list=["input_ids", "input_mask", "segment_ids",
"start_positions", "end_positions",
"unique_ids", "is_impossible"])
ds = ds.map(input_columns="start_positions", operations=type_cast_op)
ds = ds.map(input_columns="end_positions", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="start_positions")
ds = ds.map(operations=type_cast_op, input_columns="end_positions")
else:
ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
ds = ds.repeat(repeat_count)
# apply shuffle operation
buffer_size = 960

View File

@ -60,12 +60,12 @@ def _load_dataset(input_files, batch_size, epoch_count=1,
repeat_count = epoch_count
type_cast_op = deC.TypeCast(mstype.int32)
ds = ds.map(input_columns="src", operations=type_cast_op)
ds = ds.map(input_columns="src_padding", operations=type_cast_op)
ds = ds.map(input_columns="prev_opt", operations=type_cast_op)
ds = ds.map(input_columns="prev_padding", operations=type_cast_op)
ds = ds.map(input_columns="target", operations=type_cast_op)
ds = ds.map(input_columns="tgt_padding", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="src")
ds = ds.map(operations=type_cast_op, input_columns="src_padding")
ds = ds.map(operations=type_cast_op, input_columns="prev_opt")
ds = ds.map(operations=type_cast_op, input_columns="prev_padding")
ds = ds.map(operations=type_cast_op, input_columns="target")
ds = ds.map(operations=type_cast_op, input_columns="tgt_padding")
ds = ds.rename(
input_columns=["src",

View File

@ -49,11 +49,11 @@ def create_tinybert_dataset(task='td', batch_size=32, device_num=1, rank=0,
shard_equal_rows=True)
type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
if task == "td":
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -40,12 +40,12 @@ def load_test_data(batch_size=1, data_file=None):
"target_eos_ids", "target_eos_mask"],
shuffle=False)
type_cast_op = deC.TypeCast(mstype.int32)
ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op)
ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op)
ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op)
ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op)
ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op)
ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids")
ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask")
ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids")
ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask")
ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids")
ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask")
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
ds.channel_name = 'transformer'

View File

@ -30,12 +30,12 @@ def create_transformer_dataset(epoch_count=1, rank_size=1, rank_id=0, do_shuffle
shuffle=(do_shuffle == "true"), num_shards=rank_size, shard_id=rank_id)
type_cast_op = deC.TypeCast(mstype.int32)
ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op)
ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op)
ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op)
ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op)
ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op)
ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids")
ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask")
ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids")
ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask")
ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids")
ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask")
# apply batch operations
ds = ds.batch(transformer_net_cfg.batch_size, drop_remainder=True)

View File

@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True):
changeswap_op]
# apply map operations on images
ds = ds.map(input_columns="label", operations=type_cast_op)
ds = ds.map(input_columns="image", operations=c_trans)
ds = ds.map(operations=type_cast_op, input_columns="label")
ds = ds.map(operations=c_trans, input_columns="image")
# apply repeat operations
ds = ds.repeat(repeat_num)

View File

@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True):
changeswap_op]
# apply map operations on images
ds = ds.map(input_columns="label", operations=type_cast_op)
ds = ds.map(input_columns="image", operations=c_trans)
ds = ds.map(operations=type_cast_op, input_columns="label")
ds = ds.map(operations=c_trans, input_columns="image")
# apply repeat operations
ds = ds.repeat(repeat_num)

View File

@ -298,21 +298,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num
ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank,
num_parallel_workers=num_parallel_workers, shuffle=False)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode)
ds = ds.map(operations=decode, input_columns=["image"])
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
if is_training:
hwc_to_chw = C.HWC2CHW()
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers)
num_parallel_workers=num_parallel_workers)
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)
else:
ds = ds.map(input_columns=["image", "annotation"],
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "annotation"],
column_order=["image", "image_shape", "annotation"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
num_parallel_workers=num_parallel_workers)
return ds

View File

@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False):
if sink_mode:
sink_size = 100
new_repeat_count = 3
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
logger.info("data size: {}".format(ds.get_dataset_size()))

View File

@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False):
if sink_mode:
sink_size = 100
new_repeat_count = 3
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
logger.info("data size: {}".format(ds.get_dataset_size()))

View File

@ -41,12 +41,12 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", d
print('origin dataset size: ', ori_dataset_size)
new_repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size())
type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
ds = ds.repeat(max(new_repeat_count, repeat_count))

View File

@ -92,12 +92,12 @@ def me_de_train_dataset():
"next_sentence_labels", "masked_lm_positions",
"masked_lm_ids", "masked_lm_weights"], shuffle=False)
type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
# apply batch operations
batch_size = int(os.getenv('BATCH_SIZE', '16'))
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -97,10 +97,10 @@ def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train", shu
# wrapped with GeneratorDataset
dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=None)
dataset = dataset.map(input_columns=["image", "label"], operations=DataTransform(args, usage=usage))
dataset = dataset.map(operations=DataTransform(args, usage=usage), input_columns=["image", "label"])
channelswap_op = C.HWC2CHW()
dataset = dataset.map(input_columns="image", operations=channelswap_op)
dataset = dataset.map(operations=channelswap_op, input_columns="image")
# 1464 samples / batch_size 8 = 183 batches
# epoch_num is num of steps

View File

@ -68,8 +68,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -71,8 +71,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -171,11 +171,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000

View File

@ -47,11 +47,11 @@ def test_me_de_train_dataset():
rescale_op = vision.Rescale(rescale, shift)
# apply map operations on images
data_set_new = data_set_new.map(input_columns="image/encoded", operations=decode_op)
data_set_new = data_set_new.map(input_columns="image/encoded", operations=resize_op)
data_set_new = data_set_new.map(input_columns="image/encoded", operations=rescale_op)
data_set_new = data_set_new.map(operations=decode_op, input_columns="image/encoded")
data_set_new = data_set_new.map(operations=resize_op, input_columns="image/encoded")
data_set_new = data_set_new.map(operations=rescale_op, input_columns="image/encoded")
hwc2chw_op = vision.HWC2CHW()
data_set_new = data_set_new.map(input_columns="image/encoded", operations=hwc2chw_op)
data_set_new = data_set_new.map(operations=hwc2chw_op, input_columns="image/encoded")
data_set_new = data_set_new.repeat(1)
# apply batch operations
batch_size_new = 32

View File

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000

View File

@ -87,9 +87,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
hwc2chw_op = CV.HWC2CHW()
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
mnist_ds = mnist_ds.batch(batch_size)

View File

@ -77,9 +77,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
hwc2chw_op = CV.HWC2CHW()
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
mnist_ds = mnist_ds.batch(batch_size)

View File

@ -145,9 +145,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
hwc2chw_op = CV.HWC2CHW()
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
mnist_ds = mnist_ds.batch(batch_size)

View File

@ -98,11 +98,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000

View File

@ -107,11 +107,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000

View File

@ -351,8 +351,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32):
changeswap_op]
# apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op)
data_set = data_set.map(input_columns="image", operations=c_trans)
data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(operations=c_trans, input_columns="image")
# apply shuffle operations
data_set = data_set.shuffle(buffer_size=1000)

View File

@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000

View File

@ -114,11 +114,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
mnist_ds = mnist_ds.shuffle(buffer_size=10000) # 10000 as in LeNet train script

View File

@ -90,8 +90,8 @@ def create_dataset(repeat_num=1, training=True):
changeswap_op]
# apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op)
data_set = data_set.map(input_columns="image", operations=c_trans)
data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(operations=c_trans, input_columns="image")
# apply repeat operations
data_set = data_set.repeat(repeat_num)

View File

@ -68,8 +68,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32):
changeswap_op]
# apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op)
data_set = data_set.map(input_columns="image", operations=c_trans)
data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(operations=c_trans, input_columns="image")
# apply shuffle operations
data_set = data_set.shuffle(buffer_size=1000)

View File

@ -79,8 +79,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32, rank_id=0, rank_s
changeswap_op]
# apply map operations on images
data_set = data_set.map(input_columns="label", operations=type_cast_op)
data_set = data_set.map(input_columns="image", operations=c_trans)
data_set = data_set.map(operations=type_cast_op, input_columns="label")
data_set = data_set.map(operations=c_trans, input_columns="image")
# apply shuffle operations
data_set = data_set.shuffle(buffer_size=1000)

View File

@ -29,7 +29,7 @@ def test_case_0():
# apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x: x + x))
ds1 = ds1.map(operations=(lambda x: x + x), input_columns=col, output_columns="out")
print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary
@ -49,7 +49,7 @@ def test_case_1():
# apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1"], operations=(lambda x: (x, x + x)))
ds1 = ds1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"])
print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary
@ -72,7 +72,7 @@ def test_case_2():
# apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x, y: x + y))
ds1 = ds1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out")
print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary
@ -93,8 +93,8 @@ def test_case_3():
# apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"],
operations=(lambda x, y: (x, x + y, x + x + y)))
ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col,
output_columns=["out0", "out1", "out2"])
print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary
@ -119,8 +119,8 @@ def test_case_4():
# apply dataset operations
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
operations=(lambda x, y: (x, x + y, x + x + y)))
ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col,
output_columns=["out0", "out1", "out2"], num_parallel_workers=4)
print("************** Output Tensor *****************")
for data in ds1.create_dict_iterator(): # each data is a dictionary

View File

@ -39,12 +39,12 @@ def test_HWC2CHW(plot=False):
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = c_vision.Decode()
hwc2chw_op = c_vision.HWC2CHW()
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op)
data2 = data2.map(operations=decode_op, input_columns=["image"])
image_transposed = []
image = []
@ -72,8 +72,8 @@ def test_HWC2CHW_md5():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = c_vision.Decode()
hwc2chw_op = c_vision.HWC2CHW()
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
# Compare with expected md5 from images
filename = "HWC2CHW_01_result.npz"
@ -90,8 +90,8 @@ def test_HWC2CHW_comp(plot=False):
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = c_vision.Decode()
hwc2chw_op = c_vision.HWC2CHW()
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -101,7 +101,7 @@ def test_HWC2CHW_comp(plot=False):
py_vision.HWC2CHW()
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = data2.map(input_columns=["image"], operations=transform)
data2 = data2.map(operations=transform, input_columns=["image"])
image_c_transposed = []
image_py_transposed = []

View File

@ -42,8 +42,7 @@ def test_auto_contrast_py(plot=False):
F.Resize((224, 224)),
F.ToTensor()])
ds_original = ds.map(input_columns="image",
operations=transforms_original)
ds_original = ds.map(operations=transforms_original, input_columns="image")
ds_original = ds_original.batch(512)
@ -64,8 +63,7 @@ def test_auto_contrast_py(plot=False):
F.AutoContrast(cutoff=10.0, ignore=[10, 20]),
F.ToTensor()])
ds_auto_contrast = ds.map(input_columns="image",
operations=transforms_auto_contrast)
ds_auto_contrast = ds.map(operations=transforms_auto_contrast, input_columns="image")
ds_auto_contrast = ds_auto_contrast.batch(512)
@ -99,17 +97,14 @@ def test_auto_contrast_c(plot=False):
# AutoContrast Images
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
C.Resize((224, 224))])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20])
c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20])
transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)),
python_op,
np.array])
ds_auto_contrast_py = ds.map(input_columns="image",
operations=transforms_op)
ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image")
ds_auto_contrast_py = ds_auto_contrast_py.batch(512)
@ -122,12 +117,9 @@ def test_auto_contrast_c(plot=False):
axis=0)
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
C.Resize((224, 224))])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
ds_auto_contrast_c = ds.map(input_columns="image",
operations=c_op)
ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image")
ds_auto_contrast_c = ds_auto_contrast_c.batch(512)
@ -162,9 +154,7 @@ def test_auto_contrast_one_channel_c(plot=False):
# AutoContrast Images
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
C.Resize((224, 224))])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
python_op = F.AutoContrast()
c_op = C.AutoContrast()
# not using F.ToTensor() since it converts to floats
@ -174,8 +164,7 @@ def test_auto_contrast_one_channel_c(plot=False):
python_op,
np.array])
ds_auto_contrast_py = ds.map(input_columns="image",
operations=transforms_op)
ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image")
ds_auto_contrast_py = ds_auto_contrast_py.batch(512)
@ -188,13 +177,10 @@ def test_auto_contrast_one_channel_c(plot=False):
axis=0)
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])],
input_columns=["image"])
ds_auto_contrast_c = ds.map(input_columns="image",
operations=c_op)
ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image")
ds_auto_contrast_c = ds_auto_contrast_c.batch(512)
@ -223,8 +209,7 @@ def test_auto_contrast_mnist_c(plot=False):
"""
logger.info("Test AutoContrast C Op With MNIST Images")
ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
ds_auto_contrast_c = ds.map(input_columns="image",
operations=C.AutoContrast(cutoff=1, ignore=(0, 255)))
ds_auto_contrast_c = ds.map(operations=C.AutoContrast(cutoff=1, ignore=(0, 255)), input_columns="image")
ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
images = []
@ -252,25 +237,20 @@ def test_auto_contrast_invalid_ignore_param_c():
logger.info("Test AutoContrast C Op with invalid ignore parameter")
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
ds = ds.map(operations=[C.Decode(),
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
# invalid ignore
ds = ds.map(input_columns="image",
operations=C.AutoContrast(ignore=255.5))
ds = ds.map(operations=C.AutoContrast(ignore=255.5), input_columns="image")
except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value 255.5 is not of type" in str(error)
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
# invalid ignore
ds = ds.map(input_columns="image",
operations=C.AutoContrast(ignore=(10, 100)))
ds = ds.map(operations=C.AutoContrast(ignore=(10, 100)), input_columns="image")
except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value (10,100) is not of type" in str(error)
@ -283,25 +263,21 @@ def test_auto_contrast_invalid_cutoff_param_c():
logger.info("Test AutoContrast C Op with invalid cutoff parameter")
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
ds = ds.map(operations=[C.Decode(),
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
# invalid ignore
ds = ds.map(input_columns="image",
operations=C.AutoContrast(cutoff=-10.0))
ds = ds.map(operations=C.AutoContrast(cutoff=-10.0), input_columns="image")
except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
ds = ds.map(operations=[C.Decode(),
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
# invalid ignore
ds = ds.map(input_columns="image",
operations=C.AutoContrast(cutoff=120.0))
ds = ds.map(operations=C.AutoContrast(cutoff=120.0), input_columns="image")
except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
@ -314,21 +290,21 @@ def test_auto_contrast_invalid_ignore_param_py():
logger.info("Test AutoContrast python Op with invalid ignore parameter")
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)),
F.AutoContrast(ignore=255.5),
F.ToTensor()])])
F.ToTensor()])],
input_columns=["image"])
except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value 255.5 is not of type" in str(error)
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)),
F.AutoContrast(ignore=(10, 100)),
F.ToTensor()])])
F.ToTensor()])],
input_columns=["image"])
except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value (10,100) is not of type" in str(error)
@ -341,21 +317,22 @@ def test_auto_contrast_invalid_cutoff_param_py():
logger.info("Test AutoContrast python Op with invalid cutoff parameter")
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)),
F.AutoContrast(cutoff=-10.0),
F.ToTensor()])])
F.ToTensor()])],
input_columns=["image"])
except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)),
F.AutoContrast(cutoff=120.0),
F.ToTensor()])])
ds = ds.map(
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)),
F.AutoContrast(cutoff=120.0),
F.ToTensor()])],
input_columns=["image"])
except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)

View File

@ -49,10 +49,9 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False):
test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1)
# map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=[test_op])
column_order=["image", "bbox"])
filename = "bounding_box_augment_rotation_c_result.npz"
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@ -88,10 +87,9 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):
test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9)
# map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=[test_op])
column_order=["image", "bbox"])
filename = "bounding_box_augment_crop_c_result.npz"
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@ -126,10 +124,9 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9)
# map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=[test_op]) # Add column for "bbox"
column_order=["image", "bbox"]) # Add column for "bbox"
filename = "bounding_box_augment_valid_ratio_c_result.npz"
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@ -163,10 +160,9 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False):
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)
dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=[test_op])
column_order=["image", "bbox"])
unaugSamp, augSamp = [], []
@ -195,20 +191,19 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False):
# map to apply ops
# Add column for "bbox"
dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"],
dataVoc1 = dataVoc1.map(
operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)),
input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"])
dataVoc2 = dataVoc2.map(
operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)),
input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"])
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=lambda img, bbox:
(img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=lambda img, bbox:
(img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=[test_op])
column_order=["image", "bbox"])
filename = "bounding_box_augment_valid_edge_c_result.npz"
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@ -238,10 +233,9 @@ def test_bounding_box_augment_invalid_ratio_c():
# ratio range is from 0 - 1
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1.5)
# map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"],
operations=[test_op]) # Add column for "bbox"
column_order=["image", "bbox"]) # Add column for "bbox"
except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(error)

View File

@ -25,7 +25,7 @@ def test_compose():
def test_config(arr, op_list):
try:
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
data = data.map(input_columns=["col"], operations=ops.Compose(op_list))
data = data.map(operations=ops.Compose(op_list), input_columns=["col"])
res = []
for i in data.create_dict_iterator(num_epochs=1):
res.append(i["col"].tolist())

View File

@ -24,7 +24,7 @@ def test_random_apply():
def test_config(arr, op_list, prob=0.5):
try:
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
data = data.map(input_columns=["col"], operations=ops.RandomApply(op_list, prob))
data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"])
res = []
for i in data.create_dict_iterator(num_epochs=1):
res.append(i["col"].tolist())

View File

@ -48,7 +48,7 @@ def test_cache_map_basic1():
# This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4)
filename = "cache_map_01_result.npz"
@ -77,7 +77,7 @@ def test_cache_map_basic2():
# This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds1 = ds1.repeat(4)
filename = "cache_map_02_result.npz"
@ -107,7 +107,7 @@ def test_cache_map_basic3():
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
decode_op = c_vision.Decode()
ds1 = ds1.repeat(4)
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
num_iter = 0
@ -131,7 +131,7 @@ def test_cache_map_basic4():
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
decode_op = c_vision.Decode()
ds1 = ds1.repeat(4)
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
shape = ds1.output_shapes()
logger.info(shape)
@ -167,7 +167,7 @@ def test_cache_map_failure1():
# This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds1 = ds1.repeat(4)
try:

View File

@ -108,7 +108,7 @@ def test_cache_nomap_basic3():
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4)
num_iter = 0
@ -160,7 +160,7 @@ def test_cache_nomap_basic4():
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL)
decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds1 = ds1.repeat(4)
num_iter = 0
@ -197,7 +197,7 @@ def test_cache_nomap_basic5():
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache)
decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4)
num_iter = 0
@ -237,7 +237,7 @@ def test_cache_nomap_basic6():
# there was not any cache.
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_shards=3, shard_id=1, cache=some_cache)
decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4)
num_iter = 0
@ -273,7 +273,7 @@ def test_cache_nomap_basic7():
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache)
decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.repeat(4)
num_iter = 0
@ -343,11 +343,11 @@ def test_cache_nomap_allowed_share2():
decode_op = c_vision.Decode()
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds1 = ds1.repeat(4)
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache)
ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds2 = ds2.shuffle(buffer_size=2)
num_iter = 0
@ -418,10 +418,10 @@ def test_cache_nomap_allowed_share4():
decode_op = c_vision.Decode()
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=1)
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=1)
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=2)
ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=2)
num_iter = 0
for _ in ds1.create_dict_iterator(num_epochs=1):
@ -458,10 +458,10 @@ def test_cache_nomap_disallowed_share1():
rescale_op = c_vision.Rescale(1.0 / 255.0, -1.0)
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
ds2 = ds2.map(input_columns=["image"], operations=rescale_op, cache=some_cache)
ds2 = ds2.map(operations=rescale_op, input_columns=["image"], cache=some_cache)
num_iter = 0
for _ in ds1.create_dict_iterator(num_epochs=1):

View File

@ -40,12 +40,12 @@ def test_center_crop_op(height=375, width=375, plot=False):
decode_op = vision.Decode()
# 3 images [375, 500] [600, 500] [512, 512]
center_crop_op = vision.CenterCrop([height, width])
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=center_crop_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=center_crop_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
data2 = data2.map(input_columns=["image"], operations=decode_op)
data2 = data2.map(operations=decode_op, input_columns=["image"])
image_cropped = []
image = []
@ -67,8 +67,8 @@ def test_center_crop_md5(height=375, width=375):
decode_op = vision.Decode()
# 3 images [375, 500] [600, 500] [512, 512]
center_crop_op = vision.CenterCrop([height, width])
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=center_crop_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=center_crop_op, input_columns=["image"])
# Compare with expected md5 from images
filename = "center_crop_01_result.npz"
save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
@ -84,8 +84,8 @@ def test_center_crop_comp(height=375, width=375, plot=False):
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = vision.Decode()
center_crop_op = vision.CenterCrop([height, width])
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=center_crop_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=center_crop_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -95,7 +95,7 @@ def test_center_crop_comp(height=375, width=375, plot=False):
py_vision.ToTensor()
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = data2.map(input_columns=["image"], operations=transform)
data2 = data2.map(operations=transform, input_columns=["image"])
image_c_cropped = []
image_py_cropped = []
@ -126,11 +126,11 @@ def test_crop_grayscale(height=375, width=375):
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(operations=transform, input_columns=["image"])
# If input is grayscale, the output dimensions should be single channel
crop_gray = vision.CenterCrop([height, width])
data1 = data1.map(input_columns=["image"], operations=crop_gray)
data1 = data1.map(operations=crop_gray, input_columns=["image"])
for item1 in data1.create_dict_iterator(num_epochs=1):
c_image = item1["image"]

View File

@ -121,7 +121,7 @@ def test_concat_05():
data2 = ds.GeneratorDataset(generator_10, ["col1"])
type_cast_op = C.TypeCast(mstype.float32)
data1 = data1.map(input_columns=["col1"], operations=type_cast_op)
data1 = data1.map(operations=type_cast_op, input_columns=["col1"])
data3 = data1 + data2
@ -319,8 +319,8 @@ def test_concat_14():
F.Resize((224, 224)),
F.ToTensor()])
data1 = data1.map(input_columns=["image"], operations=transforms1)
data2 = data2.map(input_columns=["image"], operations=transforms1)
data1 = data1.map(operations=transforms1, input_columns=["image"])
data2 = data2.map(operations=transforms1, input_columns=["image"])
data3 = data1 + data2
expected, output = [], []

View File

@ -31,7 +31,7 @@ def test_concatenate_op_all():
append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float)
data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op)
data = data.map(operations=concatenate_op, input_columns=["col"])
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
11., 12.])
for data_row in data:
@ -45,7 +45,7 @@ def test_concatenate_op_none():
data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate()
data = data.map(input_columns=["col"], operations=concatenate_op)
data = data.map(operations=concatenate_op, input_columns=["col"])
for data_row in data:
np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float))
@ -59,7 +59,7 @@ def test_concatenate_op_string():
data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op)
data = data.map(operations=concatenate_op, input_columns=["col"])
expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S')
for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
@ -74,8 +74,8 @@ def test_concatenate_op_multi_input_string():
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor)
data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"],
operations=concatenate_op)
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
output_columns=["out1"])
expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S')
for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
@ -89,8 +89,8 @@ def test_concatenate_op_multi_input_numeric():
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor)
data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"],
operations=concatenate_op)
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
output_columns=["out1"])
expected = np.array([3, 5, 1, 2, 3, 4])
for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
@ -104,7 +104,7 @@ def test_concatenate_op_type_mismatch():
data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(0, prepend_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op)
data = data.map(operations=concatenate_op, input_columns=["col"])
with pytest.raises(RuntimeError) as error_info:
for _ in data:
pass
@ -119,7 +119,7 @@ def test_concatenate_op_type_mismatch2():
data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(0, prepend_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op)
data = data.map(operations=concatenate_op, input_columns=["col"])
with pytest.raises(RuntimeError) as error_info:
for _ in data:
pass
@ -134,7 +134,7 @@ def test_concatenate_op_incorrect_dim():
concatenate_op = data_trans.Concatenate(0, prepend_tensor)
data = ds.GeneratorDataset(gen, column_names=["col"])
data = data.map(input_columns=["col"], operations=concatenate_op)
data = data.map(operations=concatenate_op, input_columns=["col"])
with pytest.raises(RuntimeError) as error_info:
for _ in data:
pass
@ -155,7 +155,7 @@ def test_concatenate_op_negative_axis():
append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float)
data = ds.GeneratorDataset(gen, column_names=["col"])
concatenate_op = data_trans.Concatenate(-1, prepend_tensor, append_tensor)
data = data.map(input_columns=["col"], operations=concatenate_op)
data = data.map(operations=concatenate_op, input_columns=["col"])
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
11., 12.])
for data_row in data:

View File

@ -86,12 +86,12 @@ def test_pipeline():
num_parallel_workers_original = ds.config.get_num_parallel_workers()
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
data1 = data1.map(input_columns=["image"], operations=[c_vision.Decode(True)])
data1 = data1.map(operations=[c_vision.Decode(True)], input_columns=["image"])
ds.serialize(data1, "testpipeline.json")
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, num_parallel_workers=num_parallel_workers_original,
shuffle=False)
data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode(True)])
data2 = data2.map(operations=[c_vision.Decode(True)], input_columns=["image"])
ds.serialize(data2, "testpipeline2.json")
# check that the generated output is different
@ -131,14 +131,14 @@ def test_deterministic_run_fail():
# outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
decode_op = c_vision.Decode()
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=random_crop_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=random_crop_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op)
data2 = data2.map(operations=decode_op, input_columns=["image"])
# If seed is set up on constructor
data2 = data2.map(input_columns=["image"], operations=random_crop_op)
data2 = data2.map(operations=random_crop_op, input_columns=["image"])
try:
dataset_equal(data1, data2, 0)
@ -171,16 +171,16 @@ def test_seed_undeterministic():
# We get the seed when constructor is called
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
decode_op = c_vision.Decode()
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=random_crop_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=random_crop_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op)
data2 = data2.map(operations=decode_op, input_columns=["image"])
# Since seed is set up on constructor, so the two ops output deterministic sequence.
# Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
data2 = data2.map(operations=random_crop_op2, input_columns=["image"])
try:
dataset_equal(data1, data2, 0)
except Exception as e:
@ -211,15 +211,15 @@ def test_seed_deterministic():
# seed will be read in during constructor call
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
decode_op = c_vision.Decode()
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=random_crop_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=random_crop_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op)
data2 = data2.map(operations=decode_op, input_columns=["image"])
# If seed is set up on constructor, so the two ops output deterministic sequence
random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
data2 = data2.map(operations=random_crop_op2, input_columns=["image"])
dataset_equal(data1, data2, 0)
@ -246,15 +246,15 @@ def test_deterministic_run_distribution():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1)
decode_op = c_vision.Decode()
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=random_horizontal_flip_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=random_horizontal_flip_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op)
data2 = data2.map(operations=decode_op, input_columns=["image"])
# If seed is set up on constructor, so the two ops output deterministic sequence
random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1)
data2 = data2.map(input_columns=["image"], operations=random_horizontal_flip_op2)
data2 = data2.map(operations=random_horizontal_flip_op2, input_columns=["image"])
dataset_equal(data1, data2, 0)
@ -285,7 +285,7 @@ def test_deterministic_python_seed():
py_vision.ToTensor(),
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(operations=transform, input_columns=["image"])
data1_output = []
# config.set_seed() calls random.seed()
for data_one in data1.create_dict_iterator(num_epochs=1):
@ -293,7 +293,7 @@ def test_deterministic_python_seed():
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform)
data2 = data2.map(operations=transform, input_columns=["image"])
# config.set_seed() calls random.seed(), resets seed for next dataset iterator
ds.config.set_seed(0)
@ -328,7 +328,7 @@ def test_deterministic_python_seed_multi_thread():
py_vision.ToTensor(),
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform, python_multiprocessing=True)
data1 = data1.map(operations=transform, input_columns=["image"], python_multiprocessing=True)
data1_output = []
# config.set_seed() calls random.seed()
for data_one in data1.create_dict_iterator(num_epochs=1):
@ -337,7 +337,7 @@ def test_deterministic_python_seed_multi_thread():
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
# If seed is set up on constructor
data2 = data2.map(input_columns=["image"], operations=transform, python_multiprocessing=True)
data2 = data2.map(operations=transform, input_columns=["image"], python_multiprocessing=True)
# config.set_seed() calls random.seed()
ds.config.set_seed(0)

View File

@ -30,6 +30,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
GENERATE_GOLDEN = False
def test_cut_out_op(plot=False):
"""
Test Cutout
@ -45,7 +46,7 @@ def test_cut_out_op(plot=False):
f.RandomErasing(value='random')
]
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1)
data1 = data1.map(operations=transform_1, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -57,7 +58,7 @@ def test_cut_out_op(plot=False):
cut_out_op
]
data2 = data2.map(input_columns=["image"], operations=transforms_2)
data2 = data2.map(operations=transforms_2, input_columns=["image"])
num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
@ -91,7 +92,7 @@ def test_cut_out_op_multicut(plot=False):
f.ToTensor(),
]
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1)
data1 = data1.map(operations=transform_1, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -103,7 +104,7 @@ def test_cut_out_op_multicut(plot=False):
cut_out_op
]
data2 = data2.map(input_columns=["image"], operations=transforms_2)
data2 = data2.map(operations=transforms_2, input_columns=["image"])
num_iter = 0
image_list_1, image_list_2 = [], []
@ -136,8 +137,8 @@ def test_cut_out_md5():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
decode_op = c.Decode()
cut_out_op = c.CutOut(100)
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=cut_out_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=cut_out_op, input_columns=["image"])
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
transforms = [
@ -146,7 +147,7 @@ def test_cut_out_md5():
f.Cutout(100)
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = data2.map(input_columns=["image"], operations=transform)
data2 = data2.map(operations=transform, input_columns=["image"])
# Compare with expected md5 from images
filename1 = "cut_out_01_c_result.npz"
@ -174,7 +175,7 @@ def test_cut_out_comp(plot=False):
f.Cutout(200)
]
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1)
data1 = data1.map(operations=transform_1, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -184,7 +185,7 @@ def test_cut_out_comp(plot=False):
c.CutOut(200)
]
data2 = data2.map(input_columns=["image"], operations=transforms_2)
data2 = data2.map(operations=transforms_2, input_columns=["image"])
num_iter = 0
image_list_1, image_list_2 = [], []

View File

@ -51,12 +51,12 @@ def test_cutmix_batch_success1(plot=False):
# CutMix Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
hwc2chw_op = vision.HWC2CHW()
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op)
data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW, 2.0, 0.5)
data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
images_cutmix = None
for idx, (image, _) in enumerate(data1):
@ -94,12 +94,12 @@ def test_cutmix_batch_success2(plot=False):
# CutMix Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
rescale_op = vision.Rescale((1.0/255.0), 0.0)
data1 = data1.map(input_columns=["image"], operations=rescale_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
rescale_op = vision.Rescale((1.0 / 255.0), 0.0)
data1 = data1.map(operations=rescale_op, input_columns=["image"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
images_cutmix = None
for idx, (image, _) in enumerate(data1):
@ -125,7 +125,7 @@ def test_cutmix_batch_success3(plot=False):
ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode()
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op])
ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)
images_original = None
@ -139,14 +139,14 @@ def test_cutmix_batch_success3(plot=False):
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode()
data1 = data1.map(input_columns=["image"], operations=[decode_op])
data1 = data1.map(operations=[decode_op], input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data1 = data1.batch(4, pad_info={}, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
images_cutmix = None
for idx, (image, _) in enumerate(data1):
@ -172,7 +172,7 @@ def test_cutmix_batch_success4(plot=False):
ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
decode_op = vision.Decode()
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op])
ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
ds_original = ds_original.batch(2, drop_remainder=True)
images_original = None
@ -186,14 +186,14 @@ def test_cutmix_batch_success4(plot=False):
data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False)
decode_op = vision.Decode()
data1 = data1.map(input_columns=["image"], operations=[decode_op])
data1 = data1.map(operations=[decode_op], input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=100)
data1 = data1.map(input_columns=["attr"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["attr"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9)
data1 = data1.batch(2, drop_remainder=True)
data1 = data1.map(input_columns=["image", "attr"], operations=cutmix_batch_op)
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "attr"])
images_cutmix = None
for idx, (image, _) in enumerate(data1):
@ -223,10 +223,10 @@ def test_cutmix_batch_nhwc_md5():
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data = data.map(input_columns=["label"], operations=one_hot_op)
data = data.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data = data.batch(5, drop_remainder=True)
data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op)
data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"])
filename = "cutmix_batch_c_nhwc_result.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
@ -247,12 +247,12 @@ def test_cutmix_batch_nchw_md5():
# CutMixBatch Images
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
hwc2chw_op = vision.HWC2CHW()
data = data.map(input_columns=["image"], operations=hwc2chw_op)
data = data.map(operations=hwc2chw_op, input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=10)
data = data.map(input_columns=["label"], operations=one_hot_op)
data = data.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
data = data.batch(5, drop_remainder=True)
data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op)
data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"])
filename = "cutmix_batch_c_nchw_result.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
@ -273,10 +273,10 @@ def test_cutmix_batch_fail1():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
with pytest.raises(RuntimeError) as error:
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
for idx, (image, _) in enumerate(data1):
if idx == 0:
images_cutmix = image
@ -297,7 +297,7 @@ def test_cutmix_batch_fail2():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error:
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, -1)
error_message = "Input is not within the required interval"
@ -315,7 +315,7 @@ def test_cutmix_batch_fail3():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error:
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, 2)
error_message = "Input is not within the required interval"
@ -333,7 +333,7 @@ def test_cutmix_batch_fail4():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error:
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, -1)
error_message = "Input is not within the required interval"
@ -351,10 +351,10 @@ def test_cutmix_batch_fail5():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image"], operations=cutmix_batch_op)
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image"])
with pytest.raises(RuntimeError) as error:
images_cutmix = np.array([])
@ -378,10 +378,10 @@ def test_cutmix_batch_fail6():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
with pytest.raises(RuntimeError) as error:
images_cutmix = np.array([])
@ -406,7 +406,7 @@ def test_cutmix_batch_fail7():
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
with pytest.raises(RuntimeError) as error:
images_cutmix = np.array([])
@ -430,7 +430,7 @@ def test_cutmix_batch_fail8():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error:
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.0)
error_message = "Input is not within the required interval"

View File

@ -59,7 +59,7 @@ def test_numpy_slices_list_append():
data1 = de.TFRecordDataset(DATA_DIR)
resize_op = vision.Resize((resize_height, resize_width))
data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True), resize_op])
data1 = data1.map(operations=[vision.Decode(True), resize_op], input_columns=["image"])
res = []
for data in data1.create_dict_iterator(num_epochs=1):

View File

@ -46,8 +46,8 @@ def test_celeba_dataset_op():
data = data.repeat(2)
center_crop = vision.CenterCrop(crop_size)
resize_op = vision.Resize(resize_size, Inter.LINEAR) # Bilinear mode
data = data.map(input_columns=["image"], operations=center_crop)
data = data.map(input_columns=["image"], operations=resize_op)
data = data.map(operations=center_crop, input_columns=["image"])
data = data.map(operations=resize_op, input_columns=["image"])
count = 0
for item in data.create_dict_iterator(num_epochs=1):

View File

@ -25,6 +25,7 @@ INVALID_FILE = "../data/dataset/testCOCO/annotations/invalid.json"
LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json"
INVALID_CATEGORY_ID_FILE = "../data/dataset/testCOCO/annotations/invalid_category_id.json"
def test_coco_detection():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection",
decode=True, shuffle=False)
@ -57,6 +58,7 @@ def test_coco_detection():
np.testing.assert_array_equal(np.array([[5]]), category_id[4])
np.testing.assert_array_equal(np.array([[6]]), category_id[5])
def test_coco_stuff():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff",
decode=True, shuffle=False)
@ -97,6 +99,7 @@ def test_coco_stuff():
segmentation[5])
np.testing.assert_array_equal(np.array([[0]]), iscrowd[5])
def test_coco_keypoint():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint",
decode=True, shuffle=False)
@ -124,6 +127,7 @@ def test_coco_keypoint():
keypoints[1])
np.testing.assert_array_equal(np.array([[10]]), num_keypoints[1])
def test_coco_panoptic():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True, shuffle=False)
num_iter = 0
@ -151,6 +155,7 @@ def test_coco_panoptic():
np.testing.assert_array_equal(np.array([[0], [0]]), iscrowd[1])
np.testing.assert_array_equal(np.array([[43102], [6079]]), area[1])
def test_coco_detection_classindex():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
class_index = data1.get_class_indexing()
@ -161,6 +166,7 @@ def test_coco_detection_classindex():
num_iter += 1
assert num_iter == 6
def test_coco_panootic_classindex():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True)
class_index = data1.get_class_indexing()
@ -170,6 +176,7 @@ def test_coco_panootic_classindex():
num_iter += 1
assert num_iter == 2
def test_coco_case_0():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
data1 = data1.shuffle(10)
@ -179,6 +186,7 @@ def test_coco_case_0():
num_iter += 1
assert num_iter == 2
def test_coco_case_1():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
sizes = [0.5, 0.5]
@ -194,28 +202,31 @@ def test_coco_case_1():
num_iter += 1
assert num_iter == 3
def test_coco_case_2():
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
resize_op = vision.Resize((224, 224))
data1 = data1.map(input_columns=["image"], operations=resize_op)
data1 = data1.map(operations=resize_op, input_columns=["image"])
data1 = data1.repeat(4)
num_iter = 0
for _ in data1.__iter__():
num_iter += 1
assert num_iter == 24
def test_coco_case_3():
data1 = ds.CocoDataset(DATA_DIR_2, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
resize_op = vision.Resize((224, 224))
data1 = data1.map(input_columns=["image"], operations=resize_op)
data1 = data1.map(operations=resize_op, input_columns=["image"])
data1 = data1.repeat(4)
num_iter = 0
for _ in data1.__iter__():
num_iter += 1
assert num_iter == 24
def test_coco_case_exception():
try:
data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection")

View File

@ -25,6 +25,7 @@ def generator_1d():
for i in range(64):
yield (np.array([i]),)
class DatasetGenerator:
def __init__(self):
pass
@ -241,11 +242,11 @@ def test_generator_8():
# apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(input_columns="col0", output_columns="out0", operations=(lambda x: x * 3),
data1 = data1.map(operations=(lambda x: x * 3), input_columns="col0", output_columns="out0",
num_parallel_workers=2)
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x * 7, x)),
data1 = data1.map(operations=(lambda x: (x * 7, x)), input_columns="col1", output_columns=["out1", "out2"],
num_parallel_workers=2, column_order=["out0", "out1", "out2"])
data1 = data1.map(input_columns="out2", output_columns="out2", operations=(lambda x: x + 1),
data1 = data1.map(operations=(lambda x: x + 1), input_columns="out2", output_columns="out2",
num_parallel_workers=2)
i = 0
@ -268,9 +269,9 @@ def test_generator_9():
# apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["image", "label"])
data2 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
data1 = data1.map(input_columns="label", operations=(lambda x: x * 3),
data1 = data1.map(operations=(lambda x: x * 3), input_columns="label",
num_parallel_workers=4)
data2 = data2.map(input_columns="label", operations=(lambda x: x * 3),
data2 = data2.map(operations=(lambda x: x * 3), input_columns="label",
num_parallel_workers=4)
# Expected column order is not changed.
@ -298,7 +299,7 @@ def test_generator_10():
# apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)),
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
column_order=['col0', 'out1', 'out2'], num_parallel_workers=2)
# Expected column order is |col0|out1|out2|
@ -322,7 +323,7 @@ def test_generator_11():
# apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)),
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
column_order=['out1', 'out2'], num_parallel_workers=2)
# Expected column order is |out1|out2|
@ -503,7 +504,7 @@ def test_generator_error_3():
with pytest.raises(ValueError) as info:
# apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
data1 = data1.map(input_columns=["label"], output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)),
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], output_columns=["out1", "out2"],
num_parallel_workers=2)
for _ in data1:
@ -515,7 +516,7 @@ def test_generator_error_4():
with pytest.raises(RuntimeError) as info:
# apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
data1 = data1.map(input_columns=["label"], operations=(lambda x: (x, x * 5)),
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"],
num_parallel_workers=2)
for _ in data1:
@ -706,6 +707,7 @@ def test_generator_dataset_size_4():
num_rows = num_rows + 1
assert data_size == num_rows
def test_generator_dataset_size_5():
"""
Test get_dataset_size after create_dict_iterator

View File

@ -103,8 +103,8 @@ def test_manifest_dataset_multi_label_onehot():
data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False)
expect_label = [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 1]]]
one_hot_encode = data_trans.OneHot(3)
data = data.map(input_columns=["label"], operations=one_hot_encode)
data = data.map(input_columns=["label"], operations=multi_label_hot)
data = data.map(operations=one_hot_encode, input_columns=["label"])
data = data.map(operations=multi_label_hot, input_columns=["label"])
data = data.batch(2)
count = 0
for item in data.create_dict_iterator(num_epochs=1):

View File

@ -85,8 +85,8 @@ def test_case_0():
resize_op = vision.Resize((224, 224))
data1 = data1.map(input_columns=["image"], operations=resize_op)
data1 = data1.map(input_columns=["target"], operations=resize_op)
data1 = data1.map(operations=resize_op, input_columns=["image"])
data1 = data1.map(operations=resize_op, input_columns=["target"])
repeat_num = 4
data1 = data1.repeat(repeat_num)
batch_size = 2
@ -103,7 +103,7 @@ def test_case_1():
resize_op = vision.Resize((224, 224))
data1 = data1.map(input_columns=["image"], operations=resize_op)
data1 = data1.map(operations=resize_op, input_columns=["image"])
repeat_num = 4
data1 = data1.repeat(repeat_num)
batch_size = 2

View File

@ -36,7 +36,7 @@ def test_decode_op():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
# Serialize and Load dataset requires using vision.Decode instead of vision.Decode().
data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)])
data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -57,7 +57,7 @@ def test_decode_op_tf_file_dataset():
# Decode with rgb format set to True
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.FILES)
data1 = data1.map(input_columns=["image"], operations=vision.Decode(True))
data1 = data1.map(operations=vision.Decode(True), input_columns=["image"])
for item in data1.create_dict_iterator(num_epochs=1):
logger.info('decode == {}'.format(item['image']))

View File

@ -54,8 +54,8 @@ def test_case_1():
resize_op = vision.Resize((resize_height, resize_width))
# apply map operations on images
data = data.map(input_columns=["image"], operations=decode_op)
data = data.map(input_columns=["image"], operations=resize_op)
data = data.map(operations=decode_op, input_columns=["image"])
data = data.map(operations=resize_op, input_columns=["image"])
batch_size = 3
data = data.batch(batch_size, drop_remainder=True)
@ -79,8 +79,8 @@ def test_case_2():
resize_op = vision.Resize((resize_height, resize_width))
# apply map operations on images
data = data.map(input_columns=["image"], operations=decode_op)
data = data.map(input_columns=["image"], operations=resize_op)
data = data.map(operations=decode_op, input_columns=["image"])
data = data.map(operations=resize_op, input_columns=["image"])
batch_size = 2
data = data.batch(batch_size, drop_remainder=True)
@ -107,8 +107,8 @@ def test_case_3():
resize_op = vision.Resize((resize_height, resize_width))
# apply map operations on images
data = data.map(input_columns=["image"], operations=decode_op)
data = data.map(input_columns=["image"], operations=resize_op)
data = data.map(operations=decode_op, input_columns=["image"])
data = data.map(operations=resize_op, input_columns=["image"])
data = data.repeat(2)

View File

@ -24,8 +24,8 @@ import mindspore.dataset.transforms.c_transforms as ops
def compare(array):
data = ds.NumpySlicesDataset([array], column_names="x")
array = np.array(array)
data = data.map(input_columns=["x"], output_columns=["x", "y"], column_order=["x", "y"],
operations=ops.Duplicate())
data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"],
column_order=["x", "y"])
for d in data.create_dict_iterator(num_epochs=1):
np.testing.assert_array_equal(array, d["x"])
np.testing.assert_array_equal(array, d["y"])

View File

@ -79,7 +79,7 @@ def test_decode_op():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
# Serialize and Load dataset requires using vision.Decode instead of vision.Decode().
data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)])
data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)

View File

@ -43,8 +43,7 @@ def test_equalize_py(plot=False):
F.Resize((224, 224)),
F.ToTensor()])
ds_original = ds.map(input_columns="image",
operations=transforms_original)
ds_original = ds.map(operations=transforms_original, input_columns="image")
ds_original = ds_original.batch(512)
@ -64,8 +63,7 @@ def test_equalize_py(plot=False):
F.Equalize(),
F.ToTensor()])
ds_equalize = ds.map(input_columns="image",
operations=transforms_equalize)
ds_equalize = ds.map(operations=transforms_equalize, input_columns="image")
ds_equalize = ds_equalize.batch(512)
@ -98,8 +96,7 @@ def test_equalize_c(plot=False):
transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
ds_original = ds.map(input_columns="image",
operations=transforms_original)
ds_original = ds.map(operations=transforms_original, input_columns="image")
ds_original = ds_original.batch(512)
@ -117,8 +114,7 @@ def test_equalize_c(plot=False):
transform_equalize = [C.Decode(), C.Resize(size=[224, 224]),
C.Equalize()]
ds_equalize = ds.map(input_columns="image",
operations=transform_equalize)
ds_equalize = ds.map(operations=transform_equalize, input_columns="image")
ds_equalize = ds_equalize.batch(512)
@ -147,11 +143,9 @@ def test_equalize_py_c(plot=False):
# equalize Images in cpp
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(), C.Resize((224, 224))])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
ds_c_equalize = ds.map(input_columns="image",
operations=C.Equalize())
ds_c_equalize = ds.map(operations=C.Equalize(), input_columns="image")
ds_c_equalize = ds_c_equalize.batch(512)
@ -165,16 +159,14 @@ def test_equalize_py_c(plot=False):
# Equalize images in python
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(), C.Resize((224, 224))])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
F.ToPIL(),
F.Equalize(),
np.array])
ds_p_equalize = ds.map(input_columns="image",
operations=transforms_p_equalize)
ds_p_equalize = ds.map(operations=transforms_p_equalize, input_columns="image")
ds_p_equalize = ds_p_equalize.batch(512)
@ -206,13 +198,10 @@ def test_equalize_one_channel():
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
ds.map(input_columns="image",
operations=c_op)
ds.map(operations=c_op, input_columns="image")
except RuntimeError as e:
logger.info("Got an exception in DE: {}".format(str(e)))
@ -225,8 +214,7 @@ def test_equalize_mnist_c(plot=False):
"""
logger.info("Test Equalize C Op With MNIST Images")
ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
ds_equalize_c = ds.map(input_columns="image",
operations=C.Equalize())
ds_equalize_c = ds.map(operations=C.Equalize(), input_columns="image")
ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
images = []
@ -259,7 +247,7 @@ def test_equalize_md5_py():
F.Equalize(),
F.ToTensor()])
data1 = data1.map(input_columns="image", operations=transforms)
data1 = data1.map(operations=transforms, input_columns="image")
# Compare with expected md5 from images
filename = "equalize_01_result.npz"
save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
@ -279,7 +267,7 @@ def test_equalize_md5_c():
C.Equalize(),
F.ToTensor()]
data = ds.map(input_columns="image", operations=transforms_equalize)
data = ds.map(operations=transforms_equalize, input_columns="image")
# Compare with expected md5 from images
filename = "equalize_01_result_c.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -29,7 +29,7 @@ def test_exception_01():
logger.info("test_exception_01")
data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"])
with pytest.raises(TypeError) as info:
data.map(input_columns=["image"], operations=vision.Resize(100, 100))
data.map(operations=vision.Resize(100, 100), input_columns=["image"])
assert "Argument interpolation with value 100 is not of type (<enum 'Inter'>,)" in str(info.value)
@ -45,8 +45,8 @@ def test_exception_02():
num_samples = 1
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
data = data.map(input_columns=["image"], operations=vision.Decode())
data = data.map(input_columns=["image"], operations=vision.Resize((100, 100)))
data = data.map(operations=vision.Decode(), input_columns=["image"])
data = data.map(operations=vision.Resize((100, 100)), input_columns=["image"])
# Confirm 1 sample in dataset
assert sum([1 for _ in data]) == 1
num_iters = 0

View File

@ -28,7 +28,7 @@ def test_fillop_basic():
data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill(3)
data = data.map(input_columns=["col"], operations=fill_op)
data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array([3, 3, 3, 3], dtype=np.uint8)
for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
@ -41,7 +41,7 @@ def test_fillop_down_type_cast():
data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill(-3)
data = data.map(input_columns=["col"], operations=fill_op)
data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array([253, 253, 253, 253], dtype=np.uint8)
for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
@ -54,7 +54,7 @@ def test_fillop_up_type_cast():
data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill(3)
data = data.map(input_columns=["col"], operations=fill_op)
data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array([3., 3., 3., 3.], dtype=np.float)
for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
@ -67,7 +67,7 @@ def test_fillop_string():
data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill("error")
data = data.map(input_columns=["col"], operations=fill_op)
data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array(['error', 'error'], dtype='S')
for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
@ -79,7 +79,7 @@ def test_fillop_error_handling():
data = ds.GeneratorDataset(gen, column_names=["col"])
fill_op = data_trans.Fill("words")
data = data.map(input_columns=["col"], operations=fill_op)
data = data.map(operations=fill_op, input_columns=["col"])
with pytest.raises(RuntimeError) as error_info:
for _ in data:

View File

@ -30,7 +30,7 @@ def test_diff_predicate_func():
cde.Resize([64, 64])
]
dataset = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], shuffle=False)
dataset = dataset.map(input_columns=["image"], operations=transforms, num_parallel_workers=1)
dataset = dataset.map(operations=transforms, input_columns=["image"], num_parallel_workers=1)
dataset = dataset.filter(input_columns=["image", "label"], predicate=predicate_func, num_parallel_workers=4)
num_iter = 0
@ -261,8 +261,8 @@ def func_map_part(data_col1):
# test with map
def test_filter_by_generator_with_map_all_col():
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
dataset_map = dataset.map(input_columns=["col1"], output_columns=["col1"], operations=func_map_part)
# dataset_map = dataset.map( operations=func_map_part)
dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["col1"])
# dataset_map = dataset.map(operations=func_map_part)
dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1)
num_iter = 0
ret_data = []
@ -277,7 +277,7 @@ def test_filter_by_generator_with_map_all_col():
# test with map
def test_filter_by_generator_with_map_part_col():
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part)
dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"])
dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4)
num_iter = 0
@ -328,7 +328,7 @@ def filter_func_input_column3(col1):
# test with input_columns
def test_filter_by_generator_with_input_column():
dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"])
dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part)
dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"])
dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1,
num_parallel_workers=4)
dataset_f2 = dataset_f1.filter(input_columns=["out1"], predicate=filter_func_input_column2, num_parallel_workers=4)
@ -382,7 +382,7 @@ def test_filter_by_generator_Partial1():
dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"])
dataset_zip = ds.zip((dataset1, dataset2))
dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2)
dataset_map = dataset_f1.map(input_columns=["col1"], output_columns=["out1"], operations=lambda x1: x1 + 400)
dataset_map = dataset_f1.map(operations=lambda x1: x1 + 400, input_columns=["col1"], output_columns=["out1"])
ret = []
for item in dataset_map.create_dict_iterator(num_epochs=1):
ret.append(item["out1"])
@ -399,8 +399,8 @@ def test_filter_by_generator_Partial2():
dataset2f = dataset2.filter(input_columns=["col3"], predicate=lambda x: x not in [203, 207, 209],
num_parallel_workers=2)
dataset_zip = ds.zip((dataset1f, dataset2f))
dataset_map = dataset_zip.map(input_columns=["col1", "col3"], output_columns=["out1", "out3"],
operations=lambda x1, x3: (x1 + 400, x3 + 500))
dataset_map = dataset_zip.map(operations=lambda x1, x3: (x1 + 400, x3 + 500), input_columns=["col1", "col3"],
output_columns=["out1", "out3"])
ret1 = []
ret3 = []
for item in dataset_map.create_dict_iterator(num_epochs=1):
@ -484,6 +484,7 @@ def test_filter_by_generator_with_map_all_sort():
assert ret_data[0]["col1"] == 0
assert ret_data[9]["col6"] == 509
def test_filter_by_generator_get_dataset_size():
dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4)

View File

@ -41,7 +41,7 @@ def test_five_crop_op(plot=False):
vision.ToTensor(),
]
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1)
data1 = data1.map(operations=transform_1, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -51,7 +51,7 @@ def test_five_crop_op(plot=False):
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
]
transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2)
data2 = data2.map(input_columns=["image"], operations=transform_2)
data2 = data2.map(operations=transform_2, input_columns=["image"])
num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
@ -85,7 +85,7 @@ def test_five_crop_error_msg():
vision.ToTensor()
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data = data.map(input_columns=["image"], operations=transform)
data = data.map(operations=transform, input_columns=["image"])
with pytest.raises(RuntimeError) as info:
for _ in data:
@ -110,7 +110,7 @@ def test_five_crop_md5():
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data = data.map(input_columns=["image"], operations=transform)
data = data.map(operations=transform, input_columns=["image"])
# Compare with expected md5 from images
filename = "five_crop_01_result.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -26,7 +26,7 @@ def test_demo_basic_from_dataset():
vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None,
special_tokens=["<pad>", "<unk>"],
special_first=True)
data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>"))
data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
res = []
for d in data.create_dict_iterator(num_epochs=1):
res.append(d["text"].item())
@ -36,10 +36,10 @@ def test_demo_basic_from_dataset():
def test_demo_basic_from_dataset_with_tokenizer():
""" this is a tutorial on how from_dataset should be used in a normal use case with tokenizer"""
data = ds.TextFileDataset("../data/dataset/testTokenizerData/1.txt", shuffle=False)
data = data.map(input_columns=["text"], operations=text.UnicodeCharTokenizer())
data = data.map(operations=text.UnicodeCharTokenizer(), input_columns=["text"])
vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"],
special_first=True)
data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>"))
data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
res = []
for d in data.create_dict_iterator(num_epochs=1):
res.append(list(d["text"]))
@ -60,7 +60,7 @@ def test_from_dataset():
corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", "<unk>"],
special_first=True)
corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab, "<unk>"))
corpus_dataset = corpus_dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text")
res = []
for d in corpus_dataset.create_dict_iterator(num_epochs=1):
res.append(list(d["text"]))
@ -108,7 +108,7 @@ def test_from_dataset_special_token():
corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first)
data = ds.GeneratorDataset(gen_input(texts), column_names=["text"])
data = data.map(input_columns="text", operations=text.Lookup(vocab, "<unk>"))
data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text")
res = []
for d in data.create_dict_iterator(num_epochs=1):
res.append(d["text"].item())

View File

@ -95,16 +95,16 @@ def test_get_column_name_manifest():
def test_get_column_name_map():
data = ds.Cifar10Dataset(CIFAR10_DIR)
center_crop_op = vision.CenterCrop(10)
data = data.map(input_columns=["image"], operations=center_crop_op)
data = data.map(operations=center_crop_op, input_columns=["image"])
assert data.get_col_names() == ["image", "label"]
data = ds.Cifar10Dataset(CIFAR10_DIR)
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["image"])
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["image"])
assert data.get_col_names() == ["image", "label"]
data = ds.Cifar10Dataset(CIFAR10_DIR)
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1"])
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1"])
assert data.get_col_names() == ["col1", "label"]
data = ds.Cifar10Dataset(CIFAR10_DIR)
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1", "col2"],
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1", "col2"],
column_order=["col2", "col1"])
assert data.get_col_names() == ["col2", "col1"]

View File

@ -42,8 +42,7 @@ def test_invert_py(plot=False):
F.Resize((224, 224)),
F.ToTensor()])
ds_original = ds.map(input_columns="image",
operations=transforms_original)
ds_original = ds.map(operations=transforms_original, input_columns="image")
ds_original = ds_original.batch(512)
@ -63,8 +62,7 @@ def test_invert_py(plot=False):
F.Invert(),
F.ToTensor()])
ds_invert = ds.map(input_columns="image",
operations=transforms_invert)
ds_invert = ds.map(operations=transforms_invert, input_columns="image")
ds_invert = ds_invert.batch(512)
@ -97,8 +95,7 @@ def test_invert_c(plot=False):
transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
ds_original = ds.map(input_columns="image",
operations=transforms_original)
ds_original = ds.map(operations=transforms_original, input_columns="image")
ds_original = ds_original.batch(512)
@ -116,8 +113,7 @@ def test_invert_c(plot=False):
transform_invert = [C.Decode(), C.Resize(size=[224, 224]),
C.Invert()]
ds_invert = ds.map(input_columns="image",
operations=transform_invert)
ds_invert = ds.map(operations=transform_invert, input_columns="image")
ds_invert = ds_invert.batch(512)
@ -146,11 +142,9 @@ def test_invert_py_c(plot=False):
# Invert Images in cpp
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(), C.Resize((224, 224))])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
ds_c_invert = ds.map(input_columns="image",
operations=C.Invert())
ds_c_invert = ds.map(operations=C.Invert(), input_columns="image")
ds_c_invert = ds_c_invert.batch(512)
@ -164,16 +158,14 @@ def test_invert_py_c(plot=False):
# invert images in python
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(), C.Resize((224, 224))])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
transforms_p_invert = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
F.ToPIL(),
F.Invert(),
np.array])
ds_p_invert = ds.map(input_columns="image",
operations=transforms_p_invert)
ds_p_invert = ds.map(operations=transforms_p_invert, input_columns="image")
ds_p_invert = ds_p_invert.batch(512)
@ -205,13 +197,10 @@ def test_invert_one_channel():
try:
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"],
operations=[C.Decode(),
C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])])
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)),
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
ds.map(input_columns="image",
operations=c_op)
ds.map(operations=c_op, input_columns="image")
except RuntimeError as e:
logger.info("Got an exception in DE: {}".format(str(e)))
@ -231,7 +220,7 @@ def test_invert_md5_py():
F.Invert(),
F.ToTensor()])
data = ds.map(input_columns="image", operations=transforms_invert)
data = ds.map(operations=transforms_invert, input_columns="image")
# Compare with expected md5 from images
filename = "invert_01_result_py.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
@ -251,7 +240,7 @@ def test_invert_md5_c():
C.Invert(),
F.ToTensor()]
data = ds.map(input_columns="image", operations=transforms_invert)
data = ds.map(operations=transforms_invert, input_columns="image")
# Compare with expected md5 from images
filename = "invert_01_result_c.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -51,15 +51,15 @@ def test_linear_transformation_op(plot=False):
# First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(operations=transform, input_columns=["image"])
# Note: if transformation matrix is diagonal matrix with all 1 in diagonal,
# the output matrix in expected to be the same as the input matrix.
data1 = data1.map(input_columns=["image"],
operations=py_vision.LinearTransformation(transformation_matrix, mean_vector))
data1 = data1.map(operations=py_vision.LinearTransformation(transformation_matrix, mean_vector),
input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform)
data2 = data2.map(operations=transform, input_columns=["image"])
image_transformed = []
image = []
@ -98,7 +98,7 @@ def test_linear_transformation_md5():
py_vision.LinearTransformation(transformation_matrix, mean_vector)
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(operations=transform, input_columns=["image"])
# Compare with expected md5 from images
filename = "linear_transformation_01_result.npz"
@ -128,7 +128,7 @@ def test_linear_transformation_exception_01():
py_vision.LinearTransformation(None, mean_vector)
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(operations=transform, input_columns=["image"])
except TypeError as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
@ -157,7 +157,7 @@ def test_linear_transformation_exception_02():
py_vision.LinearTransformation(transformation_matrix, None)
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(operations=transform, input_columns=["image"])
except TypeError as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
@ -187,7 +187,7 @@ def test_linear_transformation_exception_03():
py_vision.LinearTransformation(transformation_matrix, mean_vector)
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(operations=transform, input_columns=["image"])
except ValueError as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "square matrix" in str(e)
@ -217,7 +217,7 @@ def test_linear_transformation_exception_04():
py_vision.LinearTransformation(transformation_matrix, mean_vector)
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(operations=transform, input_columns=["image"])
except ValueError as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "should match" in str(e)

View File

@ -73,6 +73,7 @@ def add_and_remove_cv_file():
os.remove("{}".format(x))
os.remove("{}.db".format(x))
@pytest.fixture
def add_and_remove_nlp_file():
"""add/remove nlp file"""
@ -265,6 +266,7 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file):
assert partitions(5) == 2
assert partitions(9) == 2
def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
@ -287,6 +289,7 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
assert partitions(5) == 1
assert partitions(9) == 1
def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
@ -309,6 +312,7 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
assert partitions(5) == 2
assert partitions(9) == 2
def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
@ -354,11 +358,11 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1
if num_iter <= 4:
epoch1.append(item["file_name"]) # save epoch 1 list
epoch1.append(item["file_name"]) # save epoch 1 list
elif num_iter <= 8:
epoch2.append(item["file_name"]) # save epoch 2 list
epoch2.append(item["file_name"]) # save epoch 2 list
else:
epoch3.append(item["file_name"]) # save epoch 3 list
epoch3.append(item["file_name"]) # save epoch 3 list
assert num_iter == 12
assert len(epoch1) == 4
assert len(epoch2) == 4
@ -376,9 +380,9 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
columns_list = ["data", "file_name", "label"]
num_readers = 4
num_shards = 3
epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result
epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
@ -392,7 +396,7 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
# total 3 partition, 4 result per epoch, total 12 result
epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result
epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result
num_iter += 1
assert num_iter == 12
assert epoch_result[partition_id][0] not in (epoch_result[partition_id][1], epoch_result[partition_id][2])
@ -425,11 +429,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1
if num_iter <= 10:
epoch1.append(item["file_name"]) # save epoch 1 list
epoch1.append(item["file_name"]) # save epoch 1 list
elif num_iter <= 20:
epoch2.append(item["file_name"]) # save epoch 2 list
epoch2.append(item["file_name"]) # save epoch 2 list
else:
epoch3.append(item["file_name"]) # save epoch 3 list
epoch3.append(item["file_name"]) # save epoch 3 list
assert num_iter == 30
assert len(epoch1) == 10
assert len(epoch2) == 10
@ -451,11 +455,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1
if num_iter <= 10:
epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list
epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list
elif num_iter <= 20:
epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list
epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list
else:
epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list
epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list
assert num_iter == 30
assert len(epoch1_new_dataset) == 10
assert len(epoch2_new_dataset) == 10
@ -482,11 +486,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1
if num_iter <= 10:
epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list
epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list
elif num_iter <= 20:
epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list
epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list
else:
epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list
epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list
assert num_iter == 30
assert len(epoch1_new_dataset2) == 10
assert len(epoch2_new_dataset2) == 10
@ -532,8 +536,8 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file):
data_set = data_set.map(
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
data_set = data_set.map(input_columns="data",
operations=resize_op, num_parallel_workers=2)
data_set = data_set.map(operations=resize_op, input_columns="data",
num_parallel_workers=2)
data_set = data_set.batch(2)
data_set = data_set.repeat(2)
num_iter = 0
@ -563,8 +567,8 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file):
data_set = data_set.map(
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
data_set = data_set.map(input_columns="data",
operations=resize_op, num_parallel_workers=2)
data_set = data_set.map(operations=resize_op, input_columns="data",
num_parallel_workers=2)
data_set = data_set.batch(32, drop_remainder=True)
num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
@ -707,6 +711,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
if os.path.exists("{}.db".format(CV2_FILE_NAME)):
os.remove("{}.db".format(CV2_FILE_NAME))
def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
@ -757,6 +762,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
os.remove("{}".format(x))
os.remove("{}.db".format(x))
def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
columns_list = ["data", "file_name", "label"]
@ -778,6 +784,7 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
num_iter += 1
assert num_iter == 10
def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file):
"""tutorial for nlp minderdataset."""
num_readers = 4
@ -1522,6 +1529,7 @@ def test_write_with_multi_bytes_and_MindDataset():
os.remove("{}".format(mindrecord_file_name))
os.remove("{}.db".format(mindrecord_file_name))
def test_write_with_multi_array_and_MindDataset():
mindrecord_file_name = "test.mindrecord"
try:
@ -1741,9 +1749,9 @@ def test_numpy_generic():
for idx in range(10):
row = {}
row['label1'] = np.int32(idx)
row['label2'] = np.int64(idx*10)
row['label3'] = np.float32(idx+0.12345)
row['label4'] = np.float64(idx+0.12345789)
row['label2'] = np.int64(idx * 10)
row['label3'] = np.float32(idx + 0.12345)
row['label4'] = np.float64(idx + 0.12345789)
data.append(row)
writer.add_schema(cv_schema_json, "img_schema")
writer.write_raw_data(data)
@ -1923,6 +1931,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(
os.remove("{}".format(mindrecord_file_name))
os.remove("{}.db".format(mindrecord_file_name))
if __name__ == '__main__':
test_nlp_compress_data(add_and_remove_nlp_compress_file)
test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file)

View File

@ -37,9 +37,9 @@ def test_one_hot_op():
num_classes = 2
epsilon_para = 0.1
transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para),]
transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para)]
transform_label = f.Compose(transforms)
dataset = dataset.map(input_columns=["label"], operations=transform_label)
dataset = dataset.map(operations=transform_label, input_columns=["label"])
golden_label = np.ones(num_classes) * epsilon_para / num_classes
golden_label[1] = 1 - epsilon_para / num_classes
@ -69,9 +69,9 @@ def test_mix_up_single():
resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR)
one_hot_encode = c.OneHot(num_classes) # num_classes is input argument
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
ds1 = ds1.map(input_columns=["image"], operations=resize_op)
ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode)
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.map(operations=resize_op, input_columns=["image"])
ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"])
# apply batch operations
batch_size = 3
@ -81,7 +81,7 @@ def test_mix_up_single():
alpha = 0.2
transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=True)
]
ds1 = ds1.map(input_columns=["image", "label"], operations=transforms)
ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):
image1 = data1["image"]
@ -118,9 +118,9 @@ def test_mix_up_multi():
resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR)
one_hot_encode = c.OneHot(num_classes) # num_classes is input argument
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
ds1 = ds1.map(input_columns=["image"], operations=resize_op)
ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode)
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.map(operations=resize_op, input_columns=["image"])
ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"])
# apply batch operations
batch_size = 3
@ -130,7 +130,7 @@ def test_mix_up_multi():
alpha = 0.2
transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False)
]
ds1 = ds1.map(input_columns=["image", "label"], operations=transforms)
ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])
num_iter = 0
batch1_image1 = 0
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):

View File

@ -30,6 +30,7 @@ DATA_DIR3 = "../data/dataset/testCelebAData/"
GENERATE_GOLDEN = False
def test_mixup_batch_success1(plot=False):
"""
Test MixUpBatch op with specified alpha parameter
@ -51,10 +52,10 @@ def test_mixup_batch_success1(plot=False):
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch(2)
data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
images_mixup = None
for idx, (image, _) in enumerate(data1):
@ -81,7 +82,7 @@ def test_mixup_batch_success2(plot=False):
# Original Images
ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode()
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op])
ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)
images_original = None
@ -95,14 +96,14 @@ def test_mixup_batch_success2(plot=False):
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode()
data1 = data1.map(input_columns=["image"], operations=[decode_op])
data1 = data1.map(operations=[decode_op], input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch(2.0)
data1 = data1.batch(4, pad_info={}, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
images_mixup = None
for idx, (image, _) in enumerate(data1):
@ -142,10 +143,10 @@ def test_mixup_batch_success3(plot=False):
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch()
data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
images_mixup = np.array([])
for idx, (image, _) in enumerate(data1):
@ -173,7 +174,7 @@ def test_mixup_batch_success4(plot=False):
# Original Images
ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
decode_op = vision.Decode()
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op])
ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
ds_original = ds_original.batch(2, drop_remainder=True)
images_original = None
@ -187,14 +188,14 @@ def test_mixup_batch_success4(plot=False):
data1 = ds.CelebADataset(DATA_DIR3, shuffle=False)
decode_op = vision.Decode()
data1 = data1.map(input_columns=["image"], operations=[decode_op])
data1 = data1.map(operations=[decode_op], input_columns=["image"])
one_hot_op = data_trans.OneHot(num_classes=100)
data1 = data1.map(input_columns=["attr"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["attr"])
mixup_batch_op = vision.MixUpBatch()
data1 = data1.batch(2, drop_remainder=True)
data1 = data1.map(input_columns=["image", "attr"], operations=mixup_batch_op)
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "attr"])
images_mixup = np.array([])
for idx, (image, _) in enumerate(data1):
@ -224,10 +225,10 @@ def test_mixup_batch_md5():
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data = data.map(input_columns=["label"], operations=one_hot_op)
data = data.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch()
data = data.batch(5, drop_remainder=True)
data = data.map(input_columns=["image", "label"], operations=mixup_batch_op)
data = data.map(operations=mixup_batch_op, input_columns=["image", "label"])
filename = "mixup_batch_c_result.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
@ -259,10 +260,10 @@ def test_mixup_batch_fail1():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch(0.1)
with pytest.raises(RuntimeError) as error:
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
for idx, (image, _) in enumerate(data1):
if idx == 0:
images_mixup = image
@ -294,7 +295,7 @@ def test_mixup_batch_fail2():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error:
vision.MixUpBatch(-1)
error_message = "Input is not within the required interval"
@ -322,10 +323,10 @@ def test_mixup_batch_fail3():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
mixup_batch_op = vision.MixUpBatch()
data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image"], operations=mixup_batch_op)
data1 = data1.map(operations=mixup_batch_op, input_columns=["image"])
with pytest.raises(RuntimeError) as error:
images_mixup = np.array([])
@ -337,6 +338,7 @@ def test_mixup_batch_fail3():
error_message = "Both images and labels columns are required"
assert error_message in str(error.value)
def test_mixup_batch_fail4():
"""
Test MixUpBatch Fail 2
@ -359,7 +361,7 @@ def test_mixup_batch_fail4():
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=10)
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
with pytest.raises(ValueError) as error:
vision.MixUpBatch(0.0)
error_message = "Input is not within the required interval"
@ -389,7 +391,7 @@ def test_mixup_batch_fail5():
mixup_batch_op = vision.MixUpBatch()
data1 = data1.batch(5, drop_remainder=True)
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
with pytest.raises(RuntimeError) as error:
images_mixup = np.array([])

View File

@ -39,7 +39,7 @@ def test_multiple_ngrams():
yield (np.array(line.split(" "), dtype='S'),)
dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
dataset = dataset.map(input_columns=["text"], operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "))
dataset = dataset.map(operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "), input_columns="text")
i = 0
for data in dataset.create_dict_iterator(num_epochs=1):
@ -61,7 +61,7 @@ def test_simple_ngram():
yield (np.array(line.split(" "), dtype='S'),)
dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=" "))
dataset = dataset.map(operations=text.Ngram(3, separator=" "), input_columns="text")
i = 0
for data in dataset.create_dict_iterator(num_epochs=1):
@ -78,7 +78,7 @@ def test_corner_cases():
try:
dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep))
dataset = dataset.map(operations=text.Ngram(n, l_pad, r_pad, separator=sep), input_columns=["text"])
for data in dataset.create_dict_iterator(num_epochs=1):
return [d.decode("utf8") for d in data["text"]]
except (ValueError, TypeError) as e:

View File

@ -32,10 +32,10 @@ def test_on_tokenized_line():
for line in f:
word = line.split(',')[0]
jieba_op.add_word(word)
data = data.map(input_columns=["text"], operations=jieba_op)
data = data.map(operations=jieba_op, input_columns=["text"])
vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"])
lookup = text.Lookup(vocab, "<unk>")
data = data.map(input_columns=["text"], operations=lookup)
data = data.map(operations=lookup, input_columns=["text"])
res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14],
[11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32)
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):
@ -50,10 +50,10 @@ def test_on_tokenized_line_with_no_special_tokens():
word = line.split(',')[0]
jieba_op.add_word(word)
data = data.map(input_columns=["text"], operations=jieba_op)
data = data.map(operations=jieba_op, input_columns=["text"])
vocab = text.Vocab.from_file(VOCAB_FILE, ",")
lookup = text.Lookup(vocab, "not")
data = data.map(input_columns=["text"], operations=lookup)
data = data.map(operations=lookup, input_columns=["text"])
res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12],
[9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32)
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):

View File

@ -51,8 +51,8 @@ def util_test_normalize(mean, std, op_type):
normalize_op = c_vision.Normalize(mean, std)
# Generate dataset
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(input_columns=["image"], operations=decode_op)
data = data.map(input_columns=["image"], operations=normalize_op)
data = data.map(operations=decode_op, input_columns=["image"])
data = data.map(operations=normalize_op, input_columns=["image"])
elif op_type == "python":
# define map operations
transforms = [
@ -63,7 +63,7 @@ def util_test_normalize(mean, std, op_type):
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
# Generate dataset
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(input_columns=["image"], operations=transform)
data = data.map(operations=transform, input_columns=["image"])
else:
raise ValueError("Wrong parameter value")
return data
@ -82,7 +82,7 @@ def util_test_normalize_grayscale(num_output_channels, mean, std):
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
# Generate dataset
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(input_columns=["image"], operations=transform)
data = data.map(operations=transform, input_columns=["image"])
return data
@ -99,12 +99,12 @@ def test_normalize_op_c(plot=False):
# First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=normalize_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=normalize_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=decode_op)
data2 = data2.map(operations=decode_op, input_columns=["image"])
num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
@ -136,12 +136,12 @@ def test_normalize_op_py(plot=False):
# First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(input_columns=["image"], operations=normalize_op)
data1 = data1.map(operations=transform, input_columns=["image"])
data1 = data1.map(operations=normalize_op, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform)
data2 = data2.map(operations=transform, input_columns=["image"])
num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
@ -169,7 +169,7 @@ def test_decode_op():
decode_op = c_vision.Decode()
# apply map operations on images
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
num_iter = 0
for item in data1.create_dict_iterator(num_epochs=1):
@ -192,7 +192,7 @@ def test_decode_normalize_op():
normalize_op = c_vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0])
# apply map operations on images
data1 = data1.map(input_columns=["image"], operations=[decode_op, normalize_op])
data1 = data1.map(operations=[decode_op, normalize_op], input_columns=["image"])
num_iter = 0
for item in data1.create_dict_iterator(num_epochs=1):

View File

@ -47,13 +47,14 @@ def test_one_hot():
# First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=depth)
data1 = data1.map(input_columns=["label"], operations=one_hot_op, column_order=["label"])
data1 = data1.map(operations=one_hot_op, input_columns=["label"], column_order=["label"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False)
assert dataset_equal_with_function(data1, data2, 0, one_hot, depth)
def test_one_hot_post_aug():
"""
Test One Hot Encoding after Multiple Data Augmentation Operators
@ -72,14 +73,14 @@ def test_one_hot_post_aug():
resize_op = c_vision.Resize((resize_height, resize_width))
# Apply map operations on images
data1 = data1.map(input_columns=["image"], operations=decode_op)
data1 = data1.map(input_columns=["image"], operations=rescale_op)
data1 = data1.map(input_columns=["image"], operations=resize_op)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=rescale_op, input_columns=["image"])
data1 = data1.map(operations=resize_op, input_columns=["image"])
# Apply one-hot encoding on labels
depth = 4
one_hot_encode = data_trans.OneHot(depth)
data1 = data1.map(input_columns=["label"], operations=one_hot_encode)
data1 = data1.map(operations=one_hot_encode, input_columns=["label"])
# Apply datasets ops
buffer_size = 100

View File

@ -16,6 +16,7 @@ import numpy as np
import mindspore.dataset as ds
# tests the construction of multiple ops from a single dataset.
# map dataset with columns order arguments should produce a ProjectOp over MapOp
# This test does not utilize the compiling passes at this time.
@ -27,12 +28,13 @@ def test_map_reorder0():
# Generator -> Map
data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"])
data0 = data0.map(input_columns="col0", output_columns="out", column_order=["col1", "out"],
operations=(lambda x: x))
data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out",
column_order=["col1", "out"])
for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary
assert item == [np.array(1), np.array(0)]
# tests the construction of multiple ops from a single dataset.
# map dataset with columns order arguments should produce a ProjectOp over MapOp
# This test does not utilize the compiling passes at this time.
@ -43,20 +45,20 @@ def test_map_reorder1():
# Three map and zip
data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"])
data0 = data0.map(input_columns="a0", column_order=["a2", "a1", "a0"], operations=(lambda x: x))
data0 = data0.map(operations=(lambda x: x), input_columns="a0", column_order=["a2", "a1", "a0"])
data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"])
data1 = data1.map(input_columns="b0", column_order=["b1", "b2", "b0"], operations=(lambda x: x))
data1 = data1.map(operations=(lambda x: x), input_columns="b0", column_order=["b1", "b2", "b0"])
data2 = ds.zip((data0, data1))
data2 = data2.map(input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"], operations=(lambda x: x))
data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"])
for item in data2.create_tuple_iterator(num_epochs=1):
assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)]
# tests the construction of multiple ops from a single dataset.
# TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp.
# This test does not utilize the compiling passes at this time.
def test_shuffle():
FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"

View File

@ -44,7 +44,7 @@ def test_pad_op():
pad_op,
]
data1 = data1.map(input_columns=["image"], operations=ctrans)
data1 = data1.map(operations=ctrans, input_columns=["image"])
# Second dataset
transforms = [
@ -54,7 +54,7 @@ def test_pad_op():
]
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform)
data2 = data2.map(operations=transform, input_columns=["image"])
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
c_image = item1["image"]
@ -88,11 +88,11 @@ def test_pad_grayscale():
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform)
data1 = data1.map(operations=transform, input_columns=["image"])
# if input is grayscale, the output dimensions should be single channel
pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20))
data1 = data1.map(input_columns=["image"], operations=pad_gray)
data1 = data1.map(operations=pad_gray, input_columns=["image"])
dataset_shape_1 = []
for item1 in data1.create_dict_iterator(num_epochs=1):
c_image = item1["image"]
@ -106,7 +106,7 @@ def test_pad_grayscale():
ctrans = [decode_op, pad_gray]
dataset_shape_2 = []
data2 = data2.map(input_columns=["image"], operations=ctrans)
data2 = data2.map(operations=ctrans, input_columns=["image"])
for item2 in data2.create_dict_iterator(num_epochs=1):
c_image = item2["image"]
@ -132,7 +132,7 @@ def test_pad_md5():
pad_op,
]
data1 = data1.map(input_columns=["image"], operations=ctrans)
data1 = data1.map(operations=ctrans, input_columns=["image"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@ -142,7 +142,7 @@ def test_pad_md5():
py_vision.ToTensor(),
]
transform = mindspore.dataset.transforms.py_transforms.Compose(pytrans)
data2 = data2.map(input_columns=["image"], operations=transform)
data2 = data2.map(operations=transform, input_columns=["image"])
# Compare with expected md5 from images
filename1 = "pad_01_c_result.npz"
save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN)

View File

@ -127,7 +127,7 @@ def batch_padding_performance_1d():
cifar10_dir = "../data/dataset/testCifar10Data"
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
data1 = data1.repeat(24)
data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1)))
data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image")
pad_info = {"image": ([3888], 0)} # 3888 =36*36*3
# pad_info = None
data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info)
@ -144,7 +144,7 @@ def batch_pyfunc_padding_3d():
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
data1 = data1.repeat(24)
# pad_info = {"image": ([36, 36, 3], 0)}
data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))),
data1 = data1.map(operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))), input_columns="image",
python_multiprocessing=False)
data1 = data1.batch(batch_size=24, drop_remainder=True)
start_time = time.time()
@ -159,8 +159,8 @@ def batch_pyfunc_padding_1d():
cifar10_dir = "../data/dataset/testCifar10Data"
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
data1 = data1.repeat(24)
data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1)))
data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816))), python_multiprocessing=False)
data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image")
data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image", python_multiprocessing=False)
data1 = data1.batch(batch_size=24, drop_remainder=True)
start_time = time.time()
num_batches = 0
@ -176,8 +176,8 @@ def test_pad_via_map():
def pad_map_config():
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3]
data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d
data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816))))
data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d
data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image")
data1 = data1.batch(batch_size=25, drop_remainder=True)
res = []
for data in data1.create_dict_iterator(num_epochs=1):
@ -186,7 +186,7 @@ def test_pad_via_map():
def pad_batch_config():
data2 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3]
data2 = data2.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d
data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d
data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)})
res = []
for data in data2.create_dict_iterator(num_epochs=1):

Some files were not shown because too many files have changed in this diff Show More