forked from mindspore-Ecosystem/mindspore
!5968 [MD] Update map calls to use correct order for its kwargs
Merge pull request !5968 from nhussain/api_changes2
This commit is contained in:
commit
12b50bdcc4
|
@ -282,8 +282,9 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp):
|
||||||
>>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
|
>>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
|
||||||
>>> # ["offsets_limit", dtype=uint32]}
|
>>> # ["offsets_limit", dtype=uint32]}
|
||||||
>>> tokenizer_op = text.UnicodeCharTokenizer(True)
|
>>> tokenizer_op = text.UnicodeCharTokenizer(True)
|
||||||
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
|
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
|
||||||
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
|
>>> output_columns=["token", "offsets_start", "offsets_limit"],
|
||||||
|
>>> column_order=["token", "offsets_start", "offsets_limit"])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@check_with_offsets
|
@check_with_offsets
|
||||||
|
@ -313,8 +314,9 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp):
|
||||||
>>> # ["offsets_limit", dtype=uint32]}
|
>>> # ["offsets_limit", dtype=uint32]}
|
||||||
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
|
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
|
||||||
>>> max_bytes_per_token=100, with_offsets=True)
|
>>> max_bytes_per_token=100, with_offsets=True)
|
||||||
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
|
>>> data = data.map(operations=tokenizer_op,
|
||||||
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
|
>>> input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
|
||||||
|
>>> column_order=["token", "offsets_start", "offsets_limit"])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@check_wordpiece_tokenizer
|
@check_wordpiece_tokenizer
|
||||||
|
@ -378,8 +380,9 @@ if platform.system().lower() != 'windows':
|
||||||
>>> # ["offsets_start", dtype=uint32],
|
>>> # ["offsets_start", dtype=uint32],
|
||||||
>>> # ["offsets_limit", dtype=uint32]}
|
>>> # ["offsets_limit", dtype=uint32]}
|
||||||
>>> tokenizer_op = text.WhitespaceTokenizer(True)
|
>>> tokenizer_op = text.WhitespaceTokenizer(True)
|
||||||
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
|
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
|
||||||
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
|
>>> output_columns=["token", "offsets_start", "offsets_limit"],
|
||||||
|
>>> column_order=["token", "offsets_start", "offsets_limit"])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@check_with_offsets
|
@check_with_offsets
|
||||||
|
@ -404,8 +407,9 @@ if platform.system().lower() != 'windows':
|
||||||
>>> # ["offsets_start", dtype=uint32],
|
>>> # ["offsets_start", dtype=uint32],
|
||||||
>>> # ["offsets_limit", dtype=uint32]}
|
>>> # ["offsets_limit", dtype=uint32]}
|
||||||
>>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True)
|
>>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True)
|
||||||
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
|
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
|
||||||
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
|
>>> output_columns=["token", "offsets_start", "offsets_limit"],
|
||||||
|
>>> column_order=["token", "offsets_start", "offsets_limit"])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@check_unicode_script_tokenizer
|
@check_unicode_script_tokenizer
|
||||||
|
@ -497,8 +501,9 @@ if platform.system().lower() != 'windows':
|
||||||
>>> # ["offsets_start", dtype=uint32],
|
>>> # ["offsets_start", dtype=uint32],
|
||||||
>>> # ["offsets_limit", dtype=uint32]}
|
>>> # ["offsets_limit", dtype=uint32]}
|
||||||
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
|
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
|
||||||
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
|
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
|
||||||
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
|
>>> output_columns=["token", "offsets_start", "offsets_limit"],
|
||||||
|
>>> column_order=["token", "offsets_start", "offsets_limit"])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@check_regex_tokenizer
|
@check_regex_tokenizer
|
||||||
|
@ -540,8 +545,9 @@ if platform.system().lower() != 'windows':
|
||||||
>>> normalization_form=NormalizeForm.NONE,
|
>>> normalization_form=NormalizeForm.NONE,
|
||||||
>>> preserve_unused_token=True,
|
>>> preserve_unused_token=True,
|
||||||
>>> with_offsets=True)
|
>>> with_offsets=True)
|
||||||
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
|
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
|
||||||
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
|
>>> output_columns=["token", "offsets_start", "offsets_limit"],
|
||||||
|
>>> column_order=["token", "offsets_start", "offsets_limit"])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@check_basic_tokenizer
|
@check_basic_tokenizer
|
||||||
|
@ -593,8 +599,9 @@ if platform.system().lower() != 'windows':
|
||||||
>>> unknown_token=100, lower_case=False, keep_whitespace=False,
|
>>> unknown_token=100, lower_case=False, keep_whitespace=False,
|
||||||
>>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
|
>>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
|
||||||
>>> with_offsets=True)
|
>>> with_offsets=True)
|
||||||
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
|
>>> data = data.map(operations=tokenizer_op, input_columns=["text"],
|
||||||
>>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
|
>>> output_columns=["token", "offsets_start", "offsets_limit"],
|
||||||
|
>>> column_order=["token", "offsets_start", "offsets_limit"])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@check_bert_tokenizer
|
@check_bert_tokenizer
|
||||||
|
|
|
@ -39,14 +39,14 @@ def create_dataset_cifar10(data_path, batch_size=32, repeat_size=1, status="trai
|
||||||
random_horizontal_op = CV.RandomHorizontalFlip()
|
random_horizontal_op = CV.RandomHorizontalFlip()
|
||||||
channel_swap_op = CV.HWC2CHW()
|
channel_swap_op = CV.HWC2CHW()
|
||||||
typecast_op = C.TypeCast(mstype.int32)
|
typecast_op = C.TypeCast(mstype.int32)
|
||||||
cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op)
|
cifar_ds = cifar_ds.map(operations=typecast_op, input_columns="label")
|
||||||
if status == "train":
|
if status == "train":
|
||||||
cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op)
|
cifar_ds = cifar_ds.map(operations=random_crop_op, input_columns="image")
|
||||||
cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op)
|
cifar_ds = cifar_ds.map(operations=random_horizontal_op, input_columns="image")
|
||||||
cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op)
|
cifar_ds = cifar_ds.map(operations=resize_op, input_columns="image")
|
||||||
cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op)
|
cifar_ds = cifar_ds.map(operations=rescale_op, input_columns="image")
|
||||||
cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op)
|
cifar_ds = cifar_ds.map(operations=normalize_op, input_columns="image")
|
||||||
cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op)
|
cifar_ds = cifar_ds.map(operations=channel_swap_op, input_columns="image")
|
||||||
|
|
||||||
cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
|
cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
|
||||||
cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
|
cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -84,8 +84,9 @@ class SegDataset:
|
||||||
shuffle=True, num_parallel_workers=self.num_readers,
|
shuffle=True, num_parallel_workers=self.num_readers,
|
||||||
num_shards=self.shard_num, shard_id=self.shard_id)
|
num_shards=self.shard_num, shard_id=self.shard_id)
|
||||||
transforms_list = self.preprocess_
|
transforms_list = self.preprocess_
|
||||||
data_set = data_set.map(input_columns=["data", "label"], output_columns=["data", "label"],
|
data_set = data_set.map(operations=transforms_list, input_columns=["data", "label"],
|
||||||
operations=transforms_list, num_parallel_workers=self.num_parallel_calls)
|
output_columns=["data", "label"],
|
||||||
|
num_parallel_workers=self.num_parallel_calls)
|
||||||
data_set = data_set.shuffle(buffer_size=self.batch_size * 10)
|
data_set = data_set.shuffle(buffer_size=self.batch_size * 10)
|
||||||
data_set = data_set.batch(self.batch_size, drop_remainder=True)
|
data_set = data_set.batch(self.batch_size, drop_remainder=True)
|
||||||
data_set = data_set.repeat(repeat)
|
data_set = data_set.repeat(repeat)
|
||||||
|
|
|
@ -74,8 +74,10 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
|
||||||
ious = ious.T
|
ious = ious.T
|
||||||
return ious
|
return ious
|
||||||
|
|
||||||
|
|
||||||
class PhotoMetricDistortion:
|
class PhotoMetricDistortion:
|
||||||
"""Photo Metric Distortion"""
|
"""Photo Metric Distortion"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
brightness_delta=32,
|
brightness_delta=32,
|
||||||
contrast_range=(0.5, 1.5),
|
contrast_range=(0.5, 1.5),
|
||||||
|
@ -134,8 +136,10 @@ class PhotoMetricDistortion:
|
||||||
|
|
||||||
return img, boxes, labels
|
return img, boxes, labels
|
||||||
|
|
||||||
|
|
||||||
class Expand:
|
class Expand:
|
||||||
"""expand image"""
|
"""expand image"""
|
||||||
|
|
||||||
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
|
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
|
||||||
if to_rgb:
|
if to_rgb:
|
||||||
self.mean = mean[::-1]
|
self.mean = mean[::-1]
|
||||||
|
@ -158,12 +162,13 @@ class Expand:
|
||||||
boxes += np.tile((left, top), 2)
|
boxes += np.tile((left, top), 2)
|
||||||
return img, boxes, labels
|
return img, boxes, labels
|
||||||
|
|
||||||
|
|
||||||
def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""rescale operation for image"""
|
"""rescale operation for image"""
|
||||||
img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
|
img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
|
||||||
if img_data.shape[0] > config.img_height:
|
if img_data.shape[0] > config.img_height:
|
||||||
img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True)
|
img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True)
|
||||||
scale_factor = scale_factor*scale_factor2
|
scale_factor = scale_factor * scale_factor2
|
||||||
img_shape = np.append(img_shape, scale_factor)
|
img_shape = np.append(img_shape, scale_factor)
|
||||||
img_shape = np.asarray(img_shape, dtype=np.float32)
|
img_shape = np.asarray(img_shape, dtype=np.float32)
|
||||||
gt_bboxes = gt_bboxes * scale_factor
|
gt_bboxes = gt_bboxes * scale_factor
|
||||||
|
@ -171,7 +176,8 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
|
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
|
||||||
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
|
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
|
||||||
|
|
||||||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""resize operation for image"""
|
"""resize operation for image"""
|
||||||
|
@ -188,7 +194,8 @@ def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
|
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
|
||||||
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
|
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
|
||||||
|
|
||||||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""resize operation for image of eval"""
|
"""resize operation for image of eval"""
|
||||||
|
@ -205,7 +212,8 @@ def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
|
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
|
||||||
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
|
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
|
||||||
|
|
||||||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""impad operation for image"""
|
"""impad operation for image"""
|
||||||
|
@ -213,12 +221,14 @@ def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
img_data = img_data.astype(np.float32)
|
img_data = img_data.astype(np.float32)
|
||||||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""imnormalize operation for image"""
|
"""imnormalize operation for image"""
|
||||||
img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True)
|
img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True)
|
||||||
img_data = img_data.astype(np.float32)
|
img_data = img_data.astype(np.float32)
|
||||||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""flip operation for image"""
|
"""flip operation for image"""
|
||||||
img_data = img
|
img_data = img
|
||||||
|
@ -229,7 +239,8 @@ def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
|
flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
|
||||||
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1
|
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1
|
||||||
|
|
||||||
return (img_data, img_shape, flipped, gt_label, gt_num)
|
return (img_data, img_shape, flipped, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""flipped generation"""
|
"""flipped generation"""
|
||||||
|
@ -240,11 +251,13 @@ def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
|
flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
|
||||||
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1
|
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1
|
||||||
|
|
||||||
return (img_data, img_shape, flipped, gt_label, gt_num)
|
return (img_data, img_shape, flipped, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
img_data = img[:, :, ::-1]
|
img_data = img[:, :, ::-1]
|
||||||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""transpose operation for image"""
|
"""transpose operation for image"""
|
||||||
|
@ -257,6 +270,7 @@ def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
|
|
||||||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""photo crop operation for image"""
|
"""photo crop operation for image"""
|
||||||
random_photo = PhotoMetricDistortion()
|
random_photo = PhotoMetricDistortion()
|
||||||
|
@ -264,6 +278,7 @@ def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
|
|
||||||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
"""expand operation for image"""
|
"""expand operation for image"""
|
||||||
expand = Expand()
|
expand = Expand()
|
||||||
|
@ -271,8 +286,10 @@ def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||||
|
|
||||||
return (img, img_shape, gt_bboxes, gt_label, gt_num)
|
return (img, img_shape, gt_bboxes, gt_label, gt_num)
|
||||||
|
|
||||||
|
|
||||||
def preprocess_fn(image, box, is_training):
|
def preprocess_fn(image, box, is_training):
|
||||||
"""Preprocess function for dataset."""
|
"""Preprocess function for dataset."""
|
||||||
|
|
||||||
def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert):
|
def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert):
|
||||||
image_shape = image_shape[:2]
|
image_shape = image_shape[:2]
|
||||||
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert
|
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert
|
||||||
|
@ -325,6 +342,7 @@ def preprocess_fn(image, box, is_training):
|
||||||
|
|
||||||
return _data_aug(image, box, is_training)
|
return _data_aug(image, box, is_training)
|
||||||
|
|
||||||
|
|
||||||
def create_coco_label(is_training):
|
def create_coco_label(is_training):
|
||||||
"""Get image path and annotation from COCO."""
|
"""Get image path and annotation from COCO."""
|
||||||
from pycocotools.coco import COCO
|
from pycocotools.coco import COCO
|
||||||
|
@ -334,7 +352,7 @@ def create_coco_label(is_training):
|
||||||
if is_training:
|
if is_training:
|
||||||
data_type = config.train_data_type
|
data_type = config.train_data_type
|
||||||
|
|
||||||
#Classes need to train or test.
|
# Classes need to train or test.
|
||||||
train_cls = config.coco_classes
|
train_cls = config.coco_classes
|
||||||
train_cls_dict = {}
|
train_cls_dict = {}
|
||||||
for i, cls in enumerate(train_cls):
|
for i, cls in enumerate(train_cls):
|
||||||
|
@ -375,6 +393,7 @@ def create_coco_label(is_training):
|
||||||
|
|
||||||
return image_files, image_anno_dict
|
return image_files, image_anno_dict
|
||||||
|
|
||||||
|
|
||||||
def anno_parser(annos_str):
|
def anno_parser(annos_str):
|
||||||
"""Parse annotation from string to list."""
|
"""Parse annotation from string to list."""
|
||||||
annos = []
|
annos = []
|
||||||
|
@ -383,6 +402,7 @@ def anno_parser(annos_str):
|
||||||
annos.append(anno)
|
annos.append(anno)
|
||||||
return annos
|
return annos
|
||||||
|
|
||||||
|
|
||||||
def filter_valid_data(image_dir, anno_path):
|
def filter_valid_data(image_dir, anno_path):
|
||||||
"""Filter valid image file, which both in image_dir and anno_path."""
|
"""Filter valid image file, which both in image_dir and anno_path."""
|
||||||
image_files = []
|
image_files = []
|
||||||
|
@ -404,6 +424,7 @@ def filter_valid_data(image_dir, anno_path):
|
||||||
image_files.append(image_path)
|
image_files.append(image_path)
|
||||||
return image_files, image_anno_dict
|
return image_files, image_anno_dict
|
||||||
|
|
||||||
|
|
||||||
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8):
|
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8):
|
||||||
"""Create MindRecord file."""
|
"""Create MindRecord file."""
|
||||||
mindrecord_dir = config.mindrecord_dir
|
mindrecord_dir = config.mindrecord_dir
|
||||||
|
@ -435,7 +456,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
|
||||||
ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id,
|
ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id,
|
||||||
num_parallel_workers=1, shuffle=is_training)
|
num_parallel_workers=1, shuffle=is_training)
|
||||||
decode = C.Decode()
|
decode = C.Decode()
|
||||||
ds = ds.map(input_columns=["image"], operations=decode, num_parallel_workers=1)
|
ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=1)
|
||||||
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
|
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
|
||||||
|
|
||||||
hwc_to_chw = C.HWC2CHW()
|
hwc_to_chw = C.HWC2CHW()
|
||||||
|
@ -447,38 +468,39 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
|
||||||
type_cast3 = CC.TypeCast(mstype.bool_)
|
type_cast3 = CC.TypeCast(mstype.bool_)
|
||||||
|
|
||||||
if is_training:
|
if is_training:
|
||||||
ds = ds.map(input_columns=["image", "annotation"],
|
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
|
||||||
output_columns=["image", "image_shape", "box", "label", "valid_num"],
|
output_columns=["image", "image_shape", "box", "label", "valid_num"],
|
||||||
column_order=["image", "image_shape", "box", "label", "valid_num"],
|
column_order=["image", "image_shape", "box", "label", "valid_num"],
|
||||||
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
flip = (np.random.rand() < config.flip_ratio)
|
flip = (np.random.rand() < config.flip_ratio)
|
||||||
if flip:
|
if flip:
|
||||||
ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0, horizontally_op],
|
ds = ds.map(operations=[normalize_op, type_cast0, horizontally_op], input_columns=["image"],
|
||||||
num_parallel_workers=12)
|
num_parallel_workers=12)
|
||||||
ds = ds.map(input_columns=["image", "image_shape", "box", "label", "valid_num"],
|
ds = ds.map(operations=flipped_generation,
|
||||||
operations=flipped_generation, num_parallel_workers=num_parallel_workers)
|
input_columns=["image", "image_shape", "box", "label", "valid_num"],
|
||||||
|
num_parallel_workers=num_parallel_workers)
|
||||||
else:
|
else:
|
||||||
ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0],
|
ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"],
|
||||||
num_parallel_workers=12)
|
num_parallel_workers=12)
|
||||||
ds = ds.map(input_columns=["image"], operations=[hwc_to_chw, type_cast1],
|
ds = ds.map(operations=[hwc_to_chw, type_cast1], input_columns=["image"],
|
||||||
num_parallel_workers=12)
|
num_parallel_workers=12)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
ds = ds.map(input_columns=["image", "annotation"],
|
ds = ds.map(operations=compose_map_func,
|
||||||
|
input_columns=["image", "annotation"],
|
||||||
output_columns=["image", "image_shape", "box", "label", "valid_num"],
|
output_columns=["image", "image_shape", "box", "label", "valid_num"],
|
||||||
column_order=["image", "image_shape", "box", "label", "valid_num"],
|
column_order=["image", "image_shape", "box", "label", "valid_num"],
|
||||||
operations=compose_map_func,
|
|
||||||
num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
ds = ds.map(input_columns=["image"], operations=[normalize_op, hwc_to_chw, type_cast1],
|
ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"],
|
||||||
num_parallel_workers=24)
|
num_parallel_workers=24)
|
||||||
|
|
||||||
# transpose_column from python to c
|
# transpose_column from python to c
|
||||||
ds = ds.map(input_columns=["image_shape"], operations=[type_cast1])
|
ds = ds.map(operations=[type_cast1], input_columns=["image_shape"])
|
||||||
ds = ds.map(input_columns=["box"], operations=[type_cast1])
|
ds = ds.map(operations=[type_cast1], input_columns=["box"])
|
||||||
ds = ds.map(input_columns=["label"], operations=[type_cast2])
|
ds = ds.map(operations=[type_cast2], input_columns=["label"])
|
||||||
ds = ds.map(input_columns=["valid_num"], operations=[type_cast3])
|
ds = ds.map(operations=[type_cast3], input_columns=["valid_num"])
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
ds = ds.repeat(repeat_num)
|
ds = ds.repeat(repeat_num)
|
||||||
|
|
||||||
|
|
|
@ -55,8 +55,8 @@ def create_dataset_cifar10(data_home, repeat_num=1, training=True):
|
||||||
c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]
|
c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data_set = data_set.map(input_columns="label", operations=type_cast_op)
|
data_set = data_set.map(operations=type_cast_op, input_columns="label")
|
||||||
data_set = data_set.map(input_columns="image", operations=c_trans)
|
data_set = data_set.map(operations=c_trans, input_columns="image")
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
data_set = data_set.batch(batch_size=cifar_cfg.batch_size, drop_remainder=True)
|
data_set = data_set.batch(batch_size=cifar_cfg.batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -60,8 +60,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
|
||||||
C.HWC2CHW()
|
C.HWC2CHW()
|
||||||
]
|
]
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums)
|
||||||
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums)
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(cfg.batch_size, drop_remainder=True)
|
ds = ds.batch(cfg.batch_size, drop_remainder=True)
|
||||||
# apply dataset repeat operation
|
# apply dataset repeat operation
|
||||||
|
|
|
@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
buffer_size = 10000
|
buffer_size = 10000
|
||||||
|
|
|
@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
buffer_size = 10000
|
buffer_size = 10000
|
||||||
|
|
|
@ -498,24 +498,24 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
|
||||||
num_parallel_workers=4, shuffle=is_training)
|
num_parallel_workers=4, shuffle=is_training)
|
||||||
|
|
||||||
decode = C.Decode()
|
decode = C.Decode()
|
||||||
ds = ds.map(input_columns=["image"], operations=decode)
|
ds = ds.map(operations=decode, input_columns=["image"])
|
||||||
compose_map_func = (lambda image, annotation, mask, mask_shape:
|
compose_map_func = (lambda image, annotation, mask, mask_shape:
|
||||||
preprocess_fn(image, annotation, mask, mask_shape, is_training))
|
preprocess_fn(image, annotation, mask, mask_shape, is_training))
|
||||||
|
|
||||||
if is_training:
|
if is_training:
|
||||||
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"],
|
ds = ds.map(operations=compose_map_func,
|
||||||
|
input_columns=["image", "annotation", "mask", "mask_shape"],
|
||||||
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
|
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
|
||||||
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
|
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
|
||||||
operations=compose_map_func,
|
|
||||||
python_multiprocessing=False,
|
python_multiprocessing=False,
|
||||||
num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"],
|
ds = ds.map(operations=compose_map_func,
|
||||||
|
input_columns=["image", "annotation", "mask", "mask_shape"],
|
||||||
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
|
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
|
||||||
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
|
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
|
||||||
operations=compose_map_func,
|
|
||||||
num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
||||||
|
|
|
@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1):
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
||||||
|
|
||||||
# apply shuffle operations
|
# apply shuffle operations
|
||||||
ds = ds.shuffle(buffer_size=buffer_size)
|
ds = ds.shuffle(buffer_size=buffer_size)
|
||||||
|
|
|
@ -89,8 +89,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=16)
|
||||||
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -152,7 +152,7 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num=
|
||||||
|
|
||||||
compose = P2.Compose(trans)
|
compose = P2.Compose(trans)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True)
|
ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -70,8 +70,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
||||||
|
|
||||||
# apply shuffle operations
|
# apply shuffle operations
|
||||||
ds = ds.shuffle(buffer_size=buffer_size)
|
ds = ds.shuffle(buffer_size=buffer_size)
|
||||||
|
|
|
@ -61,8 +61,8 @@ def create_dataset(dataset_path, config, do_train, repeat_num=1):
|
||||||
C.HWC2CHW()
|
C.HWC2CHW()
|
||||||
]
|
]
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=config.work_nums)
|
||||||
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=config.work_nums)
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(config.batch_size, drop_remainder=True)
|
ds = ds.batch(config.batch_size, drop_remainder=True)
|
||||||
# apply dataset repeat operation
|
# apply dataset repeat operation
|
||||||
|
|
|
@ -22,6 +22,7 @@ import mindspore.dataset.vision.c_transforms as C
|
||||||
import mindspore.dataset.transforms.c_transforms as C2
|
import mindspore.dataset.transforms.c_transforms as C2
|
||||||
from mindspore.communication.management import init, get_rank, get_group_size
|
from mindspore.communication.management import init, get_rank, get_group_size
|
||||||
|
|
||||||
|
|
||||||
def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
|
def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
|
||||||
"""
|
"""
|
||||||
create a train or evaluate cifar10 dataset for resnet50
|
create a train or evaluate cifar10 dataset for resnet50
|
||||||
|
@ -65,8 +66,8 @@ def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target=
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -126,8 +127,8 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target=
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -165,7 +166,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
|
||||||
if do_train:
|
if do_train:
|
||||||
trans = [
|
trans = [
|
||||||
C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
|
C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
|
||||||
C.RandomHorizontalFlip(rank_id/ (rank_id +1)),
|
C.RandomHorizontalFlip(rank_id / (rank_id + 1)),
|
||||||
C.Normalize(mean=mean, std=std),
|
C.Normalize(mean=mean, std=std),
|
||||||
C.HWC2CHW()
|
C.HWC2CHW()
|
||||||
]
|
]
|
||||||
|
@ -180,8 +181,8 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -190,6 +191,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
|
||||||
|
|
||||||
return ds
|
return ds
|
||||||
|
|
||||||
|
|
||||||
def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
|
def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
|
||||||
"""
|
"""
|
||||||
create a train or eval imagenet2012 dataset for se-resnet50
|
create a train or eval imagenet2012 dataset for se-resnet50
|
||||||
|
@ -233,8 +235,8 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target=
|
||||||
]
|
]
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="image", num_parallel_workers=12, operations=trans)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=12)
|
||||||
ds = ds.map(input_columns="label", num_parallel_workers=12, operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -244,6 +246,7 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target=
|
||||||
|
|
||||||
return ds
|
return ds
|
||||||
|
|
||||||
|
|
||||||
def _get_rank_info():
|
def _get_rank_info():
|
||||||
"""
|
"""
|
||||||
get rank size and rank id
|
get rank size and rank id
|
||||||
|
|
|
@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -147,7 +147,7 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe
|
||||||
trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op]
|
trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op]
|
||||||
|
|
||||||
compose = P2.Compose(trans)
|
compose = P2.Compose(trans)
|
||||||
ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True)
|
ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -75,8 +75,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", num_parallel_workers=num_parallels, operations=trans)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=num_parallels)
|
||||||
ds = ds.map(input_columns="label", num_parallel_workers=num_parallels, operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallels)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -25,6 +25,7 @@ from src.utils.sampler import DistributedSampler
|
||||||
|
|
||||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||||
|
|
||||||
|
|
||||||
class TxtDataset():
|
class TxtDataset():
|
||||||
"""
|
"""
|
||||||
create txt dataset.
|
create txt dataset.
|
||||||
|
@ -33,6 +34,7 @@ class TxtDataset():
|
||||||
Returns:
|
Returns:
|
||||||
de_dataset.
|
de_dataset.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, root, txt_name):
|
def __init__(self, root, txt_name):
|
||||||
super(TxtDataset, self).__init__()
|
super(TxtDataset, self).__init__()
|
||||||
self.imgs = []
|
self.imgs = []
|
||||||
|
@ -142,10 +144,10 @@ def classification_dataset(data_dir, image_size, per_batch_size, max_epoch, rank
|
||||||
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
|
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
|
||||||
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
|
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
|
||||||
|
|
||||||
de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=num_parallel_workers,
|
de_dataset = de_dataset.map(operations=transform_img, input_columns="image",
|
||||||
operations=transform_img)
|
num_parallel_workers=num_parallel_workers)
|
||||||
de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=num_parallel_workers,
|
de_dataset = de_dataset.map(operations=transform_label, input_columns="label",
|
||||||
operations=transform_label)
|
num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
columns_to_project = ["image", "label"]
|
columns_to_project = ["image", "label"]
|
||||||
de_dataset = de_dataset.project(columns=columns_to_project)
|
de_dataset = de_dataset.project(columns=columns_to_project)
|
||||||
|
|
|
@ -72,8 +72,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
|
||||||
]
|
]
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums)
|
||||||
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums)
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(cfg.batch_size, drop_remainder=True)
|
ds = ds.batch(cfg.batch_size, drop_remainder=True)
|
||||||
|
|
||||||
|
|
|
@ -34,13 +34,15 @@ def _rand(a=0., b=1.):
|
||||||
"""Generate random."""
|
"""Generate random."""
|
||||||
return np.random.rand() * (b - a) + a
|
return np.random.rand() * (b - a) + a
|
||||||
|
|
||||||
|
|
||||||
def get_imageId_from_fileName(filename):
|
def get_imageId_from_fileName(filename):
|
||||||
"""Get imageID from fileName"""
|
"""Get imageID from fileName"""
|
||||||
try:
|
try:
|
||||||
filename = os.path.splitext(filename)[0]
|
filename = os.path.splitext(filename)[0]
|
||||||
return int(filename)
|
return int(filename)
|
||||||
except:
|
except:
|
||||||
raise NotImplementedError('Filename %s is supposed to be an integer.'%(filename))
|
raise NotImplementedError('Filename %s is supposed to be an integer.' % (filename))
|
||||||
|
|
||||||
|
|
||||||
def random_sample_crop(image, boxes):
|
def random_sample_crop(image, boxes):
|
||||||
"""Random Crop the image and boxes"""
|
"""Random Crop the image and boxes"""
|
||||||
|
@ -64,7 +66,7 @@ def random_sample_crop(image, boxes):
|
||||||
left = _rand() * (width - w)
|
left = _rand() * (width - w)
|
||||||
top = _rand() * (height - h)
|
top = _rand() * (height - h)
|
||||||
|
|
||||||
rect = np.array([int(top), int(left), int(top+h), int(left+w)])
|
rect = np.array([int(top), int(left), int(top + h), int(left + w)])
|
||||||
overlap = jaccard_numpy(boxes, rect)
|
overlap = jaccard_numpy(boxes, rect)
|
||||||
|
|
||||||
# dropout some boxes
|
# dropout some boxes
|
||||||
|
@ -103,13 +105,14 @@ def random_sample_crop(image, boxes):
|
||||||
|
|
||||||
def preprocess_fn(img_id, image, box, is_training):
|
def preprocess_fn(img_id, image, box, is_training):
|
||||||
"""Preprocess function for dataset."""
|
"""Preprocess function for dataset."""
|
||||||
|
|
||||||
def _infer_data(image, input_shape):
|
def _infer_data(image, input_shape):
|
||||||
img_h, img_w, _ = image.shape
|
img_h, img_w, _ = image.shape
|
||||||
input_h, input_w = input_shape
|
input_h, input_w = input_shape
|
||||||
|
|
||||||
image = cv2.resize(image, (input_w, input_h))
|
image = cv2.resize(image, (input_w, input_h))
|
||||||
|
|
||||||
#When the channels of image is 1
|
# When the channels of image is 1
|
||||||
if len(image.shape) == 2:
|
if len(image.shape) == 2:
|
||||||
image = np.expand_dims(image, axis=-1)
|
image = np.expand_dims(image, axis=-1)
|
||||||
image = np.concatenate([image, image, image], axis=-1)
|
image = np.concatenate([image, image, image], axis=-1)
|
||||||
|
@ -150,6 +153,7 @@ def preprocess_fn(img_id, image, box, is_training):
|
||||||
|
|
||||||
box, label, num_match = ssd_bboxes_encode(box)
|
box, label, num_match = ssd_bboxes_encode(box)
|
||||||
return image, box, label, num_match
|
return image, box, label, num_match
|
||||||
|
|
||||||
return _data_aug(image, box, is_training, image_size=config.img_shape)
|
return _data_aug(image, box, is_training, image_size=config.img_shape)
|
||||||
|
|
||||||
|
|
||||||
|
@ -158,7 +162,7 @@ def create_voc_label(is_training):
|
||||||
voc_dir = config.voc_dir
|
voc_dir = config.voc_dir
|
||||||
cls_map = {name: i for i, name in enumerate(config.coco_classes)}
|
cls_map = {name: i for i, name in enumerate(config.coco_classes)}
|
||||||
sub_dir = 'train' if is_training else 'eval'
|
sub_dir = 'train' if is_training else 'eval'
|
||||||
#sub_dir = 'train'
|
# sub_dir = 'train'
|
||||||
voc_dir = os.path.join(voc_dir, sub_dir)
|
voc_dir = os.path.join(voc_dir, sub_dir)
|
||||||
if not os.path.isdir(voc_dir):
|
if not os.path.isdir(voc_dir):
|
||||||
raise ValueError(f'Cannot find {sub_dir} dataset path.')
|
raise ValueError(f'Cannot find {sub_dir} dataset path.')
|
||||||
|
@ -244,6 +248,7 @@ def create_voc_label(is_training):
|
||||||
|
|
||||||
return images, image_files_dict, image_anno_dict
|
return images, image_files_dict, image_anno_dict
|
||||||
|
|
||||||
|
|
||||||
def create_coco_label(is_training):
|
def create_coco_label(is_training):
|
||||||
"""Get image path and annotation from COCO."""
|
"""Get image path and annotation from COCO."""
|
||||||
from pycocotools.coco import COCO
|
from pycocotools.coco import COCO
|
||||||
|
@ -253,7 +258,7 @@ def create_coco_label(is_training):
|
||||||
if is_training:
|
if is_training:
|
||||||
data_type = config.train_data_type
|
data_type = config.train_data_type
|
||||||
|
|
||||||
#Classes need to train or test.
|
# Classes need to train or test.
|
||||||
train_cls = config.coco_classes
|
train_cls = config.coco_classes
|
||||||
train_cls_dict = {}
|
train_cls_dict = {}
|
||||||
for i, cls in enumerate(train_cls):
|
for i, cls in enumerate(train_cls):
|
||||||
|
@ -391,9 +396,10 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num
|
||||||
ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num,
|
ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num,
|
||||||
shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training)
|
shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training)
|
||||||
decode = C.Decode()
|
decode = C.Decode()
|
||||||
ds = ds.map(input_columns=["image"], operations=decode)
|
ds = ds.map(operations=decode, input_columns=["image"])
|
||||||
change_swap_op = C.HWC2CHW()
|
change_swap_op = C.HWC2CHW()
|
||||||
normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255])
|
normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
|
||||||
|
std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
|
||||||
color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
|
color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
|
||||||
compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training))
|
compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training))
|
||||||
if is_training:
|
if is_training:
|
||||||
|
@ -402,11 +408,11 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num
|
||||||
else:
|
else:
|
||||||
output_columns = ["img_id", "image", "image_shape"]
|
output_columns = ["img_id", "image", "image_shape"]
|
||||||
trans = [normalize_op, change_swap_op]
|
trans = [normalize_op, change_swap_op]
|
||||||
ds = ds.map(input_columns=["img_id", "image", "annotation"],
|
ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"],
|
||||||
output_columns=output_columns, column_order=output_columns,
|
output_columns=output_columns, column_order=output_columns,
|
||||||
operations=compose_map_func, python_multiprocessing=is_training,
|
python_multiprocessing=is_training,
|
||||||
num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training,
|
ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training,
|
||||||
num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
ds = ds.repeat(repeat_num)
|
ds = ds.repeat(repeat_num)
|
||||||
|
|
|
@ -54,8 +54,8 @@ def vgg_create_dataset(data_home, image_size, batch_size, rank_id=0, rank_size=1
|
||||||
changeswap_op]
|
changeswap_op]
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data_set = data_set.map(input_columns="label", operations=type_cast_op)
|
data_set = data_set.map(operations=type_cast_op, input_columns="label")
|
||||||
data_set = data_set.map(input_columns="image", operations=c_trans)
|
data_set = data_set.map(operations=c_trans, input_columns="image")
|
||||||
|
|
||||||
# apply repeat operations
|
# apply repeat operations
|
||||||
data_set = data_set.repeat(repeat_num)
|
data_set = data_set.repeat(repeat_num)
|
||||||
|
@ -157,8 +157,8 @@ def classification_dataset(data_dir, image_size, per_batch_size, rank=0, group_s
|
||||||
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
|
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
|
||||||
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
|
de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
|
||||||
|
|
||||||
de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=8, operations=transform_img)
|
de_dataset = de_dataset.map(operations=transform_img, input_columns="image", num_parallel_workers=8)
|
||||||
de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=8, operations=transform_label)
|
de_dataset = de_dataset.map(operations=transform_label, input_columns="label", num_parallel_workers=8)
|
||||||
|
|
||||||
columns_to_project = ["image", "label"]
|
columns_to_project = ["image", "label"]
|
||||||
de_dataset = de_dataset.project(columns=columns_to_project)
|
de_dataset = de_dataset.project(columns=columns_to_project)
|
||||||
|
|
|
@ -90,8 +90,8 @@ def create_dataset(dataset_path, batch_size=1, num_shards=1, shard_id=0, device_
|
||||||
label_trans = [
|
label_trans = [
|
||||||
c.TypeCast(mstype.int32)
|
c.TypeCast(mstype.int32)
|
||||||
]
|
]
|
||||||
ds = ds.map(input_columns=["image"], num_parallel_workers=8, operations=image_trans)
|
ds = ds.map(operations=image_trans, input_columns=["image"], num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns=["label"], num_parallel_workers=8, operations=label_trans)
|
ds = ds.map(operations=label_trans, input_columns=["label"], num_parallel_workers=8)
|
||||||
|
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
return ds
|
return ds
|
||||||
|
|
|
@ -176,11 +176,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
|
||||||
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
|
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
|
||||||
sampler=distributed_sampler)
|
sampler=distributed_sampler)
|
||||||
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
|
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
|
||||||
ds = ds.map(input_columns=["image", "img_id"],
|
ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
|
||||||
output_columns=["image", "image_shape", "img_id"],
|
output_columns=["image", "image_shape", "img_id"],
|
||||||
column_order=["image", "image_shape", "img_id"],
|
column_order=["image", "image_shape", "img_id"],
|
||||||
operations=compose_map_func, num_parallel_workers=8)
|
num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8)
|
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
ds = ds.repeat(max_epoch)
|
ds = ds.repeat(max_epoch)
|
||||||
|
|
||||||
|
|
|
@ -173,11 +173,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
|
||||||
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
|
ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"],
|
||||||
sampler=distributed_sampler)
|
sampler=distributed_sampler)
|
||||||
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
|
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
|
||||||
ds = ds.map(input_columns=["image", "img_id"],
|
ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
|
||||||
output_columns=["image", "image_shape", "img_id"],
|
output_columns=["image", "image_shape", "img_id"],
|
||||||
column_order=["image", "image_shape", "img_id"],
|
column_order=["image", "image_shape", "img_id"],
|
||||||
operations=compose_map_func, num_parallel_workers=8)
|
num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8)
|
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
ds = ds.repeat(max_epoch)
|
ds = ds.repeat(max_epoch)
|
||||||
|
|
||||||
|
|
|
@ -296,21 +296,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num=
|
||||||
ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank,
|
ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank,
|
||||||
num_parallel_workers=num_parallel_workers, shuffle=is_training)
|
num_parallel_workers=num_parallel_workers, shuffle=is_training)
|
||||||
decode = C.Decode()
|
decode = C.Decode()
|
||||||
ds = ds.map(input_columns=["image"], operations=decode)
|
ds = ds.map(operations=decode, input_columns=["image"])
|
||||||
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
|
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
|
||||||
|
|
||||||
if is_training:
|
if is_training:
|
||||||
hwc_to_chw = C.HWC2CHW()
|
hwc_to_chw = C.HWC2CHW()
|
||||||
ds = ds.map(input_columns=["image", "annotation"],
|
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
|
||||||
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
||||||
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
||||||
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers)
|
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
ds = ds.repeat(repeat_num)
|
ds = ds.repeat(repeat_num)
|
||||||
else:
|
else:
|
||||||
ds = ds.map(input_columns=["image", "annotation"],
|
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
|
||||||
output_columns=["image", "image_shape", "annotation"],
|
output_columns=["image", "image_shape", "annotation"],
|
||||||
column_order=["image", "image_shape", "annotation"],
|
column_order=["image", "image_shape", "annotation"],
|
||||||
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
return ds
|
return ds
|
||||||
|
|
|
@ -112,12 +112,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''):
|
||||||
"masked_lm_weights",
|
"masked_lm_weights",
|
||||||
"next_sentence_labels"])
|
"next_sentence_labels"])
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
|
||||||
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
|
||||||
ds = ds.repeat(repeat_count)
|
ds = ds.repeat(repeat_count)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
|
|
|
@ -42,30 +42,31 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
|
||||||
usage=data_usage, shuffle=shuffle_dataset)
|
usage=data_usage, shuffle=shuffle_dataset)
|
||||||
### Processing label
|
### Processing label
|
||||||
if data_usage == 'test':
|
if data_usage == 'test':
|
||||||
dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
|
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"],
|
||||||
column_order=["id", "label_id", "sentence"], operations=ops.Duplicate())
|
column_order=["id", "label_id", "sentence"])
|
||||||
dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
|
dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"])
|
||||||
else:
|
else:
|
||||||
label_vocab = text.Vocab.from_list(label_list)
|
label_vocab = text.Vocab.from_list(label_list)
|
||||||
label_lookup = text.Lookup(label_vocab)
|
label_lookup = text.Lookup(label_vocab)
|
||||||
dataset = dataset.map(input_columns="label_desc", output_columns="label_id", operations=label_lookup)
|
dataset = dataset.map(operations=label_lookup, input_columns="label_desc", output_columns="label_id")
|
||||||
### Processing sentence
|
### Processing sentence
|
||||||
vocab = text.Vocab.from_file(bert_vocab_path)
|
vocab = text.Vocab.from_file(bert_vocab_path)
|
||||||
tokenizer = text.BertTokenizer(vocab, lower_case=True)
|
tokenizer = text.BertTokenizer(vocab, lower_case=True)
|
||||||
lookup = text.Lookup(vocab, unknown_token='[UNK]')
|
lookup = text.Lookup(vocab, unknown_token='[UNK]')
|
||||||
dataset = dataset.map(input_columns=["sentence"], operations=tokenizer)
|
dataset = dataset.map(operations=tokenizer, input_columns=["sentence"])
|
||||||
dataset = dataset.map(input_columns=["sentence"], operations=ops.Slice(slice(0, max_seq_len)))
|
dataset = dataset.map(operations=ops.Slice(slice(0, max_seq_len)), input_columns=["sentence"])
|
||||||
dataset = dataset.map(input_columns=["sentence"],
|
dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
|
||||||
operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
|
append=np.array(["[SEP]"], dtype='S')), input_columns=["sentence"])
|
||||||
append=np.array(["[SEP]"], dtype='S')))
|
dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["text_ids"])
|
||||||
dataset = dataset.map(input_columns=["sentence"], output_columns=["text_ids"], operations=lookup)
|
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"])
|
||||||
dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
|
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
|
||||||
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
|
output_columns=["text_ids", "mask_ids"],
|
||||||
column_order=["text_ids", "mask_ids", "label_id"], operations=ops.Duplicate())
|
column_order=["text_ids", "mask_ids", "label_id"])
|
||||||
dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
|
dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"])
|
||||||
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "segment_ids"],
|
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
|
||||||
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate())
|
output_columns=["text_ids", "segment_ids"],
|
||||||
dataset = dataset.map(input_columns=["segment_ids"], operations=ops.Fill(0))
|
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"])
|
||||||
|
dataset = dataset.map(operations=ops.Fill(0), input_columns=["segment_ids"])
|
||||||
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
|
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
|
||||||
return dataset
|
return dataset
|
||||||
|
|
||||||
|
@ -86,50 +87,51 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
|
||||||
usage=data_usage, shuffle=shuffle_dataset)
|
usage=data_usage, shuffle=shuffle_dataset)
|
||||||
### Processing label
|
### Processing label
|
||||||
if data_usage == 'test':
|
if data_usage == 'test':
|
||||||
dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
|
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"],
|
||||||
column_order=["id", "label_id", "sentence1", "sentence2"], operations=ops.Duplicate())
|
column_order=["id", "label_id", "sentence1", "sentence2"])
|
||||||
dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
|
dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"])
|
||||||
else:
|
else:
|
||||||
label_vocab = text.Vocab.from_list(label_list)
|
label_vocab = text.Vocab.from_list(label_list)
|
||||||
label_lookup = text.Lookup(label_vocab)
|
label_lookup = text.Lookup(label_vocab)
|
||||||
dataset = dataset.map(input_columns="label", output_columns="label_id", operations=label_lookup)
|
dataset = dataset.map(operations=label_lookup, input_columns="label", output_columns="label_id")
|
||||||
### Processing sentence pairs
|
### Processing sentence pairs
|
||||||
vocab = text.Vocab.from_file(bert_vocab_path)
|
vocab = text.Vocab.from_file(bert_vocab_path)
|
||||||
tokenizer = text.BertTokenizer(vocab, lower_case=True)
|
tokenizer = text.BertTokenizer(vocab, lower_case=True)
|
||||||
lookup = text.Lookup(vocab, unknown_token='[UNK]')
|
lookup = text.Lookup(vocab, unknown_token='[UNK]')
|
||||||
### Tokenizing sentences and truncate sequence pair
|
### Tokenizing sentences and truncate sequence pair
|
||||||
dataset = dataset.map(input_columns=["sentence1"], operations=tokenizer)
|
dataset = dataset.map(operations=tokenizer, input_columns=["sentence1"])
|
||||||
dataset = dataset.map(input_columns=["sentence2"], operations=tokenizer)
|
dataset = dataset.map(operations=tokenizer, input_columns=["sentence2"])
|
||||||
dataset = dataset.map(input_columns=["sentence1", "sentence2"],
|
dataset = dataset.map(operations=text.TruncateSequencePair(max_seq_len - 3),
|
||||||
operations=text.TruncateSequencePair(max_seq_len-3))
|
input_columns=["sentence1", "sentence2"])
|
||||||
### Adding special tokens
|
### Adding special tokens
|
||||||
dataset = dataset.map(input_columns=["sentence1"],
|
dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
|
||||||
operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
|
append=np.array(["[SEP]"], dtype='S')),
|
||||||
append=np.array(["[SEP]"], dtype='S')))
|
input_columns=["sentence1"])
|
||||||
dataset = dataset.map(input_columns=["sentence2"],
|
dataset = dataset.map(operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')),
|
||||||
operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')))
|
input_columns=["sentence2"])
|
||||||
### Generating segment_ids
|
### Generating segment_ids
|
||||||
dataset = dataset.map(input_columns=["sentence1"], output_columns=["sentence1", "type_sentence1"],
|
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["sentence1"],
|
||||||
column_order=["sentence1", "type_sentence1", "sentence2", "label_id"],
|
output_columns=["sentence1", "type_sentence1"],
|
||||||
operations=ops.Duplicate())
|
column_order=["sentence1", "type_sentence1", "sentence2", "label_id"])
|
||||||
dataset = dataset.map(input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"],
|
dataset = dataset.map(operations=ops.Duplicate(),
|
||||||
column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"],
|
input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"],
|
||||||
operations=ops.Duplicate())
|
column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"])
|
||||||
dataset = dataset.map(input_columns=["type_sentence1"], operations=[lookup, ops.Fill(0)])
|
dataset = dataset.map(operations=[lookup, ops.Fill(0)], input_columns=["type_sentence1"])
|
||||||
dataset = dataset.map(input_columns=["type_sentence2"], operations=[lookup, ops.Fill(1)])
|
dataset = dataset.map(operations=[lookup, ops.Fill(1)], input_columns=["type_sentence2"])
|
||||||
dataset = dataset.map(input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"],
|
dataset = dataset.map(operations=ops.Concatenate(),
|
||||||
column_order=["sentence1", "sentence2", "segment_ids", "label_id"],
|
input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"],
|
||||||
operations=ops.Concatenate())
|
column_order=["sentence1", "sentence2", "segment_ids", "label_id"])
|
||||||
dataset = dataset.map(input_columns=["segment_ids"], operations=ops.PadEnd([max_seq_len], 0))
|
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["segment_ids"])
|
||||||
### Generating text_ids
|
### Generating text_ids
|
||||||
dataset = dataset.map(input_columns=["sentence1", "sentence2"], output_columns=["text_ids"],
|
dataset = dataset.map(operations=ops.Concatenate(),
|
||||||
column_order=["text_ids", "segment_ids", "label_id"],
|
input_columns=["sentence1", "sentence2"], output_columns=["text_ids"],
|
||||||
operations=ops.Concatenate())
|
column_order=["text_ids", "segment_ids", "label_id"])
|
||||||
dataset = dataset.map(input_columns=["text_ids"], operations=lookup)
|
dataset = dataset.map(operations=lookup, input_columns=["text_ids"])
|
||||||
dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
|
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"])
|
||||||
### Generating mask_ids
|
### Generating mask_ids
|
||||||
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
|
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
|
||||||
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate())
|
output_columns=["text_ids", "mask_ids"],
|
||||||
dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
|
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"])
|
||||||
|
dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"])
|
||||||
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
|
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
|
||||||
return dataset
|
return dataset
|
||||||
|
|
|
@ -39,12 +39,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None,
|
||||||
ori_dataset_size = ds.get_dataset_size()
|
ori_dataset_size = ds.get_dataset_size()
|
||||||
print('origin dataset size: ', ori_dataset_size)
|
print('origin dataset size: ', ori_dataset_size)
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
|
||||||
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
|
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
|
||||||
logger.info("data size: {}".format(ds.get_dataset_size()))
|
logger.info("data size: {}".format(ds.get_dataset_size()))
|
||||||
|
@ -60,12 +60,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy
|
||||||
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle)
|
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle)
|
||||||
if assessment_method == "Spearman_correlation":
|
if assessment_method == "Spearman_correlation":
|
||||||
type_cast_op_float = C.TypeCast(mstype.float32)
|
type_cast_op_float = C.TypeCast(mstype.float32)
|
||||||
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
|
ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
|
||||||
else:
|
else:
|
||||||
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
ds = ds.repeat(repeat_count)
|
ds = ds.repeat(repeat_count)
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -80,12 +80,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho
|
||||||
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle)
|
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle)
|
||||||
if assessment_method == "Spearman_correlation":
|
if assessment_method == "Spearman_correlation":
|
||||||
type_cast_op_float = C.TypeCast(mstype.float32)
|
type_cast_op_float = C.TypeCast(mstype.float32)
|
||||||
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
|
ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
|
||||||
else:
|
else:
|
||||||
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
ds = ds.repeat(repeat_count)
|
ds = ds.repeat(repeat_count)
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -101,14 +101,14 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche
|
||||||
columns_list=["input_ids", "input_mask", "segment_ids", "start_positions",
|
columns_list=["input_ids", "input_mask", "segment_ids", "start_positions",
|
||||||
"end_positions", "unique_ids", "is_impossible"],
|
"end_positions", "unique_ids", "is_impossible"],
|
||||||
shuffle=do_shuffle)
|
shuffle=do_shuffle)
|
||||||
ds = ds.map(input_columns="start_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="start_positions")
|
||||||
ds = ds.map(input_columns="end_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="end_positions")
|
||||||
else:
|
else:
|
||||||
ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
|
ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
|
||||||
columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
|
columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
ds = ds.repeat(repeat_count)
|
ds = ds.repeat(repeat_count)
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -117,12 +117,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''):
|
||||||
"masked_lm_weights",
|
"masked_lm_weights",
|
||||||
"next_sentence_labels"])
|
"next_sentence_labels"])
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
|
||||||
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
|
||||||
ds = ds.repeat(repeat_count)
|
ds = ds.repeat(repeat_count)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
|
|
|
@ -40,12 +40,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None,
|
||||||
ori_dataset_size = ds.get_dataset_size()
|
ori_dataset_size = ds.get_dataset_size()
|
||||||
print('origin dataset size: ', ori_dataset_size)
|
print('origin dataset size: ', ori_dataset_size)
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
|
||||||
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
|
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
|
||||||
logger.info("data size: {}".format(ds.get_dataset_size()))
|
logger.info("data size: {}".format(ds.get_dataset_size()))
|
||||||
|
@ -61,12 +61,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy
|
||||||
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
|
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
|
||||||
if assessment_method == "Spearman_correlation":
|
if assessment_method == "Spearman_correlation":
|
||||||
type_cast_op_float = C.TypeCast(mstype.float32)
|
type_cast_op_float = C.TypeCast(mstype.float32)
|
||||||
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
|
ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
|
||||||
else:
|
else:
|
||||||
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
ds = ds.repeat(repeat_count)
|
ds = ds.repeat(repeat_count)
|
||||||
# apply shuffle operation
|
# apply shuffle operation
|
||||||
buffer_size = 960
|
buffer_size = 960
|
||||||
|
@ -84,12 +84,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho
|
||||||
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
|
columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
|
||||||
if assessment_method == "Spearman_correlation":
|
if assessment_method == "Spearman_correlation":
|
||||||
type_cast_op_float = C.TypeCast(mstype.float32)
|
type_cast_op_float = C.TypeCast(mstype.float32)
|
||||||
ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
|
ds = ds.map(operations=type_cast_op_float, input_columns="label_ids")
|
||||||
else:
|
else:
|
||||||
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
ds = ds.repeat(repeat_count)
|
ds = ds.repeat(repeat_count)
|
||||||
# apply shuffle operation
|
# apply shuffle operation
|
||||||
buffer_size = 960
|
buffer_size = 960
|
||||||
|
@ -107,17 +107,17 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche
|
||||||
columns_list=["input_ids", "input_mask", "segment_ids",
|
columns_list=["input_ids", "input_mask", "segment_ids",
|
||||||
"start_positions", "end_positions",
|
"start_positions", "end_positions",
|
||||||
"unique_ids", "is_impossible"])
|
"unique_ids", "is_impossible"])
|
||||||
ds = ds.map(input_columns="start_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="start_positions")
|
||||||
ds = ds.map(input_columns="end_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="end_positions")
|
||||||
else:
|
else:
|
||||||
ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
|
ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
|
||||||
columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
|
columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
ds = ds.repeat(repeat_count)
|
ds = ds.repeat(repeat_count)
|
||||||
# apply shuffle operation
|
# apply shuffle operation
|
||||||
buffer_size = 960
|
buffer_size = 960
|
||||||
|
|
|
@ -60,12 +60,12 @@ def _load_dataset(input_files, batch_size, epoch_count=1,
|
||||||
repeat_count = epoch_count
|
repeat_count = epoch_count
|
||||||
|
|
||||||
type_cast_op = deC.TypeCast(mstype.int32)
|
type_cast_op = deC.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="src", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="src")
|
||||||
ds = ds.map(input_columns="src_padding", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="src_padding")
|
||||||
ds = ds.map(input_columns="prev_opt", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="prev_opt")
|
||||||
ds = ds.map(input_columns="prev_padding", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="prev_padding")
|
||||||
ds = ds.map(input_columns="target", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="target")
|
||||||
ds = ds.map(input_columns="tgt_padding", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="tgt_padding")
|
||||||
|
|
||||||
ds = ds.rename(
|
ds = ds.rename(
|
||||||
input_columns=["src",
|
input_columns=["src",
|
||||||
|
|
|
@ -49,11 +49,11 @@ def create_tinybert_dataset(task='td', batch_size=32, device_num=1, rank=0,
|
||||||
shard_equal_rows=True)
|
shard_equal_rows=True)
|
||||||
|
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
if task == "td":
|
if task == "td":
|
||||||
ds = ds.map(input_columns="label_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label_ids")
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
||||||
|
|
|
@ -40,12 +40,12 @@ def load_test_data(batch_size=1, data_file=None):
|
||||||
"target_eos_ids", "target_eos_mask"],
|
"target_eos_ids", "target_eos_mask"],
|
||||||
shuffle=False)
|
shuffle=False)
|
||||||
type_cast_op = deC.TypeCast(mstype.int32)
|
type_cast_op = deC.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids")
|
||||||
ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask")
|
||||||
ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids")
|
||||||
ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask")
|
||||||
ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids")
|
||||||
ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask")
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
ds.channel_name = 'transformer'
|
ds.channel_name = 'transformer'
|
||||||
|
|
|
@ -30,12 +30,12 @@ def create_transformer_dataset(epoch_count=1, rank_size=1, rank_id=0, do_shuffle
|
||||||
shuffle=(do_shuffle == "true"), num_shards=rank_size, shard_id=rank_id)
|
shuffle=(do_shuffle == "true"), num_shards=rank_size, shard_id=rank_id)
|
||||||
|
|
||||||
type_cast_op = deC.TypeCast(mstype.int32)
|
type_cast_op = deC.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids")
|
||||||
ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask")
|
||||||
ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids")
|
||||||
ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask")
|
||||||
ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids")
|
||||||
ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask")
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(transformer_net_cfg.batch_size, drop_remainder=True)
|
ds = ds.batch(transformer_net_cfg.batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True):
|
||||||
changeswap_op]
|
changeswap_op]
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
ds = ds.map(input_columns="label", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label")
|
||||||
ds = ds.map(input_columns="image", operations=c_trans)
|
ds = ds.map(operations=c_trans, input_columns="image")
|
||||||
|
|
||||||
# apply repeat operations
|
# apply repeat operations
|
||||||
ds = ds.repeat(repeat_num)
|
ds = ds.repeat(repeat_num)
|
||||||
|
|
|
@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True):
|
||||||
changeswap_op]
|
changeswap_op]
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
ds = ds.map(input_columns="label", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label")
|
||||||
ds = ds.map(input_columns="image", operations=c_trans)
|
ds = ds.map(operations=c_trans, input_columns="image")
|
||||||
|
|
||||||
# apply repeat operations
|
# apply repeat operations
|
||||||
ds = ds.repeat(repeat_num)
|
ds = ds.repeat(repeat_num)
|
||||||
|
|
|
@ -298,21 +298,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num
|
||||||
ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank,
|
ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank,
|
||||||
num_parallel_workers=num_parallel_workers, shuffle=False)
|
num_parallel_workers=num_parallel_workers, shuffle=False)
|
||||||
decode = C.Decode()
|
decode = C.Decode()
|
||||||
ds = ds.map(input_columns=["image"], operations=decode)
|
ds = ds.map(operations=decode, input_columns=["image"])
|
||||||
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
|
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
|
||||||
|
|
||||||
if is_training:
|
if is_training:
|
||||||
hwc_to_chw = C.HWC2CHW()
|
hwc_to_chw = C.HWC2CHW()
|
||||||
ds = ds.map(input_columns=["image", "annotation"],
|
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
|
||||||
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
||||||
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
||||||
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers)
|
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
ds = ds.repeat(repeat_num)
|
ds = ds.repeat(repeat_num)
|
||||||
else:
|
else:
|
||||||
ds = ds.map(input_columns=["image", "annotation"],
|
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
|
||||||
output_columns=["image", "image_shape", "annotation"],
|
output_columns=["image", "image_shape", "annotation"],
|
||||||
column_order=["image", "image_shape", "annotation"],
|
column_order=["image", "image_shape", "annotation"],
|
||||||
operations=compose_map_func, num_parallel_workers=num_parallel_workers)
|
num_parallel_workers=num_parallel_workers)
|
||||||
return ds
|
return ds
|
||||||
|
|
|
@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False):
|
||||||
if sink_mode:
|
if sink_mode:
|
||||||
sink_size = 100
|
sink_size = 100
|
||||||
new_repeat_count = 3
|
new_repeat_count = 3
|
||||||
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
|
||||||
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
logger.info("data size: {}".format(ds.get_dataset_size()))
|
logger.info("data size: {}".format(ds.get_dataset_size()))
|
||||||
|
|
|
@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False):
|
||||||
if sink_mode:
|
if sink_mode:
|
||||||
sink_size = 100
|
sink_size = 100
|
||||||
new_repeat_count = 3
|
new_repeat_count = 3
|
||||||
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
|
||||||
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
logger.info("data size: {}".format(ds.get_dataset_size()))
|
logger.info("data size: {}".format(ds.get_dataset_size()))
|
||||||
|
|
|
@ -41,12 +41,12 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", d
|
||||||
print('origin dataset size: ', ori_dataset_size)
|
print('origin dataset size: ', ori_dataset_size)
|
||||||
new_repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size())
|
new_repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size())
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
|
||||||
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
|
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
|
||||||
ds = ds.repeat(max(new_repeat_count, repeat_count))
|
ds = ds.repeat(max(new_repeat_count, repeat_count))
|
||||||
|
|
|
@ -92,12 +92,12 @@ def me_de_train_dataset():
|
||||||
"next_sentence_labels", "masked_lm_positions",
|
"next_sentence_labels", "masked_lm_positions",
|
||||||
"masked_lm_ids", "masked_lm_weights"], shuffle=False)
|
"masked_lm_ids", "masked_lm_weights"], shuffle=False)
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids")
|
||||||
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions")
|
||||||
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels")
|
||||||
ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
|
||||||
ds = ds.map(input_columns="input_mask", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_mask")
|
||||||
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="input_ids")
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
batch_size = int(os.getenv('BATCH_SIZE', '16'))
|
batch_size = int(os.getenv('BATCH_SIZE', '16'))
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -97,10 +97,10 @@ def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train", shu
|
||||||
|
|
||||||
# wrapped with GeneratorDataset
|
# wrapped with GeneratorDataset
|
||||||
dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=None)
|
dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=None)
|
||||||
dataset = dataset.map(input_columns=["image", "label"], operations=DataTransform(args, usage=usage))
|
dataset = dataset.map(operations=DataTransform(args, usage=usage), input_columns=["image", "label"])
|
||||||
|
|
||||||
channelswap_op = C.HWC2CHW()
|
channelswap_op = C.HWC2CHW()
|
||||||
dataset = dataset.map(input_columns="image", operations=channelswap_op)
|
dataset = dataset.map(operations=channelswap_op, input_columns="image")
|
||||||
|
|
||||||
# 1464 samples / batch_size 8 = 183 batches
|
# 1464 samples / batch_size 8 = 183 batches
|
||||||
# epoch_num is num of steps
|
# epoch_num is num of steps
|
||||||
|
|
|
@ -68,8 +68,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -71,8 +71,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
|
||||||
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
|
||||||
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
|
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
|
|
|
@ -171,11 +171,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
buffer_size = 10000
|
buffer_size = 10000
|
||||||
|
|
|
@ -47,11 +47,11 @@ def test_me_de_train_dataset():
|
||||||
rescale_op = vision.Rescale(rescale, shift)
|
rescale_op = vision.Rescale(rescale, shift)
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data_set_new = data_set_new.map(input_columns="image/encoded", operations=decode_op)
|
data_set_new = data_set_new.map(operations=decode_op, input_columns="image/encoded")
|
||||||
data_set_new = data_set_new.map(input_columns="image/encoded", operations=resize_op)
|
data_set_new = data_set_new.map(operations=resize_op, input_columns="image/encoded")
|
||||||
data_set_new = data_set_new.map(input_columns="image/encoded", operations=rescale_op)
|
data_set_new = data_set_new.map(operations=rescale_op, input_columns="image/encoded")
|
||||||
hwc2chw_op = vision.HWC2CHW()
|
hwc2chw_op = vision.HWC2CHW()
|
||||||
data_set_new = data_set_new.map(input_columns="image/encoded", operations=hwc2chw_op)
|
data_set_new = data_set_new.map(operations=hwc2chw_op, input_columns="image/encoded")
|
||||||
data_set_new = data_set_new.repeat(1)
|
data_set_new = data_set_new.repeat(1)
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
batch_size_new = 32
|
batch_size_new = 32
|
||||||
|
|
|
@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
buffer_size = 10000
|
buffer_size = 10000
|
||||||
|
|
|
@ -87,9 +87,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
hwc2chw_op = CV.HWC2CHW()
|
hwc2chw_op = CV.HWC2CHW()
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
mnist_ds = mnist_ds.batch(batch_size)
|
mnist_ds = mnist_ds.batch(batch_size)
|
||||||
|
|
|
@ -77,9 +77,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
hwc2chw_op = CV.HWC2CHW()
|
hwc2chw_op = CV.HWC2CHW()
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
mnist_ds = mnist_ds.batch(batch_size)
|
mnist_ds = mnist_ds.batch(batch_size)
|
||||||
|
|
|
@ -145,9 +145,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
hwc2chw_op = CV.HWC2CHW()
|
hwc2chw_op = CV.HWC2CHW()
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
mnist_ds = mnist_ds.batch(batch_size)
|
mnist_ds = mnist_ds.batch(batch_size)
|
||||||
|
|
|
@ -98,11 +98,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
buffer_size = 10000
|
buffer_size = 10000
|
||||||
|
|
|
@ -107,11 +107,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
buffer_size = 10000
|
buffer_size = 10000
|
||||||
|
|
|
@ -351,8 +351,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32):
|
||||||
changeswap_op]
|
changeswap_op]
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data_set = data_set.map(input_columns="label", operations=type_cast_op)
|
data_set = data_set.map(operations=type_cast_op, input_columns="label")
|
||||||
data_set = data_set.map(input_columns="image", operations=c_trans)
|
data_set = data_set.map(operations=c_trans, input_columns="image")
|
||||||
|
|
||||||
# apply shuffle operations
|
# apply shuffle operations
|
||||||
data_set = data_set.shuffle(buffer_size=1000)
|
data_set = data_set.shuffle(buffer_size=1000)
|
||||||
|
|
|
@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
buffer_size = 10000
|
buffer_size = 10000
|
||||||
|
|
|
@ -114,11 +114,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers
|
||||||
type_cast_op = C.TypeCast(mstype.int32)
|
type_cast_op = C.TypeCast(mstype.int32)
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)
|
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||||
|
|
||||||
# apply DatasetOps
|
# apply DatasetOps
|
||||||
mnist_ds = mnist_ds.shuffle(buffer_size=10000) # 10000 as in LeNet train script
|
mnist_ds = mnist_ds.shuffle(buffer_size=10000) # 10000 as in LeNet train script
|
||||||
|
|
|
@ -90,8 +90,8 @@ def create_dataset(repeat_num=1, training=True):
|
||||||
changeswap_op]
|
changeswap_op]
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data_set = data_set.map(input_columns="label", operations=type_cast_op)
|
data_set = data_set.map(operations=type_cast_op, input_columns="label")
|
||||||
data_set = data_set.map(input_columns="image", operations=c_trans)
|
data_set = data_set.map(operations=c_trans, input_columns="image")
|
||||||
|
|
||||||
# apply repeat operations
|
# apply repeat operations
|
||||||
data_set = data_set.repeat(repeat_num)
|
data_set = data_set.repeat(repeat_num)
|
||||||
|
|
|
@ -68,8 +68,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32):
|
||||||
changeswap_op]
|
changeswap_op]
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data_set = data_set.map(input_columns="label", operations=type_cast_op)
|
data_set = data_set.map(operations=type_cast_op, input_columns="label")
|
||||||
data_set = data_set.map(input_columns="image", operations=c_trans)
|
data_set = data_set.map(operations=c_trans, input_columns="image")
|
||||||
|
|
||||||
# apply shuffle operations
|
# apply shuffle operations
|
||||||
data_set = data_set.shuffle(buffer_size=1000)
|
data_set = data_set.shuffle(buffer_size=1000)
|
||||||
|
|
|
@ -79,8 +79,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32, rank_id=0, rank_s
|
||||||
changeswap_op]
|
changeswap_op]
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data_set = data_set.map(input_columns="label", operations=type_cast_op)
|
data_set = data_set.map(operations=type_cast_op, input_columns="label")
|
||||||
data_set = data_set.map(input_columns="image", operations=c_trans)
|
data_set = data_set.map(operations=c_trans, input_columns="image")
|
||||||
|
|
||||||
# apply shuffle operations
|
# apply shuffle operations
|
||||||
data_set = data_set.shuffle(buffer_size=1000)
|
data_set = data_set.shuffle(buffer_size=1000)
|
||||||
|
|
|
@ -29,7 +29,7 @@ def test_case_0():
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||||
|
|
||||||
ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x: x + x))
|
ds1 = ds1.map(operations=(lambda x: x + x), input_columns=col, output_columns="out")
|
||||||
|
|
||||||
print("************** Output Tensor *****************")
|
print("************** Output Tensor *****************")
|
||||||
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
||||||
|
@ -49,7 +49,7 @@ def test_case_1():
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||||
|
|
||||||
ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1"], operations=(lambda x: (x, x + x)))
|
ds1 = ds1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"])
|
||||||
|
|
||||||
print("************** Output Tensor *****************")
|
print("************** Output Tensor *****************")
|
||||||
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
||||||
|
@ -72,7 +72,7 @@ def test_case_2():
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||||
|
|
||||||
ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x, y: x + y))
|
ds1 = ds1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out")
|
||||||
|
|
||||||
print("************** Output Tensor *****************")
|
print("************** Output Tensor *****************")
|
||||||
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
||||||
|
@ -93,8 +93,8 @@ def test_case_3():
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||||
|
|
||||||
ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"],
|
ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col,
|
||||||
operations=(lambda x, y: (x, x + y, x + x + y)))
|
output_columns=["out0", "out1", "out2"])
|
||||||
|
|
||||||
print("************** Output Tensor *****************")
|
print("************** Output Tensor *****************")
|
||||||
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
||||||
|
@ -119,8 +119,8 @@ def test_case_4():
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||||
|
|
||||||
ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
|
ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col,
|
||||||
operations=(lambda x, y: (x, x + y, x + x + y)))
|
output_columns=["out0", "out1", "out2"], num_parallel_workers=4)
|
||||||
|
|
||||||
print("************** Output Tensor *****************")
|
print("************** Output Tensor *****************")
|
||||||
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
for data in ds1.create_dict_iterator(): # each data is a dictionary
|
||||||
|
|
|
@ -39,12 +39,12 @@ def test_HWC2CHW(plot=False):
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
hwc2chw_op = c_vision.HWC2CHW()
|
hwc2chw_op = c_vision.HWC2CHW()
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op)
|
data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=decode_op)
|
data2 = data2.map(operations=decode_op, input_columns=["image"])
|
||||||
|
|
||||||
image_transposed = []
|
image_transposed = []
|
||||||
image = []
|
image = []
|
||||||
|
@ -72,8 +72,8 @@ def test_HWC2CHW_md5():
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
hwc2chw_op = c_vision.HWC2CHW()
|
hwc2chw_op = c_vision.HWC2CHW()
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op)
|
data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
|
||||||
|
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename = "HWC2CHW_01_result.npz"
|
filename = "HWC2CHW_01_result.npz"
|
||||||
|
@ -90,8 +90,8 @@ def test_HWC2CHW_comp(plot=False):
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
hwc2chw_op = c_vision.HWC2CHW()
|
hwc2chw_op = c_vision.HWC2CHW()
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op)
|
data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
@ -101,7 +101,7 @@ def test_HWC2CHW_comp(plot=False):
|
||||||
py_vision.HWC2CHW()
|
py_vision.HWC2CHW()
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform)
|
data2 = data2.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
image_c_transposed = []
|
image_c_transposed = []
|
||||||
image_py_transposed = []
|
image_py_transposed = []
|
||||||
|
|
|
@ -42,8 +42,7 @@ def test_auto_contrast_py(plot=False):
|
||||||
F.Resize((224, 224)),
|
F.Resize((224, 224)),
|
||||||
F.ToTensor()])
|
F.ToTensor()])
|
||||||
|
|
||||||
ds_original = ds.map(input_columns="image",
|
ds_original = ds.map(operations=transforms_original, input_columns="image")
|
||||||
operations=transforms_original)
|
|
||||||
|
|
||||||
ds_original = ds_original.batch(512)
|
ds_original = ds_original.batch(512)
|
||||||
|
|
||||||
|
@ -64,8 +63,7 @@ def test_auto_contrast_py(plot=False):
|
||||||
F.AutoContrast(cutoff=10.0, ignore=[10, 20]),
|
F.AutoContrast(cutoff=10.0, ignore=[10, 20]),
|
||||||
F.ToTensor()])
|
F.ToTensor()])
|
||||||
|
|
||||||
ds_auto_contrast = ds.map(input_columns="image",
|
ds_auto_contrast = ds.map(operations=transforms_auto_contrast, input_columns="image")
|
||||||
operations=transforms_auto_contrast)
|
|
||||||
|
|
||||||
ds_auto_contrast = ds_auto_contrast.batch(512)
|
ds_auto_contrast = ds_auto_contrast.batch(512)
|
||||||
|
|
||||||
|
@ -99,17 +97,14 @@ def test_auto_contrast_c(plot=False):
|
||||||
|
|
||||||
# AutoContrast Images
|
# AutoContrast Images
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
|
||||||
operations=[C.Decode(),
|
|
||||||
C.Resize((224, 224))])
|
|
||||||
python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20])
|
python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20])
|
||||||
c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20])
|
c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20])
|
||||||
transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)),
|
transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)),
|
||||||
python_op,
|
python_op,
|
||||||
np.array])
|
np.array])
|
||||||
|
|
||||||
ds_auto_contrast_py = ds.map(input_columns="image",
|
ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image")
|
||||||
operations=transforms_op)
|
|
||||||
|
|
||||||
ds_auto_contrast_py = ds_auto_contrast_py.batch(512)
|
ds_auto_contrast_py = ds_auto_contrast_py.batch(512)
|
||||||
|
|
||||||
|
@ -122,12 +117,9 @@ def test_auto_contrast_c(plot=False):
|
||||||
axis=0)
|
axis=0)
|
||||||
|
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
|
||||||
operations=[C.Decode(),
|
|
||||||
C.Resize((224, 224))])
|
|
||||||
|
|
||||||
ds_auto_contrast_c = ds.map(input_columns="image",
|
ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image")
|
||||||
operations=c_op)
|
|
||||||
|
|
||||||
ds_auto_contrast_c = ds_auto_contrast_c.batch(512)
|
ds_auto_contrast_c = ds_auto_contrast_c.batch(512)
|
||||||
|
|
||||||
|
@ -162,9 +154,7 @@ def test_auto_contrast_one_channel_c(plot=False):
|
||||||
|
|
||||||
# AutoContrast Images
|
# AutoContrast Images
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
|
||||||
operations=[C.Decode(),
|
|
||||||
C.Resize((224, 224))])
|
|
||||||
python_op = F.AutoContrast()
|
python_op = F.AutoContrast()
|
||||||
c_op = C.AutoContrast()
|
c_op = C.AutoContrast()
|
||||||
# not using F.ToTensor() since it converts to floats
|
# not using F.ToTensor() since it converts to floats
|
||||||
|
@ -174,8 +164,7 @@ def test_auto_contrast_one_channel_c(plot=False):
|
||||||
python_op,
|
python_op,
|
||||||
np.array])
|
np.array])
|
||||||
|
|
||||||
ds_auto_contrast_py = ds.map(input_columns="image",
|
ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image")
|
||||||
operations=transforms_op)
|
|
||||||
|
|
||||||
ds_auto_contrast_py = ds_auto_contrast_py.batch(512)
|
ds_auto_contrast_py = ds_auto_contrast_py.batch(512)
|
||||||
|
|
||||||
|
@ -188,13 +177,10 @@ def test_auto_contrast_one_channel_c(plot=False):
|
||||||
axis=0)
|
axis=0)
|
||||||
|
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])],
|
||||||
operations=[C.Decode(),
|
input_columns=["image"])
|
||||||
C.Resize((224, 224)),
|
|
||||||
lambda img: np.array(img[:, :, 0])])
|
|
||||||
|
|
||||||
ds_auto_contrast_c = ds.map(input_columns="image",
|
ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image")
|
||||||
operations=c_op)
|
|
||||||
|
|
||||||
ds_auto_contrast_c = ds_auto_contrast_c.batch(512)
|
ds_auto_contrast_c = ds_auto_contrast_c.batch(512)
|
||||||
|
|
||||||
|
@ -223,8 +209,7 @@ def test_auto_contrast_mnist_c(plot=False):
|
||||||
"""
|
"""
|
||||||
logger.info("Test AutoContrast C Op With MNIST Images")
|
logger.info("Test AutoContrast C Op With MNIST Images")
|
||||||
ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
|
ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
|
||||||
ds_auto_contrast_c = ds.map(input_columns="image",
|
ds_auto_contrast_c = ds.map(operations=C.AutoContrast(cutoff=1, ignore=(0, 255)), input_columns="image")
|
||||||
operations=C.AutoContrast(cutoff=1, ignore=(0, 255)))
|
|
||||||
ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
|
ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
|
||||||
|
|
||||||
images = []
|
images = []
|
||||||
|
@ -252,25 +237,20 @@ def test_auto_contrast_invalid_ignore_param_c():
|
||||||
logger.info("Test AutoContrast C Op with invalid ignore parameter")
|
logger.info("Test AutoContrast C Op with invalid ignore parameter")
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(),
|
||||||
operations=[C.Decode(),
|
|
||||||
C.Resize((224, 224)),
|
C.Resize((224, 224)),
|
||||||
lambda img: np.array(img[:, :, 0])])
|
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
|
||||||
# invalid ignore
|
# invalid ignore
|
||||||
ds = ds.map(input_columns="image",
|
ds = ds.map(operations=C.AutoContrast(ignore=255.5), input_columns="image")
|
||||||
operations=C.AutoContrast(ignore=255.5))
|
|
||||||
except TypeError as error:
|
except TypeError as error:
|
||||||
logger.info("Got an exception in DE: {}".format(str(error)))
|
logger.info("Got an exception in DE: {}".format(str(error)))
|
||||||
assert "Argument ignore with value 255.5 is not of type" in str(error)
|
assert "Argument ignore with value 255.5 is not of type" in str(error)
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)),
|
||||||
operations=[C.Decode(),
|
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
|
||||||
C.Resize((224, 224)),
|
|
||||||
lambda img: np.array(img[:, :, 0])])
|
|
||||||
# invalid ignore
|
# invalid ignore
|
||||||
ds = ds.map(input_columns="image",
|
ds = ds.map(operations=C.AutoContrast(ignore=(10, 100)), input_columns="image")
|
||||||
operations=C.AutoContrast(ignore=(10, 100)))
|
|
||||||
except TypeError as error:
|
except TypeError as error:
|
||||||
logger.info("Got an exception in DE: {}".format(str(error)))
|
logger.info("Got an exception in DE: {}".format(str(error)))
|
||||||
assert "Argument ignore with value (10,100) is not of type" in str(error)
|
assert "Argument ignore with value (10,100) is not of type" in str(error)
|
||||||
|
@ -283,25 +263,21 @@ def test_auto_contrast_invalid_cutoff_param_c():
|
||||||
logger.info("Test AutoContrast C Op with invalid cutoff parameter")
|
logger.info("Test AutoContrast C Op with invalid cutoff parameter")
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(),
|
||||||
operations=[C.Decode(),
|
|
||||||
C.Resize((224, 224)),
|
C.Resize((224, 224)),
|
||||||
lambda img: np.array(img[:, :, 0])])
|
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
|
||||||
# invalid ignore
|
# invalid ignore
|
||||||
ds = ds.map(input_columns="image",
|
ds = ds.map(operations=C.AutoContrast(cutoff=-10.0), input_columns="image")
|
||||||
operations=C.AutoContrast(cutoff=-10.0))
|
|
||||||
except ValueError as error:
|
except ValueError as error:
|
||||||
logger.info("Got an exception in DE: {}".format(str(error)))
|
logger.info("Got an exception in DE: {}".format(str(error)))
|
||||||
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
|
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(),
|
||||||
operations=[C.Decode(),
|
|
||||||
C.Resize((224, 224)),
|
C.Resize((224, 224)),
|
||||||
lambda img: np.array(img[:, :, 0])])
|
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
|
||||||
# invalid ignore
|
# invalid ignore
|
||||||
ds = ds.map(input_columns="image",
|
ds = ds.map(operations=C.AutoContrast(cutoff=120.0), input_columns="image")
|
||||||
operations=C.AutoContrast(cutoff=120.0))
|
|
||||||
except ValueError as error:
|
except ValueError as error:
|
||||||
logger.info("Got an exception in DE: {}".format(str(error)))
|
logger.info("Got an exception in DE: {}".format(str(error)))
|
||||||
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
|
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
|
||||||
|
@ -314,21 +290,21 @@ def test_auto_contrast_invalid_ignore_param_py():
|
||||||
logger.info("Test AutoContrast python Op with invalid ignore parameter")
|
logger.info("Test AutoContrast python Op with invalid ignore parameter")
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
|
||||||
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
|
|
||||||
F.Resize((224, 224)),
|
F.Resize((224, 224)),
|
||||||
F.AutoContrast(ignore=255.5),
|
F.AutoContrast(ignore=255.5),
|
||||||
F.ToTensor()])])
|
F.ToTensor()])],
|
||||||
|
input_columns=["image"])
|
||||||
except TypeError as error:
|
except TypeError as error:
|
||||||
logger.info("Got an exception in DE: {}".format(str(error)))
|
logger.info("Got an exception in DE: {}".format(str(error)))
|
||||||
assert "Argument ignore with value 255.5 is not of type" in str(error)
|
assert "Argument ignore with value 255.5 is not of type" in str(error)
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
|
||||||
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
|
|
||||||
F.Resize((224, 224)),
|
F.Resize((224, 224)),
|
||||||
F.AutoContrast(ignore=(10, 100)),
|
F.AutoContrast(ignore=(10, 100)),
|
||||||
F.ToTensor()])])
|
F.ToTensor()])],
|
||||||
|
input_columns=["image"])
|
||||||
except TypeError as error:
|
except TypeError as error:
|
||||||
logger.info("Got an exception in DE: {}".format(str(error)))
|
logger.info("Got an exception in DE: {}".format(str(error)))
|
||||||
assert "Argument ignore with value (10,100) is not of type" in str(error)
|
assert "Argument ignore with value (10,100) is not of type" in str(error)
|
||||||
|
@ -341,21 +317,22 @@ def test_auto_contrast_invalid_cutoff_param_py():
|
||||||
logger.info("Test AutoContrast python Op with invalid cutoff parameter")
|
logger.info("Test AutoContrast python Op with invalid cutoff parameter")
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
|
||||||
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
|
|
||||||
F.Resize((224, 224)),
|
F.Resize((224, 224)),
|
||||||
F.AutoContrast(cutoff=-10.0),
|
F.AutoContrast(cutoff=-10.0),
|
||||||
F.ToTensor()])])
|
F.ToTensor()])],
|
||||||
|
input_columns=["image"])
|
||||||
except ValueError as error:
|
except ValueError as error:
|
||||||
logger.info("Got an exception in DE: {}".format(str(error)))
|
logger.info("Got an exception in DE: {}".format(str(error)))
|
||||||
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
|
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(
|
||||||
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
|
operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
|
||||||
F.Resize((224, 224)),
|
F.Resize((224, 224)),
|
||||||
F.AutoContrast(cutoff=120.0),
|
F.AutoContrast(cutoff=120.0),
|
||||||
F.ToTensor()])])
|
F.ToTensor()])],
|
||||||
|
input_columns=["image"])
|
||||||
except ValueError as error:
|
except ValueError as error:
|
||||||
logger.info("Got an exception in DE: {}".format(str(error)))
|
logger.info("Got an exception in DE: {}".format(str(error)))
|
||||||
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
|
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
|
||||||
|
|
|
@ -49,10 +49,9 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False):
|
||||||
test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1)
|
test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1)
|
||||||
|
|
||||||
# map to apply ops
|
# map to apply ops
|
||||||
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
|
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
|
||||||
output_columns=["image", "bbox"],
|
output_columns=["image", "bbox"],
|
||||||
column_order=["image", "bbox"],
|
column_order=["image", "bbox"])
|
||||||
operations=[test_op])
|
|
||||||
|
|
||||||
filename = "bounding_box_augment_rotation_c_result.npz"
|
filename = "bounding_box_augment_rotation_c_result.npz"
|
||||||
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
@ -88,10 +87,9 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):
|
||||||
test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9)
|
test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9)
|
||||||
|
|
||||||
# map to apply ops
|
# map to apply ops
|
||||||
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
|
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
|
||||||
output_columns=["image", "bbox"],
|
output_columns=["image", "bbox"],
|
||||||
column_order=["image", "bbox"],
|
column_order=["image", "bbox"])
|
||||||
operations=[test_op])
|
|
||||||
|
|
||||||
filename = "bounding_box_augment_crop_c_result.npz"
|
filename = "bounding_box_augment_crop_c_result.npz"
|
||||||
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
@ -126,10 +124,9 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
|
||||||
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9)
|
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9)
|
||||||
|
|
||||||
# map to apply ops
|
# map to apply ops
|
||||||
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
|
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
|
||||||
output_columns=["image", "bbox"],
|
output_columns=["image", "bbox"],
|
||||||
column_order=["image", "bbox"],
|
column_order=["image", "bbox"]) # Add column for "bbox"
|
||||||
operations=[test_op]) # Add column for "bbox"
|
|
||||||
|
|
||||||
filename = "bounding_box_augment_valid_ratio_c_result.npz"
|
filename = "bounding_box_augment_valid_ratio_c_result.npz"
|
||||||
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
@ -163,10 +160,9 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False):
|
||||||
|
|
||||||
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)
|
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)
|
||||||
|
|
||||||
dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
|
dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"],
|
||||||
output_columns=["image", "bbox"],
|
output_columns=["image", "bbox"],
|
||||||
column_order=["image", "bbox"],
|
column_order=["image", "bbox"])
|
||||||
operations=[test_op])
|
|
||||||
|
|
||||||
unaugSamp, augSamp = [], []
|
unaugSamp, augSamp = [], []
|
||||||
|
|
||||||
|
@ -195,20 +191,19 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False):
|
||||||
|
|
||||||
# map to apply ops
|
# map to apply ops
|
||||||
# Add column for "bbox"
|
# Add column for "bbox"
|
||||||
dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"],
|
dataVoc1 = dataVoc1.map(
|
||||||
|
operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)),
|
||||||
|
input_columns=["image", "bbox"],
|
||||||
|
output_columns=["image", "bbox"],
|
||||||
|
column_order=["image", "bbox"])
|
||||||
|
dataVoc2 = dataVoc2.map(
|
||||||
|
operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)),
|
||||||
|
input_columns=["image", "bbox"],
|
||||||
|
output_columns=["image", "bbox"],
|
||||||
|
column_order=["image", "bbox"])
|
||||||
|
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
|
||||||
output_columns=["image", "bbox"],
|
output_columns=["image", "bbox"],
|
||||||
column_order=["image", "bbox"],
|
column_order=["image", "bbox"])
|
||||||
operations=lambda img, bbox:
|
|
||||||
(img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
|
|
||||||
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
|
|
||||||
output_columns=["image", "bbox"],
|
|
||||||
column_order=["image", "bbox"],
|
|
||||||
operations=lambda img, bbox:
|
|
||||||
(img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
|
|
||||||
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
|
|
||||||
output_columns=["image", "bbox"],
|
|
||||||
column_order=["image", "bbox"],
|
|
||||||
operations=[test_op])
|
|
||||||
filename = "bounding_box_augment_valid_edge_c_result.npz"
|
filename = "bounding_box_augment_valid_edge_c_result.npz"
|
||||||
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
|
||||||
|
@ -238,10 +233,9 @@ def test_bounding_box_augment_invalid_ratio_c():
|
||||||
# ratio range is from 0 - 1
|
# ratio range is from 0 - 1
|
||||||
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1.5)
|
test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1.5)
|
||||||
# map to apply ops
|
# map to apply ops
|
||||||
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
|
dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
|
||||||
output_columns=["image", "bbox"],
|
output_columns=["image", "bbox"],
|
||||||
column_order=["image", "bbox"],
|
column_order=["image", "bbox"]) # Add column for "bbox"
|
||||||
operations=[test_op]) # Add column for "bbox"
|
|
||||||
except ValueError as error:
|
except ValueError as error:
|
||||||
logger.info("Got an exception in DE: {}".format(str(error)))
|
logger.info("Got an exception in DE: {}".format(str(error)))
|
||||||
assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(error)
|
assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(error)
|
||||||
|
|
|
@ -25,7 +25,7 @@ def test_compose():
|
||||||
def test_config(arr, op_list):
|
def test_config(arr, op_list):
|
||||||
try:
|
try:
|
||||||
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
|
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
|
||||||
data = data.map(input_columns=["col"], operations=ops.Compose(op_list))
|
data = data.map(operations=ops.Compose(op_list), input_columns=["col"])
|
||||||
res = []
|
res = []
|
||||||
for i in data.create_dict_iterator(num_epochs=1):
|
for i in data.create_dict_iterator(num_epochs=1):
|
||||||
res.append(i["col"].tolist())
|
res.append(i["col"].tolist())
|
||||||
|
|
|
@ -24,7 +24,7 @@ def test_random_apply():
|
||||||
def test_config(arr, op_list, prob=0.5):
|
def test_config(arr, op_list, prob=0.5):
|
||||||
try:
|
try:
|
||||||
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
|
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
|
||||||
data = data.map(input_columns=["col"], operations=ops.RandomApply(op_list, prob))
|
data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"])
|
||||||
res = []
|
res = []
|
||||||
for i in data.create_dict_iterator(num_epochs=1):
|
for i in data.create_dict_iterator(num_epochs=1):
|
||||||
res.append(i["col"].tolist())
|
res.append(i["col"].tolist())
|
||||||
|
|
|
@ -48,7 +48,7 @@ def test_cache_map_basic1():
|
||||||
# This DATA_DIR only has 2 images in it
|
# This DATA_DIR only has 2 images in it
|
||||||
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
|
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
|
|
||||||
filename = "cache_map_01_result.npz"
|
filename = "cache_map_01_result.npz"
|
||||||
|
@ -77,7 +77,7 @@ def test_cache_map_basic2():
|
||||||
# This DATA_DIR only has 2 images in it
|
# This DATA_DIR only has 2 images in it
|
||||||
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
|
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
|
|
||||||
filename = "cache_map_02_result.npz"
|
filename = "cache_map_02_result.npz"
|
||||||
|
@ -107,7 +107,7 @@ def test_cache_map_basic3():
|
||||||
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
|
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
|
||||||
logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
|
logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -131,7 +131,7 @@ def test_cache_map_basic4():
|
||||||
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
|
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
|
||||||
logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
|
logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
|
||||||
shape = ds1.output_shapes()
|
shape = ds1.output_shapes()
|
||||||
logger.info(shape)
|
logger.info(shape)
|
||||||
|
@ -167,7 +167,7 @@ def test_cache_map_failure1():
|
||||||
# This DATA_DIR only has 2 images in it
|
# This DATA_DIR only has 2 images in it
|
||||||
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
|
ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -108,7 +108,7 @@ def test_cache_nomap_basic3():
|
||||||
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
|
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -160,7 +160,7 @@ def test_cache_nomap_basic4():
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
|
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -197,7 +197,7 @@ def test_cache_nomap_basic5():
|
||||||
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
|
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -237,7 +237,7 @@ def test_cache_nomap_basic6():
|
||||||
# there was not any cache.
|
# there was not any cache.
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_shards=3, shard_id=1, cache=some_cache)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_shards=3, shard_id=1, cache=some_cache)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -273,7 +273,7 @@ def test_cache_nomap_basic7():
|
||||||
|
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -343,11 +343,11 @@ def test_cache_nomap_allowed_share2():
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
|
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
|
||||||
ds1 = ds1.repeat(4)
|
ds1 = ds1.repeat(4)
|
||||||
|
|
||||||
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache)
|
ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache)
|
||||||
ds2 = ds2.shuffle(buffer_size=2)
|
ds2 = ds2.shuffle(buffer_size=2)
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -418,10 +418,10 @@ def test_cache_nomap_allowed_share4():
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
|
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=1)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=1)
|
||||||
|
|
||||||
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=2)
|
ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=2)
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for _ in ds1.create_dict_iterator(num_epochs=1):
|
for _ in ds1.create_dict_iterator(num_epochs=1):
|
||||||
|
@ -458,10 +458,10 @@ def test_cache_nomap_disallowed_share1():
|
||||||
rescale_op = c_vision.Rescale(1.0 / 255.0, -1.0)
|
rescale_op = c_vision.Rescale(1.0 / 255.0, -1.0)
|
||||||
|
|
||||||
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache)
|
||||||
|
|
||||||
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
ds2 = ds2.map(input_columns=["image"], operations=rescale_op, cache=some_cache)
|
ds2 = ds2.map(operations=rescale_op, input_columns=["image"], cache=some_cache)
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for _ in ds1.create_dict_iterator(num_epochs=1):
|
for _ in ds1.create_dict_iterator(num_epochs=1):
|
||||||
|
|
|
@ -40,12 +40,12 @@ def test_center_crop_op(height=375, width=375, plot=False):
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
# 3 images [375, 500] [600, 500] [512, 512]
|
# 3 images [375, 500] [600, 500] [512, 512]
|
||||||
center_crop_op = vision.CenterCrop([height, width])
|
center_crop_op = vision.CenterCrop([height, width])
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=center_crop_op)
|
data1 = data1.map(operations=center_crop_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
|
||||||
data2 = data2.map(input_columns=["image"], operations=decode_op)
|
data2 = data2.map(operations=decode_op, input_columns=["image"])
|
||||||
|
|
||||||
image_cropped = []
|
image_cropped = []
|
||||||
image = []
|
image = []
|
||||||
|
@ -67,8 +67,8 @@ def test_center_crop_md5(height=375, width=375):
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
# 3 images [375, 500] [600, 500] [512, 512]
|
# 3 images [375, 500] [600, 500] [512, 512]
|
||||||
center_crop_op = vision.CenterCrop([height, width])
|
center_crop_op = vision.CenterCrop([height, width])
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=center_crop_op)
|
data1 = data1.map(operations=center_crop_op, input_columns=["image"])
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename = "center_crop_01_result.npz"
|
filename = "center_crop_01_result.npz"
|
||||||
save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
@ -84,8 +84,8 @@ def test_center_crop_comp(height=375, width=375, plot=False):
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
center_crop_op = vision.CenterCrop([height, width])
|
center_crop_op = vision.CenterCrop([height, width])
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=center_crop_op)
|
data1 = data1.map(operations=center_crop_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
@ -95,7 +95,7 @@ def test_center_crop_comp(height=375, width=375, plot=False):
|
||||||
py_vision.ToTensor()
|
py_vision.ToTensor()
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform)
|
data2 = data2.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
image_c_cropped = []
|
image_c_cropped = []
|
||||||
image_py_cropped = []
|
image_py_cropped = []
|
||||||
|
@ -126,11 +126,11 @@ def test_crop_grayscale(height=375, width=375):
|
||||||
|
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
# If input is grayscale, the output dimensions should be single channel
|
# If input is grayscale, the output dimensions should be single channel
|
||||||
crop_gray = vision.CenterCrop([height, width])
|
crop_gray = vision.CenterCrop([height, width])
|
||||||
data1 = data1.map(input_columns=["image"], operations=crop_gray)
|
data1 = data1.map(operations=crop_gray, input_columns=["image"])
|
||||||
|
|
||||||
for item1 in data1.create_dict_iterator(num_epochs=1):
|
for item1 in data1.create_dict_iterator(num_epochs=1):
|
||||||
c_image = item1["image"]
|
c_image = item1["image"]
|
||||||
|
|
|
@ -121,7 +121,7 @@ def test_concat_05():
|
||||||
data2 = ds.GeneratorDataset(generator_10, ["col1"])
|
data2 = ds.GeneratorDataset(generator_10, ["col1"])
|
||||||
|
|
||||||
type_cast_op = C.TypeCast(mstype.float32)
|
type_cast_op = C.TypeCast(mstype.float32)
|
||||||
data1 = data1.map(input_columns=["col1"], operations=type_cast_op)
|
data1 = data1.map(operations=type_cast_op, input_columns=["col1"])
|
||||||
|
|
||||||
data3 = data1 + data2
|
data3 = data1 + data2
|
||||||
|
|
||||||
|
@ -319,8 +319,8 @@ def test_concat_14():
|
||||||
F.Resize((224, 224)),
|
F.Resize((224, 224)),
|
||||||
F.ToTensor()])
|
F.ToTensor()])
|
||||||
|
|
||||||
data1 = data1.map(input_columns=["image"], operations=transforms1)
|
data1 = data1.map(operations=transforms1, input_columns=["image"])
|
||||||
data2 = data2.map(input_columns=["image"], operations=transforms1)
|
data2 = data2.map(operations=transforms1, input_columns=["image"])
|
||||||
data3 = data1 + data2
|
data3 = data1 + data2
|
||||||
|
|
||||||
expected, output = [], []
|
expected, output = [], []
|
||||||
|
|
|
@ -31,7 +31,7 @@ def test_concatenate_op_all():
|
||||||
append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float)
|
append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float)
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor)
|
concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor)
|
||||||
data = data.map(input_columns=["col"], operations=concatenate_op)
|
data = data.map(operations=concatenate_op, input_columns=["col"])
|
||||||
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
|
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
|
||||||
11., 12.])
|
11., 12.])
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
|
@ -45,7 +45,7 @@ def test_concatenate_op_none():
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
concatenate_op = data_trans.Concatenate()
|
concatenate_op = data_trans.Concatenate()
|
||||||
|
|
||||||
data = data.map(input_columns=["col"], operations=concatenate_op)
|
data = data.map(operations=concatenate_op, input_columns=["col"])
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float))
|
np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float))
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ def test_concatenate_op_string():
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor)
|
concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor)
|
||||||
|
|
||||||
data = data.map(input_columns=["col"], operations=concatenate_op)
|
data = data.map(operations=concatenate_op, input_columns=["col"])
|
||||||
expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S')
|
expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S')
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
np.testing.assert_array_equal(data_row[0], expected)
|
np.testing.assert_array_equal(data_row[0], expected)
|
||||||
|
@ -74,8 +74,8 @@ def test_concatenate_op_multi_input_string():
|
||||||
|
|
||||||
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor)
|
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor)
|
||||||
|
|
||||||
data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"],
|
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
|
||||||
operations=concatenate_op)
|
output_columns=["out1"])
|
||||||
expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S')
|
expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S')
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
np.testing.assert_array_equal(data_row[0], expected)
|
np.testing.assert_array_equal(data_row[0], expected)
|
||||||
|
@ -89,8 +89,8 @@ def test_concatenate_op_multi_input_numeric():
|
||||||
|
|
||||||
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor)
|
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor)
|
||||||
|
|
||||||
data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"],
|
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
|
||||||
operations=concatenate_op)
|
output_columns=["out1"])
|
||||||
expected = np.array([3, 5, 1, 2, 3, 4])
|
expected = np.array([3, 5, 1, 2, 3, 4])
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
np.testing.assert_array_equal(data_row[0], expected)
|
np.testing.assert_array_equal(data_row[0], expected)
|
||||||
|
@ -104,7 +104,7 @@ def test_concatenate_op_type_mismatch():
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
concatenate_op = data_trans.Concatenate(0, prepend_tensor)
|
concatenate_op = data_trans.Concatenate(0, prepend_tensor)
|
||||||
|
|
||||||
data = data.map(input_columns=["col"], operations=concatenate_op)
|
data = data.map(operations=concatenate_op, input_columns=["col"])
|
||||||
with pytest.raises(RuntimeError) as error_info:
|
with pytest.raises(RuntimeError) as error_info:
|
||||||
for _ in data:
|
for _ in data:
|
||||||
pass
|
pass
|
||||||
|
@ -119,7 +119,7 @@ def test_concatenate_op_type_mismatch2():
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
concatenate_op = data_trans.Concatenate(0, prepend_tensor)
|
concatenate_op = data_trans.Concatenate(0, prepend_tensor)
|
||||||
|
|
||||||
data = data.map(input_columns=["col"], operations=concatenate_op)
|
data = data.map(operations=concatenate_op, input_columns=["col"])
|
||||||
with pytest.raises(RuntimeError) as error_info:
|
with pytest.raises(RuntimeError) as error_info:
|
||||||
for _ in data:
|
for _ in data:
|
||||||
pass
|
pass
|
||||||
|
@ -134,7 +134,7 @@ def test_concatenate_op_incorrect_dim():
|
||||||
concatenate_op = data_trans.Concatenate(0, prepend_tensor)
|
concatenate_op = data_trans.Concatenate(0, prepend_tensor)
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
|
|
||||||
data = data.map(input_columns=["col"], operations=concatenate_op)
|
data = data.map(operations=concatenate_op, input_columns=["col"])
|
||||||
with pytest.raises(RuntimeError) as error_info:
|
with pytest.raises(RuntimeError) as error_info:
|
||||||
for _ in data:
|
for _ in data:
|
||||||
pass
|
pass
|
||||||
|
@ -155,7 +155,7 @@ def test_concatenate_op_negative_axis():
|
||||||
append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float)
|
append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float)
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
concatenate_op = data_trans.Concatenate(-1, prepend_tensor, append_tensor)
|
concatenate_op = data_trans.Concatenate(-1, prepend_tensor, append_tensor)
|
||||||
data = data.map(input_columns=["col"], operations=concatenate_op)
|
data = data.map(operations=concatenate_op, input_columns=["col"])
|
||||||
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
|
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
|
||||||
11., 12.])
|
11., 12.])
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
|
|
|
@ -86,12 +86,12 @@ def test_pipeline():
|
||||||
num_parallel_workers_original = ds.config.get_num_parallel_workers()
|
num_parallel_workers_original = ds.config.get_num_parallel_workers()
|
||||||
|
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||||
data1 = data1.map(input_columns=["image"], operations=[c_vision.Decode(True)])
|
data1 = data1.map(operations=[c_vision.Decode(True)], input_columns=["image"])
|
||||||
ds.serialize(data1, "testpipeline.json")
|
ds.serialize(data1, "testpipeline.json")
|
||||||
|
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, num_parallel_workers=num_parallel_workers_original,
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, num_parallel_workers=num_parallel_workers_original,
|
||||||
shuffle=False)
|
shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode(True)])
|
data2 = data2.map(operations=[c_vision.Decode(True)], input_columns=["image"])
|
||||||
ds.serialize(data2, "testpipeline2.json")
|
ds.serialize(data2, "testpipeline2.json")
|
||||||
|
|
||||||
# check that the generated output is different
|
# check that the generated output is different
|
||||||
|
@ -131,14 +131,14 @@ def test_deterministic_run_fail():
|
||||||
# outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
|
# outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
|
||||||
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=random_crop_op)
|
data1 = data1.map(operations=random_crop_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=decode_op)
|
data2 = data2.map(operations=decode_op, input_columns=["image"])
|
||||||
# If seed is set up on constructor
|
# If seed is set up on constructor
|
||||||
data2 = data2.map(input_columns=["image"], operations=random_crop_op)
|
data2 = data2.map(operations=random_crop_op, input_columns=["image"])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dataset_equal(data1, data2, 0)
|
dataset_equal(data1, data2, 0)
|
||||||
|
@ -171,16 +171,16 @@ def test_seed_undeterministic():
|
||||||
# We get the seed when constructor is called
|
# We get the seed when constructor is called
|
||||||
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=random_crop_op)
|
data1 = data1.map(operations=random_crop_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=decode_op)
|
data2 = data2.map(operations=decode_op, input_columns=["image"])
|
||||||
# Since seed is set up on constructor, so the two ops output deterministic sequence.
|
# Since seed is set up on constructor, so the two ops output deterministic sequence.
|
||||||
# Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
|
# Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
|
||||||
random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
||||||
data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
|
data2 = data2.map(operations=random_crop_op2, input_columns=["image"])
|
||||||
try:
|
try:
|
||||||
dataset_equal(data1, data2, 0)
|
dataset_equal(data1, data2, 0)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -211,15 +211,15 @@ def test_seed_deterministic():
|
||||||
# seed will be read in during constructor call
|
# seed will be read in during constructor call
|
||||||
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=random_crop_op)
|
data1 = data1.map(operations=random_crop_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=decode_op)
|
data2 = data2.map(operations=decode_op, input_columns=["image"])
|
||||||
# If seed is set up on constructor, so the two ops output deterministic sequence
|
# If seed is set up on constructor, so the two ops output deterministic sequence
|
||||||
random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
||||||
data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
|
data2 = data2.map(operations=random_crop_op2, input_columns=["image"])
|
||||||
|
|
||||||
dataset_equal(data1, data2, 0)
|
dataset_equal(data1, data2, 0)
|
||||||
|
|
||||||
|
@ -246,15 +246,15 @@ def test_deterministic_run_distribution():
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1)
|
random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1)
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=random_horizontal_flip_op)
|
data1 = data1.map(operations=random_horizontal_flip_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=decode_op)
|
data2 = data2.map(operations=decode_op, input_columns=["image"])
|
||||||
# If seed is set up on constructor, so the two ops output deterministic sequence
|
# If seed is set up on constructor, so the two ops output deterministic sequence
|
||||||
random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1)
|
random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1)
|
||||||
data2 = data2.map(input_columns=["image"], operations=random_horizontal_flip_op2)
|
data2 = data2.map(operations=random_horizontal_flip_op2, input_columns=["image"])
|
||||||
|
|
||||||
dataset_equal(data1, data2, 0)
|
dataset_equal(data1, data2, 0)
|
||||||
|
|
||||||
|
@ -285,7 +285,7 @@ def test_deterministic_python_seed():
|
||||||
py_vision.ToTensor(),
|
py_vision.ToTensor(),
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
data1_output = []
|
data1_output = []
|
||||||
# config.set_seed() calls random.seed()
|
# config.set_seed() calls random.seed()
|
||||||
for data_one in data1.create_dict_iterator(num_epochs=1):
|
for data_one in data1.create_dict_iterator(num_epochs=1):
|
||||||
|
@ -293,7 +293,7 @@ def test_deterministic_python_seed():
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform)
|
data2 = data2.map(operations=transform, input_columns=["image"])
|
||||||
# config.set_seed() calls random.seed(), resets seed for next dataset iterator
|
# config.set_seed() calls random.seed(), resets seed for next dataset iterator
|
||||||
ds.config.set_seed(0)
|
ds.config.set_seed(0)
|
||||||
|
|
||||||
|
@ -328,7 +328,7 @@ def test_deterministic_python_seed_multi_thread():
|
||||||
py_vision.ToTensor(),
|
py_vision.ToTensor(),
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform, python_multiprocessing=True)
|
data1 = data1.map(operations=transform, input_columns=["image"], python_multiprocessing=True)
|
||||||
data1_output = []
|
data1_output = []
|
||||||
# config.set_seed() calls random.seed()
|
# config.set_seed() calls random.seed()
|
||||||
for data_one in data1.create_dict_iterator(num_epochs=1):
|
for data_one in data1.create_dict_iterator(num_epochs=1):
|
||||||
|
@ -337,7 +337,7 @@ def test_deterministic_python_seed_multi_thread():
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
# If seed is set up on constructor
|
# If seed is set up on constructor
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform, python_multiprocessing=True)
|
data2 = data2.map(operations=transform, input_columns=["image"], python_multiprocessing=True)
|
||||||
# config.set_seed() calls random.seed()
|
# config.set_seed() calls random.seed()
|
||||||
ds.config.set_seed(0)
|
ds.config.set_seed(0)
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
|
||||||
|
|
||||||
GENERATE_GOLDEN = False
|
GENERATE_GOLDEN = False
|
||||||
|
|
||||||
|
|
||||||
def test_cut_out_op(plot=False):
|
def test_cut_out_op(plot=False):
|
||||||
"""
|
"""
|
||||||
Test Cutout
|
Test Cutout
|
||||||
|
@ -45,7 +46,7 @@ def test_cut_out_op(plot=False):
|
||||||
f.RandomErasing(value='random')
|
f.RandomErasing(value='random')
|
||||||
]
|
]
|
||||||
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
|
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform_1)
|
data1 = data1.map(operations=transform_1, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
@ -57,7 +58,7 @@ def test_cut_out_op(plot=False):
|
||||||
cut_out_op
|
cut_out_op
|
||||||
]
|
]
|
||||||
|
|
||||||
data2 = data2.map(input_columns=["image"], operations=transforms_2)
|
data2 = data2.map(operations=transforms_2, input_columns=["image"])
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
||||||
|
@ -91,7 +92,7 @@ def test_cut_out_op_multicut(plot=False):
|
||||||
f.ToTensor(),
|
f.ToTensor(),
|
||||||
]
|
]
|
||||||
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
|
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform_1)
|
data1 = data1.map(operations=transform_1, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
@ -103,7 +104,7 @@ def test_cut_out_op_multicut(plot=False):
|
||||||
cut_out_op
|
cut_out_op
|
||||||
]
|
]
|
||||||
|
|
||||||
data2 = data2.map(input_columns=["image"], operations=transforms_2)
|
data2 = data2.map(operations=transforms_2, input_columns=["image"])
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
image_list_1, image_list_2 = [], []
|
image_list_1, image_list_2 = [], []
|
||||||
|
@ -136,8 +137,8 @@ def test_cut_out_md5():
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
decode_op = c.Decode()
|
decode_op = c.Decode()
|
||||||
cut_out_op = c.CutOut(100)
|
cut_out_op = c.CutOut(100)
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=cut_out_op)
|
data1 = data1.map(operations=cut_out_op, input_columns=["image"])
|
||||||
|
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
transforms = [
|
transforms = [
|
||||||
|
@ -146,7 +147,7 @@ def test_cut_out_md5():
|
||||||
f.Cutout(100)
|
f.Cutout(100)
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform)
|
data2 = data2.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename1 = "cut_out_01_c_result.npz"
|
filename1 = "cut_out_01_c_result.npz"
|
||||||
|
@ -174,7 +175,7 @@ def test_cut_out_comp(plot=False):
|
||||||
f.Cutout(200)
|
f.Cutout(200)
|
||||||
]
|
]
|
||||||
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
|
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform_1)
|
data1 = data1.map(operations=transform_1, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
@ -184,7 +185,7 @@ def test_cut_out_comp(plot=False):
|
||||||
c.CutOut(200)
|
c.CutOut(200)
|
||||||
]
|
]
|
||||||
|
|
||||||
data2 = data2.map(input_columns=["image"], operations=transforms_2)
|
data2 = data2.map(operations=transforms_2, input_columns=["image"])
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
image_list_1, image_list_2 = [], []
|
image_list_1, image_list_2 = [], []
|
||||||
|
|
|
@ -51,12 +51,12 @@ def test_cutmix_batch_success1(plot=False):
|
||||||
# CutMix Images
|
# CutMix Images
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
hwc2chw_op = vision.HWC2CHW()
|
hwc2chw_op = vision.HWC2CHW()
|
||||||
data1 = data1.map(input_columns=["image"], operations=hwc2chw_op)
|
data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW, 2.0, 0.5)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW, 2.0, 0.5)
|
||||||
data1 = data1.batch(5, drop_remainder=True)
|
data1 = data1.batch(5, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
|
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
images_cutmix = None
|
images_cutmix = None
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
|
@ -94,12 +94,12 @@ def test_cutmix_batch_success2(plot=False):
|
||||||
# CutMix Images
|
# CutMix Images
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
rescale_op = vision.Rescale((1.0/255.0), 0.0)
|
rescale_op = vision.Rescale((1.0 / 255.0), 0.0)
|
||||||
data1 = data1.map(input_columns=["image"], operations=rescale_op)
|
data1 = data1.map(operations=rescale_op, input_columns=["image"])
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
||||||
data1 = data1.batch(5, drop_remainder=True)
|
data1 = data1.batch(5, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
|
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
images_cutmix = None
|
images_cutmix = None
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
|
@ -125,7 +125,7 @@ def test_cutmix_batch_success3(plot=False):
|
||||||
|
|
||||||
ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
|
ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op])
|
ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
|
||||||
ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)
|
ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)
|
||||||
|
|
||||||
images_original = None
|
images_original = None
|
||||||
|
@ -139,14 +139,14 @@ def test_cutmix_batch_success3(plot=False):
|
||||||
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
|
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
|
||||||
|
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
data1 = data1.map(input_columns=["image"], operations=[decode_op])
|
data1 = data1.map(operations=[decode_op], input_columns=["image"])
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
|
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
||||||
data1 = data1.batch(4, pad_info={}, drop_remainder=True)
|
data1 = data1.batch(4, pad_info={}, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
|
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
images_cutmix = None
|
images_cutmix = None
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
|
@ -172,7 +172,7 @@ def test_cutmix_batch_success4(plot=False):
|
||||||
|
|
||||||
ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
|
ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op])
|
ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
|
||||||
ds_original = ds_original.batch(2, drop_remainder=True)
|
ds_original = ds_original.batch(2, drop_remainder=True)
|
||||||
|
|
||||||
images_original = None
|
images_original = None
|
||||||
|
@ -186,14 +186,14 @@ def test_cutmix_batch_success4(plot=False):
|
||||||
data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False)
|
data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False)
|
||||||
|
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
data1 = data1.map(input_columns=["image"], operations=[decode_op])
|
data1 = data1.map(operations=[decode_op], input_columns=["image"])
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=100)
|
one_hot_op = data_trans.OneHot(num_classes=100)
|
||||||
data1 = data1.map(input_columns=["attr"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["attr"])
|
||||||
|
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9)
|
||||||
data1 = data1.batch(2, drop_remainder=True)
|
data1 = data1.batch(2, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "attr"], operations=cutmix_batch_op)
|
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "attr"])
|
||||||
|
|
||||||
images_cutmix = None
|
images_cutmix = None
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
|
@ -223,10 +223,10 @@ def test_cutmix_batch_nhwc_md5():
|
||||||
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data = data.map(input_columns=["label"], operations=one_hot_op)
|
data = data.map(operations=one_hot_op, input_columns=["label"])
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
||||||
data = data.batch(5, drop_remainder=True)
|
data = data.batch(5, drop_remainder=True)
|
||||||
data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op)
|
data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
filename = "cutmix_batch_c_nhwc_result.npz"
|
filename = "cutmix_batch_c_nhwc_result.npz"
|
||||||
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
@ -247,12 +247,12 @@ def test_cutmix_batch_nchw_md5():
|
||||||
# CutMixBatch Images
|
# CutMixBatch Images
|
||||||
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
hwc2chw_op = vision.HWC2CHW()
|
hwc2chw_op = vision.HWC2CHW()
|
||||||
data = data.map(input_columns=["image"], operations=hwc2chw_op)
|
data = data.map(operations=hwc2chw_op, input_columns=["image"])
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data = data.map(input_columns=["label"], operations=one_hot_op)
|
data = data.map(operations=one_hot_op, input_columns=["label"])
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
|
||||||
data = data.batch(5, drop_remainder=True)
|
data = data.batch(5, drop_remainder=True)
|
||||||
data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op)
|
data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
filename = "cutmix_batch_c_nchw_result.npz"
|
filename = "cutmix_batch_c_nchw_result.npz"
|
||||||
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
@ -273,10 +273,10 @@ def test_cutmix_batch_fail1():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
||||||
with pytest.raises(RuntimeError) as error:
|
with pytest.raises(RuntimeError) as error:
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
|
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
if idx == 0:
|
if idx == 0:
|
||||||
images_cutmix = image
|
images_cutmix = image
|
||||||
|
@ -297,7 +297,7 @@ def test_cutmix_batch_fail2():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
with pytest.raises(ValueError) as error:
|
with pytest.raises(ValueError) as error:
|
||||||
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, -1)
|
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, -1)
|
||||||
error_message = "Input is not within the required interval"
|
error_message = "Input is not within the required interval"
|
||||||
|
@ -315,7 +315,7 @@ def test_cutmix_batch_fail3():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
with pytest.raises(ValueError) as error:
|
with pytest.raises(ValueError) as error:
|
||||||
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, 2)
|
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, 2)
|
||||||
error_message = "Input is not within the required interval"
|
error_message = "Input is not within the required interval"
|
||||||
|
@ -333,7 +333,7 @@ def test_cutmix_batch_fail4():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
with pytest.raises(ValueError) as error:
|
with pytest.raises(ValueError) as error:
|
||||||
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, -1)
|
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, -1)
|
||||||
error_message = "Input is not within the required interval"
|
error_message = "Input is not within the required interval"
|
||||||
|
@ -351,10 +351,10 @@ def test_cutmix_batch_fail5():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
||||||
data1 = data1.batch(5, drop_remainder=True)
|
data1 = data1.batch(5, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image"], operations=cutmix_batch_op)
|
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image"])
|
||||||
|
|
||||||
with pytest.raises(RuntimeError) as error:
|
with pytest.raises(RuntimeError) as error:
|
||||||
images_cutmix = np.array([])
|
images_cutmix = np.array([])
|
||||||
|
@ -378,10 +378,10 @@ def test_cutmix_batch_fail6():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
|
||||||
data1 = data1.batch(5, drop_remainder=True)
|
data1 = data1.batch(5, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
|
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
with pytest.raises(RuntimeError) as error:
|
with pytest.raises(RuntimeError) as error:
|
||||||
images_cutmix = np.array([])
|
images_cutmix = np.array([])
|
||||||
|
@ -406,7 +406,7 @@ def test_cutmix_batch_fail7():
|
||||||
|
|
||||||
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
|
||||||
data1 = data1.batch(5, drop_remainder=True)
|
data1 = data1.batch(5, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op)
|
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
with pytest.raises(RuntimeError) as error:
|
with pytest.raises(RuntimeError) as error:
|
||||||
images_cutmix = np.array([])
|
images_cutmix = np.array([])
|
||||||
|
@ -430,7 +430,7 @@ def test_cutmix_batch_fail8():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
with pytest.raises(ValueError) as error:
|
with pytest.raises(ValueError) as error:
|
||||||
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.0)
|
vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.0)
|
||||||
error_message = "Input is not within the required interval"
|
error_message = "Input is not within the required interval"
|
||||||
|
|
|
@ -59,7 +59,7 @@ def test_numpy_slices_list_append():
|
||||||
|
|
||||||
data1 = de.TFRecordDataset(DATA_DIR)
|
data1 = de.TFRecordDataset(DATA_DIR)
|
||||||
resize_op = vision.Resize((resize_height, resize_width))
|
resize_op = vision.Resize((resize_height, resize_width))
|
||||||
data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True), resize_op])
|
data1 = data1.map(operations=[vision.Decode(True), resize_op], input_columns=["image"])
|
||||||
|
|
||||||
res = []
|
res = []
|
||||||
for data in data1.create_dict_iterator(num_epochs=1):
|
for data in data1.create_dict_iterator(num_epochs=1):
|
||||||
|
|
|
@ -46,8 +46,8 @@ def test_celeba_dataset_op():
|
||||||
data = data.repeat(2)
|
data = data.repeat(2)
|
||||||
center_crop = vision.CenterCrop(crop_size)
|
center_crop = vision.CenterCrop(crop_size)
|
||||||
resize_op = vision.Resize(resize_size, Inter.LINEAR) # Bilinear mode
|
resize_op = vision.Resize(resize_size, Inter.LINEAR) # Bilinear mode
|
||||||
data = data.map(input_columns=["image"], operations=center_crop)
|
data = data.map(operations=center_crop, input_columns=["image"])
|
||||||
data = data.map(input_columns=["image"], operations=resize_op)
|
data = data.map(operations=resize_op, input_columns=["image"])
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
for item in data.create_dict_iterator(num_epochs=1):
|
for item in data.create_dict_iterator(num_epochs=1):
|
||||||
|
|
|
@ -25,6 +25,7 @@ INVALID_FILE = "../data/dataset/testCOCO/annotations/invalid.json"
|
||||||
LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json"
|
LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json"
|
||||||
INVALID_CATEGORY_ID_FILE = "../data/dataset/testCOCO/annotations/invalid_category_id.json"
|
INVALID_CATEGORY_ID_FILE = "../data/dataset/testCOCO/annotations/invalid_category_id.json"
|
||||||
|
|
||||||
|
|
||||||
def test_coco_detection():
|
def test_coco_detection():
|
||||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection",
|
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection",
|
||||||
decode=True, shuffle=False)
|
decode=True, shuffle=False)
|
||||||
|
@ -57,6 +58,7 @@ def test_coco_detection():
|
||||||
np.testing.assert_array_equal(np.array([[5]]), category_id[4])
|
np.testing.assert_array_equal(np.array([[5]]), category_id[4])
|
||||||
np.testing.assert_array_equal(np.array([[6]]), category_id[5])
|
np.testing.assert_array_equal(np.array([[6]]), category_id[5])
|
||||||
|
|
||||||
|
|
||||||
def test_coco_stuff():
|
def test_coco_stuff():
|
||||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff",
|
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff",
|
||||||
decode=True, shuffle=False)
|
decode=True, shuffle=False)
|
||||||
|
@ -97,6 +99,7 @@ def test_coco_stuff():
|
||||||
segmentation[5])
|
segmentation[5])
|
||||||
np.testing.assert_array_equal(np.array([[0]]), iscrowd[5])
|
np.testing.assert_array_equal(np.array([[0]]), iscrowd[5])
|
||||||
|
|
||||||
|
|
||||||
def test_coco_keypoint():
|
def test_coco_keypoint():
|
||||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint",
|
data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint",
|
||||||
decode=True, shuffle=False)
|
decode=True, shuffle=False)
|
||||||
|
@ -124,6 +127,7 @@ def test_coco_keypoint():
|
||||||
keypoints[1])
|
keypoints[1])
|
||||||
np.testing.assert_array_equal(np.array([[10]]), num_keypoints[1])
|
np.testing.assert_array_equal(np.array([[10]]), num_keypoints[1])
|
||||||
|
|
||||||
|
|
||||||
def test_coco_panoptic():
|
def test_coco_panoptic():
|
||||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True, shuffle=False)
|
data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True, shuffle=False)
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -151,6 +155,7 @@ def test_coco_panoptic():
|
||||||
np.testing.assert_array_equal(np.array([[0], [0]]), iscrowd[1])
|
np.testing.assert_array_equal(np.array([[0], [0]]), iscrowd[1])
|
||||||
np.testing.assert_array_equal(np.array([[43102], [6079]]), area[1])
|
np.testing.assert_array_equal(np.array([[43102], [6079]]), area[1])
|
||||||
|
|
||||||
|
|
||||||
def test_coco_detection_classindex():
|
def test_coco_detection_classindex():
|
||||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
||||||
class_index = data1.get_class_indexing()
|
class_index = data1.get_class_indexing()
|
||||||
|
@ -161,6 +166,7 @@ def test_coco_detection_classindex():
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
assert num_iter == 6
|
assert num_iter == 6
|
||||||
|
|
||||||
|
|
||||||
def test_coco_panootic_classindex():
|
def test_coco_panootic_classindex():
|
||||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True)
|
data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True)
|
||||||
class_index = data1.get_class_indexing()
|
class_index = data1.get_class_indexing()
|
||||||
|
@ -170,6 +176,7 @@ def test_coco_panootic_classindex():
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
assert num_iter == 2
|
assert num_iter == 2
|
||||||
|
|
||||||
|
|
||||||
def test_coco_case_0():
|
def test_coco_case_0():
|
||||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
||||||
data1 = data1.shuffle(10)
|
data1 = data1.shuffle(10)
|
||||||
|
@ -179,6 +186,7 @@ def test_coco_case_0():
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
assert num_iter == 2
|
assert num_iter == 2
|
||||||
|
|
||||||
|
|
||||||
def test_coco_case_1():
|
def test_coco_case_1():
|
||||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
||||||
sizes = [0.5, 0.5]
|
sizes = [0.5, 0.5]
|
||||||
|
@ -194,28 +202,31 @@ def test_coco_case_1():
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
assert num_iter == 3
|
assert num_iter == 3
|
||||||
|
|
||||||
|
|
||||||
def test_coco_case_2():
|
def test_coco_case_2():
|
||||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
||||||
resize_op = vision.Resize((224, 224))
|
resize_op = vision.Resize((224, 224))
|
||||||
|
|
||||||
data1 = data1.map(input_columns=["image"], operations=resize_op)
|
data1 = data1.map(operations=resize_op, input_columns=["image"])
|
||||||
data1 = data1.repeat(4)
|
data1 = data1.repeat(4)
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for _ in data1.__iter__():
|
for _ in data1.__iter__():
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
assert num_iter == 24
|
assert num_iter == 24
|
||||||
|
|
||||||
|
|
||||||
def test_coco_case_3():
|
def test_coco_case_3():
|
||||||
data1 = ds.CocoDataset(DATA_DIR_2, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
data1 = ds.CocoDataset(DATA_DIR_2, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
|
||||||
resize_op = vision.Resize((224, 224))
|
resize_op = vision.Resize((224, 224))
|
||||||
|
|
||||||
data1 = data1.map(input_columns=["image"], operations=resize_op)
|
data1 = data1.map(operations=resize_op, input_columns=["image"])
|
||||||
data1 = data1.repeat(4)
|
data1 = data1.repeat(4)
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for _ in data1.__iter__():
|
for _ in data1.__iter__():
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
assert num_iter == 24
|
assert num_iter == 24
|
||||||
|
|
||||||
|
|
||||||
def test_coco_case_exception():
|
def test_coco_case_exception():
|
||||||
try:
|
try:
|
||||||
data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection")
|
data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection")
|
||||||
|
|
|
@ -25,6 +25,7 @@ def generator_1d():
|
||||||
for i in range(64):
|
for i in range(64):
|
||||||
yield (np.array([i]),)
|
yield (np.array([i]),)
|
||||||
|
|
||||||
|
|
||||||
class DatasetGenerator:
|
class DatasetGenerator:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
@ -241,11 +242,11 @@ def test_generator_8():
|
||||||
|
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
|
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
|
||||||
data1 = data1.map(input_columns="col0", output_columns="out0", operations=(lambda x: x * 3),
|
data1 = data1.map(operations=(lambda x: x * 3), input_columns="col0", output_columns="out0",
|
||||||
num_parallel_workers=2)
|
num_parallel_workers=2)
|
||||||
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x * 7, x)),
|
data1 = data1.map(operations=(lambda x: (x * 7, x)), input_columns="col1", output_columns=["out1", "out2"],
|
||||||
num_parallel_workers=2, column_order=["out0", "out1", "out2"])
|
num_parallel_workers=2, column_order=["out0", "out1", "out2"])
|
||||||
data1 = data1.map(input_columns="out2", output_columns="out2", operations=(lambda x: x + 1),
|
data1 = data1.map(operations=(lambda x: x + 1), input_columns="out2", output_columns="out2",
|
||||||
num_parallel_workers=2)
|
num_parallel_workers=2)
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
|
@ -268,9 +269,9 @@ def test_generator_9():
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["image", "label"])
|
data1 = ds.GeneratorDataset(generator_mc(2048), ["image", "label"])
|
||||||
data2 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
|
data2 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
|
||||||
data1 = data1.map(input_columns="label", operations=(lambda x: x * 3),
|
data1 = data1.map(operations=(lambda x: x * 3), input_columns="label",
|
||||||
num_parallel_workers=4)
|
num_parallel_workers=4)
|
||||||
data2 = data2.map(input_columns="label", operations=(lambda x: x * 3),
|
data2 = data2.map(operations=(lambda x: x * 3), input_columns="label",
|
||||||
num_parallel_workers=4)
|
num_parallel_workers=4)
|
||||||
|
|
||||||
# Expected column order is not changed.
|
# Expected column order is not changed.
|
||||||
|
@ -298,7 +299,7 @@ def test_generator_10():
|
||||||
|
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
|
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
|
||||||
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)),
|
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
|
||||||
column_order=['col0', 'out1', 'out2'], num_parallel_workers=2)
|
column_order=['col0', 'out1', 'out2'], num_parallel_workers=2)
|
||||||
|
|
||||||
# Expected column order is |col0|out1|out2|
|
# Expected column order is |col0|out1|out2|
|
||||||
|
@ -322,7 +323,7 @@ def test_generator_11():
|
||||||
|
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
|
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
|
||||||
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)),
|
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
|
||||||
column_order=['out1', 'out2'], num_parallel_workers=2)
|
column_order=['out1', 'out2'], num_parallel_workers=2)
|
||||||
|
|
||||||
# Expected column order is |out1|out2|
|
# Expected column order is |out1|out2|
|
||||||
|
@ -503,7 +504,7 @@ def test_generator_error_3():
|
||||||
with pytest.raises(ValueError) as info:
|
with pytest.raises(ValueError) as info:
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
|
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
|
||||||
data1 = data1.map(input_columns=["label"], output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)),
|
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], output_columns=["out1", "out2"],
|
||||||
num_parallel_workers=2)
|
num_parallel_workers=2)
|
||||||
|
|
||||||
for _ in data1:
|
for _ in data1:
|
||||||
|
@ -515,7 +516,7 @@ def test_generator_error_4():
|
||||||
with pytest.raises(RuntimeError) as info:
|
with pytest.raises(RuntimeError) as info:
|
||||||
# apply dataset operations
|
# apply dataset operations
|
||||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
|
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
|
||||||
data1 = data1.map(input_columns=["label"], operations=(lambda x: (x, x * 5)),
|
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"],
|
||||||
num_parallel_workers=2)
|
num_parallel_workers=2)
|
||||||
|
|
||||||
for _ in data1:
|
for _ in data1:
|
||||||
|
@ -706,6 +707,7 @@ def test_generator_dataset_size_4():
|
||||||
num_rows = num_rows + 1
|
num_rows = num_rows + 1
|
||||||
assert data_size == num_rows
|
assert data_size == num_rows
|
||||||
|
|
||||||
|
|
||||||
def test_generator_dataset_size_5():
|
def test_generator_dataset_size_5():
|
||||||
"""
|
"""
|
||||||
Test get_dataset_size after create_dict_iterator
|
Test get_dataset_size after create_dict_iterator
|
||||||
|
|
|
@ -103,8 +103,8 @@ def test_manifest_dataset_multi_label_onehot():
|
||||||
data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False)
|
data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False)
|
||||||
expect_label = [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 1]]]
|
expect_label = [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 1]]]
|
||||||
one_hot_encode = data_trans.OneHot(3)
|
one_hot_encode = data_trans.OneHot(3)
|
||||||
data = data.map(input_columns=["label"], operations=one_hot_encode)
|
data = data.map(operations=one_hot_encode, input_columns=["label"])
|
||||||
data = data.map(input_columns=["label"], operations=multi_label_hot)
|
data = data.map(operations=multi_label_hot, input_columns=["label"])
|
||||||
data = data.batch(2)
|
data = data.batch(2)
|
||||||
count = 0
|
count = 0
|
||||||
for item in data.create_dict_iterator(num_epochs=1):
|
for item in data.create_dict_iterator(num_epochs=1):
|
||||||
|
|
|
@ -85,8 +85,8 @@ def test_case_0():
|
||||||
|
|
||||||
resize_op = vision.Resize((224, 224))
|
resize_op = vision.Resize((224, 224))
|
||||||
|
|
||||||
data1 = data1.map(input_columns=["image"], operations=resize_op)
|
data1 = data1.map(operations=resize_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["target"], operations=resize_op)
|
data1 = data1.map(operations=resize_op, input_columns=["target"])
|
||||||
repeat_num = 4
|
repeat_num = 4
|
||||||
data1 = data1.repeat(repeat_num)
|
data1 = data1.repeat(repeat_num)
|
||||||
batch_size = 2
|
batch_size = 2
|
||||||
|
@ -103,7 +103,7 @@ def test_case_1():
|
||||||
|
|
||||||
resize_op = vision.Resize((224, 224))
|
resize_op = vision.Resize((224, 224))
|
||||||
|
|
||||||
data1 = data1.map(input_columns=["image"], operations=resize_op)
|
data1 = data1.map(operations=resize_op, input_columns=["image"])
|
||||||
repeat_num = 4
|
repeat_num = 4
|
||||||
data1 = data1.repeat(repeat_num)
|
data1 = data1.repeat(repeat_num)
|
||||||
batch_size = 2
|
batch_size = 2
|
||||||
|
|
|
@ -36,7 +36,7 @@ def test_decode_op():
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
|
||||||
# Serialize and Load dataset requires using vision.Decode instead of vision.Decode().
|
# Serialize and Load dataset requires using vision.Decode instead of vision.Decode().
|
||||||
data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)])
|
data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
@ -57,7 +57,7 @@ def test_decode_op_tf_file_dataset():
|
||||||
|
|
||||||
# Decode with rgb format set to True
|
# Decode with rgb format set to True
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.FILES)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.FILES)
|
||||||
data1 = data1.map(input_columns=["image"], operations=vision.Decode(True))
|
data1 = data1.map(operations=vision.Decode(True), input_columns=["image"])
|
||||||
|
|
||||||
for item in data1.create_dict_iterator(num_epochs=1):
|
for item in data1.create_dict_iterator(num_epochs=1):
|
||||||
logger.info('decode == {}'.format(item['image']))
|
logger.info('decode == {}'.format(item['image']))
|
||||||
|
|
|
@ -54,8 +54,8 @@ def test_case_1():
|
||||||
resize_op = vision.Resize((resize_height, resize_width))
|
resize_op = vision.Resize((resize_height, resize_width))
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data = data.map(input_columns=["image"], operations=decode_op)
|
data = data.map(operations=decode_op, input_columns=["image"])
|
||||||
data = data.map(input_columns=["image"], operations=resize_op)
|
data = data.map(operations=resize_op, input_columns=["image"])
|
||||||
|
|
||||||
batch_size = 3
|
batch_size = 3
|
||||||
data = data.batch(batch_size, drop_remainder=True)
|
data = data.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -79,8 +79,8 @@ def test_case_2():
|
||||||
resize_op = vision.Resize((resize_height, resize_width))
|
resize_op = vision.Resize((resize_height, resize_width))
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data = data.map(input_columns=["image"], operations=decode_op)
|
data = data.map(operations=decode_op, input_columns=["image"])
|
||||||
data = data.map(input_columns=["image"], operations=resize_op)
|
data = data.map(operations=resize_op, input_columns=["image"])
|
||||||
|
|
||||||
batch_size = 2
|
batch_size = 2
|
||||||
data = data.batch(batch_size, drop_remainder=True)
|
data = data.batch(batch_size, drop_remainder=True)
|
||||||
|
@ -107,8 +107,8 @@ def test_case_3():
|
||||||
resize_op = vision.Resize((resize_height, resize_width))
|
resize_op = vision.Resize((resize_height, resize_width))
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data = data.map(input_columns=["image"], operations=decode_op)
|
data = data.map(operations=decode_op, input_columns=["image"])
|
||||||
data = data.map(input_columns=["image"], operations=resize_op)
|
data = data.map(operations=resize_op, input_columns=["image"])
|
||||||
|
|
||||||
data = data.repeat(2)
|
data = data.repeat(2)
|
||||||
|
|
||||||
|
|
|
@ -24,8 +24,8 @@ import mindspore.dataset.transforms.c_transforms as ops
|
||||||
def compare(array):
|
def compare(array):
|
||||||
data = ds.NumpySlicesDataset([array], column_names="x")
|
data = ds.NumpySlicesDataset([array], column_names="x")
|
||||||
array = np.array(array)
|
array = np.array(array)
|
||||||
data = data.map(input_columns=["x"], output_columns=["x", "y"], column_order=["x", "y"],
|
data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"],
|
||||||
operations=ops.Duplicate())
|
column_order=["x", "y"])
|
||||||
for d in data.create_dict_iterator(num_epochs=1):
|
for d in data.create_dict_iterator(num_epochs=1):
|
||||||
np.testing.assert_array_equal(array, d["x"])
|
np.testing.assert_array_equal(array, d["x"])
|
||||||
np.testing.assert_array_equal(array, d["y"])
|
np.testing.assert_array_equal(array, d["y"])
|
||||||
|
|
|
@ -79,7 +79,7 @@ def test_decode_op():
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
|
||||||
# Serialize and Load dataset requires using vision.Decode instead of vision.Decode().
|
# Serialize and Load dataset requires using vision.Decode instead of vision.Decode().
|
||||||
data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)])
|
data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
|
|
@ -43,8 +43,7 @@ def test_equalize_py(plot=False):
|
||||||
F.Resize((224, 224)),
|
F.Resize((224, 224)),
|
||||||
F.ToTensor()])
|
F.ToTensor()])
|
||||||
|
|
||||||
ds_original = ds.map(input_columns="image",
|
ds_original = ds.map(operations=transforms_original, input_columns="image")
|
||||||
operations=transforms_original)
|
|
||||||
|
|
||||||
ds_original = ds_original.batch(512)
|
ds_original = ds_original.batch(512)
|
||||||
|
|
||||||
|
@ -64,8 +63,7 @@ def test_equalize_py(plot=False):
|
||||||
F.Equalize(),
|
F.Equalize(),
|
||||||
F.ToTensor()])
|
F.ToTensor()])
|
||||||
|
|
||||||
ds_equalize = ds.map(input_columns="image",
|
ds_equalize = ds.map(operations=transforms_equalize, input_columns="image")
|
||||||
operations=transforms_equalize)
|
|
||||||
|
|
||||||
ds_equalize = ds_equalize.batch(512)
|
ds_equalize = ds_equalize.batch(512)
|
||||||
|
|
||||||
|
@ -98,8 +96,7 @@ def test_equalize_c(plot=False):
|
||||||
|
|
||||||
transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
|
transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
|
||||||
|
|
||||||
ds_original = ds.map(input_columns="image",
|
ds_original = ds.map(operations=transforms_original, input_columns="image")
|
||||||
operations=transforms_original)
|
|
||||||
|
|
||||||
ds_original = ds_original.batch(512)
|
ds_original = ds_original.batch(512)
|
||||||
|
|
||||||
|
@ -117,8 +114,7 @@ def test_equalize_c(plot=False):
|
||||||
transform_equalize = [C.Decode(), C.Resize(size=[224, 224]),
|
transform_equalize = [C.Decode(), C.Resize(size=[224, 224]),
|
||||||
C.Equalize()]
|
C.Equalize()]
|
||||||
|
|
||||||
ds_equalize = ds.map(input_columns="image",
|
ds_equalize = ds.map(operations=transform_equalize, input_columns="image")
|
||||||
operations=transform_equalize)
|
|
||||||
|
|
||||||
ds_equalize = ds_equalize.batch(512)
|
ds_equalize = ds_equalize.batch(512)
|
||||||
|
|
||||||
|
@ -147,11 +143,9 @@ def test_equalize_py_c(plot=False):
|
||||||
|
|
||||||
# equalize Images in cpp
|
# equalize Images in cpp
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
|
||||||
operations=[C.Decode(), C.Resize((224, 224))])
|
|
||||||
|
|
||||||
ds_c_equalize = ds.map(input_columns="image",
|
ds_c_equalize = ds.map(operations=C.Equalize(), input_columns="image")
|
||||||
operations=C.Equalize())
|
|
||||||
|
|
||||||
ds_c_equalize = ds_c_equalize.batch(512)
|
ds_c_equalize = ds_c_equalize.batch(512)
|
||||||
|
|
||||||
|
@ -165,16 +159,14 @@ def test_equalize_py_c(plot=False):
|
||||||
|
|
||||||
# Equalize images in python
|
# Equalize images in python
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
|
||||||
operations=[C.Decode(), C.Resize((224, 224))])
|
|
||||||
|
|
||||||
transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
|
transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
|
||||||
F.ToPIL(),
|
F.ToPIL(),
|
||||||
F.Equalize(),
|
F.Equalize(),
|
||||||
np.array])
|
np.array])
|
||||||
|
|
||||||
ds_p_equalize = ds.map(input_columns="image",
|
ds_p_equalize = ds.map(operations=transforms_p_equalize, input_columns="image")
|
||||||
operations=transforms_p_equalize)
|
|
||||||
|
|
||||||
ds_p_equalize = ds_p_equalize.batch(512)
|
ds_p_equalize = ds_p_equalize.batch(512)
|
||||||
|
|
||||||
|
@ -206,13 +198,10 @@ def test_equalize_one_channel():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)),
|
||||||
operations=[C.Decode(),
|
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
|
||||||
C.Resize((224, 224)),
|
|
||||||
lambda img: np.array(img[:, :, 0])])
|
|
||||||
|
|
||||||
ds.map(input_columns="image",
|
ds.map(operations=c_op, input_columns="image")
|
||||||
operations=c_op)
|
|
||||||
|
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
logger.info("Got an exception in DE: {}".format(str(e)))
|
logger.info("Got an exception in DE: {}".format(str(e)))
|
||||||
|
@ -225,8 +214,7 @@ def test_equalize_mnist_c(plot=False):
|
||||||
"""
|
"""
|
||||||
logger.info("Test Equalize C Op With MNIST Images")
|
logger.info("Test Equalize C Op With MNIST Images")
|
||||||
ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
|
ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
|
||||||
ds_equalize_c = ds.map(input_columns="image",
|
ds_equalize_c = ds.map(operations=C.Equalize(), input_columns="image")
|
||||||
operations=C.Equalize())
|
|
||||||
ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
|
ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False)
|
||||||
|
|
||||||
images = []
|
images = []
|
||||||
|
@ -259,7 +247,7 @@ def test_equalize_md5_py():
|
||||||
F.Equalize(),
|
F.Equalize(),
|
||||||
F.ToTensor()])
|
F.ToTensor()])
|
||||||
|
|
||||||
data1 = data1.map(input_columns="image", operations=transforms)
|
data1 = data1.map(operations=transforms, input_columns="image")
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename = "equalize_01_result.npz"
|
filename = "equalize_01_result.npz"
|
||||||
save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
@ -279,7 +267,7 @@ def test_equalize_md5_c():
|
||||||
C.Equalize(),
|
C.Equalize(),
|
||||||
F.ToTensor()]
|
F.ToTensor()]
|
||||||
|
|
||||||
data = ds.map(input_columns="image", operations=transforms_equalize)
|
data = ds.map(operations=transforms_equalize, input_columns="image")
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename = "equalize_01_result_c.npz"
|
filename = "equalize_01_result_c.npz"
|
||||||
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
|
|
@ -29,7 +29,7 @@ def test_exception_01():
|
||||||
logger.info("test_exception_01")
|
logger.info("test_exception_01")
|
||||||
data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"])
|
data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"])
|
||||||
with pytest.raises(TypeError) as info:
|
with pytest.raises(TypeError) as info:
|
||||||
data.map(input_columns=["image"], operations=vision.Resize(100, 100))
|
data.map(operations=vision.Resize(100, 100), input_columns=["image"])
|
||||||
assert "Argument interpolation with value 100 is not of type (<enum 'Inter'>,)" in str(info.value)
|
assert "Argument interpolation with value 100 is not of type (<enum 'Inter'>,)" in str(info.value)
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,8 +45,8 @@ def test_exception_02():
|
||||||
|
|
||||||
num_samples = 1
|
num_samples = 1
|
||||||
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
|
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
|
||||||
data = data.map(input_columns=["image"], operations=vision.Decode())
|
data = data.map(operations=vision.Decode(), input_columns=["image"])
|
||||||
data = data.map(input_columns=["image"], operations=vision.Resize((100, 100)))
|
data = data.map(operations=vision.Resize((100, 100)), input_columns=["image"])
|
||||||
# Confirm 1 sample in dataset
|
# Confirm 1 sample in dataset
|
||||||
assert sum([1 for _ in data]) == 1
|
assert sum([1 for _ in data]) == 1
|
||||||
num_iters = 0
|
num_iters = 0
|
||||||
|
|
|
@ -28,7 +28,7 @@ def test_fillop_basic():
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
fill_op = data_trans.Fill(3)
|
fill_op = data_trans.Fill(3)
|
||||||
|
|
||||||
data = data.map(input_columns=["col"], operations=fill_op)
|
data = data.map(operations=fill_op, input_columns=["col"])
|
||||||
expected = np.array([3, 3, 3, 3], dtype=np.uint8)
|
expected = np.array([3, 3, 3, 3], dtype=np.uint8)
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
np.testing.assert_array_equal(data_row[0], expected)
|
np.testing.assert_array_equal(data_row[0], expected)
|
||||||
|
@ -41,7 +41,7 @@ def test_fillop_down_type_cast():
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
fill_op = data_trans.Fill(-3)
|
fill_op = data_trans.Fill(-3)
|
||||||
|
|
||||||
data = data.map(input_columns=["col"], operations=fill_op)
|
data = data.map(operations=fill_op, input_columns=["col"])
|
||||||
expected = np.array([253, 253, 253, 253], dtype=np.uint8)
|
expected = np.array([253, 253, 253, 253], dtype=np.uint8)
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
np.testing.assert_array_equal(data_row[0], expected)
|
np.testing.assert_array_equal(data_row[0], expected)
|
||||||
|
@ -54,7 +54,7 @@ def test_fillop_up_type_cast():
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
fill_op = data_trans.Fill(3)
|
fill_op = data_trans.Fill(3)
|
||||||
|
|
||||||
data = data.map(input_columns=["col"], operations=fill_op)
|
data = data.map(operations=fill_op, input_columns=["col"])
|
||||||
expected = np.array([3., 3., 3., 3.], dtype=np.float)
|
expected = np.array([3., 3., 3., 3.], dtype=np.float)
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
np.testing.assert_array_equal(data_row[0], expected)
|
np.testing.assert_array_equal(data_row[0], expected)
|
||||||
|
@ -67,7 +67,7 @@ def test_fillop_string():
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
fill_op = data_trans.Fill("error")
|
fill_op = data_trans.Fill("error")
|
||||||
|
|
||||||
data = data.map(input_columns=["col"], operations=fill_op)
|
data = data.map(operations=fill_op, input_columns=["col"])
|
||||||
expected = np.array(['error', 'error'], dtype='S')
|
expected = np.array(['error', 'error'], dtype='S')
|
||||||
for data_row in data:
|
for data_row in data:
|
||||||
np.testing.assert_array_equal(data_row[0], expected)
|
np.testing.assert_array_equal(data_row[0], expected)
|
||||||
|
@ -79,7 +79,7 @@ def test_fillop_error_handling():
|
||||||
|
|
||||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||||
fill_op = data_trans.Fill("words")
|
fill_op = data_trans.Fill("words")
|
||||||
data = data.map(input_columns=["col"], operations=fill_op)
|
data = data.map(operations=fill_op, input_columns=["col"])
|
||||||
|
|
||||||
with pytest.raises(RuntimeError) as error_info:
|
with pytest.raises(RuntimeError) as error_info:
|
||||||
for _ in data:
|
for _ in data:
|
||||||
|
|
|
@ -30,7 +30,7 @@ def test_diff_predicate_func():
|
||||||
cde.Resize([64, 64])
|
cde.Resize([64, 64])
|
||||||
]
|
]
|
||||||
dataset = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], shuffle=False)
|
dataset = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], shuffle=False)
|
||||||
dataset = dataset.map(input_columns=["image"], operations=transforms, num_parallel_workers=1)
|
dataset = dataset.map(operations=transforms, input_columns=["image"], num_parallel_workers=1)
|
||||||
dataset = dataset.filter(input_columns=["image", "label"], predicate=predicate_func, num_parallel_workers=4)
|
dataset = dataset.filter(input_columns=["image", "label"], predicate=predicate_func, num_parallel_workers=4)
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -261,8 +261,8 @@ def func_map_part(data_col1):
|
||||||
# test with map
|
# test with map
|
||||||
def test_filter_by_generator_with_map_all_col():
|
def test_filter_by_generator_with_map_all_col():
|
||||||
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
|
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
|
||||||
dataset_map = dataset.map(input_columns=["col1"], output_columns=["col1"], operations=func_map_part)
|
dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["col1"])
|
||||||
# dataset_map = dataset.map( operations=func_map_part)
|
# dataset_map = dataset.map(operations=func_map_part)
|
||||||
dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1)
|
dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1)
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
ret_data = []
|
ret_data = []
|
||||||
|
@ -277,7 +277,7 @@ def test_filter_by_generator_with_map_all_col():
|
||||||
# test with map
|
# test with map
|
||||||
def test_filter_by_generator_with_map_part_col():
|
def test_filter_by_generator_with_map_part_col():
|
||||||
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
|
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
|
||||||
dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part)
|
dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"])
|
||||||
|
|
||||||
dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4)
|
dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4)
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -328,7 +328,7 @@ def filter_func_input_column3(col1):
|
||||||
# test with input_columns
|
# test with input_columns
|
||||||
def test_filter_by_generator_with_input_column():
|
def test_filter_by_generator_with_input_column():
|
||||||
dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"])
|
dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"])
|
||||||
dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part)
|
dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"])
|
||||||
dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1,
|
dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1,
|
||||||
num_parallel_workers=4)
|
num_parallel_workers=4)
|
||||||
dataset_f2 = dataset_f1.filter(input_columns=["out1"], predicate=filter_func_input_column2, num_parallel_workers=4)
|
dataset_f2 = dataset_f1.filter(input_columns=["out1"], predicate=filter_func_input_column2, num_parallel_workers=4)
|
||||||
|
@ -382,7 +382,7 @@ def test_filter_by_generator_Partial1():
|
||||||
dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"])
|
dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"])
|
||||||
dataset_zip = ds.zip((dataset1, dataset2))
|
dataset_zip = ds.zip((dataset1, dataset2))
|
||||||
dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2)
|
dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2)
|
||||||
dataset_map = dataset_f1.map(input_columns=["col1"], output_columns=["out1"], operations=lambda x1: x1 + 400)
|
dataset_map = dataset_f1.map(operations=lambda x1: x1 + 400, input_columns=["col1"], output_columns=["out1"])
|
||||||
ret = []
|
ret = []
|
||||||
for item in dataset_map.create_dict_iterator(num_epochs=1):
|
for item in dataset_map.create_dict_iterator(num_epochs=1):
|
||||||
ret.append(item["out1"])
|
ret.append(item["out1"])
|
||||||
|
@ -399,8 +399,8 @@ def test_filter_by_generator_Partial2():
|
||||||
dataset2f = dataset2.filter(input_columns=["col3"], predicate=lambda x: x not in [203, 207, 209],
|
dataset2f = dataset2.filter(input_columns=["col3"], predicate=lambda x: x not in [203, 207, 209],
|
||||||
num_parallel_workers=2)
|
num_parallel_workers=2)
|
||||||
dataset_zip = ds.zip((dataset1f, dataset2f))
|
dataset_zip = ds.zip((dataset1f, dataset2f))
|
||||||
dataset_map = dataset_zip.map(input_columns=["col1", "col3"], output_columns=["out1", "out3"],
|
dataset_map = dataset_zip.map(operations=lambda x1, x3: (x1 + 400, x3 + 500), input_columns=["col1", "col3"],
|
||||||
operations=lambda x1, x3: (x1 + 400, x3 + 500))
|
output_columns=["out1", "out3"])
|
||||||
ret1 = []
|
ret1 = []
|
||||||
ret3 = []
|
ret3 = []
|
||||||
for item in dataset_map.create_dict_iterator(num_epochs=1):
|
for item in dataset_map.create_dict_iterator(num_epochs=1):
|
||||||
|
@ -484,6 +484,7 @@ def test_filter_by_generator_with_map_all_sort():
|
||||||
assert ret_data[0]["col1"] == 0
|
assert ret_data[0]["col1"] == 0
|
||||||
assert ret_data[9]["col6"] == 509
|
assert ret_data[9]["col6"] == 509
|
||||||
|
|
||||||
|
|
||||||
def test_filter_by_generator_get_dataset_size():
|
def test_filter_by_generator_get_dataset_size():
|
||||||
dataset = ds.GeneratorDataset(generator_1d, ["data"])
|
dataset = ds.GeneratorDataset(generator_1d, ["data"])
|
||||||
dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4)
|
dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4)
|
||||||
|
|
|
@ -41,7 +41,7 @@ def test_five_crop_op(plot=False):
|
||||||
vision.ToTensor(),
|
vision.ToTensor(),
|
||||||
]
|
]
|
||||||
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
|
transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform_1)
|
data1 = data1.map(operations=transform_1, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
@ -51,7 +51,7 @@ def test_five_crop_op(plot=False):
|
||||||
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
|
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
|
||||||
]
|
]
|
||||||
transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2)
|
transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2)
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform_2)
|
data2 = data2.map(operations=transform_2, input_columns=["image"])
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
||||||
|
@ -85,7 +85,7 @@ def test_five_crop_error_msg():
|
||||||
vision.ToTensor()
|
vision.ToTensor()
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data = data.map(input_columns=["image"], operations=transform)
|
data = data.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
with pytest.raises(RuntimeError) as info:
|
with pytest.raises(RuntimeError) as info:
|
||||||
for _ in data:
|
for _ in data:
|
||||||
|
@ -110,7 +110,7 @@ def test_five_crop_md5():
|
||||||
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
|
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data = data.map(input_columns=["image"], operations=transform)
|
data = data.map(operations=transform, input_columns=["image"])
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename = "five_crop_01_result.npz"
|
filename = "five_crop_01_result.npz"
|
||||||
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
|
|
@ -26,7 +26,7 @@ def test_demo_basic_from_dataset():
|
||||||
vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None,
|
vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None,
|
||||||
special_tokens=["<pad>", "<unk>"],
|
special_tokens=["<pad>", "<unk>"],
|
||||||
special_first=True)
|
special_first=True)
|
||||||
data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>"))
|
data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
|
||||||
res = []
|
res = []
|
||||||
for d in data.create_dict_iterator(num_epochs=1):
|
for d in data.create_dict_iterator(num_epochs=1):
|
||||||
res.append(d["text"].item())
|
res.append(d["text"].item())
|
||||||
|
@ -36,10 +36,10 @@ def test_demo_basic_from_dataset():
|
||||||
def test_demo_basic_from_dataset_with_tokenizer():
|
def test_demo_basic_from_dataset_with_tokenizer():
|
||||||
""" this is a tutorial on how from_dataset should be used in a normal use case with tokenizer"""
|
""" this is a tutorial on how from_dataset should be used in a normal use case with tokenizer"""
|
||||||
data = ds.TextFileDataset("../data/dataset/testTokenizerData/1.txt", shuffle=False)
|
data = ds.TextFileDataset("../data/dataset/testTokenizerData/1.txt", shuffle=False)
|
||||||
data = data.map(input_columns=["text"], operations=text.UnicodeCharTokenizer())
|
data = data.map(operations=text.UnicodeCharTokenizer(), input_columns=["text"])
|
||||||
vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"],
|
vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"],
|
||||||
special_first=True)
|
special_first=True)
|
||||||
data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>"))
|
data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
|
||||||
res = []
|
res = []
|
||||||
for d in data.create_dict_iterator(num_epochs=1):
|
for d in data.create_dict_iterator(num_epochs=1):
|
||||||
res.append(list(d["text"]))
|
res.append(list(d["text"]))
|
||||||
|
@ -60,7 +60,7 @@ def test_from_dataset():
|
||||||
corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
|
corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
|
||||||
vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", "<unk>"],
|
vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", "<unk>"],
|
||||||
special_first=True)
|
special_first=True)
|
||||||
corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab, "<unk>"))
|
corpus_dataset = corpus_dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text")
|
||||||
res = []
|
res = []
|
||||||
for d in corpus_dataset.create_dict_iterator(num_epochs=1):
|
for d in corpus_dataset.create_dict_iterator(num_epochs=1):
|
||||||
res.append(list(d["text"]))
|
res.append(list(d["text"]))
|
||||||
|
@ -108,7 +108,7 @@ def test_from_dataset_special_token():
|
||||||
corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
|
corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
|
||||||
vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first)
|
vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first)
|
||||||
data = ds.GeneratorDataset(gen_input(texts), column_names=["text"])
|
data = ds.GeneratorDataset(gen_input(texts), column_names=["text"])
|
||||||
data = data.map(input_columns="text", operations=text.Lookup(vocab, "<unk>"))
|
data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text")
|
||||||
res = []
|
res = []
|
||||||
for d in data.create_dict_iterator(num_epochs=1):
|
for d in data.create_dict_iterator(num_epochs=1):
|
||||||
res.append(d["text"].item())
|
res.append(d["text"].item())
|
||||||
|
|
|
@ -95,16 +95,16 @@ def test_get_column_name_manifest():
|
||||||
def test_get_column_name_map():
|
def test_get_column_name_map():
|
||||||
data = ds.Cifar10Dataset(CIFAR10_DIR)
|
data = ds.Cifar10Dataset(CIFAR10_DIR)
|
||||||
center_crop_op = vision.CenterCrop(10)
|
center_crop_op = vision.CenterCrop(10)
|
||||||
data = data.map(input_columns=["image"], operations=center_crop_op)
|
data = data.map(operations=center_crop_op, input_columns=["image"])
|
||||||
assert data.get_col_names() == ["image", "label"]
|
assert data.get_col_names() == ["image", "label"]
|
||||||
data = ds.Cifar10Dataset(CIFAR10_DIR)
|
data = ds.Cifar10Dataset(CIFAR10_DIR)
|
||||||
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["image"])
|
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["image"])
|
||||||
assert data.get_col_names() == ["image", "label"]
|
assert data.get_col_names() == ["image", "label"]
|
||||||
data = ds.Cifar10Dataset(CIFAR10_DIR)
|
data = ds.Cifar10Dataset(CIFAR10_DIR)
|
||||||
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1"])
|
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1"])
|
||||||
assert data.get_col_names() == ["col1", "label"]
|
assert data.get_col_names() == ["col1", "label"]
|
||||||
data = ds.Cifar10Dataset(CIFAR10_DIR)
|
data = ds.Cifar10Dataset(CIFAR10_DIR)
|
||||||
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1", "col2"],
|
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1", "col2"],
|
||||||
column_order=["col2", "col1"])
|
column_order=["col2", "col1"])
|
||||||
assert data.get_col_names() == ["col2", "col1"]
|
assert data.get_col_names() == ["col2", "col1"]
|
||||||
|
|
||||||
|
|
|
@ -42,8 +42,7 @@ def test_invert_py(plot=False):
|
||||||
F.Resize((224, 224)),
|
F.Resize((224, 224)),
|
||||||
F.ToTensor()])
|
F.ToTensor()])
|
||||||
|
|
||||||
ds_original = ds.map(input_columns="image",
|
ds_original = ds.map(operations=transforms_original, input_columns="image")
|
||||||
operations=transforms_original)
|
|
||||||
|
|
||||||
ds_original = ds_original.batch(512)
|
ds_original = ds_original.batch(512)
|
||||||
|
|
||||||
|
@ -63,8 +62,7 @@ def test_invert_py(plot=False):
|
||||||
F.Invert(),
|
F.Invert(),
|
||||||
F.ToTensor()])
|
F.ToTensor()])
|
||||||
|
|
||||||
ds_invert = ds.map(input_columns="image",
|
ds_invert = ds.map(operations=transforms_invert, input_columns="image")
|
||||||
operations=transforms_invert)
|
|
||||||
|
|
||||||
ds_invert = ds_invert.batch(512)
|
ds_invert = ds_invert.batch(512)
|
||||||
|
|
||||||
|
@ -97,8 +95,7 @@ def test_invert_c(plot=False):
|
||||||
|
|
||||||
transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
|
transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
|
||||||
|
|
||||||
ds_original = ds.map(input_columns="image",
|
ds_original = ds.map(operations=transforms_original, input_columns="image")
|
||||||
operations=transforms_original)
|
|
||||||
|
|
||||||
ds_original = ds_original.batch(512)
|
ds_original = ds_original.batch(512)
|
||||||
|
|
||||||
|
@ -116,8 +113,7 @@ def test_invert_c(plot=False):
|
||||||
transform_invert = [C.Decode(), C.Resize(size=[224, 224]),
|
transform_invert = [C.Decode(), C.Resize(size=[224, 224]),
|
||||||
C.Invert()]
|
C.Invert()]
|
||||||
|
|
||||||
ds_invert = ds.map(input_columns="image",
|
ds_invert = ds.map(operations=transform_invert, input_columns="image")
|
||||||
operations=transform_invert)
|
|
||||||
|
|
||||||
ds_invert = ds_invert.batch(512)
|
ds_invert = ds_invert.batch(512)
|
||||||
|
|
||||||
|
@ -146,11 +142,9 @@ def test_invert_py_c(plot=False):
|
||||||
|
|
||||||
# Invert Images in cpp
|
# Invert Images in cpp
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
|
||||||
operations=[C.Decode(), C.Resize((224, 224))])
|
|
||||||
|
|
||||||
ds_c_invert = ds.map(input_columns="image",
|
ds_c_invert = ds.map(operations=C.Invert(), input_columns="image")
|
||||||
operations=C.Invert())
|
|
||||||
|
|
||||||
ds_c_invert = ds_c_invert.batch(512)
|
ds_c_invert = ds_c_invert.batch(512)
|
||||||
|
|
||||||
|
@ -164,16 +158,14 @@ def test_invert_py_c(plot=False):
|
||||||
|
|
||||||
# invert images in python
|
# invert images in python
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
|
||||||
operations=[C.Decode(), C.Resize((224, 224))])
|
|
||||||
|
|
||||||
transforms_p_invert = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
|
transforms_p_invert = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
|
||||||
F.ToPIL(),
|
F.ToPIL(),
|
||||||
F.Invert(),
|
F.Invert(),
|
||||||
np.array])
|
np.array])
|
||||||
|
|
||||||
ds_p_invert = ds.map(input_columns="image",
|
ds_p_invert = ds.map(operations=transforms_p_invert, input_columns="image")
|
||||||
operations=transforms_p_invert)
|
|
||||||
|
|
||||||
ds_p_invert = ds_p_invert.batch(512)
|
ds_p_invert = ds_p_invert.batch(512)
|
||||||
|
|
||||||
|
@ -205,13 +197,10 @@ def test_invert_one_channel():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
|
||||||
ds = ds.map(input_columns=["image"],
|
ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)),
|
||||||
operations=[C.Decode(),
|
lambda img: np.array(img[:, :, 0])], input_columns=["image"])
|
||||||
C.Resize((224, 224)),
|
|
||||||
lambda img: np.array(img[:, :, 0])])
|
|
||||||
|
|
||||||
ds.map(input_columns="image",
|
ds.map(operations=c_op, input_columns="image")
|
||||||
operations=c_op)
|
|
||||||
|
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
logger.info("Got an exception in DE: {}".format(str(e)))
|
logger.info("Got an exception in DE: {}".format(str(e)))
|
||||||
|
@ -231,7 +220,7 @@ def test_invert_md5_py():
|
||||||
F.Invert(),
|
F.Invert(),
|
||||||
F.ToTensor()])
|
F.ToTensor()])
|
||||||
|
|
||||||
data = ds.map(input_columns="image", operations=transforms_invert)
|
data = ds.map(operations=transforms_invert, input_columns="image")
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename = "invert_01_result_py.npz"
|
filename = "invert_01_result_py.npz"
|
||||||
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
@ -251,7 +240,7 @@ def test_invert_md5_c():
|
||||||
C.Invert(),
|
C.Invert(),
|
||||||
F.ToTensor()]
|
F.ToTensor()]
|
||||||
|
|
||||||
data = ds.map(input_columns="image", operations=transforms_invert)
|
data = ds.map(operations=transforms_invert, input_columns="image")
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename = "invert_01_result_c.npz"
|
filename = "invert_01_result_c.npz"
|
||||||
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
|
|
@ -51,15 +51,15 @@ def test_linear_transformation_op(plot=False):
|
||||||
|
|
||||||
# First dataset
|
# First dataset
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
# Note: if transformation matrix is diagonal matrix with all 1 in diagonal,
|
# Note: if transformation matrix is diagonal matrix with all 1 in diagonal,
|
||||||
# the output matrix in expected to be the same as the input matrix.
|
# the output matrix in expected to be the same as the input matrix.
|
||||||
data1 = data1.map(input_columns=["image"],
|
data1 = data1.map(operations=py_vision.LinearTransformation(transformation_matrix, mean_vector),
|
||||||
operations=py_vision.LinearTransformation(transformation_matrix, mean_vector))
|
input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform)
|
data2 = data2.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
image_transformed = []
|
image_transformed = []
|
||||||
image = []
|
image = []
|
||||||
|
@ -98,7 +98,7 @@ def test_linear_transformation_md5():
|
||||||
py_vision.LinearTransformation(transformation_matrix, mean_vector)
|
py_vision.LinearTransformation(transformation_matrix, mean_vector)
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename = "linear_transformation_01_result.npz"
|
filename = "linear_transformation_01_result.npz"
|
||||||
|
@ -128,7 +128,7 @@ def test_linear_transformation_exception_01():
|
||||||
py_vision.LinearTransformation(None, mean_vector)
|
py_vision.LinearTransformation(None, mean_vector)
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
except TypeError as e:
|
except TypeError as e:
|
||||||
logger.info("Got an exception in DE: {}".format(str(e)))
|
logger.info("Got an exception in DE: {}".format(str(e)))
|
||||||
assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
|
assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
|
||||||
|
@ -157,7 +157,7 @@ def test_linear_transformation_exception_02():
|
||||||
py_vision.LinearTransformation(transformation_matrix, None)
|
py_vision.LinearTransformation(transformation_matrix, None)
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
except TypeError as e:
|
except TypeError as e:
|
||||||
logger.info("Got an exception in DE: {}".format(str(e)))
|
logger.info("Got an exception in DE: {}".format(str(e)))
|
||||||
assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
|
assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
|
||||||
|
@ -187,7 +187,7 @@ def test_linear_transformation_exception_03():
|
||||||
py_vision.LinearTransformation(transformation_matrix, mean_vector)
|
py_vision.LinearTransformation(transformation_matrix, mean_vector)
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
logger.info("Got an exception in DE: {}".format(str(e)))
|
logger.info("Got an exception in DE: {}".format(str(e)))
|
||||||
assert "square matrix" in str(e)
|
assert "square matrix" in str(e)
|
||||||
|
@ -217,7 +217,7 @@ def test_linear_transformation_exception_04():
|
||||||
py_vision.LinearTransformation(transformation_matrix, mean_vector)
|
py_vision.LinearTransformation(transformation_matrix, mean_vector)
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
logger.info("Got an exception in DE: {}".format(str(e)))
|
logger.info("Got an exception in DE: {}".format(str(e)))
|
||||||
assert "should match" in str(e)
|
assert "should match" in str(e)
|
||||||
|
|
|
@ -73,6 +73,7 @@ def add_and_remove_cv_file():
|
||||||
os.remove("{}".format(x))
|
os.remove("{}".format(x))
|
||||||
os.remove("{}.db".format(x))
|
os.remove("{}.db".format(x))
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def add_and_remove_nlp_file():
|
def add_and_remove_nlp_file():
|
||||||
"""add/remove nlp file"""
|
"""add/remove nlp file"""
|
||||||
|
@ -265,6 +266,7 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file):
|
||||||
assert partitions(5) == 2
|
assert partitions(5) == 2
|
||||||
assert partitions(9) == 2
|
assert partitions(9) == 2
|
||||||
|
|
||||||
|
|
||||||
def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
|
def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
|
||||||
"""tutorial for cv minddataset."""
|
"""tutorial for cv minddataset."""
|
||||||
columns_list = ["data", "file_name", "label"]
|
columns_list = ["data", "file_name", "label"]
|
||||||
|
@ -287,6 +289,7 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
|
||||||
assert partitions(5) == 1
|
assert partitions(5) == 1
|
||||||
assert partitions(9) == 1
|
assert partitions(9) == 1
|
||||||
|
|
||||||
|
|
||||||
def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
|
def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
|
||||||
"""tutorial for cv minddataset."""
|
"""tutorial for cv minddataset."""
|
||||||
columns_list = ["data", "file_name", "label"]
|
columns_list = ["data", "file_name", "label"]
|
||||||
|
@ -309,6 +312,7 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
|
||||||
assert partitions(5) == 2
|
assert partitions(5) == 2
|
||||||
assert partitions(9) == 2
|
assert partitions(9) == 2
|
||||||
|
|
||||||
|
|
||||||
def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file):
|
def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file):
|
||||||
"""tutorial for cv minddataset."""
|
"""tutorial for cv minddataset."""
|
||||||
columns_list = ["data", "file_name", "label"]
|
columns_list = ["data", "file_name", "label"]
|
||||||
|
@ -354,11 +358,11 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c
|
||||||
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
if num_iter <= 4:
|
if num_iter <= 4:
|
||||||
epoch1.append(item["file_name"]) # save epoch 1 list
|
epoch1.append(item["file_name"]) # save epoch 1 list
|
||||||
elif num_iter <= 8:
|
elif num_iter <= 8:
|
||||||
epoch2.append(item["file_name"]) # save epoch 2 list
|
epoch2.append(item["file_name"]) # save epoch 2 list
|
||||||
else:
|
else:
|
||||||
epoch3.append(item["file_name"]) # save epoch 3 list
|
epoch3.append(item["file_name"]) # save epoch 3 list
|
||||||
assert num_iter == 12
|
assert num_iter == 12
|
||||||
assert len(epoch1) == 4
|
assert len(epoch1) == 4
|
||||||
assert len(epoch2) == 4
|
assert len(epoch2) == 4
|
||||||
|
@ -376,9 +380,9 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
|
||||||
columns_list = ["data", "file_name", "label"]
|
columns_list = ["data", "file_name", "label"]
|
||||||
num_readers = 4
|
num_readers = 4
|
||||||
num_shards = 3
|
num_shards = 3
|
||||||
epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result
|
epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result
|
||||||
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result
|
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result
|
||||||
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result
|
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result
|
||||||
|
|
||||||
for partition_id in range(num_shards):
|
for partition_id in range(num_shards):
|
||||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||||
|
@ -392,7 +396,7 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
|
||||||
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
|
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
|
||||||
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
||||||
# total 3 partition, 4 result per epoch, total 12 result
|
# total 3 partition, 4 result per epoch, total 12 result
|
||||||
epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result
|
epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
assert num_iter == 12
|
assert num_iter == 12
|
||||||
assert epoch_result[partition_id][0] not in (epoch_result[partition_id][1], epoch_result[partition_id][2])
|
assert epoch_result[partition_id][0] not in (epoch_result[partition_id][1], epoch_result[partition_id][2])
|
||||||
|
@ -425,11 +429,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
|
||||||
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
if num_iter <= 10:
|
if num_iter <= 10:
|
||||||
epoch1.append(item["file_name"]) # save epoch 1 list
|
epoch1.append(item["file_name"]) # save epoch 1 list
|
||||||
elif num_iter <= 20:
|
elif num_iter <= 20:
|
||||||
epoch2.append(item["file_name"]) # save epoch 2 list
|
epoch2.append(item["file_name"]) # save epoch 2 list
|
||||||
else:
|
else:
|
||||||
epoch3.append(item["file_name"]) # save epoch 3 list
|
epoch3.append(item["file_name"]) # save epoch 3 list
|
||||||
assert num_iter == 30
|
assert num_iter == 30
|
||||||
assert len(epoch1) == 10
|
assert len(epoch1) == 10
|
||||||
assert len(epoch2) == 10
|
assert len(epoch2) == 10
|
||||||
|
@ -451,11 +455,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
|
||||||
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
if num_iter <= 10:
|
if num_iter <= 10:
|
||||||
epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list
|
epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list
|
||||||
elif num_iter <= 20:
|
elif num_iter <= 20:
|
||||||
epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list
|
epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list
|
||||||
else:
|
else:
|
||||||
epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list
|
epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list
|
||||||
assert num_iter == 30
|
assert num_iter == 30
|
||||||
assert len(epoch1_new_dataset) == 10
|
assert len(epoch1_new_dataset) == 10
|
||||||
assert len(epoch2_new_dataset) == 10
|
assert len(epoch2_new_dataset) == 10
|
||||||
|
@ -482,11 +486,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
|
||||||
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
if num_iter <= 10:
|
if num_iter <= 10:
|
||||||
epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list
|
epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list
|
||||||
elif num_iter <= 20:
|
elif num_iter <= 20:
|
||||||
epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list
|
epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list
|
||||||
else:
|
else:
|
||||||
epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list
|
epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list
|
||||||
assert num_iter == 30
|
assert num_iter == 30
|
||||||
assert len(epoch1_new_dataset2) == 10
|
assert len(epoch1_new_dataset2) == 10
|
||||||
assert len(epoch2_new_dataset2) == 10
|
assert len(epoch2_new_dataset2) == 10
|
||||||
|
@ -532,8 +536,8 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file):
|
||||||
data_set = data_set.map(
|
data_set = data_set.map(
|
||||||
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
|
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
|
||||||
resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
|
resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
|
||||||
data_set = data_set.map(input_columns="data",
|
data_set = data_set.map(operations=resize_op, input_columns="data",
|
||||||
operations=resize_op, num_parallel_workers=2)
|
num_parallel_workers=2)
|
||||||
data_set = data_set.batch(2)
|
data_set = data_set.batch(2)
|
||||||
data_set = data_set.repeat(2)
|
data_set = data_set.repeat(2)
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
|
@ -563,8 +567,8 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file):
|
||||||
data_set = data_set.map(
|
data_set = data_set.map(
|
||||||
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
|
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
|
||||||
resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
|
resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
|
||||||
data_set = data_set.map(input_columns="data",
|
data_set = data_set.map(operations=resize_op, input_columns="data",
|
||||||
operations=resize_op, num_parallel_workers=2)
|
num_parallel_workers=2)
|
||||||
data_set = data_set.batch(32, drop_remainder=True)
|
data_set = data_set.batch(32, drop_remainder=True)
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for item in data_set.create_dict_iterator(num_epochs=1):
|
for item in data_set.create_dict_iterator(num_epochs=1):
|
||||||
|
@ -707,6 +711,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
|
||||||
if os.path.exists("{}.db".format(CV2_FILE_NAME)):
|
if os.path.exists("{}.db".format(CV2_FILE_NAME)):
|
||||||
os.remove("{}.db".format(CV2_FILE_NAME))
|
os.remove("{}.db".format(CV2_FILE_NAME))
|
||||||
|
|
||||||
|
|
||||||
def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
|
def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
|
||||||
paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0'))
|
paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0'))
|
||||||
for x in range(FILES_NUM)]
|
for x in range(FILES_NUM)]
|
||||||
|
@ -757,6 +762,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
|
||||||
os.remove("{}".format(x))
|
os.remove("{}".format(x))
|
||||||
os.remove("{}.db".format(x))
|
os.remove("{}.db".format(x))
|
||||||
|
|
||||||
|
|
||||||
def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
|
def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
|
||||||
"""tutorial for cv minderdataset."""
|
"""tutorial for cv minderdataset."""
|
||||||
columns_list = ["data", "file_name", "label"]
|
columns_list = ["data", "file_name", "label"]
|
||||||
|
@ -778,6 +784,7 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
|
||||||
num_iter += 1
|
num_iter += 1
|
||||||
assert num_iter == 10
|
assert num_iter == 10
|
||||||
|
|
||||||
|
|
||||||
def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file):
|
def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file):
|
||||||
"""tutorial for nlp minderdataset."""
|
"""tutorial for nlp minderdataset."""
|
||||||
num_readers = 4
|
num_readers = 4
|
||||||
|
@ -1522,6 +1529,7 @@ def test_write_with_multi_bytes_and_MindDataset():
|
||||||
os.remove("{}".format(mindrecord_file_name))
|
os.remove("{}".format(mindrecord_file_name))
|
||||||
os.remove("{}.db".format(mindrecord_file_name))
|
os.remove("{}.db".format(mindrecord_file_name))
|
||||||
|
|
||||||
|
|
||||||
def test_write_with_multi_array_and_MindDataset():
|
def test_write_with_multi_array_and_MindDataset():
|
||||||
mindrecord_file_name = "test.mindrecord"
|
mindrecord_file_name = "test.mindrecord"
|
||||||
try:
|
try:
|
||||||
|
@ -1741,9 +1749,9 @@ def test_numpy_generic():
|
||||||
for idx in range(10):
|
for idx in range(10):
|
||||||
row = {}
|
row = {}
|
||||||
row['label1'] = np.int32(idx)
|
row['label1'] = np.int32(idx)
|
||||||
row['label2'] = np.int64(idx*10)
|
row['label2'] = np.int64(idx * 10)
|
||||||
row['label3'] = np.float32(idx+0.12345)
|
row['label3'] = np.float32(idx + 0.12345)
|
||||||
row['label4'] = np.float64(idx+0.12345789)
|
row['label4'] = np.float64(idx + 0.12345789)
|
||||||
data.append(row)
|
data.append(row)
|
||||||
writer.add_schema(cv_schema_json, "img_schema")
|
writer.add_schema(cv_schema_json, "img_schema")
|
||||||
writer.write_raw_data(data)
|
writer.write_raw_data(data)
|
||||||
|
@ -1923,6 +1931,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(
|
||||||
os.remove("{}".format(mindrecord_file_name))
|
os.remove("{}".format(mindrecord_file_name))
|
||||||
os.remove("{}.db".format(mindrecord_file_name))
|
os.remove("{}.db".format(mindrecord_file_name))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test_nlp_compress_data(add_and_remove_nlp_compress_file)
|
test_nlp_compress_data(add_and_remove_nlp_compress_file)
|
||||||
test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file)
|
test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file)
|
||||||
|
|
|
@ -37,9 +37,9 @@ def test_one_hot_op():
|
||||||
num_classes = 2
|
num_classes = 2
|
||||||
epsilon_para = 0.1
|
epsilon_para = 0.1
|
||||||
|
|
||||||
transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para),]
|
transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para)]
|
||||||
transform_label = f.Compose(transforms)
|
transform_label = f.Compose(transforms)
|
||||||
dataset = dataset.map(input_columns=["label"], operations=transform_label)
|
dataset = dataset.map(operations=transform_label, input_columns=["label"])
|
||||||
|
|
||||||
golden_label = np.ones(num_classes) * epsilon_para / num_classes
|
golden_label = np.ones(num_classes) * epsilon_para / num_classes
|
||||||
golden_label[1] = 1 - epsilon_para / num_classes
|
golden_label[1] = 1 - epsilon_para / num_classes
|
||||||
|
@ -69,9 +69,9 @@ def test_mix_up_single():
|
||||||
resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR)
|
resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR)
|
||||||
one_hot_encode = c.OneHot(num_classes) # num_classes is input argument
|
one_hot_encode = c.OneHot(num_classes) # num_classes is input argument
|
||||||
|
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=resize_op)
|
ds1 = ds1.map(operations=resize_op, input_columns=["image"])
|
||||||
ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode)
|
ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"])
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
batch_size = 3
|
batch_size = 3
|
||||||
|
@ -81,7 +81,7 @@ def test_mix_up_single():
|
||||||
alpha = 0.2
|
alpha = 0.2
|
||||||
transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=True)
|
transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=True)
|
||||||
]
|
]
|
||||||
ds1 = ds1.map(input_columns=["image", "label"], operations=transforms)
|
ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])
|
||||||
|
|
||||||
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):
|
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):
|
||||||
image1 = data1["image"]
|
image1 = data1["image"]
|
||||||
|
@ -118,9 +118,9 @@ def test_mix_up_multi():
|
||||||
resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR)
|
resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR)
|
||||||
one_hot_encode = c.OneHot(num_classes) # num_classes is input argument
|
one_hot_encode = c.OneHot(num_classes) # num_classes is input argument
|
||||||
|
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
|
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
|
||||||
ds1 = ds1.map(input_columns=["image"], operations=resize_op)
|
ds1 = ds1.map(operations=resize_op, input_columns=["image"])
|
||||||
ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode)
|
ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"])
|
||||||
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
batch_size = 3
|
batch_size = 3
|
||||||
|
@ -130,7 +130,7 @@ def test_mix_up_multi():
|
||||||
alpha = 0.2
|
alpha = 0.2
|
||||||
transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False)
|
transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False)
|
||||||
]
|
]
|
||||||
ds1 = ds1.map(input_columns=["image", "label"], operations=transforms)
|
ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
batch1_image1 = 0
|
batch1_image1 = 0
|
||||||
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):
|
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):
|
||||||
|
|
|
@ -30,6 +30,7 @@ DATA_DIR3 = "../data/dataset/testCelebAData/"
|
||||||
|
|
||||||
GENERATE_GOLDEN = False
|
GENERATE_GOLDEN = False
|
||||||
|
|
||||||
|
|
||||||
def test_mixup_batch_success1(plot=False):
|
def test_mixup_batch_success1(plot=False):
|
||||||
"""
|
"""
|
||||||
Test MixUpBatch op with specified alpha parameter
|
Test MixUpBatch op with specified alpha parameter
|
||||||
|
@ -51,10 +52,10 @@ def test_mixup_batch_success1(plot=False):
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
mixup_batch_op = vision.MixUpBatch(2)
|
mixup_batch_op = vision.MixUpBatch(2)
|
||||||
data1 = data1.batch(5, drop_remainder=True)
|
data1 = data1.batch(5, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
|
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
images_mixup = None
|
images_mixup = None
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
|
@ -81,7 +82,7 @@ def test_mixup_batch_success2(plot=False):
|
||||||
# Original Images
|
# Original Images
|
||||||
ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
|
ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op])
|
ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
|
||||||
ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)
|
ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)
|
||||||
|
|
||||||
images_original = None
|
images_original = None
|
||||||
|
@ -95,14 +96,14 @@ def test_mixup_batch_success2(plot=False):
|
||||||
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
|
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
|
||||||
|
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
data1 = data1.map(input_columns=["image"], operations=[decode_op])
|
data1 = data1.map(operations=[decode_op], input_columns=["image"])
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
|
|
||||||
mixup_batch_op = vision.MixUpBatch(2.0)
|
mixup_batch_op = vision.MixUpBatch(2.0)
|
||||||
data1 = data1.batch(4, pad_info={}, drop_remainder=True)
|
data1 = data1.batch(4, pad_info={}, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
|
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
images_mixup = None
|
images_mixup = None
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
|
@ -142,10 +143,10 @@ def test_mixup_batch_success3(plot=False):
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
mixup_batch_op = vision.MixUpBatch()
|
mixup_batch_op = vision.MixUpBatch()
|
||||||
data1 = data1.batch(5, drop_remainder=True)
|
data1 = data1.batch(5, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
|
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
images_mixup = np.array([])
|
images_mixup = np.array([])
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
|
@ -173,7 +174,7 @@ def test_mixup_batch_success4(plot=False):
|
||||||
# Original Images
|
# Original Images
|
||||||
ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
|
ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op])
|
ds_original = ds_original.map(operations=[decode_op], input_columns=["image"])
|
||||||
ds_original = ds_original.batch(2, drop_remainder=True)
|
ds_original = ds_original.batch(2, drop_remainder=True)
|
||||||
|
|
||||||
images_original = None
|
images_original = None
|
||||||
|
@ -187,14 +188,14 @@ def test_mixup_batch_success4(plot=False):
|
||||||
data1 = ds.CelebADataset(DATA_DIR3, shuffle=False)
|
data1 = ds.CelebADataset(DATA_DIR3, shuffle=False)
|
||||||
|
|
||||||
decode_op = vision.Decode()
|
decode_op = vision.Decode()
|
||||||
data1 = data1.map(input_columns=["image"], operations=[decode_op])
|
data1 = data1.map(operations=[decode_op], input_columns=["image"])
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=100)
|
one_hot_op = data_trans.OneHot(num_classes=100)
|
||||||
data1 = data1.map(input_columns=["attr"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["attr"])
|
||||||
|
|
||||||
mixup_batch_op = vision.MixUpBatch()
|
mixup_batch_op = vision.MixUpBatch()
|
||||||
data1 = data1.batch(2, drop_remainder=True)
|
data1 = data1.batch(2, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "attr"], operations=mixup_batch_op)
|
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "attr"])
|
||||||
|
|
||||||
images_mixup = np.array([])
|
images_mixup = np.array([])
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
|
@ -224,10 +225,10 @@ def test_mixup_batch_md5():
|
||||||
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data = data.map(input_columns=["label"], operations=one_hot_op)
|
data = data.map(operations=one_hot_op, input_columns=["label"])
|
||||||
mixup_batch_op = vision.MixUpBatch()
|
mixup_batch_op = vision.MixUpBatch()
|
||||||
data = data.batch(5, drop_remainder=True)
|
data = data.batch(5, drop_remainder=True)
|
||||||
data = data.map(input_columns=["image", "label"], operations=mixup_batch_op)
|
data = data.map(operations=mixup_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
filename = "mixup_batch_c_result.npz"
|
filename = "mixup_batch_c_result.npz"
|
||||||
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
|
||||||
|
@ -259,10 +260,10 @@ def test_mixup_batch_fail1():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
mixup_batch_op = vision.MixUpBatch(0.1)
|
mixup_batch_op = vision.MixUpBatch(0.1)
|
||||||
with pytest.raises(RuntimeError) as error:
|
with pytest.raises(RuntimeError) as error:
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
|
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
|
||||||
for idx, (image, _) in enumerate(data1):
|
for idx, (image, _) in enumerate(data1):
|
||||||
if idx == 0:
|
if idx == 0:
|
||||||
images_mixup = image
|
images_mixup = image
|
||||||
|
@ -294,7 +295,7 @@ def test_mixup_batch_fail2():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
with pytest.raises(ValueError) as error:
|
with pytest.raises(ValueError) as error:
|
||||||
vision.MixUpBatch(-1)
|
vision.MixUpBatch(-1)
|
||||||
error_message = "Input is not within the required interval"
|
error_message = "Input is not within the required interval"
|
||||||
|
@ -322,10 +323,10 @@ def test_mixup_batch_fail3():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
mixup_batch_op = vision.MixUpBatch()
|
mixup_batch_op = vision.MixUpBatch()
|
||||||
data1 = data1.batch(5, drop_remainder=True)
|
data1 = data1.batch(5, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image"], operations=mixup_batch_op)
|
data1 = data1.map(operations=mixup_batch_op, input_columns=["image"])
|
||||||
|
|
||||||
with pytest.raises(RuntimeError) as error:
|
with pytest.raises(RuntimeError) as error:
|
||||||
images_mixup = np.array([])
|
images_mixup = np.array([])
|
||||||
|
@ -337,6 +338,7 @@ def test_mixup_batch_fail3():
|
||||||
error_message = "Both images and labels columns are required"
|
error_message = "Both images and labels columns are required"
|
||||||
assert error_message in str(error.value)
|
assert error_message in str(error.value)
|
||||||
|
|
||||||
|
|
||||||
def test_mixup_batch_fail4():
|
def test_mixup_batch_fail4():
|
||||||
"""
|
"""
|
||||||
Test MixUpBatch Fail 2
|
Test MixUpBatch Fail 2
|
||||||
|
@ -359,7 +361,7 @@ def test_mixup_batch_fail4():
|
||||||
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
|
||||||
|
|
||||||
one_hot_op = data_trans.OneHot(num_classes=10)
|
one_hot_op = data_trans.OneHot(num_classes=10)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op)
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||||
with pytest.raises(ValueError) as error:
|
with pytest.raises(ValueError) as error:
|
||||||
vision.MixUpBatch(0.0)
|
vision.MixUpBatch(0.0)
|
||||||
error_message = "Input is not within the required interval"
|
error_message = "Input is not within the required interval"
|
||||||
|
@ -389,7 +391,7 @@ def test_mixup_batch_fail5():
|
||||||
|
|
||||||
mixup_batch_op = vision.MixUpBatch()
|
mixup_batch_op = vision.MixUpBatch()
|
||||||
data1 = data1.batch(5, drop_remainder=True)
|
data1 = data1.batch(5, drop_remainder=True)
|
||||||
data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
|
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
|
||||||
|
|
||||||
with pytest.raises(RuntimeError) as error:
|
with pytest.raises(RuntimeError) as error:
|
||||||
images_mixup = np.array([])
|
images_mixup = np.array([])
|
||||||
|
|
|
@ -39,7 +39,7 @@ def test_multiple_ngrams():
|
||||||
yield (np.array(line.split(" "), dtype='S'),)
|
yield (np.array(line.split(" "), dtype='S'),)
|
||||||
|
|
||||||
dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
|
dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
|
||||||
dataset = dataset.map(input_columns=["text"], operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "))
|
dataset = dataset.map(operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "), input_columns="text")
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
for data in dataset.create_dict_iterator(num_epochs=1):
|
for data in dataset.create_dict_iterator(num_epochs=1):
|
||||||
|
@ -61,7 +61,7 @@ def test_simple_ngram():
|
||||||
yield (np.array(line.split(" "), dtype='S'),)
|
yield (np.array(line.split(" "), dtype='S'),)
|
||||||
|
|
||||||
dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
|
dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
|
||||||
dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=" "))
|
dataset = dataset.map(operations=text.Ngram(3, separator=" "), input_columns="text")
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
for data in dataset.create_dict_iterator(num_epochs=1):
|
for data in dataset.create_dict_iterator(num_epochs=1):
|
||||||
|
@ -78,7 +78,7 @@ def test_corner_cases():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
|
dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
|
||||||
dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep))
|
dataset = dataset.map(operations=text.Ngram(n, l_pad, r_pad, separator=sep), input_columns=["text"])
|
||||||
for data in dataset.create_dict_iterator(num_epochs=1):
|
for data in dataset.create_dict_iterator(num_epochs=1):
|
||||||
return [d.decode("utf8") for d in data["text"]]
|
return [d.decode("utf8") for d in data["text"]]
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
|
|
|
@ -32,10 +32,10 @@ def test_on_tokenized_line():
|
||||||
for line in f:
|
for line in f:
|
||||||
word = line.split(',')[0]
|
word = line.split(',')[0]
|
||||||
jieba_op.add_word(word)
|
jieba_op.add_word(word)
|
||||||
data = data.map(input_columns=["text"], operations=jieba_op)
|
data = data.map(operations=jieba_op, input_columns=["text"])
|
||||||
vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"])
|
vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"])
|
||||||
lookup = text.Lookup(vocab, "<unk>")
|
lookup = text.Lookup(vocab, "<unk>")
|
||||||
data = data.map(input_columns=["text"], operations=lookup)
|
data = data.map(operations=lookup, input_columns=["text"])
|
||||||
res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14],
|
res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14],
|
||||||
[11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32)
|
[11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32)
|
||||||
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):
|
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):
|
||||||
|
@ -50,10 +50,10 @@ def test_on_tokenized_line_with_no_special_tokens():
|
||||||
word = line.split(',')[0]
|
word = line.split(',')[0]
|
||||||
jieba_op.add_word(word)
|
jieba_op.add_word(word)
|
||||||
|
|
||||||
data = data.map(input_columns=["text"], operations=jieba_op)
|
data = data.map(operations=jieba_op, input_columns=["text"])
|
||||||
vocab = text.Vocab.from_file(VOCAB_FILE, ",")
|
vocab = text.Vocab.from_file(VOCAB_FILE, ",")
|
||||||
lookup = text.Lookup(vocab, "not")
|
lookup = text.Lookup(vocab, "not")
|
||||||
data = data.map(input_columns=["text"], operations=lookup)
|
data = data.map(operations=lookup, input_columns=["text"])
|
||||||
res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12],
|
res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12],
|
||||||
[9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32)
|
[9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32)
|
||||||
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):
|
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):
|
||||||
|
|
|
@ -51,8 +51,8 @@ def util_test_normalize(mean, std, op_type):
|
||||||
normalize_op = c_vision.Normalize(mean, std)
|
normalize_op = c_vision.Normalize(mean, std)
|
||||||
# Generate dataset
|
# Generate dataset
|
||||||
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data = data.map(input_columns=["image"], operations=decode_op)
|
data = data.map(operations=decode_op, input_columns=["image"])
|
||||||
data = data.map(input_columns=["image"], operations=normalize_op)
|
data = data.map(operations=normalize_op, input_columns=["image"])
|
||||||
elif op_type == "python":
|
elif op_type == "python":
|
||||||
# define map operations
|
# define map operations
|
||||||
transforms = [
|
transforms = [
|
||||||
|
@ -63,7 +63,7 @@ def util_test_normalize(mean, std, op_type):
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
# Generate dataset
|
# Generate dataset
|
||||||
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data = data.map(input_columns=["image"], operations=transform)
|
data = data.map(operations=transform, input_columns=["image"])
|
||||||
else:
|
else:
|
||||||
raise ValueError("Wrong parameter value")
|
raise ValueError("Wrong parameter value")
|
||||||
return data
|
return data
|
||||||
|
@ -82,7 +82,7 @@ def util_test_normalize_grayscale(num_output_channels, mean, std):
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
# Generate dataset
|
# Generate dataset
|
||||||
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data = data.map(input_columns=["image"], operations=transform)
|
data = data.map(operations=transform, input_columns=["image"])
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,12 +99,12 @@ def test_normalize_op_c(plot=False):
|
||||||
|
|
||||||
# First dataset
|
# First dataset
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=normalize_op)
|
data1 = data1.map(operations=normalize_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=decode_op)
|
data2 = data2.map(operations=decode_op, input_columns=["image"])
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
||||||
|
@ -136,12 +136,12 @@ def test_normalize_op_py(plot=False):
|
||||||
|
|
||||||
# First dataset
|
# First dataset
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=normalize_op)
|
data1 = data1.map(operations=normalize_op, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform)
|
data2 = data2.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
||||||
|
@ -169,7 +169,7 @@ def test_decode_op():
|
||||||
decode_op = c_vision.Decode()
|
decode_op = c_vision.Decode()
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for item in data1.create_dict_iterator(num_epochs=1):
|
for item in data1.create_dict_iterator(num_epochs=1):
|
||||||
|
@ -192,7 +192,7 @@ def test_decode_normalize_op():
|
||||||
normalize_op = c_vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0])
|
normalize_op = c_vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0])
|
||||||
|
|
||||||
# apply map operations on images
|
# apply map operations on images
|
||||||
data1 = data1.map(input_columns=["image"], operations=[decode_op, normalize_op])
|
data1 = data1.map(operations=[decode_op, normalize_op], input_columns=["image"])
|
||||||
|
|
||||||
num_iter = 0
|
num_iter = 0
|
||||||
for item in data1.create_dict_iterator(num_epochs=1):
|
for item in data1.create_dict_iterator(num_epochs=1):
|
||||||
|
|
|
@ -47,13 +47,14 @@ def test_one_hot():
|
||||||
# First dataset
|
# First dataset
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||||
one_hot_op = data_trans.OneHot(num_classes=depth)
|
one_hot_op = data_trans.OneHot(num_classes=depth)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_op, column_order=["label"])
|
data1 = data1.map(operations=one_hot_op, input_columns=["label"], column_order=["label"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False)
|
||||||
|
|
||||||
assert dataset_equal_with_function(data1, data2, 0, one_hot, depth)
|
assert dataset_equal_with_function(data1, data2, 0, one_hot, depth)
|
||||||
|
|
||||||
|
|
||||||
def test_one_hot_post_aug():
|
def test_one_hot_post_aug():
|
||||||
"""
|
"""
|
||||||
Test One Hot Encoding after Multiple Data Augmentation Operators
|
Test One Hot Encoding after Multiple Data Augmentation Operators
|
||||||
|
@ -72,14 +73,14 @@ def test_one_hot_post_aug():
|
||||||
resize_op = c_vision.Resize((resize_height, resize_width))
|
resize_op = c_vision.Resize((resize_height, resize_width))
|
||||||
|
|
||||||
# Apply map operations on images
|
# Apply map operations on images
|
||||||
data1 = data1.map(input_columns=["image"], operations=decode_op)
|
data1 = data1.map(operations=decode_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=rescale_op)
|
data1 = data1.map(operations=rescale_op, input_columns=["image"])
|
||||||
data1 = data1.map(input_columns=["image"], operations=resize_op)
|
data1 = data1.map(operations=resize_op, input_columns=["image"])
|
||||||
|
|
||||||
# Apply one-hot encoding on labels
|
# Apply one-hot encoding on labels
|
||||||
depth = 4
|
depth = 4
|
||||||
one_hot_encode = data_trans.OneHot(depth)
|
one_hot_encode = data_trans.OneHot(depth)
|
||||||
data1 = data1.map(input_columns=["label"], operations=one_hot_encode)
|
data1 = data1.map(operations=one_hot_encode, input_columns=["label"])
|
||||||
|
|
||||||
# Apply datasets ops
|
# Apply datasets ops
|
||||||
buffer_size = 100
|
buffer_size = 100
|
||||||
|
|
|
@ -16,6 +16,7 @@ import numpy as np
|
||||||
|
|
||||||
import mindspore.dataset as ds
|
import mindspore.dataset as ds
|
||||||
|
|
||||||
|
|
||||||
# tests the construction of multiple ops from a single dataset.
|
# tests the construction of multiple ops from a single dataset.
|
||||||
# map dataset with columns order arguments should produce a ProjectOp over MapOp
|
# map dataset with columns order arguments should produce a ProjectOp over MapOp
|
||||||
# This test does not utilize the compiling passes at this time.
|
# This test does not utilize the compiling passes at this time.
|
||||||
|
@ -27,12 +28,13 @@ def test_map_reorder0():
|
||||||
# Generator -> Map
|
# Generator -> Map
|
||||||
data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"])
|
data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"])
|
||||||
|
|
||||||
data0 = data0.map(input_columns="col0", output_columns="out", column_order=["col1", "out"],
|
data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out",
|
||||||
operations=(lambda x: x))
|
column_order=["col1", "out"])
|
||||||
|
|
||||||
for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary
|
for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary
|
||||||
assert item == [np.array(1), np.array(0)]
|
assert item == [np.array(1), np.array(0)]
|
||||||
|
|
||||||
|
|
||||||
# tests the construction of multiple ops from a single dataset.
|
# tests the construction of multiple ops from a single dataset.
|
||||||
# map dataset with columns order arguments should produce a ProjectOp over MapOp
|
# map dataset with columns order arguments should produce a ProjectOp over MapOp
|
||||||
# This test does not utilize the compiling passes at this time.
|
# This test does not utilize the compiling passes at this time.
|
||||||
|
@ -43,20 +45,20 @@ def test_map_reorder1():
|
||||||
|
|
||||||
# Three map and zip
|
# Three map and zip
|
||||||
data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"])
|
data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"])
|
||||||
data0 = data0.map(input_columns="a0", column_order=["a2", "a1", "a0"], operations=(lambda x: x))
|
data0 = data0.map(operations=(lambda x: x), input_columns="a0", column_order=["a2", "a1", "a0"])
|
||||||
data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"])
|
data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"])
|
||||||
data1 = data1.map(input_columns="b0", column_order=["b1", "b2", "b0"], operations=(lambda x: x))
|
data1 = data1.map(operations=(lambda x: x), input_columns="b0", column_order=["b1", "b2", "b0"])
|
||||||
data2 = ds.zip((data0, data1))
|
data2 = ds.zip((data0, data1))
|
||||||
data2 = data2.map(input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"], operations=(lambda x: x))
|
data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"])
|
||||||
|
|
||||||
for item in data2.create_tuple_iterator(num_epochs=1):
|
for item in data2.create_tuple_iterator(num_epochs=1):
|
||||||
assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)]
|
assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)]
|
||||||
|
|
||||||
|
|
||||||
# tests the construction of multiple ops from a single dataset.
|
# tests the construction of multiple ops from a single dataset.
|
||||||
# TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp.
|
# TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp.
|
||||||
# This test does not utilize the compiling passes at this time.
|
# This test does not utilize the compiling passes at this time.
|
||||||
def test_shuffle():
|
def test_shuffle():
|
||||||
|
|
||||||
FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
|
FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
|
||||||
SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
|
SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ def test_pad_op():
|
||||||
pad_op,
|
pad_op,
|
||||||
]
|
]
|
||||||
|
|
||||||
data1 = data1.map(input_columns=["image"], operations=ctrans)
|
data1 = data1.map(operations=ctrans, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
transforms = [
|
transforms = [
|
||||||
|
@ -54,7 +54,7 @@ def test_pad_op():
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform)
|
data2 = data2.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
|
||||||
c_image = item1["image"]
|
c_image = item1["image"]
|
||||||
|
@ -88,11 +88,11 @@ def test_pad_grayscale():
|
||||||
|
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
|
||||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
data1 = data1.map(input_columns=["image"], operations=transform)
|
data1 = data1.map(operations=transform, input_columns=["image"])
|
||||||
|
|
||||||
# if input is grayscale, the output dimensions should be single channel
|
# if input is grayscale, the output dimensions should be single channel
|
||||||
pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20))
|
pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20))
|
||||||
data1 = data1.map(input_columns=["image"], operations=pad_gray)
|
data1 = data1.map(operations=pad_gray, input_columns=["image"])
|
||||||
dataset_shape_1 = []
|
dataset_shape_1 = []
|
||||||
for item1 in data1.create_dict_iterator(num_epochs=1):
|
for item1 in data1.create_dict_iterator(num_epochs=1):
|
||||||
c_image = item1["image"]
|
c_image = item1["image"]
|
||||||
|
@ -106,7 +106,7 @@ def test_pad_grayscale():
|
||||||
ctrans = [decode_op, pad_gray]
|
ctrans = [decode_op, pad_gray]
|
||||||
dataset_shape_2 = []
|
dataset_shape_2 = []
|
||||||
|
|
||||||
data2 = data2.map(input_columns=["image"], operations=ctrans)
|
data2 = data2.map(operations=ctrans, input_columns=["image"])
|
||||||
|
|
||||||
for item2 in data2.create_dict_iterator(num_epochs=1):
|
for item2 in data2.create_dict_iterator(num_epochs=1):
|
||||||
c_image = item2["image"]
|
c_image = item2["image"]
|
||||||
|
@ -132,7 +132,7 @@ def test_pad_md5():
|
||||||
pad_op,
|
pad_op,
|
||||||
]
|
]
|
||||||
|
|
||||||
data1 = data1.map(input_columns=["image"], operations=ctrans)
|
data1 = data1.map(operations=ctrans, input_columns=["image"])
|
||||||
|
|
||||||
# Second dataset
|
# Second dataset
|
||||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||||
|
@ -142,7 +142,7 @@ def test_pad_md5():
|
||||||
py_vision.ToTensor(),
|
py_vision.ToTensor(),
|
||||||
]
|
]
|
||||||
transform = mindspore.dataset.transforms.py_transforms.Compose(pytrans)
|
transform = mindspore.dataset.transforms.py_transforms.Compose(pytrans)
|
||||||
data2 = data2.map(input_columns=["image"], operations=transform)
|
data2 = data2.map(operations=transform, input_columns=["image"])
|
||||||
# Compare with expected md5 from images
|
# Compare with expected md5 from images
|
||||||
filename1 = "pad_01_c_result.npz"
|
filename1 = "pad_01_c_result.npz"
|
||||||
save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN)
|
save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN)
|
||||||
|
|
|
@ -127,7 +127,7 @@ def batch_padding_performance_1d():
|
||||||
cifar10_dir = "../data/dataset/testCifar10Data"
|
cifar10_dir = "../data/dataset/testCifar10Data"
|
||||||
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
|
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
|
||||||
data1 = data1.repeat(24)
|
data1 = data1.repeat(24)
|
||||||
data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1)))
|
data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image")
|
||||||
pad_info = {"image": ([3888], 0)} # 3888 =36*36*3
|
pad_info = {"image": ([3888], 0)} # 3888 =36*36*3
|
||||||
# pad_info = None
|
# pad_info = None
|
||||||
data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info)
|
data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info)
|
||||||
|
@ -144,7 +144,7 @@ def batch_pyfunc_padding_3d():
|
||||||
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
|
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
|
||||||
data1 = data1.repeat(24)
|
data1 = data1.repeat(24)
|
||||||
# pad_info = {"image": ([36, 36, 3], 0)}
|
# pad_info = {"image": ([36, 36, 3], 0)}
|
||||||
data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))),
|
data1 = data1.map(operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))), input_columns="image",
|
||||||
python_multiprocessing=False)
|
python_multiprocessing=False)
|
||||||
data1 = data1.batch(batch_size=24, drop_remainder=True)
|
data1 = data1.batch(batch_size=24, drop_remainder=True)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -159,8 +159,8 @@ def batch_pyfunc_padding_1d():
|
||||||
cifar10_dir = "../data/dataset/testCifar10Data"
|
cifar10_dir = "../data/dataset/testCifar10Data"
|
||||||
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
|
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3]
|
||||||
data1 = data1.repeat(24)
|
data1 = data1.repeat(24)
|
||||||
data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1)))
|
data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image")
|
||||||
data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816))), python_multiprocessing=False)
|
data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image", python_multiprocessing=False)
|
||||||
data1 = data1.batch(batch_size=24, drop_remainder=True)
|
data1 = data1.batch(batch_size=24, drop_remainder=True)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
num_batches = 0
|
num_batches = 0
|
||||||
|
@ -176,8 +176,8 @@ def test_pad_via_map():
|
||||||
|
|
||||||
def pad_map_config():
|
def pad_map_config():
|
||||||
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3]
|
data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3]
|
||||||
data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d
|
data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d
|
||||||
data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816))))
|
data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image")
|
||||||
data1 = data1.batch(batch_size=25, drop_remainder=True)
|
data1 = data1.batch(batch_size=25, drop_remainder=True)
|
||||||
res = []
|
res = []
|
||||||
for data in data1.create_dict_iterator(num_epochs=1):
|
for data in data1.create_dict_iterator(num_epochs=1):
|
||||||
|
@ -186,7 +186,7 @@ def test_pad_via_map():
|
||||||
|
|
||||||
def pad_batch_config():
|
def pad_batch_config():
|
||||||
data2 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3]
|
data2 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3]
|
||||||
data2 = data2.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d
|
data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d
|
||||||
data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)})
|
data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)})
|
||||||
res = []
|
res = []
|
||||||
for data in data2.create_dict_iterator(num_epochs=1):
|
for data in data2.create_dict_iterator(num_epochs=1):
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue