flat_map first commit

This commit is contained in:
jiangzhiwen 2020-05-11 16:47:54 +08:00
parent 6b4a6e55a0
commit cb2814b498
6 changed files with 127 additions and 0 deletions

View File

@ -268,6 +268,50 @@ class Dataset:
"""
return ShuffleDataset(self, buffer_size)
def flat_map(self, func):
"""
Maps `func` to each row in dataset and flatten the result.
The specified `func` is a function that must take one 'Ndarray' as input
and return a 'Dataset'.
Args:
func (function): A function that must take one 'Ndarray' as an argument and
return a 'Dataset'.
Returns:
Dataset, applied by the function.
Examples:
>>> import mindspore.dataset as ds
>>> import mindspore.dataset.transforms.nlp.utils as nlp
>>> # declare a function which returns a Dataset object
>>> def flat_map_func(x):
>>> data_dir = nlp.as_text(x[0])
>>> d = ds.ImageFolderDatasetV2(data_dir)
>>> return d
>>> # data is a Dataset object
>>> data = ds.TextFileDataset(DATA_FILE)
>>> data = data.flat_map(flat_map_func)
Raises:
TypeError: If `func` is not a function.
TypeError: If `func` doesn't return a Dataset.
"""
dataset = None
if not hasattr(func, '__call__'):
raise TypeError("func must be a function.")
for row_data in self:
if dataset is None:
dataset = func(row_data)
else:
dataset += func(row_data)
if not isinstance(dataset, Dataset):
raise TypeError("flat_map must return a Dataset object.")
return dataset
@check_map
def map(self, input_columns=None, operations=None, output_columns=None, columns_order=None,
num_parallel_workers=None, python_multiprocessing=False):

View File

@ -0,0 +1,2 @@
../data/dataset/test_flat_map/images1.txt
../data/dataset/test_flat_map/images2.txt

View File

@ -0,0 +1,3 @@
../data/dataset/testPK/data
../data/dataset/testImageNetData/train
../data/dataset/testImageNetData2/train

View File

@ -0,0 +1,3 @@
../data/dataset/testPK/data
../data/dataset/testImageNetData/train
../data/dataset/testImageNetData2/train

View File

@ -0,0 +1,3 @@
../data/dataset/testPK/data
../data/dataset/testImageNetData/train
../data/dataset/testImageNetData2/train

View File

@ -0,0 +1,72 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import mindspore.dataset as ds
DATA_FILE = "../data/dataset/test_flat_map/images1.txt"
INDEX_FILE = "../data/dataset/test_flat_map/image_index.txt"
def test_flat_map_1():
'''
DATA_FILE records the path of image folders, load the images from them.
'''
import mindspore.dataset.transforms.nlp.utils as nlp
def flat_map_func(x):
data_dir = nlp.as_text(x[0])
d = ds.ImageFolderDatasetV2(data_dir)
return d
data = ds.TextFileDataset(DATA_FILE)
data = data.flat_map(flat_map_func)
count = 0
for d in data:
assert isinstance(d[0], np.ndarray)
count += 1
assert count == 52
def test_flat_map_2():
'''
Flatten 3D structure data
'''
import mindspore.dataset.transforms.nlp.utils as nlp
def flat_map_func_1(x):
data_dir = nlp.as_text(x[0])
d = ds.ImageFolderDatasetV2(data_dir)
return d
def flat_map_func_2(x):
text_file = nlp.as_text(x[0])
d = ds.TextFileDataset(text_file)
d = d.flat_map(flat_map_func_1)
return d
data = ds.TextFileDataset(INDEX_FILE)
data = data.flat_map(flat_map_func_2)
count = 0
for d in data:
assert isinstance(d[0], np.ndarray)
count += 1
assert count == 104
if __name__ == "__main__":
test_flat_map_1()
test_flat_map_2()