forked from mindspore-Ecosystem/mindspore
flat_map first commit
This commit is contained in:
parent
6b4a6e55a0
commit
cb2814b498
|
@ -268,6 +268,50 @@ class Dataset:
|
|||
"""
|
||||
return ShuffleDataset(self, buffer_size)
|
||||
|
||||
def flat_map(self, func):
|
||||
"""
|
||||
Maps `func` to each row in dataset and flatten the result.
|
||||
|
||||
The specified `func` is a function that must take one 'Ndarray' as input
|
||||
and return a 'Dataset'.
|
||||
|
||||
Args:
|
||||
func (function): A function that must take one 'Ndarray' as an argument and
|
||||
return a 'Dataset'.
|
||||
|
||||
Returns:
|
||||
Dataset, applied by the function.
|
||||
|
||||
Examples:
|
||||
>>> import mindspore.dataset as ds
|
||||
>>> import mindspore.dataset.transforms.nlp.utils as nlp
|
||||
>>> # declare a function which returns a Dataset object
|
||||
>>> def flat_map_func(x):
|
||||
>>> data_dir = nlp.as_text(x[0])
|
||||
>>> d = ds.ImageFolderDatasetV2(data_dir)
|
||||
>>> return d
|
||||
>>> # data is a Dataset object
|
||||
>>> data = ds.TextFileDataset(DATA_FILE)
|
||||
>>> data = data.flat_map(flat_map_func)
|
||||
|
||||
Raises:
|
||||
TypeError: If `func` is not a function.
|
||||
TypeError: If `func` doesn't return a Dataset.
|
||||
"""
|
||||
dataset = None
|
||||
if not hasattr(func, '__call__'):
|
||||
raise TypeError("func must be a function.")
|
||||
|
||||
for row_data in self:
|
||||
if dataset is None:
|
||||
dataset = func(row_data)
|
||||
else:
|
||||
dataset += func(row_data)
|
||||
|
||||
if not isinstance(dataset, Dataset):
|
||||
raise TypeError("flat_map must return a Dataset object.")
|
||||
return dataset
|
||||
|
||||
@check_map
|
||||
def map(self, input_columns=None, operations=None, output_columns=None, columns_order=None,
|
||||
num_parallel_workers=None, python_multiprocessing=False):
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
../data/dataset/test_flat_map/images1.txt
|
||||
../data/dataset/test_flat_map/images2.txt
|
|
@ -0,0 +1,3 @@
|
|||
../data/dataset/testPK/data
|
||||
../data/dataset/testImageNetData/train
|
||||
../data/dataset/testImageNetData2/train
|
|
@ -0,0 +1,3 @@
|
|||
../data/dataset/testPK/data
|
||||
../data/dataset/testImageNetData/train
|
||||
../data/dataset/testImageNetData2/train
|
|
@ -0,0 +1,3 @@
|
|||
../data/dataset/testPK/data
|
||||
../data/dataset/testImageNetData/train
|
||||
../data/dataset/testImageNetData2/train
|
|
@ -0,0 +1,72 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
import numpy as np
|
||||
import mindspore.dataset as ds
|
||||
|
||||
DATA_FILE = "../data/dataset/test_flat_map/images1.txt"
|
||||
INDEX_FILE = "../data/dataset/test_flat_map/image_index.txt"
|
||||
|
||||
|
||||
def test_flat_map_1():
|
||||
'''
|
||||
DATA_FILE records the path of image folders, load the images from them.
|
||||
'''
|
||||
import mindspore.dataset.transforms.nlp.utils as nlp
|
||||
|
||||
def flat_map_func(x):
|
||||
data_dir = nlp.as_text(x[0])
|
||||
d = ds.ImageFolderDatasetV2(data_dir)
|
||||
return d
|
||||
|
||||
data = ds.TextFileDataset(DATA_FILE)
|
||||
data = data.flat_map(flat_map_func)
|
||||
|
||||
count = 0
|
||||
for d in data:
|
||||
assert isinstance(d[0], np.ndarray)
|
||||
count += 1
|
||||
assert count == 52
|
||||
|
||||
|
||||
def test_flat_map_2():
|
||||
'''
|
||||
Flatten 3D structure data
|
||||
'''
|
||||
import mindspore.dataset.transforms.nlp.utils as nlp
|
||||
|
||||
def flat_map_func_1(x):
|
||||
data_dir = nlp.as_text(x[0])
|
||||
d = ds.ImageFolderDatasetV2(data_dir)
|
||||
return d
|
||||
|
||||
def flat_map_func_2(x):
|
||||
text_file = nlp.as_text(x[0])
|
||||
d = ds.TextFileDataset(text_file)
|
||||
d = d.flat_map(flat_map_func_1)
|
||||
return d
|
||||
|
||||
data = ds.TextFileDataset(INDEX_FILE)
|
||||
data = data.flat_map(flat_map_func_2)
|
||||
|
||||
count = 0
|
||||
for d in data:
|
||||
assert isinstance(d[0], np.ndarray)
|
||||
count += 1
|
||||
assert count == 104
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_flat_map_1()
|
||||
test_flat_map_2()
|
Loading…
Reference in New Issue