forked from mindspore-Ecosystem/mindspore
Merge pull request !2243 from panfengfeng/revert-merge-2189-master
This commit is contained in:
commit
f27de896a7
|
@ -2209,7 +2209,7 @@ class ConcatDataset(DatasetOp):
|
|||
Number, number of batches.
|
||||
"""
|
||||
children_sizes = [c.get_dataset_size() for c in self.input]
|
||||
dataset_size = sum(children_sizes)
|
||||
dataset_size = np.sum(children_sizes)
|
||||
return dataset_size
|
||||
|
||||
|
||||
|
@ -2219,8 +2219,8 @@ class RenameDataset(DatasetOp):
|
|||
|
||||
Args:
|
||||
input_dataset (Dataset): Input Dataset to be Renamed.
|
||||
input_columns (list[str]): list of names of the input columns.
|
||||
output_columns (list[str]): list of names of the output columns.
|
||||
input_column_names (list[str]): list of names of the input columns.
|
||||
output_column_names (list[str]): list of names of the output columns.
|
||||
"""
|
||||
|
||||
def __init__(self, input_dataset, input_columns, output_columns):
|
||||
|
@ -4737,39 +4737,58 @@ class _NumpySlicesDataset:
|
|||
def __init__(self, data, column_list=None):
|
||||
self.column_list = None
|
||||
# Convert dict data into tuple
|
||||
if isinstance(data, dict):
|
||||
if isinstance(data, dict) or isinstance(data[0], dict):
|
||||
data = self.process_dict(data)
|
||||
|
||||
if isinstance(data, tuple):
|
||||
self.data = ()
|
||||
data_len = len(data)
|
||||
for i in range(data_len):
|
||||
self.data = self.data + (np.array(data[i]),)
|
||||
if isinstance(data[0], tuple) or isinstance(data, tuple):
|
||||
self.is_tuple = True
|
||||
self.data = data
|
||||
if isinstance(data[0], tuple):
|
||||
for i in range(len(self.data)):
|
||||
self.data[i] = np.array(self.data[i])
|
||||
else:
|
||||
self.data = (np.array(data),)
|
||||
self.is_tuple = False
|
||||
self.data = np.array(data)
|
||||
|
||||
# Init column_name
|
||||
if column_list is not None:
|
||||
self.column_list = column_list
|
||||
elif self.column_list is None:
|
||||
self.column_list = []
|
||||
column_num = len(self.data)
|
||||
column_num = len(self.data) if self.is_tuple else 1
|
||||
for i in range(column_num):
|
||||
self.column_list.append("column_" + str(i))
|
||||
|
||||
def __getitem__(self, index):
|
||||
data_row = [d[index, ...] for d in self.data]
|
||||
data_res = tuple(data_row)
|
||||
if self.is_tuple:
|
||||
data_row = []
|
||||
for i in range(len(self.data)):
|
||||
data_row.append(self.data[i][index, ...])
|
||||
data_res = tuple(data_row)
|
||||
else:
|
||||
data_row = self.data[index, ...]
|
||||
data_row = [data_row]
|
||||
data_res = tuple(data_row)
|
||||
|
||||
return data_res
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data[0])
|
||||
if self.is_tuple:
|
||||
return len(self.data[0])
|
||||
return len(self.data)
|
||||
|
||||
def process_dict(self, input_data):
|
||||
"""
|
||||
Convert the dict like data into tuple format, when input is a tuple of dict then compose it into a dict first.
|
||||
"""
|
||||
# Convert pandas like dict(has "values" column) into General dict
|
||||
# When input is a tuple of dict, composing it
|
||||
if isinstance(input_data, tuple) and isinstance(input_data[0], dict):
|
||||
data_dict = {}
|
||||
for d in input_data:
|
||||
data_dict.update(d)
|
||||
input_data = data_dict
|
||||
|
||||
# convert pandas like dict(has "values" column) into General dict
|
||||
data_keys = list(input_data.keys())
|
||||
data_col = input_data[data_keys[0]]
|
||||
if hasattr(data_col, "values"):
|
||||
|
@ -4780,12 +4799,13 @@ class _NumpySlicesDataset:
|
|||
input_data = new_dict
|
||||
|
||||
# Convert the data in dict into tuple
|
||||
data = ()
|
||||
keys = list(input_data.keys())
|
||||
self.column_list = keys
|
||||
data = []
|
||||
self.column_list = []
|
||||
keys = input_data.keys()
|
||||
for key in keys:
|
||||
self.column_list.append(key)
|
||||
value = input_data[key]
|
||||
data = data + (list(value),)
|
||||
data.append(tuple(value))
|
||||
|
||||
return data
|
||||
|
||||
|
@ -4824,7 +4844,7 @@ class NumpySlicesDataset(GeneratorDataset):
|
|||
- not allowed
|
||||
|
||||
Args:
|
||||
data (list, tuple or dict) Input of Given data, supported data type includes list, tuple, dict and other numpy
|
||||
data(list, tuple or dict)Input of Given data, supported data type includes list, tuple, dict and other numpy
|
||||
format. Input data will be sliced in first dimension and generate many rows, large data is not recommend to
|
||||
load in this way as data is loading into memory.
|
||||
column_names (list[str], optional): List of column names of the dataset (default=None). If column_names not
|
||||
|
@ -4848,8 +4868,8 @@ class NumpySlicesDataset(GeneratorDataset):
|
|||
>>> # 2) Input data can be a dict, and column_names will be its key
|
||||
>>> data = {"a": [1, 2], "b": [3, 4]}
|
||||
>>> dataset2 = ds.NumpySlicesDataset(data)
|
||||
>>> # 3) Input data can be a tuple of lists (or numpy arrays), each tuple element refers to data in each column
|
||||
>>> data = ([1, 2], [3, 4], [5, 6])
|
||||
>>> # 3) Input data can be a tuple (or list of tuple), and each tuple element refers to data in each column
|
||||
>>> data = ((1, 2), (3, 4), (5, 6))
|
||||
>>> dataset3 = ds.NumpySlicesDataset(data, column_names=["column_1", "column_2", "column_3"])
|
||||
>>> # 4) Load data from csv file
|
||||
>>> import pandas as pd
|
||||
|
|
|
@ -1484,11 +1484,8 @@ def check_numpyslicesdataset(method):
|
|||
# check data; required argument
|
||||
data = param_dict.get('data')
|
||||
if not isinstance(data, (list, tuple, dict, np.ndarray)):
|
||||
raise TypeError("Unsupported data type: {}, only support some common python data type, "
|
||||
"like list, tuple, dict, and numpy array.".format(type(data)))
|
||||
if isinstance(data, tuple) and not isinstance(data[0], (list, np.ndarray)):
|
||||
raise TypeError("Unsupported data type: when input is tuple, only support some common python "
|
||||
"data type, like tuple of lists and tuple of numpy arrays.")
|
||||
raise TypeError("Unsupported data type: {}, only support some common python data type, \
|
||||
like list, tuple, dict, and numpy array.".format(type(data)))
|
||||
if not data:
|
||||
raise ValueError("Input data is empty.")
|
||||
|
||||
|
@ -1502,17 +1499,20 @@ def check_numpyslicesdataset(method):
|
|||
if isinstance(data, dict):
|
||||
data_column = len(list(data.keys()))
|
||||
if column_num != data_column:
|
||||
raise ValueError("Num of input column names is {0}, but required is {1}."
|
||||
.format(column_num, data_column))
|
||||
raise ValueError("Num of column is {0}, but required is {1}.".format(column_num, data_column))
|
||||
|
||||
elif isinstance(data, tuple):
|
||||
# Consider input is a tuple of dict
|
||||
elif isinstance(data[0], dict):
|
||||
data_column = sum(len(list(data[i].keys())) for i in range(len(data)))
|
||||
if column_num != data_column:
|
||||
raise ValueError("Num of column is {0}, but required is {1}.".format(column_num, data_column))
|
||||
|
||||
elif isinstance(data[0], tuple) or isinstance(data, tuple):
|
||||
if column_num != len(data):
|
||||
raise ValueError("Num of input column names is {0}, but required is {1}."
|
||||
.format(column_num, len(data)))
|
||||
raise ValueError("Num of column is {0}, but required is {1}.".format(column_num, len(data)))
|
||||
else:
|
||||
if column_num != 1:
|
||||
raise ValueError("Num of input column names is {0}, but required is {1} as data is list."
|
||||
.format(column_num, 1))
|
||||
raise ValueError("Num of column is {0}, but required is {1} as data is list.".format(column_num, 1))
|
||||
|
||||
return method(*args, **kwargs)
|
||||
|
||||
|
|
|
@ -81,32 +81,34 @@ def test_numpy_slices_dict_1():
|
|||
assert data[1] == res[i][1]
|
||||
|
||||
|
||||
def test_numpy_slices_dict_2():
|
||||
logger.info("Test input data is a tuple of Dictionary structure data.")
|
||||
|
||||
data1, data2 = {"a": [1, 2]}, {"b": [3, 4]}
|
||||
ds = de.NumpySlicesDataset((data1, data2), column_names=["col1", "col2"], shuffle=False)
|
||||
res = [[1, 3], [2, 4]]
|
||||
|
||||
for i, data in enumerate(ds):
|
||||
assert data[0] == res[i][0]
|
||||
assert data[1] == res[i][1]
|
||||
|
||||
|
||||
def test_numpy_slices_tuple_1():
|
||||
logger.info("Test slicing a list of tuple.")
|
||||
|
||||
np_data = [([1, 2], [3, 4]), ([11, 12], [13, 14]), ([21, 22], [23, 24])]
|
||||
res = [[[1, 2], [11, 12], [21, 22]], [[3, 4], [13, 14], [23, 24]]]
|
||||
ds = de.NumpySlicesDataset(np_data, shuffle=False)
|
||||
|
||||
for i, data in enumerate(ds):
|
||||
assert np.equal(data, np_data[i]).all()
|
||||
|
||||
assert sum([1 for _ in ds]) == 3
|
||||
|
||||
|
||||
def test_numpy_slices_tuple_2():
|
||||
logger.info("Test slicing a tuple of list.")
|
||||
|
||||
np_data = ([1, 2], [3, 4], [5, 6])
|
||||
expected = [[1, 3, 5], [2, 4, 6]]
|
||||
ds = de.NumpySlicesDataset(np_data, shuffle=False)
|
||||
|
||||
for i, data in enumerate(ds):
|
||||
assert np.equal(data, expected[i]).all()
|
||||
assert np.equal(data[0], res[i][0]).all()
|
||||
assert np.equal(data[1], res[i][1]).all()
|
||||
assert np.equal(data[2], res[i][2]).all()
|
||||
|
||||
assert sum([1 for _ in ds]) == 2
|
||||
|
||||
|
||||
def test_numpy_slices_tuple_3():
|
||||
def test_numpy_slices_tuple_2():
|
||||
logger.info("Test reading different dimension of tuple data.")
|
||||
features, labels = np.random.sample((5, 2)), np.random.sample((5, 1))
|
||||
data = (features, labels)
|
||||
|
@ -189,9 +191,9 @@ if __name__ == "__main__":
|
|||
test_numpy_slices_list_3()
|
||||
test_numpy_slices_list_append()
|
||||
test_numpy_slices_dict_1()
|
||||
test_numpy_slices_dict_2()
|
||||
test_numpy_slices_tuple_1()
|
||||
test_numpy_slices_tuple_2()
|
||||
test_numpy_slices_tuple_3()
|
||||
test_numpy_slices_csv_value()
|
||||
test_numpy_slices_csv_dict()
|
||||
test_numpy_slices_num_samplers()
|
||||
|
|
Loading…
Reference in New Issue