2020-03-27 14:49:12 +08:00
|
|
|
# Copyright 2019 Huawei Technologies Co., Ltd
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
# ==============================================================================
|
|
|
|
import numpy as np
|
2020-04-21 04:24:19 +08:00
|
|
|
import pytest
|
2020-03-27 14:49:12 +08:00
|
|
|
|
|
|
|
import mindspore.dataset as ds
|
|
|
|
from mindspore import log as logger
|
|
|
|
|
|
|
|
DATA_DIR = ["../data/dataset/testPyfuncMap/data.data"]
|
|
|
|
SCHEMA_DIR = "../data/dataset/testPyfuncMap/schema.json"
|
|
|
|
COLUMNS = ["col0", "col1", "col2"]
|
|
|
|
GENERATE_GOLDEN = False
|
|
|
|
|
|
|
|
|
|
|
|
def test_case_0():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test 1-1 PyFunc : lambda x : x + x")
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
|
|
|
|
data1 = data1.map(input_columns="col0", output_columns="out", operations=(lambda x: x + x))
|
|
|
|
|
|
|
|
i = 0
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-03-27 14:49:12 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
i = i + 4
|
|
|
|
|
|
|
|
|
|
|
|
def test_case_1():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test 1-n PyFunc : lambda x : (x , x + x) ")
|
|
|
|
|
|
|
|
col = "col0"
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
data1 = data1.map(input_columns=col, output_columns=["out0", "out1"], operations=(lambda x: (x, x + x)),
|
2020-05-18 10:31:46 +08:00
|
|
|
columns_order=["out0", "out1"])
|
2020-03-27 14:49:12 +08:00
|
|
|
|
|
|
|
i = 0
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-03-27 14:49:12 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[i, i + 1], [i + 2, i + 3]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out0"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out1"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
i = i + 4
|
|
|
|
|
|
|
|
|
|
|
|
def test_case_2():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test n-1 PyFunc : lambda x, y : x + y ")
|
|
|
|
|
|
|
|
col = ["col0", "col1"]
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
|
|
|
|
data1 = data1.map(input_columns=col, output_columns="out", operations=(lambda x, y: x + y),
|
2020-05-18 10:31:46 +08:00
|
|
|
columns_order=["out"])
|
2020-03-27 14:49:12 +08:00
|
|
|
|
|
|
|
i = 0
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-03-27 14:49:12 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
i = i + 4
|
|
|
|
|
|
|
|
|
|
|
|
def test_case_3():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
|
|
|
|
|
|
|
|
col = ["col0", "col1"]
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
|
|
|
|
data1 = data1.map(input_columns=col, output_columns=["out0", "out1", "out2"],
|
2020-05-18 10:31:46 +08:00
|
|
|
operations=(lambda x, y: (x, x + y, x + y + 1)), columns_order=["out0", "out1", "out2"])
|
2020-03-27 14:49:12 +08:00
|
|
|
|
|
|
|
i = 0
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-03-27 14:49:12 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[i, i + 1], [i + 2, i + 3]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out0"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out1"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out2"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
i = i + 4
|
|
|
|
|
|
|
|
|
|
|
|
def test_case_4():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test Parallel n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
|
|
|
|
|
|
|
|
col = ["col0", "col1"]
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
|
|
|
|
data1 = data1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
|
2020-05-18 10:31:46 +08:00
|
|
|
operations=(lambda x, y: (x, x + y, x + y + 1)), columns_order=["out0", "out1", "out2"])
|
2020-03-27 14:49:12 +08:00
|
|
|
|
|
|
|
i = 0
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-03-27 14:49:12 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[i, i + 1], [i + 2, i + 3]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out0"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out1"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out2"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
i = i + 4
|
|
|
|
|
|
|
|
|
|
|
|
# The execution of this function will acquire GIL
|
|
|
|
def func_5(x):
|
|
|
|
return np.ones(x.shape, dtype=x.dtype)
|
|
|
|
|
|
|
|
|
|
|
|
def test_case_5():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test 1-1 PyFunc : lambda x: np.ones(x.shape)")
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
|
|
|
|
data1 = data1.map(input_columns="col0", output_columns="out", operations=func_5)
|
|
|
|
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-03-27 14:49:12 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[1, 1], [1, 1]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
|
|
|
|
|
|
|
|
def test_case_6():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test PyFunc ComposeOp : (lambda x : x + x), (lambda x : x + x)")
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
|
|
|
|
data1 = data1.map(input_columns="col0", output_columns="out",
|
2020-05-18 10:31:46 +08:00
|
|
|
operations=[(lambda x: x + x), (lambda x: x + x)])
|
2020-03-27 14:49:12 +08:00
|
|
|
|
|
|
|
i = 0
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-03-27 14:49:12 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[i * 4, (i + 1) * 4], [(i + 2) * 4, (i + 3) * 4]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out"], golden)
|
2020-03-27 14:49:12 +08:00
|
|
|
i = i + 4
|
|
|
|
|
|
|
|
|
2020-04-21 04:24:19 +08:00
|
|
|
def test_case_7():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test 1-1 PyFunc Multiprocess: lambda x : x + x")
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
|
|
|
|
data1 = data1.map(input_columns="col0", output_columns="out", operations=(lambda x: x + x),
|
2020-05-18 10:31:46 +08:00
|
|
|
num_parallel_workers=4, python_multiprocessing=True)
|
2020-04-21 04:24:19 +08:00
|
|
|
|
|
|
|
i = 0
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-04-21 04:24:19 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out"], golden)
|
2020-04-21 04:24:19 +08:00
|
|
|
i = i + 4
|
|
|
|
|
|
|
|
|
|
|
|
def test_case_8():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test Multiprocess n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
|
|
|
|
|
|
|
|
col = ["col0", "col1"]
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
|
|
|
|
data1 = data1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
|
|
|
|
operations=(lambda x, y: (x, x + y, x + y + 1)), columns_order=["out0", "out1", "out2"],
|
|
|
|
python_multiprocessing=True)
|
|
|
|
|
|
|
|
i = 0
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-04-21 04:24:19 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[i, i + 1], [i + 2, i + 3]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out0"], golden)
|
2020-04-21 04:24:19 +08:00
|
|
|
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out1"], golden)
|
2020-04-21 04:24:19 +08:00
|
|
|
golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out2"], golden)
|
2020-04-21 04:24:19 +08:00
|
|
|
i = i + 4
|
|
|
|
|
|
|
|
|
|
|
|
def test_case_9():
|
|
|
|
"""
|
|
|
|
Test PyFunc
|
|
|
|
"""
|
|
|
|
logger.info("Test multiple 1-1 PyFunc Multiprocess: lambda x : x + x")
|
|
|
|
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
|
|
|
|
|
|
data1 = data1.map(input_columns="col0", output_columns="out", operations=[(lambda x: x + x), (lambda x: x + 1),
|
|
|
|
(lambda x: x + 2)],
|
|
|
|
num_parallel_workers=4, python_multiprocessing=True)
|
|
|
|
|
|
|
|
i = 0
|
2020-08-26 07:52:53 +08:00
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
|
2020-04-21 04:24:19 +08:00
|
|
|
# In this test, the dataset is 2x2 sequential tensors
|
|
|
|
golden = np.array([[i * 2 + 3, (i + 1) * 2 + 3], [(i + 2) * 2 + 3, (i + 3) * 2 + 3]])
|
2020-07-28 02:27:11 +08:00
|
|
|
np.testing.assert_array_equal(item["out"], golden)
|
2020-04-21 04:24:19 +08:00
|
|
|
i = i + 4
|
|
|
|
|
|
|
|
|
|
|
|
def test_pyfunc_execption():
|
|
|
|
logger.info("Test PyFunc Execption Throw: lambda x : raise Execption()")
|
|
|
|
|
|
|
|
def pyfunc(x):
|
|
|
|
raise Exception("Pyfunc Throw")
|
|
|
|
|
|
|
|
with pytest.raises(RuntimeError) as info:
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
2020-05-18 10:31:46 +08:00
|
|
|
data1 = data1.map(input_columns="col0", output_columns="out", operations=pyfunc,
|
2020-04-21 04:24:19 +08:00
|
|
|
num_parallel_workers=4)
|
|
|
|
for _ in data1:
|
|
|
|
pass
|
|
|
|
assert "Pyfunc Throw" in str(info.value)
|
|
|
|
|
|
|
|
|
2020-05-02 03:14:07 +08:00
|
|
|
def skip_test_pyfunc_execption_multiprocess():
|
2020-04-21 04:24:19 +08:00
|
|
|
logger.info("Test Multiprocess PyFunc Execption Throw: lambda x : raise Execption()")
|
|
|
|
|
|
|
|
def pyfunc(x):
|
|
|
|
raise Exception("MP Pyfunc Throw")
|
|
|
|
|
|
|
|
with pytest.raises(RuntimeError) as info:
|
|
|
|
# apply dataset operations
|
|
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
2020-05-18 10:31:46 +08:00
|
|
|
data1 = data1.map(input_columns="col0", output_columns="out", operations=pyfunc,
|
|
|
|
num_parallel_workers=4, python_multiprocessing=True)
|
2020-04-21 04:24:19 +08:00
|
|
|
for _ in data1:
|
|
|
|
pass
|
|
|
|
assert "MP Pyfunc Throw" in str(info.value)
|
|
|
|
|
|
|
|
|
2020-03-27 14:49:12 +08:00
|
|
|
if __name__ == "__main__":
|
|
|
|
test_case_0()
|
|
|
|
test_case_1()
|
|
|
|
test_case_2()
|
|
|
|
test_case_3()
|
|
|
|
test_case_4()
|
|
|
|
test_case_5()
|
|
|
|
test_case_6()
|
2020-04-21 04:24:19 +08:00
|
|
|
test_case_7()
|
|
|
|
test_case_8()
|
|
|
|
test_case_9()
|
|
|
|
test_pyfunc_execption()
|
2020-05-02 03:14:07 +08:00
|
|
|
skip_test_pyfunc_execption_multiprocess()
|