forked from mindspore-Ecosystem/mindspore
136 lines
4.5 KiB
Python
136 lines
4.5 KiB
Python
# Copyright 2020 Huawei Technologies Co., Ltd
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""
|
|
Testing SlidingWindow in mindspore.dataset
|
|
"""
|
|
import numpy as np
|
|
import pytest
|
|
import mindspore.dataset as ds
|
|
import mindspore.dataset.text as text
|
|
|
|
|
|
def test_sliding_window_callable():
|
|
"""
|
|
Test sliding window op is callable
|
|
"""
|
|
op = text.SlidingWindow(2, 0)
|
|
|
|
input1 = ["大", "家", "早", "上", "好"]
|
|
expect = np.array([['大', '家'], ['家', '早'], ['早', '上'], ['上', '好']])
|
|
result = op(input1)
|
|
assert np.array_equal(result, expect)
|
|
|
|
# test 2D input
|
|
input2 = [["大", "家", "早", "上", "好"]]
|
|
with pytest.raises(RuntimeError) as info:
|
|
_ = op(input2)
|
|
assert "SlidingWindow: SlidingWindow supports 1D input only for now." in str(info.value)
|
|
|
|
# test input multiple tensors
|
|
with pytest.raises(RuntimeError) as info:
|
|
_ = op(input1, input1)
|
|
assert "The op is OneToOne, can only accept one tensor as input." in str(info.value)
|
|
|
|
|
|
def test_sliding_window_string():
|
|
""" test sliding_window with string type"""
|
|
inputs = [["大", "家", "早", "上", "好"]]
|
|
expect = np.array([['大', '家'], ['家', '早'], ['早', '上'], ['上', '好']])
|
|
|
|
dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False)
|
|
dataset = dataset.map(operations=text.SlidingWindow(2, 0), input_columns=["text"])
|
|
|
|
result = []
|
|
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
for i in range(data['text'].shape[0]):
|
|
result.append([])
|
|
for j in range(data['text'].shape[1]):
|
|
result[i].append(data['text'][i][j].decode('utf8'))
|
|
result = np.array(result)
|
|
np.testing.assert_array_equal(result, expect)
|
|
|
|
|
|
def test_sliding_window_number():
|
|
inputs = [1]
|
|
expect = np.array([[1]])
|
|
|
|
def gen(nums):
|
|
yield (np.array(nums),)
|
|
|
|
dataset = ds.GeneratorDataset(gen(inputs), column_names=["number"])
|
|
dataset = dataset.map(operations=text.SlidingWindow(1, -1), input_columns=["number"])
|
|
|
|
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
np.testing.assert_array_equal(data['number'], expect)
|
|
|
|
|
|
def test_sliding_window_big_width():
|
|
inputs = [[1, 2, 3, 4, 5]]
|
|
expect = np.array([])
|
|
|
|
dataset = ds.NumpySlicesDataset(inputs, column_names=["number"], shuffle=False)
|
|
dataset = dataset.map(operations=text.SlidingWindow(30, 0), input_columns=["number"])
|
|
|
|
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
np.testing.assert_array_equal(data['number'], expect)
|
|
|
|
|
|
def test_sliding_window_exception():
|
|
try:
|
|
_ = text.SlidingWindow(0, 0)
|
|
assert False
|
|
except ValueError:
|
|
pass
|
|
|
|
try:
|
|
_ = text.SlidingWindow("1", 0)
|
|
assert False
|
|
except TypeError:
|
|
pass
|
|
|
|
try:
|
|
_ = text.SlidingWindow(1, "0")
|
|
assert False
|
|
except TypeError:
|
|
pass
|
|
|
|
try:
|
|
inputs = [[1, 2, 3, 4, 5]]
|
|
dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False)
|
|
dataset = dataset.map(operations=text.SlidingWindow(3, -100), input_columns=["text"])
|
|
for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
pass
|
|
assert False
|
|
except RuntimeError as e:
|
|
assert "axis supports 0 or -1 only for now." in str(e)
|
|
|
|
try:
|
|
inputs = ["aa", "bb", "cc"]
|
|
dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False)
|
|
dataset = dataset.map(operations=text.SlidingWindow(2, 0), input_columns=["text"])
|
|
for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
pass
|
|
assert False
|
|
except RuntimeError as e:
|
|
assert "SlidingWindow supports 1D input only for now." in str(e)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
test_sliding_window_callable()
|
|
test_sliding_window_string()
|
|
test_sliding_window_number()
|
|
test_sliding_window_big_width()
|
|
test_sliding_window_exception()
|