forked from OSchip/llvm-project
[mlir][taco] Use sparse_tensor.out to write sparse tensors to files.
Add a Python method, output_sparse_tensor, to use sparse_tensor.out to write a sparse tensor value to a file. Modify the method that evaluates a tensor expression to return a pointer of the MLIR sparse tensor for the result to delay the extraction of the coordinates and non-zero values. Implement the Tensor to_file method to evaluate the tensor assignment and write the result to a file. Add unit tests. Modify test golden files to reflect the change that TNS outputs now have a comment line and two meta data lines. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D118956
This commit is contained in:
parent
620d99b7ed
commit
61a3dd70ff
|
@ -1,3 +1,6 @@
|
|||
# See http://frostt.io/tensors/file-formats.html for FROSTT (.tns) format
|
||||
2 50
|
||||
2 25
|
||||
1 1 12
|
||||
1 2 12
|
||||
1 3 12
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
# See http://frostt.io/tensors/file-formats.html for FROSTT (.tns) format
|
||||
2 9
|
||||
3 3
|
||||
1.0 1.0 100.0
|
||||
1.0 2.0 107.0
|
||||
1.0 3.0 114.0
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
# See http://frostt.io/tensors/file-formats.html for FROSTT (.tns) format
|
||||
1 3
|
||||
3
|
||||
1 37102
|
||||
2 -20.4138
|
||||
3 804927
|
||||
|
|
|
@ -7,7 +7,9 @@ import tempfile
|
|||
|
||||
_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(_SCRIPT_PATH)
|
||||
|
||||
from tools import mlir_pytaco_api as pt
|
||||
from tools import testing_utils as utils
|
||||
|
||||
###### This PyTACO part is taken from the TACO open-source project. ######
|
||||
# See http://tensor-compiler.org/docs/data_analytics/index.html.
|
||||
|
@ -42,12 +44,12 @@ A[i, j] = B[i, k, l] * D[l, j] * C[k, j]
|
|||
|
||||
##########################################################################
|
||||
|
||||
# CHECK: Compare result True
|
||||
# Perform the MTTKRP computation and write the result to file.
|
||||
with tempfile.TemporaryDirectory() as test_dir:
|
||||
actual_file = os.path.join(test_dir, "A.tns")
|
||||
pt.write(actual_file, A)
|
||||
actual = np.loadtxt(actual_file, np.float64)
|
||||
expected = np.loadtxt(
|
||||
os.path.join(_SCRIPT_PATH, "data/gold_A.tns"), np.float64)
|
||||
print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}")
|
||||
golden_file = os.path.join(_SCRIPT_PATH, "data/gold_A.tns")
|
||||
out_file = os.path.join(test_dir, "A.tns")
|
||||
pt.write(out_file, A)
|
||||
#
|
||||
# CHECK: Compare result True
|
||||
#
|
||||
print(f"Compare result {utils.compare_sparse_tns(golden_file, out_file)}")
|
||||
|
|
|
@ -10,6 +10,7 @@ _SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
|
|||
sys.path.append(_SCRIPT_PATH)
|
||||
|
||||
from tools import mlir_pytaco_api as pt
|
||||
from tools import testing_utils as utils
|
||||
|
||||
# Define the CSR format.
|
||||
csr = pt.format([pt.dense, pt.compressed], [0, 1])
|
||||
|
@ -33,6 +34,6 @@ with tempfile.TemporaryDirectory() as test_dir:
|
|||
out_file = os.path.join(test_dir, "C.tns")
|
||||
pt.write(out_file, C)
|
||||
#
|
||||
# CHECK: Compare files True
|
||||
# CHECK: Compare result True
|
||||
#
|
||||
print(f"Compare files {filecmp.cmp(golden_file, out_file)}")
|
||||
print(f"Compare result {utils.compare_sparse_tns(golden_file, out_file)}")
|
||||
|
|
|
@ -7,7 +7,9 @@ import tempfile
|
|||
|
||||
_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(_SCRIPT_PATH)
|
||||
|
||||
from tools import mlir_pytaco_api as pt
|
||||
from tools import testing_utils as utils
|
||||
|
||||
###### This PyTACO part is taken from the TACO open-source project. ######
|
||||
# See http://tensor-compiler.org/docs/scientific_computing/index.html.
|
||||
|
@ -43,12 +45,12 @@ y[i] = A[i, j] * x[j] + z[i]
|
|||
|
||||
##########################################################################
|
||||
|
||||
# CHECK: Compare result True
|
||||
# Perform the SpMV computation and write the result to file
|
||||
with tempfile.TemporaryDirectory() as test_dir:
|
||||
actual_file = os.path.join(test_dir, "y.tns")
|
||||
pt.write(actual_file, y)
|
||||
actual = np.loadtxt(actual_file, np.float64)
|
||||
expected = np.loadtxt(
|
||||
os.path.join(_SCRIPT_PATH, "data/gold_y.tns"), np.float64)
|
||||
print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}")
|
||||
golden_file = os.path.join(_SCRIPT_PATH, "data/gold_y.tns")
|
||||
out_file = os.path.join(test_dir, "y.tns")
|
||||
pt.write(out_file, y)
|
||||
#
|
||||
# CHECK: Compare result True
|
||||
#
|
||||
print(f"Compare result {utils.compare_sparse_tns(golden_file, out_file)}")
|
||||
|
|
|
@ -667,6 +667,11 @@ class Tensor:
|
|||
"Must be a tuple or list for a shape or a single value"
|
||||
f"if initializing a scalar tensor: {value_or_shape}.")
|
||||
|
||||
def _set_packed_sparse_tensor(self, pointer: ctypes.c_void_p) -> None:
|
||||
"""Records the MLIR sparse tensor pointer."""
|
||||
self._sparse_value_location = _SparseValueInfo._PACKED
|
||||
self._packed_sparse_value = pointer
|
||||
|
||||
def is_unpacked(self) -> bool:
|
||||
"""Returns true if the tensor value is not packed as MLIR sparse tensor."""
|
||||
return (self._sparse_value_location == _SparseValueInfo._UNPACKED)
|
||||
|
@ -826,11 +831,39 @@ class Tensor:
|
|||
sparse_tensor, shape = utils.create_sparse_tensor(filename,
|
||||
fmt.format_pack.formats)
|
||||
tensor = Tensor(shape.tolist(), fmt)
|
||||
tensor._sparse_value_location = _SparseValueInfo._PACKED
|
||||
tensor._packed_sparse_value = sparse_tensor
|
||||
tensor._set_packed_sparse_tensor(sparse_tensor)
|
||||
|
||||
return tensor
|
||||
|
||||
def to_file(self, filename: str) -> None:
|
||||
"""Output the tensor value to a file.
|
||||
|
||||
This method evaluates any pending assignment to the tensor and outputs the
|
||||
tensor value.
|
||||
|
||||
Args:
|
||||
filename: A string file name.
|
||||
"""
|
||||
self._sync_value()
|
||||
if not self.is_unpacked():
|
||||
utils.output_sparse_tensor(self._packed_sparse_value, filename,
|
||||
self._format.format_pack.formats)
|
||||
return
|
||||
|
||||
# TODO: Use MLIR code to output the value.
|
||||
coords, values = self.get_coordinates_and_values()
|
||||
assert len(coords) == len(values)
|
||||
with open(filename, "w") as file:
|
||||
# Output a comment line and the meta data.
|
||||
file.write("; extended FROSTT format\n")
|
||||
file.write(f"{self.order} {len(coords)}\n")
|
||||
file.write(f"{' '.join(map(lambda i: str(i), self.shape))}\n")
|
||||
# Output each (coordinate value) pair in a line.
|
||||
for c, v in zip(coords, values):
|
||||
# The coordinates are 1-based in the text file and 0-based in memory.
|
||||
plus_one_to_str = lambda x: str(x + 1)
|
||||
file.write(f"{' '.join(map(plus_one_to_str,c))} {v}\n")
|
||||
|
||||
@property
|
||||
def dtype(self) -> DType:
|
||||
"""Returns the data type for the Tensor."""
|
||||
|
@ -914,9 +947,7 @@ class Tensor:
|
|||
assert isinstance(result, np.ndarray)
|
||||
self._dense_storage = result
|
||||
else:
|
||||
assert _all_instance_of(result, np.ndarray) and len(result) == 2
|
||||
assert (result[0].ndim, result[1].ndim) == (1, 2)
|
||||
(self._values, self._coords) = result
|
||||
self._set_packed_sparse_tensor(result)
|
||||
|
||||
def _sync_value(self) -> None:
|
||||
"""Updates the tensor value by evaluating the pending assignment."""
|
||||
|
@ -1349,7 +1380,7 @@ class IndexExpr(abc.ABC):
|
|||
self,
|
||||
dst: Tensor,
|
||||
dst_indices: Tuple[IndexVar, ...],
|
||||
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
|
||||
) -> Union[np.ndarray, ctypes.c_void_p]:
|
||||
"""Evaluates tensor assignment dst[dst_indices] = expression.
|
||||
|
||||
Args:
|
||||
|
@ -1357,9 +1388,8 @@ class IndexExpr(abc.ABC):
|
|||
dst_indices: The tuple of IndexVar used to access the destination tensor.
|
||||
|
||||
Returns:
|
||||
The result of the dense tensor represented in numpy ndarray or the sparse
|
||||
tensor represented by two numpy ndarray for its non-zero values and
|
||||
indices.
|
||||
The result of the dense tensor represented in numpy ndarray or the pointer
|
||||
to the MLIR sparse tensor.
|
||||
|
||||
Raises:
|
||||
ValueError: If the expression is not proper or not supported.
|
||||
|
@ -1397,17 +1427,8 @@ class IndexExpr(abc.ABC):
|
|||
if dst.is_dense():
|
||||
return runtime.ranked_memref_to_numpy(arg_pointers[0][0])
|
||||
|
||||
# Check and return the sparse tensor output.
|
||||
rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor(
|
||||
ctypes.cast(arg_pointers[-1][0], ctypes.c_void_p),
|
||||
np.float64,
|
||||
)
|
||||
assert (np.equal(rank, dst.order)
|
||||
and np.array_equal(shape, np.array(dst.shape)) and
|
||||
np.equal(values.ndim, 1) and np.equal(values.shape[0], nse) and
|
||||
np.equal(indices.ndim, 2) and np.equal(indices.shape[0], nse) and
|
||||
np.equal(indices.shape[1], rank))
|
||||
return (values, indices)
|
||||
# Return the sparse tensor pointer.
|
||||
return arg_pointers[-1][0]
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
|
@ -1438,6 +1459,13 @@ class Access(IndexExpr):
|
|||
raise ValueError("Invalid indices for rank: "
|
||||
f"str{self.tensor.order} != len({str(self.indices)}).")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
# The Tensor __repr__ method evaluates the pending assignment to the tensor.
|
||||
# We want to define the __repr__ method here to avoid such evaluation of the
|
||||
# tensor assignment.
|
||||
indices_str = ", ".join(map(lambda i: i.name, self.indices))
|
||||
return (f"Tensor({self.tensor.name}) " f"Indices({indices_str})")
|
||||
|
||||
def _emit_expression(
|
||||
self,
|
||||
expr_to_opnd: Dict[IndexExpr, lang.OperandDef],
|
||||
|
|
|
@ -31,17 +31,6 @@ _MTX_FILENAME_SUFFIX = ".mtx"
|
|||
_TNS_FILENAME_SUFFIX = ".tns"
|
||||
|
||||
|
||||
def _write_tns(file: TextIO, tensor: Tensor) -> None:
|
||||
"""Outputs a tensor to a file using .tns format."""
|
||||
coords, non_zeros = tensor.get_coordinates_and_values()
|
||||
assert len(coords) == len(non_zeros)
|
||||
# Output a coordinate and the corresponding value in a line.
|
||||
for c, v in zip(coords, non_zeros):
|
||||
# The coordinates are 1-based in the text file and 0-based in memory.
|
||||
plus_one_to_str = lambda x: str(x + 1)
|
||||
file.write(f"{' '.join(map(plus_one_to_str,c))} {v}\n")
|
||||
|
||||
|
||||
def read(filename: str, fmt: Format) -> Tensor:
|
||||
"""Inputs a tensor from a given file.
|
||||
|
||||
|
@ -88,7 +77,4 @@ def write(filename: str, tensor: Tensor) -> None:
|
|||
if not isinstance(tensor, Tensor):
|
||||
raise ValueError(f"Expected a Tensor object: {tensor}.")
|
||||
|
||||
# TODO: combine the evaluation and the outputing into one step.
|
||||
tensor._sync_value()
|
||||
with open(filename, "w") as file:
|
||||
return _write_tns(file, tensor)
|
||||
tensor.to_file(filename)
|
||||
|
|
|
@ -270,3 +270,67 @@ def create_sparse_tensor(
|
|||
engine.invoke(_ENTRY_NAME, *arg_pointers)
|
||||
shape = runtime.ranked_memref_to_numpy(ctypes.pointer(c_tensor_desc.shape))
|
||||
return c_tensor_desc.storage, shape
|
||||
|
||||
|
||||
# TODO: With better support from MLIR, we may improve the current implementation
|
||||
# by using Python code to generate the kernel instead of doing MLIR text code
|
||||
# stitching.
|
||||
def _get_output_sparse_tensor_kernel(
|
||||
sparsity_codes: Sequence[sparse_tensor.DimLevelType]) -> str:
|
||||
"""Creates an MLIR text kernel to output a sparse tensor to a file.
|
||||
|
||||
The kernel returns void.
|
||||
"""
|
||||
rank = len(sparsity_codes)
|
||||
|
||||
# Use ? to represent a dimension in the dynamic shape string representation.
|
||||
shape = "x".join(map(lambda d: "?", range(rank)))
|
||||
|
||||
# Convert the encoded sparsity values to a string representation.
|
||||
sparsity = ", ".join(
|
||||
map(lambda s: '"compressed"' if s.value else '"dense"', sparsity_codes))
|
||||
|
||||
# Return the MLIR text kernel.
|
||||
return f"""
|
||||
!Ptr = type !llvm.ptr<i8>
|
||||
#enc = #sparse_tensor.encoding<{{
|
||||
dimLevelType = [ {sparsity} ]
|
||||
}}>
|
||||
func @{_ENTRY_NAME}(%t: tensor<{shape}xf64, #enc>, %filename: !Ptr)
|
||||
attributes {{ llvm.emit_c_interface }} {{
|
||||
sparse_tensor.out %t, %filename : tensor<{shape}xf64, #enc>, !Ptr
|
||||
std.return
|
||||
}}"""
|
||||
|
||||
|
||||
def output_sparse_tensor(
|
||||
tensor: ctypes.c_void_p, filename: str,
|
||||
sparsity: Sequence[sparse_tensor.DimLevelType]) -> None:
|
||||
"""Outputs an MLIR sparse tensor to the given file.
|
||||
|
||||
Args:
|
||||
tensor: A C pointer to the MLIR sparse tensor.
|
||||
filename: A string for the name of the file that contains the tensor data in
|
||||
a COO-flavored format.
|
||||
sparsity: A sequence of DimLevelType values, one for each dimension of the
|
||||
tensor.
|
||||
|
||||
Raises:
|
||||
OSError: If there is any problem in loading the supporting C shared library.
|
||||
ValueError: If the shared library doesn't contain the needed routine.
|
||||
"""
|
||||
with ir.Context() as ctx, ir.Location.unknown():
|
||||
module = _get_output_sparse_tensor_kernel(sparsity)
|
||||
module = ir.Module.parse(module)
|
||||
engine = compile_and_build_engine(module)
|
||||
|
||||
# Convert the filename to a byte stream.
|
||||
c_filename = ctypes.c_char_p(bytes(filename, "utf-8"))
|
||||
|
||||
arg_pointers = [
|
||||
ctypes.byref(ctypes.cast(tensor, ctypes.c_void_p)),
|
||||
ctypes.byref(c_filename)
|
||||
]
|
||||
|
||||
# Invoke the execution engine to run the module and return the result.
|
||||
engine.invoke(_ENTRY_NAME, *arg_pointers)
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
# This file contains the utilities to support testing.
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def compare_sparse_tns(expected: str, actual: str, rtol: float = 0.0001) -> bool:
|
||||
"""Compares sparse tensor actual output file with expected output file.
|
||||
|
||||
This routine assumes the input files are in FROSTT format. See
|
||||
http://frostt.io/tensors/file-formats.html for FROSTT (.tns) format.
|
||||
|
||||
It also assumes the first line in the output file is a comment line.
|
||||
|
||||
"""
|
||||
with open(actual, "r") as actual_f:
|
||||
with open(expected, "r") as expected_f:
|
||||
# Skip the first comment line.
|
||||
_ = actual_f.readline()
|
||||
_ = expected_f.readline()
|
||||
|
||||
# Compare the two lines of meta data
|
||||
if actual_f.readline() != expected_f.readline() or actual_f.readline(
|
||||
) != expected_f.readline():
|
||||
return FALSE
|
||||
|
||||
actual_data = np.loadtxt(actual, np.float64, skiprows=3)
|
||||
expected_data = np.loadtxt(expected, np.float64, skiprows=3)
|
||||
return np.allclose(actual_data, expected_data, rtol=rtol)
|
|
@ -108,3 +108,46 @@ def test_read_tns():
|
|||
passed += np.allclose(values, [2.0, 3.0, 4.0])
|
||||
# CHECK: 4
|
||||
print(passed)
|
||||
|
||||
|
||||
# CHECK-LABEL: test_write_unpacked_tns
|
||||
@_run
|
||||
def test_write_unpacked_tns():
|
||||
a = mlir_pytaco.Tensor([2, 3])
|
||||
a.insert([0, 1], 10)
|
||||
a.insert([1, 2], 40)
|
||||
a.insert([0, 0], 20)
|
||||
with tempfile.TemporaryDirectory() as test_dir:
|
||||
file_name = os.path.join(test_dir, "data.tns")
|
||||
mlir_pytaco_io.write(file_name, a)
|
||||
with open(file_name, "r") as file:
|
||||
lines = file.readlines()
|
||||
passed = 0
|
||||
# Skip the comment line in the output.
|
||||
if lines[1:] == ["2 3\n", "2 3\n", "1 2 10.0\n", "2 3 40.0\n", "1 1 20.0\n"]:
|
||||
passed = 1
|
||||
# CHECK: 1
|
||||
print(passed)
|
||||
|
||||
|
||||
# CHECK-LABEL: test_write_packed_tns
|
||||
@_run
|
||||
def test_write_packed_tns():
|
||||
a = mlir_pytaco.Tensor([2, 3])
|
||||
a.insert([0, 1], 10)
|
||||
a.insert([1, 2], 40)
|
||||
a.insert([0, 0], 20)
|
||||
b = mlir_pytaco.Tensor([2, 3])
|
||||
i, j = mlir_pytaco.get_index_vars(2)
|
||||
b[i, j] = a[i, j] + a[i, j]
|
||||
with tempfile.TemporaryDirectory() as test_dir:
|
||||
file_name = os.path.join(test_dir, "data.tns")
|
||||
mlir_pytaco_io.write(file_name, b)
|
||||
with open(file_name, "r") as file:
|
||||
lines = file.readlines()
|
||||
passed = 0
|
||||
# Skip the comment line in the output.
|
||||
if lines[1:] == ["2 3\n", "2 3\n", "1 1 40\n", "1 2 20\n", "2 3 80\n"]:
|
||||
passed = 1
|
||||
# CHECK: 1
|
||||
print(passed)
|
||||
|
|
Loading…
Reference in New Issue