[mlir][sparse][taco] Add support for float32.

Previously, we only support float64. We now support float32 and float64. When
constructing a tensor without providing a data type, the default is float32.

Fix the tests to data type consistency. All PyTACO application tests now use
float32 to match the default data type of TACO. Other tests may use float32 or
float64.

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D120356
This commit is contained in:
Bixia Zheng 2022-02-22 14:04:30 -08:00
parent 47d18be58b
commit c8ae8cfb5d
8 changed files with 112 additions and 60 deletions

View File

@ -30,8 +30,8 @@ B = pt.read(os.path.join(_SCRIPT_PATH, "data/nell-2.tns"), csf)
# These two lines have been modified from the original program to use static
# data to support result comparison.
C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float64))
D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float64))
C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float32))
D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float32))
# Declare the result to be a dense matrix.
A = pt.tensor([B.shape[0], 25], rm)

View File

@ -15,8 +15,8 @@ from tools import testing_utils as utils
i, j, k = pt.get_index_vars(3)
# Set up dense matrices.
A = pt.from_array(np.full((8, 8), 2.0))
B = pt.from_array(np.full((8, 8), 3.0))
A = pt.from_array(np.full((8, 8), 2.0, dtype=np.float32))
B = pt.from_array(np.full((8, 8), 3.0, dtype=np.float32))
# Set up sparse matrices.
S = pt.tensor([8, 8], pt.format([pt.compressed, pt.compressed]))

View File

@ -31,8 +31,8 @@ A = pt.read(os.path.join(_SCRIPT_PATH, "data/pwtk.mtx"), csr)
# These two lines have been modified from the original program to use static
# data to support result comparison.
x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float64))
z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float64))
x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float32))
z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float32))
# Declare the result to be a dense vector
y = pt.tensor([A.shape[0]], dv)

View File

@ -14,7 +14,7 @@ A = pt.from_array(np.full([2,3], 1, dtype=np.float64))
B = pt.from_array(np.full([2,3], 2, dtype=np.float64))
# Define the result tensor as a true dense tensor. The parameter is_dense=True
# is an MLIR-PyTACO extension.
C = pt.tensor([2, 3], is_dense=True)
C = pt.tensor([2, 3], dtype=pt.float64, is_dense=True)
C[i, j] = A[i, j] + B[i, j]

View File

@ -96,7 +96,7 @@ class DType:
kind: A Type enum representing the data type.
value: The numpy data type for the TACO data type.
"""
kind: Type = Type.FLOAT64
kind: Type = Type.FLOAT32
def is_float(self) -> bool:
"""Returns whether the data type represents a floating point value."""
@ -112,6 +112,30 @@ class DType:
return self.kind.value
def _dtype_to_mlir_str(dtype: DType) -> str:
"""Returns the MLIR string for the given dtype."""
dtype_to_str = {
Type.INT16: "i16",
Type.INT32: "i32",
Type.INT64: "i64",
Type.FLOAT32: "f32",
Type.FLOAT64: "f64"
}
return dtype_to_str[dtype.kind]
def _nptype_to_taco_type(ty: np.dtype) -> DType:
"""Returns the TACO type for the given numpy type."""
nptype_to_dtype = {
np.int16: Type.INT16,
np.int32: Type.INT32,
np.int64: Type.INT64,
np.float32: Type.FLOAT32,
np.float64: Type.FLOAT64
}
return DType(nptype_to_dtype[ty])
def _mlir_type_from_taco_type(dtype: DType) -> ir.Type:
"""Returns the MLIR type corresponding to the given TACO type."""
dtype_to_irtype = {
@ -123,7 +147,6 @@ def _mlir_type_from_taco_type(dtype: DType) -> ir.Type:
}
return dtype_to_irtype[dtype.kind]
def _ctype_pointer_from_array(array: np.ndarray) -> ctypes.pointer:
"""Returns the ctype pointer for the given numpy array."""
return ctypes.pointer(
@ -632,7 +655,7 @@ class Tensor:
"""
# Take care of the argument default values common to both sparse tensors
# and dense tensors.
dtype = dtype or DType(Type.FLOAT64)
dtype = dtype or DType(Type.FLOAT32)
self._name = name or self._get_unique_name()
self._assignment = None
self._sparse_value_location = _SparseValueInfo._UNPACKED
@ -688,7 +711,7 @@ class Tensor:
# Use the output MLIR sparse tensor pointer to retrieve the COO-flavored
# values and verify the values.
rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor(
self._packed_sparse_value, np.float64)
self._packed_sparse_value, self._dtype.value)
assert rank == self.order
assert np.allclose(self.shape, shape)
assert nse == len(values)
@ -757,7 +780,8 @@ class Tensor:
def from_array(array: np.ndarray) -> "Tensor":
"""Returns a dense tensor with the value copied from the input array.
We currently only support the conversion of float64 numpy arrays to Tensor.
We currently only support the conversion of float32 and float64 numpy arrays
to Tensor.
Args:
array: The numpy array that provides the data type, shape and value for
@ -767,11 +791,14 @@ class Tensor:
A Tensor object.
Raises:
ValueError if the data type of the numpy array is not float64.
ValueError if the data type of the numpy array is not supported.
"""
if array.dtype != np.float64:
raise ValueError(f"Expected float64 value type: {array.dtype}.")
tensor = Tensor(array.shape, is_dense=True)
if array.dtype != np.float32 and array.dtype != np.float64:
raise ValueError(f"Expected floating point value type: {array.dtype}.")
tensor = Tensor(
array.shape,
dtype=_nptype_to_taco_type(array.dtype.type),
is_dense=True)
tensor._dense_storage = np.copy(array)
return tensor
@ -808,7 +835,7 @@ class Tensor:
# The size of each dimension is one more that such a maximum coordinate
# value.
shape = [c + 1 for c in max_coordinate]
tensor = Tensor(shape, fmt)
tensor = Tensor(shape, fmt, dtype=dtype)
tensor._coords = coordinates
tensor._values = values
@ -833,8 +860,9 @@ class Tensor:
value is stored as an MLIR sparse tensor.
"""
sparse_tensor, shape = utils.create_sparse_tensor(filename,
fmt.format_pack.formats)
tensor = Tensor(shape.tolist(), fmt)
fmt.format_pack.formats,
_dtype_to_mlir_str(dtype))
tensor = Tensor(shape.tolist(), fmt, dtype=dtype)
tensor._set_packed_sparse_tensor(sparse_tensor)
return tensor
@ -862,7 +890,8 @@ class Tensor:
"supported.")
utils.output_sparse_tensor(self._packed_sparse_value, filename,
self._format.format_pack.formats)
self._format.format_pack.formats,
_dtype_to_mlir_str(self._dtype))
@property
def dtype(self) -> DType:

View File

@ -31,7 +31,8 @@ _MTX_FILENAME_SUFFIX = ".mtx"
_TNS_FILENAME_SUFFIX = ".tns"
def read(filename: str, fmt: Format) -> Tensor:
def read(filename: str, fmt: Format,
dtype: DType = DType(Type.FLOAT32)) -> Tensor:
"""Inputs a tensor from a given file.
The name suffix of the file specifies the format of the input tensor. We
@ -40,6 +41,7 @@ def read(filename: str, fmt: Format) -> Tensor:
Args:
filename: A string input filename.
fmt: The storage format of the tensor.
dtype: The data type, default to float32.
Raises:
ValueError: If filename doesn't end with .mtx or .tns, or fmt is not an
@ -52,7 +54,7 @@ def read(filename: str, fmt: Format) -> Tensor:
f"{_MTX_FILENAME_SUFFIX} or {_TNS_FILENAME_SUFFIX}: "
f"{filename}.")
return Tensor.from_file(filename, fmt, DType(Type.FLOAT64))
return Tensor.from_file(filename, fmt, dtype)
def write(filename: str, tensor: Tensor) -> None:

View File

@ -4,7 +4,7 @@
# This file contains the utilities to process sparse tensor outputs.
from typing import Sequence, Tuple
from typing import Callable, Dict, Sequence, Tuple
import ctypes
import functools
import numpy as np
@ -18,6 +18,10 @@ from mlir import runtime
from mlir.dialects import sparse_tensor
from mlir.passmanager import PassManager
# Type aliases for type annotation.
_SupportFunc = Callable[..., None]
_SupportFuncLocator = Callable[[np.dtype], Tuple[_SupportFunc, _SupportFunc]]
# The name for the environment variable that provides the full path for the
# supporting library.
_SUPPORTLIB_ENV_VAR = "SUPPORTLIB"
@ -36,15 +40,28 @@ def _get_support_lib_name() -> str:
return os.getenv(_SUPPORTLIB_ENV_VAR, _DEFAULT_SUPPORTLIB)
def _record_support_funcs(
ty: np.dtype, to_func: _SupportFunc, from_func: _SupportFunc,
ty_to_funcs: Dict[np.dtype, Tuple[_SupportFunc, _SupportFunc]]) -> None:
"""Records the two supporting functions for a given data type."""
to_func.restype = ctypes.c_void_p
from_func.restype = ctypes.c_void_p
ty_to_funcs[ty] = (to_func, from_func)
@functools.lru_cache()
def _get_c_shared_lib() -> ctypes.CDLL:
"""Loads the supporting C shared library with the needed routines.
def _get_support_func_locator() -> _SupportFuncLocator:
"""Constructs a function to locate the supporting functions for a data type.
Loads the supporting C shared library with the needed routines. Constructs a
dictionary from the supported data types to the routines for the data types,
and then a function to look up the dictionary for a given data type.
The name of the supporting C shared library is either provided by an
an environment variable or a default value.
Returns:
The supporting C shared library.
The function to look up the supporting functions for a given data type.
Raises:
OSError: If there is any problem in loading the shared library.
@ -54,19 +71,25 @@ def _get_c_shared_lib() -> ctypes.CDLL:
# library.
c_lib = ctypes.CDLL(_get_support_lib_name())
type_to_funcs = {}
try:
c_lib.convertToMLIRSparseTensorF64.restype = ctypes.c_void_p
_record_support_funcs(np.float32, c_lib.convertToMLIRSparseTensorF32,
c_lib.convertFromMLIRSparseTensorF32, type_to_funcs)
except Exception as e:
raise ValueError("Missing function convertToMLIRSparseTensorF64 from "
f"the supporting C shared library: {e} ") from e
raise ValueError(f"Missing supporting function: {e}") from e
try:
c_lib.convertFromMLIRSparseTensorF64.restype = ctypes.c_void_p
_record_support_funcs(np.float64, c_lib.convertToMLIRSparseTensorF64,
c_lib.convertFromMLIRSparseTensorF64, type_to_funcs)
except Exception as e:
raise ValueError("Missing function convertFromMLIRSparseTensorF64 from "
f"the C shared library: {e} ") from e
raise ValueError(f"Missing supporting function: {e}") from e
return c_lib
def get_support_funcs(ty: np.dtype):
funcs = type_to_funcs[ty]
assert funcs is not None
return funcs
return get_support_funcs
def sparse_tensor_to_coo_tensor(
@ -93,17 +116,14 @@ def sparse_tensor_to_coo_tensor(
OSError: If there is any problem in loading the shared library.
ValueError: If the shared library doesn't contain the needed routines.
"""
c_lib = _get_c_shared_lib()
convert_from = _get_support_func_locator()(dtype)[1]
rank = ctypes.c_ulonglong(0)
nse = ctypes.c_ulonglong(0)
shape = ctypes.POINTER(ctypes.c_ulonglong)()
values = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))()
indices = ctypes.POINTER(ctypes.c_ulonglong)()
c_lib.convertFromMLIRSparseTensorF64(sparse_tensor, ctypes.byref(rank),
ctypes.byref(nse), ctypes.byref(shape),
ctypes.byref(values),
ctypes.byref(indices))
convert_from(sparse_tensor, ctypes.byref(rank), ctypes.byref(nse),
ctypes.byref(shape), ctypes.byref(values), ctypes.byref(indices))
# Convert the returned values to the corresponding numpy types.
shape = np.ctypeslib.as_array(shape, shape=[rank.value])
@ -138,8 +158,8 @@ def coo_tensor_to_sparse_tensor(np_shape: np.ndarray, np_values: np.ndarray,
ctypes.POINTER(np.ctypeslib.as_ctypes_type(np_values.dtype)))
indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
c_lib = _get_c_shared_lib()
ptr = c_lib.convertToMLIRSparseTensorF64(rank, nse, shape, values, indices)
convert_to = _get_support_func_locator()(np_values.dtype.type)[0]
ptr = convert_to(rank, nse, shape, values, indices)
assert ptr is not None, "Problem with calling convertToMLIRSparseTensorF64"
return ptr
@ -171,11 +191,11 @@ class _SparseTensorDescriptor(ctypes.Structure):
]
def _output_one_dim(dim: int, rank: int, shape: str) -> str:
def _output_one_dim(dim: int, rank: int, shape: str, type: str) -> str:
"""Produces the MLIR text code to output the size for the given dimension."""
return f"""
%c{dim} = arith.constant {dim} : index
%d{dim} = tensor.dim %t, %c{dim} : tensor<{shape}xf64, #enc>
%d{dim} = tensor.dim %t, %c{dim} : tensor<{shape}x{type}, #enc>
memref.store %d{dim}, %b[%c{dim}] : memref<{rank}xindex>
"""
@ -187,7 +207,7 @@ def _output_one_dim(dim: int, rank: int, shape: str) -> str:
# (2) Use scf.for instead of an unrolled loop to write out the dimension sizes
# when tensor.dim supports non-constant dimension value.
def _get_create_sparse_tensor_kernel(
sparsity_codes: Sequence[sparse_tensor.DimLevelType]) -> str:
sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str:
"""Creates an MLIR text kernel to contruct a sparse tensor from a file.
The kernel returns a _SparseTensorDescriptor structure.
@ -203,7 +223,7 @@ def _get_create_sparse_tensor_kernel(
# Get the MLIR text code to write the dimension sizes to the output buffer.
output_dims = "\n".join(
map(lambda d: _output_one_dim(d, rank, shape), range(rank)))
map(lambda d: _output_one_dim(d, rank, shape, type), range(rank)))
# Return the MLIR text kernel.
return f"""
@ -211,18 +231,18 @@ def _get_create_sparse_tensor_kernel(
#enc = #sparse_tensor.encoding<{{
dimLevelType = [ {sparsity} ]
}}>
func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}xf64, #enc>, memref<{rank}xindex>)
func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}x{type}, #enc>, memref<{rank}xindex>)
attributes {{ llvm.emit_c_interface }} {{
%t = sparse_tensor.new %filename : !Ptr to tensor<{shape}xf64, #enc>
%t = sparse_tensor.new %filename : !Ptr to tensor<{shape}x{type}, #enc>
%b = memref.alloc() : memref<{rank}xindex>
{output_dims}
return %t, %b : tensor<{shape}xf64, #enc>, memref<{rank}xindex>
return %t, %b : tensor<{shape}x{type}, #enc>, memref<{rank}xindex>
}}"""
def create_sparse_tensor(
filename: str, sparsity: Sequence[sparse_tensor.DimLevelType]
) -> Tuple[ctypes.c_void_p, np.ndarray]:
def create_sparse_tensor(filename: str,
sparsity: Sequence[sparse_tensor.DimLevelType],
type: str) -> Tuple[ctypes.c_void_p, np.ndarray]:
"""Creates an MLIR sparse tensor from the input file.
Args:
@ -241,7 +261,7 @@ def create_sparse_tensor(
ValueError: If the shared library doesn't contain the needed routine.
"""
with ir.Context() as ctx, ir.Location.unknown():
module = _get_create_sparse_tensor_kernel(sparsity)
module = _get_create_sparse_tensor_kernel(sparsity, type)
module = ir.Module.parse(module)
engine = compile_and_build_engine(module)
@ -265,7 +285,7 @@ def create_sparse_tensor(
# by using Python code to generate the kernel instead of doing MLIR text code
# stitching.
def _get_output_sparse_tensor_kernel(
sparsity_codes: Sequence[sparse_tensor.DimLevelType]) -> str:
sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str:
"""Creates an MLIR text kernel to output a sparse tensor to a file.
The kernel returns void.
@ -285,16 +305,16 @@ def _get_output_sparse_tensor_kernel(
#enc = #sparse_tensor.encoding<{{
dimLevelType = [ {sparsity} ]
}}>
func @{_ENTRY_NAME}(%t: tensor<{shape}xf64, #enc>, %filename: !Ptr)
func @{_ENTRY_NAME}(%t: tensor<{shape}x{type}, #enc>, %filename: !Ptr)
attributes {{ llvm.emit_c_interface }} {{
sparse_tensor.out %t, %filename : tensor<{shape}xf64, #enc>, !Ptr
sparse_tensor.out %t, %filename : tensor<{shape}x{type}, #enc>, !Ptr
std.return
}}"""
def output_sparse_tensor(
tensor: ctypes.c_void_p, filename: str,
sparsity: Sequence[sparse_tensor.DimLevelType]) -> None:
def output_sparse_tensor(tensor: ctypes.c_void_p, filename: str,
sparsity: Sequence[sparse_tensor.DimLevelType],
type: str) -> None:
"""Outputs an MLIR sparse tensor to the given file.
Args:
@ -303,13 +323,14 @@ def output_sparse_tensor(
a COO-flavored format.
sparsity: A sequence of DimLevelType values, one for each dimension of the
tensor.
type: The MLIR string for the data type.
Raises:
OSError: If there is any problem in loading the supporting C shared library.
ValueError: If the shared library doesn't contain the needed routine.
"""
with ir.Context() as ctx, ir.Location.unknown():
module = _get_output_sparse_tensor_kernel(sparsity)
module = _get_output_sparse_tensor_kernel(sparsity, type)
module = ir.Module.parse(module)
engine = compile_and_build_engine(module)

View File

@ -75,7 +75,7 @@ def _implement_read_tns_test(
# Read the data from the file and construct an MLIR sparse tensor.
sparse_tensor, o_shape = pytaco_utils.create_sparse_tensor(
file_name, sparsity_codes)
file_name, sparsity_codes, "f64")
passed = 0