forked from OSchip/llvm-project
122 lines
4.5 KiB
Python
122 lines
4.5 KiB
Python
"""This file contains benchmarks for sparse tensors. In particular, it
|
|
contains benchmarks for both mlir sparse tensor dialect and numpy so that they
|
|
can be compared against each other.
|
|
"""
|
|
import ctypes
|
|
import numpy as np
|
|
import os
|
|
import re
|
|
import time
|
|
|
|
from mlir import ir
|
|
from mlir import runtime as rt
|
|
from mlir.dialects import builtin
|
|
from mlir.dialects.linalg.opdsl import lang as dsl
|
|
from mlir.execution_engine import ExecutionEngine
|
|
|
|
from common import create_sparse_np_tensor
|
|
from common import emit_timer_func
|
|
from common import emit_benchmark_wrapped_main_func
|
|
from common import get_kernel_func_from_module
|
|
from common import setup_passes
|
|
|
|
|
|
@dsl.linalg_structured_op
|
|
def matmul_dsl(
|
|
A=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.K),
|
|
B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),
|
|
C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True)
|
|
):
|
|
"""Helper function for mlir sparse matrix multiplication benchmark."""
|
|
C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]
|
|
|
|
|
|
def benchmark_sparse_mlir_multiplication():
|
|
"""Benchmark for mlir sparse matrix multiplication. Because its an
|
|
MLIR benchmark we need to return both a `compiler` function and a `runner`
|
|
function.
|
|
"""
|
|
with ir.Context(), ir.Location.unknown():
|
|
module = ir.Module.create()
|
|
f64 = ir.F64Type.get()
|
|
param1_type = ir.RankedTensorType.get([1000, 1500], f64)
|
|
param2_type = ir.RankedTensorType.get([1500, 2000], f64)
|
|
result_type = ir.RankedTensorType.get([1000, 2000], f64)
|
|
with ir.InsertionPoint(module.body):
|
|
@builtin.FuncOp.from_py_func(param1_type, param2_type, result_type)
|
|
def sparse_kernel(x, y, z):
|
|
return matmul_dsl(x, y, outs=[z])
|
|
|
|
def compiler():
|
|
with ir.Context(), ir.Location.unknown():
|
|
kernel_func = get_kernel_func_from_module(module)
|
|
timer_func = emit_timer_func()
|
|
wrapped_func = emit_benchmark_wrapped_main_func(
|
|
kernel_func,
|
|
timer_func
|
|
)
|
|
main_module_with_benchmark = ir.Module.parse(
|
|
str(timer_func) + str(wrapped_func) + str(kernel_func)
|
|
)
|
|
setup_passes(main_module_with_benchmark)
|
|
c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "")
|
|
assert os.path.exists(c_runner_utils),\
|
|
f"{c_runner_utils} does not exist." \
|
|
f" Please pass a valid value for" \
|
|
f" MLIR_C_RUNNER_UTILS environment variable."
|
|
runner_utils = os.getenv("MLIR_RUNNER_UTILS", "")
|
|
assert os.path.exists(runner_utils),\
|
|
f"{runner_utils} does not exist." \
|
|
f" Please pass a valid value for MLIR_RUNNER_UTILS" \
|
|
f" environment variable."
|
|
|
|
engine = ExecutionEngine(
|
|
main_module_with_benchmark,
|
|
3,
|
|
shared_libs=[c_runner_utils, runner_utils]
|
|
)
|
|
return engine.invoke
|
|
|
|
def runner(engine_invoke):
|
|
compiled_program_args = []
|
|
for argument_type in [
|
|
result_type, param1_type, param2_type, result_type
|
|
]:
|
|
argument_type_str = str(argument_type)
|
|
dimensions_str = re.sub("<|>|tensor", "", argument_type_str)
|
|
dimensions = [int(dim) for dim in dimensions_str.split("x")[:-1]]
|
|
if argument_type == result_type:
|
|
argument = np.zeros(dimensions, np.float64)
|
|
else:
|
|
argument = create_sparse_np_tensor(dimensions, 1000)
|
|
compiled_program_args.append(
|
|
ctypes.pointer(
|
|
ctypes.pointer(rt.get_ranked_memref_descriptor(argument))
|
|
)
|
|
)
|
|
np_timers_ns = np.array([0], dtype=np.int64)
|
|
compiled_program_args.append(
|
|
ctypes.pointer(
|
|
ctypes.pointer(rt.get_ranked_memref_descriptor(np_timers_ns))
|
|
)
|
|
)
|
|
engine_invoke("main", *compiled_program_args)
|
|
return int(np_timers_ns[0])
|
|
|
|
return compiler, runner
|
|
|
|
|
|
def benchmark_np_matrix_multiplication():
|
|
"""Benchmark for numpy matrix multiplication. Because its a python
|
|
benchmark, we don't have any `compiler` function returned. We just return
|
|
the `runner` function.
|
|
"""
|
|
def runner():
|
|
argument1 = np.random.uniform(low=0.0, high=100.0, size=(1000, 1500))
|
|
argument2 = np.random.uniform(low=0.0, high=100.0, size=(1500, 2000))
|
|
start_time = time.time_ns()
|
|
np.matmul(argument1, argument2)
|
|
return time.time_ns() - start_time
|
|
|
|
return None, runner
|