llvm-project/mlir/benchmark/python/benchmark_sparse.py

"""This file contains benchmarks for sparse tensors. In particular, it
contains benchmarks for both mlir sparse tensor dialect and numpy so that they
can be compared against each other.
"""
import ctypes
import numpy as np
import os
import re
import time

from mlir import ir
from mlir import runtime as rt
from mlir.dialects import builtin
from mlir.dialects.linalg.opdsl import lang as dsl
from mlir.execution_engine import ExecutionEngine

from common import create_sparse_np_tensor
from common import emit_timer_func
from common import emit_benchmark_wrapped_main_func
from common import get_kernel_func_from_module
from common import setup_passes


@dsl.linalg_structured_op
def matmul_dsl(
    A=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.K),
    B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),
    C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True)
):
    """Helper function for mlir sparse matrix multiplication benchmark."""
    C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]


def benchmark_sparse_mlir_multiplication():
    """Benchmark for mlir sparse matrix multiplication. Because its an
    MLIR benchmark we need to return both a `compiler` function and a `runner`
    function.
    """
    with ir.Context(), ir.Location.unknown():
        module = ir.Module.create()
        f64 = ir.F64Type.get()
        param1_type = ir.RankedTensorType.get([1000, 1500], f64)
        param2_type = ir.RankedTensorType.get([1500, 2000], f64)
        result_type = ir.RankedTensorType.get([1000, 2000], f64)
        with ir.InsertionPoint(module.body):
            @builtin.FuncOp.from_py_func(param1_type, param2_type, result_type)
            def sparse_kernel(x, y, z):
                return matmul_dsl(x, y, outs=[z])

    def compiler():
        with ir.Context(), ir.Location.unknown():
            kernel_func = get_kernel_func_from_module(module)
            timer_func = emit_timer_func()
            wrapped_func = emit_benchmark_wrapped_main_func(
                kernel_func,
                timer_func
            )
            main_module_with_benchmark = ir.Module.parse(
                str(timer_func) + str(wrapped_func) + str(kernel_func)
            )
            setup_passes(main_module_with_benchmark)
            c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "")
            assert os.path.exists(c_runner_utils),\
                f"{c_runner_utils} does not exist." \
                f" Please pass a valid value for" \
                f" MLIR_C_RUNNER_UTILS environment variable."
            runner_utils = os.getenv("MLIR_RUNNER_UTILS", "")
            assert os.path.exists(runner_utils),\
                f"{runner_utils} does not exist." \
                f" Please pass a valid value for MLIR_RUNNER_UTILS" \
                f" environment variable."

            engine = ExecutionEngine(
                main_module_with_benchmark,
                3,
                shared_libs=[c_runner_utils, runner_utils]
            )
            return engine.invoke

    def runner(engine_invoke):
        compiled_program_args = []
        for argument_type in [
            result_type, param1_type, param2_type, result_type
        ]:
            argument_type_str = str(argument_type)
            dimensions_str = re.sub("<|>|tensor", "", argument_type_str)
            dimensions = [int(dim) for dim in dimensions_str.split("x")[:-1]]
            if argument_type == result_type:
                argument = np.zeros(dimensions, np.float64)
            else:
                argument = create_sparse_np_tensor(dimensions, 1000)
            compiled_program_args.append(
                ctypes.pointer(
                    ctypes.pointer(rt.get_ranked_memref_descriptor(argument))
                )
            )
        np_timers_ns = np.array([0], dtype=np.int64)
        compiled_program_args.append(
            ctypes.pointer(
                ctypes.pointer(rt.get_ranked_memref_descriptor(np_timers_ns))
            )
        )
        engine_invoke("main", *compiled_program_args)
        return int(np_timers_ns[0])

    return compiler, runner


def benchmark_np_matrix_multiplication():
    """Benchmark for numpy matrix multiplication. Because its a python
    benchmark, we don't have any `compiler` function returned. We just return
    the `runner` function.
    """
    def runner():
        argument1 = np.random.uniform(low=0.0, high=100.0, size=(1000, 1500))
        argument2 = np.random.uniform(low=0.0, high=100.0, size=(1500, 2000))
        start_time = time.time_ns()
        np.matmul(argument1, argument2)
        return time.time_ns() - start_time

    return None, runner
[mlir] Set up boilerplate build for MLIR benchmarks This is is the start of the MLIR benchmarks. It sets up a command line tool along with conventions to define and run benchmarks using mlir's python bindings. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D115174 2022-01-28 05:35:34 +08:00			`"""This file contains benchmarks for sparse tensors. In particular, it`
			`contains benchmarks for both mlir sparse tensor dialect and numpy so that they`
			`can be compared against each other.`
			`"""`
			`import ctypes`
			`import numpy as np`
			`import os`
			`import re`
			`import time`

			`from mlir import ir`
			`from mlir import runtime as rt`
			`from mlir.dialects import builtin`
			`from mlir.dialects.linalg.opdsl import lang as dsl`
			`from mlir.execution_engine import ExecutionEngine`

			`from common import create_sparse_np_tensor`
			`from common import emit_timer_func`
			`from common import emit_benchmark_wrapped_main_func`
			`from common import get_kernel_func_from_module`
			`from common import setup_passes`


			`@dsl.linalg_structured_op`
			`def matmul_dsl(`
			`A=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.K),`
			`B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),`
			`C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True)`
			`):`
			`"""Helper function for mlir sparse matrix multiplication benchmark."""`
			`C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]`


			`def benchmark_sparse_mlir_multiplication():`
			`"""Benchmark for mlir sparse matrix multiplication. Because its an`
			MLIR benchmark we need to return both a `compiler` function and a `runner`
			`function.`
			`"""`
			`with ir.Context(), ir.Location.unknown():`
			`module = ir.Module.create()`
			`f64 = ir.F64Type.get()`
			`param1_type = ir.RankedTensorType.get([1000, 1500], f64)`
			`param2_type = ir.RankedTensorType.get([1500, 2000], f64)`
			`result_type = ir.RankedTensorType.get([1000, 2000], f64)`
			`with ir.InsertionPoint(module.body):`
			`@builtin.FuncOp.from_py_func(param1_type, param2_type, result_type)`
			`def sparse_kernel(x, y, z):`
			`return matmul_dsl(x, y, outs=[z])`

			`def compiler():`
			`with ir.Context(), ir.Location.unknown():`
			`kernel_func = get_kernel_func_from_module(module)`
			`timer_func = emit_timer_func()`
			`wrapped_func = emit_benchmark_wrapped_main_func(`
			`kernel_func,`
			`timer_func`
			`)`
			`main_module_with_benchmark = ir.Module.parse(`
			`str(timer_func) + str(wrapped_func) + str(kernel_func)`
			`)`
			`setup_passes(main_module_with_benchmark)`
			`c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "")`
			`assert os.path.exists(c_runner_utils),\`
			`f"{c_runner_utils} does not exist." \`
			`f" Please pass a valid value for" \`
			`f" MLIR_C_RUNNER_UTILS environment variable."`
			`runner_utils = os.getenv("MLIR_RUNNER_UTILS", "")`
			`assert os.path.exists(runner_utils),\`
			`f"{runner_utils} does not exist." \`
			`f" Please pass a valid value for MLIR_RUNNER_UTILS" \`
			`f" environment variable."`

			`engine = ExecutionEngine(`
			`main_module_with_benchmark,`
			`3,`
			`shared_libs=[c_runner_utils, runner_utils]`
			`)`
			`return engine.invoke`

			`def runner(engine_invoke):`
			`compiled_program_args = []`
			`for argument_type in [`
			`result_type, param1_type, param2_type, result_type`
			`]:`
			`argument_type_str = str(argument_type)`
			`dimensions_str = re.sub("<\|>\|tensor", "", argument_type_str)`
			`dimensions = [int(dim) for dim in dimensions_str.split("x")[:-1]]`
			`if argument_type == result_type:`
			`argument = np.zeros(dimensions, np.float64)`
			`else:`
			`argument = create_sparse_np_tensor(dimensions, 1000)`
			`compiled_program_args.append(`
			`ctypes.pointer(`
			`ctypes.pointer(rt.get_ranked_memref_descriptor(argument))`
			`)`
			`)`
			`np_timers_ns = np.array([0], dtype=np.int64)`
			`compiled_program_args.append(`
			`ctypes.pointer(`
			`ctypes.pointer(rt.get_ranked_memref_descriptor(np_timers_ns))`
			`)`
			`)`
			`engine_invoke("main", *compiled_program_args)`
			`return int(np_timers_ns[0])`

			`return compiler, runner`


			`def benchmark_np_matrix_multiplication():`
			`"""Benchmark for numpy matrix multiplication. Because its a python`
			benchmark, we don't have any `compiler` function returned. We just return
			the `runner` function.
			`"""`
			`def runner():`
			`argument1 = np.random.uniform(low=0.0, high=100.0, size=(1000, 1500))`
			`argument2 = np.random.uniform(low=0.0, high=100.0, size=(1500, 2000))`
			`start_time = time.time_ns()`
			`np.matmul(argument1, argument2)`
			`return time.time_ns() - start_time`

			`return None, runner`