From 61ca9ff0b64b7c244982a8a92fcd99a04d980e3c Mon Sep 17 00:00:00 2001 From: louisfd Date: Fri, 28 Jun 2024 11:08:56 -0400 Subject: [PATCH] unroll transpose --- .../src/kernel/matmul/tiling2d_cube/load_shared_memory.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/burn-jit/src/kernel/matmul/tiling2d_cube/load_shared_memory.rs b/crates/burn-jit/src/kernel/matmul/tiling2d_cube/load_shared_memory.rs index 7ae3e49fa..2fb2dacbc 100644 --- a/crates/burn-jit/src/kernel/matmul/tiling2d_cube/load_shared_memory.rs +++ b/crates/burn-jit/src/kernel/matmul/tiling2d_cube/load_shared_memory.rs @@ -193,7 +193,9 @@ fn write_tile_transposed( } else { for i in range(0u32, Comptime::get(tile_size), unroll) { let mut transposed = F::vectorized(0., Comptime::get(tile_size)); - for j in range(0u32, Comptime::get(tile_size), unroll) { + + // Unrolling this one makes the difference + for j in range(0u32, Comptime::get(tile_size), Comptime::new(true)) { transposed[j] = tile[j][i]; }