forked from OSchip/llvm-project
[mlir] fix SPIR-V CPU and Vulkan runners after e2310704d8
The commit in question changed the syntax but did not update the runner tests. This also required registering the MemRef dialect for custom parser to work correctly.
This commit is contained in:
parent
4aa510be78
commit
b868a3edad
|
@ -20,12 +20,12 @@ module attributes {
|
|||
%i4 = constant 4 : index
|
||||
%i5 = constant 5 : index
|
||||
|
||||
%x0 = load %arg0[%i0] : memref<6xi32>
|
||||
%x1 = load %arg0[%i1] : memref<6xi32>
|
||||
%x2 = load %arg0[%i2] : memref<6xi32>
|
||||
%x3 = load %arg0[%i3] : memref<6xi32>
|
||||
%x4 = load %arg0[%i4] : memref<6xi32>
|
||||
%x5 = load %arg0[%i5] : memref<6xi32>
|
||||
%x0 = memref.load %arg0[%i0] : memref<6xi32>
|
||||
%x1 = memref.load %arg0[%i1] : memref<6xi32>
|
||||
%x2 = memref.load %arg0[%i2] : memref<6xi32>
|
||||
%x3 = memref.load %arg0[%i3] : memref<6xi32>
|
||||
%x4 = memref.load %arg0[%i4] : memref<6xi32>
|
||||
%x5 = memref.load %arg0[%i5] : memref<6xi32>
|
||||
|
||||
%y0 = muli %x0, %factor : i32
|
||||
%y1 = muli %x1, %factor : i32
|
||||
|
@ -34,22 +34,22 @@ module attributes {
|
|||
%y4 = muli %x4, %factor : i32
|
||||
%y5 = muli %x5, %factor : i32
|
||||
|
||||
store %y0, %arg1[%i0] : memref<6xi32>
|
||||
store %y1, %arg1[%i1] : memref<6xi32>
|
||||
store %y2, %arg1[%i2] : memref<6xi32>
|
||||
store %y3, %arg1[%i3] : memref<6xi32>
|
||||
store %y4, %arg1[%i4] : memref<6xi32>
|
||||
store %y5, %arg1[%i5] : memref<6xi32>
|
||||
memref.store %y0, %arg1[%i0] : memref<6xi32>
|
||||
memref.store %y1, %arg1[%i1] : memref<6xi32>
|
||||
memref.store %y2, %arg1[%i2] : memref<6xi32>
|
||||
memref.store %y3, %arg1[%i3] : memref<6xi32>
|
||||
memref.store %y4, %arg1[%i4] : memref<6xi32>
|
||||
memref.store %y5, %arg1[%i5] : memref<6xi32>
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
func @main() {
|
||||
%input = alloc() : memref<6xi32>
|
||||
%output = alloc() : memref<6xi32>
|
||||
%input = memref.alloc() : memref<6xi32>
|
||||
%output = memref.alloc() : memref<6xi32>
|
||||
%four = constant 4 : i32
|
||||
%zero = constant 0 : i32
|
||||
%input_casted = memref_cast %input : memref<6xi32> to memref<?xi32>
|
||||
%output_casted = memref_cast %output : memref<6xi32> to memref<?xi32>
|
||||
%input_casted = memref.cast %input : memref<6xi32> to memref<?xi32>
|
||||
%output_casted = memref.cast %output : memref<6xi32> to memref<?xi32>
|
||||
call @fillI32Buffer(%input_casted, %four) : (memref<?xi32>, i32) -> ()
|
||||
call @fillI32Buffer(%output_casted, %zero) : (memref<?xi32>, i32) -> ()
|
||||
|
||||
|
@ -57,7 +57,7 @@ module attributes {
|
|||
gpu.launch_func @kernels::@double
|
||||
blocks in (%one, %one, %one) threads in (%one, %one, %one)
|
||||
args(%input : memref<6xi32>, %output : memref<6xi32>)
|
||||
%result = memref_cast %output : memref<6xi32> to memref<*xi32>
|
||||
%result = memref.cast %output : memref<6xi32> to memref<*xi32>
|
||||
call @print_memref_i32(%result) : (memref<*xi32>) -> ()
|
||||
return
|
||||
}
|
||||
|
|
|
@ -15,33 +15,33 @@ module attributes {
|
|||
%i1 = constant 1 : index
|
||||
%i2 = constant 2 : index
|
||||
|
||||
%x = load %arg0[%i0] : memref<3xf32>
|
||||
%y = load %arg1[%i0, %i0] : memref<3x3xf32>
|
||||
%x = memref.load %arg0[%i0] : memref<3xf32>
|
||||
%y = memref.load %arg1[%i0, %i0] : memref<3x3xf32>
|
||||
%sum = addf %x, %y : f32
|
||||
|
||||
store %sum, %arg2[%i0, %i0, %i0] : memref<3x3x3xf32>
|
||||
store %sum, %arg2[%i0, %i1, %i0] : memref<3x3x3xf32>
|
||||
store %sum, %arg2[%i0, %i2, %i0] : memref<3x3x3xf32>
|
||||
store %sum, %arg2[%i1, %i0, %i1] : memref<3x3x3xf32>
|
||||
store %sum, %arg2[%i1, %i1, %i1] : memref<3x3x3xf32>
|
||||
store %sum, %arg2[%i1, %i2, %i1] : memref<3x3x3xf32>
|
||||
store %sum, %arg2[%i2, %i0, %i2] : memref<3x3x3xf32>
|
||||
store %sum, %arg2[%i2, %i1, %i2] : memref<3x3x3xf32>
|
||||
store %sum, %arg2[%i2, %i2, %i2] : memref<3x3x3xf32>
|
||||
memref.store %sum, %arg2[%i0, %i0, %i0] : memref<3x3x3xf32>
|
||||
memref.store %sum, %arg2[%i0, %i1, %i0] : memref<3x3x3xf32>
|
||||
memref.store %sum, %arg2[%i0, %i2, %i0] : memref<3x3x3xf32>
|
||||
memref.store %sum, %arg2[%i1, %i0, %i1] : memref<3x3x3xf32>
|
||||
memref.store %sum, %arg2[%i1, %i1, %i1] : memref<3x3x3xf32>
|
||||
memref.store %sum, %arg2[%i1, %i2, %i1] : memref<3x3x3xf32>
|
||||
memref.store %sum, %arg2[%i2, %i0, %i2] : memref<3x3x3xf32>
|
||||
memref.store %sum, %arg2[%i2, %i1, %i2] : memref<3x3x3xf32>
|
||||
memref.store %sum, %arg2[%i2, %i2, %i2] : memref<3x3x3xf32>
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
|
||||
func @main() {
|
||||
%input1 = alloc() : memref<3xf32>
|
||||
%input2 = alloc() : memref<3x3xf32>
|
||||
%output = alloc() : memref<3x3x3xf32>
|
||||
%input1 = memref.alloc() : memref<3xf32>
|
||||
%input2 = memref.alloc() : memref<3x3xf32>
|
||||
%output = memref.alloc() : memref<3x3x3xf32>
|
||||
%0 = constant 0.0 : f32
|
||||
%3 = constant 3.4 : f32
|
||||
%4 = constant 4.3 : f32
|
||||
%input1_casted = memref_cast %input1 : memref<3xf32> to memref<?xf32>
|
||||
%input2_casted = memref_cast %input2 : memref<3x3xf32> to memref<?x?xf32>
|
||||
%output_casted = memref_cast %output : memref<3x3x3xf32> to memref<?x?x?xf32>
|
||||
%input1_casted = memref.cast %input1 : memref<3xf32> to memref<?xf32>
|
||||
%input2_casted = memref.cast %input2 : memref<3x3xf32> to memref<?x?xf32>
|
||||
%output_casted = memref.cast %output : memref<3x3x3xf32> to memref<?x?x?xf32>
|
||||
call @fillF32Buffer1D(%input1_casted, %3) : (memref<?xf32>, f32) -> ()
|
||||
call @fillF32Buffer2D(%input2_casted, %4) : (memref<?x?xf32>, f32) -> ()
|
||||
call @fillF32Buffer3D(%output_casted, %0) : (memref<?x?x?xf32>, f32) -> ()
|
||||
|
@ -50,7 +50,7 @@ module attributes {
|
|||
gpu.launch_func @kernels::@sum
|
||||
blocks in (%one, %one, %one) threads in (%one, %one, %one)
|
||||
args(%input1 : memref<3xf32>, %input2 : memref<3x3xf32>, %output : memref<3x3x3xf32>)
|
||||
%result = memref_cast %output : memref<3x3x3xf32> to memref<*xf32>
|
||||
%result = memref.cast %output : memref<3x3x3xf32> to memref<*xf32>
|
||||
call @print_memref_f32(%result) : (memref<*xf32>) -> ()
|
||||
return
|
||||
}
|
||||
|
|
|
@ -10,27 +10,27 @@ module attributes {
|
|||
gpu.func @kernel_add(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>)
|
||||
kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32> }} {
|
||||
%0 = "gpu.block_id"() {dimension = "x"} : () -> index
|
||||
%1 = load %arg0[%0] : memref<8xf32>
|
||||
%2 = load %arg1[%0] : memref<8xf32>
|
||||
%1 = memref.load %arg0[%0] : memref<8xf32>
|
||||
%2 = memref.load %arg1[%0] : memref<8xf32>
|
||||
%3 = addf %1, %2 : f32
|
||||
store %3, %arg2[%0] : memref<8xf32>
|
||||
memref.store %3, %arg2[%0] : memref<8xf32>
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
|
||||
func @main() {
|
||||
%arg0 = alloc() : memref<8xf32>
|
||||
%arg1 = alloc() : memref<8xf32>
|
||||
%arg2 = alloc() : memref<8xf32>
|
||||
%arg0 = memref.alloc() : memref<8xf32>
|
||||
%arg1 = memref.alloc() : memref<8xf32>
|
||||
%arg2 = memref.alloc() : memref<8xf32>
|
||||
%0 = constant 0 : i32
|
||||
%1 = constant 1 : i32
|
||||
%2 = constant 2 : i32
|
||||
%value0 = constant 0.0 : f32
|
||||
%value1 = constant 1.1 : f32
|
||||
%value2 = constant 2.2 : f32
|
||||
%arg3 = memref_cast %arg0 : memref<8xf32> to memref<?xf32>
|
||||
%arg4 = memref_cast %arg1 : memref<8xf32> to memref<?xf32>
|
||||
%arg5 = memref_cast %arg2 : memref<8xf32> to memref<?xf32>
|
||||
%arg3 = memref.cast %arg0 : memref<8xf32> to memref<?xf32>
|
||||
%arg4 = memref.cast %arg1 : memref<8xf32> to memref<?xf32>
|
||||
%arg5 = memref.cast %arg2 : memref<8xf32> to memref<?xf32>
|
||||
call @fillResource1DFloat(%arg3, %value1) : (memref<?xf32>, f32) -> ()
|
||||
call @fillResource1DFloat(%arg4, %value2) : (memref<?xf32>, f32) -> ()
|
||||
call @fillResource1DFloat(%arg5, %value0) : (memref<?xf32>, f32) -> ()
|
||||
|
@ -40,7 +40,7 @@ module attributes {
|
|||
gpu.launch_func @kernels::@kernel_add
|
||||
blocks in (%cst8, %cst1, %cst1) threads in (%cst1, %cst1, %cst1)
|
||||
args(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>)
|
||||
%arg6 = memref_cast %arg5 : memref<?xf32> to memref<*xf32>
|
||||
%arg6 = memref.cast %arg5 : memref<?xf32> to memref<*xf32>
|
||||
call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()
|
||||
return
|
||||
}
|
||||
|
|
|
@ -12,24 +12,24 @@ module attributes {
|
|||
%x = "gpu.block_id"() {dimension = "x"} : () -> index
|
||||
%y = "gpu.block_id"() {dimension = "y"} : () -> index
|
||||
%z = "gpu.block_id"() {dimension = "z"} : () -> index
|
||||
%0 = load %arg0[%x] : memref<8xi32>
|
||||
%1 = load %arg1[%y, %x] : memref<8x8xi32>
|
||||
%0 = memref.load %arg0[%x] : memref<8xi32>
|
||||
%1 = memref.load %arg1[%y, %x] : memref<8x8xi32>
|
||||
%2 = addi %0, %1 : i32
|
||||
store %2, %arg2[%z, %y, %x] : memref<8x8x8xi32>
|
||||
memref.store %2, %arg2[%z, %y, %x] : memref<8x8x8xi32>
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
|
||||
func @main() {
|
||||
%arg0 = alloc() : memref<8xi32>
|
||||
%arg1 = alloc() : memref<8x8xi32>
|
||||
%arg2 = alloc() : memref<8x8x8xi32>
|
||||
%arg0 = memref.alloc() : memref<8xi32>
|
||||
%arg1 = memref.alloc() : memref<8x8xi32>
|
||||
%arg2 = memref.alloc() : memref<8x8x8xi32>
|
||||
%value0 = constant 0 : i32
|
||||
%value1 = constant 1 : i32
|
||||
%value2 = constant 2 : i32
|
||||
%arg3 = memref_cast %arg0 : memref<8xi32> to memref<?xi32>
|
||||
%arg4 = memref_cast %arg1 : memref<8x8xi32> to memref<?x?xi32>
|
||||
%arg5 = memref_cast %arg2 : memref<8x8x8xi32> to memref<?x?x?xi32>
|
||||
%arg3 = memref.cast %arg0 : memref<8xi32> to memref<?xi32>
|
||||
%arg4 = memref.cast %arg1 : memref<8x8xi32> to memref<?x?xi32>
|
||||
%arg5 = memref.cast %arg2 : memref<8x8x8xi32> to memref<?x?x?xi32>
|
||||
call @fillResource1DInt(%arg3, %value1) : (memref<?xi32>, i32) -> ()
|
||||
call @fillResource2DInt(%arg4, %value2) : (memref<?x?xi32>, i32) -> ()
|
||||
call @fillResource3DInt(%arg5, %value0) : (memref<?x?x?xi32>, i32) -> ()
|
||||
|
@ -39,7 +39,7 @@ module attributes {
|
|||
gpu.launch_func @kernels::@kernel_addi
|
||||
blocks in (%cst8, %cst8, %cst8) threads in (%cst1, %cst1, %cst1)
|
||||
args(%arg0 : memref<8xi32>, %arg1 : memref<8x8xi32>, %arg2 : memref<8x8x8xi32>)
|
||||
%arg6 = memref_cast %arg5 : memref<?x?x?xi32> to memref<*xi32>
|
||||
%arg6 = memref.cast %arg5 : memref<?x?x?xi32> to memref<*xi32>
|
||||
call @print_memref_i32(%arg6) : (memref<*xi32>) -> ()
|
||||
return
|
||||
}
|
||||
|
|
|
@ -12,25 +12,25 @@ module attributes {
|
|||
%x = "gpu.block_id"() {dimension = "x"} : () -> index
|
||||
%y = "gpu.block_id"() {dimension = "y"} : () -> index
|
||||
%z = "gpu.block_id"() {dimension = "z"} : () -> index
|
||||
%0 = load %arg0[%x] : memref<8xi8>
|
||||
%1 = load %arg1[%y, %x] : memref<8x8xi8>
|
||||
%0 = memref.load %arg0[%x] : memref<8xi8>
|
||||
%1 = memref.load %arg1[%y, %x] : memref<8x8xi8>
|
||||
%2 = addi %0, %1 : i8
|
||||
%3 = zexti %2 : i8 to i32
|
||||
store %3, %arg2[%z, %y, %x] : memref<8x8x8xi32>
|
||||
memref.store %3, %arg2[%z, %y, %x] : memref<8x8x8xi32>
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
|
||||
func @main() {
|
||||
%arg0 = alloc() : memref<8xi8>
|
||||
%arg1 = alloc() : memref<8x8xi8>
|
||||
%arg2 = alloc() : memref<8x8x8xi32>
|
||||
%arg0 = memref.alloc() : memref<8xi8>
|
||||
%arg1 = memref.alloc() : memref<8x8xi8>
|
||||
%arg2 = memref.alloc() : memref<8x8x8xi32>
|
||||
%value0 = constant 0 : i32
|
||||
%value1 = constant 1 : i8
|
||||
%value2 = constant 2 : i8
|
||||
%arg3 = memref_cast %arg0 : memref<8xi8> to memref<?xi8>
|
||||
%arg4 = memref_cast %arg1 : memref<8x8xi8> to memref<?x?xi8>
|
||||
%arg5 = memref_cast %arg2 : memref<8x8x8xi32> to memref<?x?x?xi32>
|
||||
%arg3 = memref.cast %arg0 : memref<8xi8> to memref<?xi8>
|
||||
%arg4 = memref.cast %arg1 : memref<8x8xi8> to memref<?x?xi8>
|
||||
%arg5 = memref.cast %arg2 : memref<8x8x8xi32> to memref<?x?x?xi32>
|
||||
call @fillResource1DInt8(%arg3, %value1) : (memref<?xi8>, i8) -> ()
|
||||
call @fillResource2DInt8(%arg4, %value2) : (memref<?x?xi8>, i8) -> ()
|
||||
call @fillResource3DInt(%arg5, %value0) : (memref<?x?x?xi32>, i32) -> ()
|
||||
|
@ -40,7 +40,7 @@ module attributes {
|
|||
gpu.launch_func @kernels::@kernel_addi
|
||||
blocks in (%cst8, %cst8, %cst8) threads in (%cst1, %cst1, %cst1)
|
||||
args(%arg0 : memref<8xi8>, %arg1 : memref<8x8xi8>, %arg2 : memref<8x8x8xi32>)
|
||||
%arg6 = memref_cast %arg5 : memref<?x?x?xi32> to memref<*xi32>
|
||||
%arg6 = memref.cast %arg5 : memref<?x?x?xi32> to memref<*xi32>
|
||||
call @print_memref_i32(%arg6) : (memref<*xi32>) -> ()
|
||||
return
|
||||
}
|
||||
|
|
|
@ -11,27 +11,27 @@ module attributes {
|
|||
kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32> }} {
|
||||
%x = "gpu.block_id"() {dimension = "x"} : () -> index
|
||||
%y = "gpu.block_id"() {dimension = "y"} : () -> index
|
||||
%1 = load %arg0[%x, %y] : memref<4x4xf32>
|
||||
%2 = load %arg1[%x, %y] : memref<4x4xf32>
|
||||
%1 = memref.load %arg0[%x, %y] : memref<4x4xf32>
|
||||
%2 = memref.load %arg1[%x, %y] : memref<4x4xf32>
|
||||
%3 = mulf %1, %2 : f32
|
||||
store %3, %arg2[%x, %y] : memref<4x4xf32>
|
||||
memref.store %3, %arg2[%x, %y] : memref<4x4xf32>
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
|
||||
func @main() {
|
||||
%arg0 = alloc() : memref<4x4xf32>
|
||||
%arg1 = alloc() : memref<4x4xf32>
|
||||
%arg2 = alloc() : memref<4x4xf32>
|
||||
%arg0 = memref.alloc() : memref<4x4xf32>
|
||||
%arg1 = memref.alloc() : memref<4x4xf32>
|
||||
%arg2 = memref.alloc() : memref<4x4xf32>
|
||||
%0 = constant 0 : i32
|
||||
%1 = constant 1 : i32
|
||||
%2 = constant 2 : i32
|
||||
%value0 = constant 0.0 : f32
|
||||
%value1 = constant 2.0 : f32
|
||||
%value2 = constant 3.0 : f32
|
||||
%arg3 = memref_cast %arg0 : memref<4x4xf32> to memref<?x?xf32>
|
||||
%arg4 = memref_cast %arg1 : memref<4x4xf32> to memref<?x?xf32>
|
||||
%arg5 = memref_cast %arg2 : memref<4x4xf32> to memref<?x?xf32>
|
||||
%arg3 = memref.cast %arg0 : memref<4x4xf32> to memref<?x?xf32>
|
||||
%arg4 = memref.cast %arg1 : memref<4x4xf32> to memref<?x?xf32>
|
||||
%arg5 = memref.cast %arg2 : memref<4x4xf32> to memref<?x?xf32>
|
||||
call @fillResource2DFloat(%arg3, %value1) : (memref<?x?xf32>, f32) -> ()
|
||||
call @fillResource2DFloat(%arg4, %value2) : (memref<?x?xf32>, f32) -> ()
|
||||
call @fillResource2DFloat(%arg5, %value0) : (memref<?x?xf32>, f32) -> ()
|
||||
|
@ -41,7 +41,7 @@ module attributes {
|
|||
gpu.launch_func @kernels::@kernel_mul
|
||||
blocks in (%cst4, %cst4, %cst1) threads in(%cst1, %cst1, %cst1)
|
||||
args(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<4x4xf32>)
|
||||
%arg6 = memref_cast %arg5 : memref<?x?xf32> to memref<*xf32>
|
||||
%arg6 = memref.cast %arg5 : memref<?x?xf32> to memref<*xf32>
|
||||
call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()
|
||||
return
|
||||
}
|
||||
|
|
|
@ -12,27 +12,27 @@ module attributes {
|
|||
%x = "gpu.block_id"() {dimension = "x"} : () -> index
|
||||
%y = "gpu.block_id"() {dimension = "y"} : () -> index
|
||||
%z = "gpu.block_id"() {dimension = "z"} : () -> index
|
||||
%1 = load %arg0[%x, %y, %z] : memref<8x4x4xf32>
|
||||
%2 = load %arg1[%y, %z] : memref<4x4xf32>
|
||||
%1 = memref.load %arg0[%x, %y, %z] : memref<8x4x4xf32>
|
||||
%2 = memref.load %arg1[%y, %z] : memref<4x4xf32>
|
||||
%3 = subf %1, %2 : f32
|
||||
store %3, %arg2[%x, %y, %z] : memref<8x4x4xf32>
|
||||
memref.store %3, %arg2[%x, %y, %z] : memref<8x4x4xf32>
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
|
||||
func @main() {
|
||||
%arg0 = alloc() : memref<8x4x4xf32>
|
||||
%arg1 = alloc() : memref<4x4xf32>
|
||||
%arg2 = alloc() : memref<8x4x4xf32>
|
||||
%arg0 = memref.alloc() : memref<8x4x4xf32>
|
||||
%arg1 = memref.alloc() : memref<4x4xf32>
|
||||
%arg2 = memref.alloc() : memref<8x4x4xf32>
|
||||
%0 = constant 0 : i32
|
||||
%1 = constant 1 : i32
|
||||
%2 = constant 2 : i32
|
||||
%value0 = constant 0.0 : f32
|
||||
%value1 = constant 3.3 : f32
|
||||
%value2 = constant 1.1 : f32
|
||||
%arg3 = memref_cast %arg0 : memref<8x4x4xf32> to memref<?x?x?xf32>
|
||||
%arg4 = memref_cast %arg1 : memref<4x4xf32> to memref<?x?xf32>
|
||||
%arg5 = memref_cast %arg2 : memref<8x4x4xf32> to memref<?x?x?xf32>
|
||||
%arg3 = memref.cast %arg0 : memref<8x4x4xf32> to memref<?x?x?xf32>
|
||||
%arg4 = memref.cast %arg1 : memref<4x4xf32> to memref<?x?xf32>
|
||||
%arg5 = memref.cast %arg2 : memref<8x4x4xf32> to memref<?x?x?xf32>
|
||||
call @fillResource3DFloat(%arg3, %value1) : (memref<?x?x?xf32>, f32) -> ()
|
||||
call @fillResource2DFloat(%arg4, %value2) : (memref<?x?xf32>, f32) -> ()
|
||||
call @fillResource3DFloat(%arg5, %value0) : (memref<?x?x?xf32>, f32) -> ()
|
||||
|
@ -43,7 +43,7 @@ module attributes {
|
|||
gpu.launch_func @kernels::@kernel_sub
|
||||
blocks in (%cst8, %cst4, %cst4) threads in (%cst1, %cst1, %cst1)
|
||||
args(%arg0 : memref<8x4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<8x4x4xf32>)
|
||||
%arg6 = memref_cast %arg5 : memref<?x?x?xf32> to memref<*xf32>
|
||||
%arg6 = memref.cast %arg5 : memref<?x?x?xf32> to memref<*xf32>
|
||||
call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()
|
||||
return
|
||||
}
|
||||
|
|
|
@ -17,27 +17,27 @@ module attributes {
|
|||
%cst = constant 128 : index
|
||||
%b = muli %bid, %cst : index
|
||||
%0 = addi %b, %tid : index
|
||||
%1 = load %arg0[%0] : memref<16384xf32>
|
||||
%2 = load %arg1[%0] : memref<16384xf32>
|
||||
%1 = memref.load %arg0[%0] : memref<16384xf32>
|
||||
%2 = memref.load %arg1[%0] : memref<16384xf32>
|
||||
%3 = addf %1, %2 : f32
|
||||
store %3, %arg2[%0] : memref<16384xf32>
|
||||
memref.store %3, %arg2[%0] : memref<16384xf32>
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
|
||||
func @main() {
|
||||
%arg0 = alloc() : memref<16384xf32>
|
||||
%arg1 = alloc() : memref<16384xf32>
|
||||
%arg2 = alloc() : memref<16384xf32>
|
||||
%arg0 = memref.alloc() : memref<16384xf32>
|
||||
%arg1 = memref.alloc() : memref<16384xf32>
|
||||
%arg2 = memref.alloc() : memref<16384xf32>
|
||||
%0 = constant 0 : i32
|
||||
%1 = constant 1 : i32
|
||||
%2 = constant 2 : i32
|
||||
%value0 = constant 0.0 : f32
|
||||
%value1 = constant 1.1 : f32
|
||||
%value2 = constant 2.2 : f32
|
||||
%arg3 = memref_cast %arg0 : memref<16384xf32> to memref<?xf32>
|
||||
%arg4 = memref_cast %arg1 : memref<16384xf32> to memref<?xf32>
|
||||
%arg5 = memref_cast %arg2 : memref<16384xf32> to memref<?xf32>
|
||||
%arg3 = memref.cast %arg0 : memref<16384xf32> to memref<?xf32>
|
||||
%arg4 = memref.cast %arg1 : memref<16384xf32> to memref<?xf32>
|
||||
%arg5 = memref.cast %arg2 : memref<16384xf32> to memref<?xf32>
|
||||
call @fillResource1DFloat(%arg3, %value1) : (memref<?xf32>, f32) -> ()
|
||||
call @fillResource1DFloat(%arg4, %value2) : (memref<?xf32>, f32) -> ()
|
||||
call @fillResource1DFloat(%arg5, %value0) : (memref<?xf32>, f32) -> ()
|
||||
|
@ -47,7 +47,7 @@ module attributes {
|
|||
gpu.launch_func @kernels::@kernel_add
|
||||
blocks in (%cst128, %cst1, %cst1) threads in (%cst128, %cst1, %cst1)
|
||||
args(%arg0 : memref<16384xf32>, %arg1 : memref<16384xf32>, %arg2 : memref<16384xf32>)
|
||||
%arg6 = memref_cast %arg5 : memref<?xf32> to memref<*xf32>
|
||||
%arg6 = memref.cast %arg5 : memref<?xf32> to memref<*xf32>
|
||||
return
|
||||
}
|
||||
func private @fillResource1DFloat(%0 : memref<?xf32>, %1 : f32)
|
||||
|
|
|
@ -23,6 +23,7 @@ if (MLIR_SPIRV_CPU_RUNNER_ENABLED)
|
|||
MLIRJitRunner
|
||||
MLIRLLVMIR
|
||||
MLIRLLVMToLLVMIRTranslation
|
||||
MLIRMemRef
|
||||
MLIRParser
|
||||
MLIRSPIRV
|
||||
MLIRStandard
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "mlir/Dialect/GPU/GPUDialect.h"
|
||||
#include "mlir/Dialect/GPU/Passes.h"
|
||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
|
||||
#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
|
||||
#include "mlir/Dialect/SPIRV/Transforms/Passes.h"
|
||||
|
@ -96,7 +97,8 @@ int main(int argc, char **argv) {
|
|||
|
||||
mlir::DialectRegistry registry;
|
||||
registry.insert<mlir::LLVM::LLVMDialect, mlir::gpu::GPUDialect,
|
||||
mlir::spirv::SPIRVDialect, mlir::StandardOpsDialect>();
|
||||
mlir::spirv::SPIRVDialect, mlir::StandardOpsDialect,
|
||||
mlir::memref::MemRefDialect>();
|
||||
mlir::registerLLVMDialectTranslation(registry);
|
||||
|
||||
return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig);
|
||||
|
|
|
@ -63,6 +63,7 @@ if (MLIR_VULKAN_RUNNER_ENABLED)
|
|||
MLIRJitRunner
|
||||
MLIRLLVMIR
|
||||
MLIRLLVMToLLVMIRTranslation
|
||||
MLIRMemRef
|
||||
MLIRParser
|
||||
MLIRSPIRV
|
||||
MLIRSPIRVTransforms
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "mlir/Dialect/GPU/GPUDialect.h"
|
||||
#include "mlir/Dialect/GPU/Passes.h"
|
||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
|
||||
#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
|
||||
#include "mlir/Dialect/SPIRV/Transforms/Passes.h"
|
||||
|
@ -68,7 +69,8 @@ int main(int argc, char **argv) {
|
|||
|
||||
mlir::DialectRegistry registry;
|
||||
registry.insert<mlir::LLVM::LLVMDialect, mlir::gpu::GPUDialect,
|
||||
mlir::spirv::SPIRVDialect, mlir::StandardOpsDialect>();
|
||||
mlir::spirv::SPIRVDialect, mlir::StandardOpsDialect,
|
||||
mlir::memref::MemRefDialect>();
|
||||
mlir::registerLLVMDialectTranslation(registry);
|
||||
|
||||
return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig);
|
||||
|
|
Loading…
Reference in New Issue