llvm-project/mlir/test/mlir-cpu-runner/async.mlir

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

92 lines
3.5 KiB
MLIR
Raw Normal View History

// RUN: mlir-opt %s -async-to-async-runtime \
// RUN: -async-runtime-ref-counting \
// RUN: -async-runtime-ref-counting-opt \
// RUN: -convert-async-to-llvm \
// RUN: -convert-linalg-to-loops \
// RUN: -convert-scf-to-std \
// RUN: -convert-linalg-to-llvm \
// RUN: -convert-memref-to-llvm \
// RUN: -convert-arith-to-llvm \
// RUN: -convert-std-to-llvm \
[mlir] Factor type reconciliation out of Standard-to-LLVM conversion Conversion to the LLVM dialect is being refactored to be more progressive and is now performed as a series of independent passes converting different dialects. These passes may produce `unrealized_conversion_cast` operations that represent pending conversions between built-in and LLVM dialect types. Historically, a more monolithic Standard-to-LLVM conversion pass did not need these casts as all operations were converted in one shot. Previous refactorings have led to the requirement of running the Standard-to-LLVM conversion pass to clean up `unrealized_conversion_cast`s even though the IR had no standard operations in it. The pass must have been also run the last among all to-LLVM passes, in contradiction with the partial conversion logic. Additionally, the way it was set up could produce invalid operations by removing casts between LLVM and built-in types even when the consumer did not accept the uncasted type, or could lead to cryptic conversion errors (recursive application of the rewrite pattern on `unrealized_conversion_cast` as a means to indicate failure to eliminate casts). In fact, the need to eliminate A->B->A `unrealized_conversion_cast`s is not specific to to-LLVM conversions and can be factored out into a separate type reconciliation pass, which is achieved in this commit. While the cast operation itself has a folder pattern, it is insufficient in most conversion passes as the folder only applies to the second cast. Without complex legality setup in the conversion target, the conversion infra will either consider the cast operations valid and not fold them (a separate canonicalization would be necessary to trigger the folding), or consider the first cast invalid upon generation and stop with error. The pattern provided by the reconciliation pass applies to the first cast operation instead. Furthermore, having a separate pass makes it clear when `unrealized_conversion_cast`s could not have been eliminated since it is the only reason why this pass can fail. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D109507
2021-09-09 22:06:10 +08:00
// RUN: -reconcile-unrealized-casts \
// RUN: | mlir-cpu-runner \
// RUN: -e main -entry-point-result=void -O0 \
// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \
// RUN: | FileCheck %s
func @main() {
%i0 = arith.constant 0 : index
%i1 = arith.constant 1 : index
%i2 = arith.constant 2 : index
%i3 = arith.constant 3 : index
%c0 = arith.constant 0.0 : f32
%c1 = arith.constant 1.0 : f32
%c2 = arith.constant 2.0 : f32
%c3 = arith.constant 3.0 : f32
%c4 = arith.constant 4.0 : f32
%A = memref.alloc() : memref<4xf32>
linalg.fill(%c0, %A) : f32, memref<4xf32>
// CHECK: [0, 0, 0, 0]
%U = memref.cast %A : memref<4xf32> to memref<*xf32>
call @print_memref_f32(%U): (memref<*xf32>) -> ()
// CHECK: Current thread id: [[MAIN:.*]]
// CHECK: [1, 0, 0, 0]
memref.store %c1, %A[%i0]: memref<4xf32>
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
call @print_memref_f32(%U): (memref<*xf32>) -> ()
%outer = async.execute {
// CHECK: Current thread id: [[THREAD0:.*]]
// CHECK: [1, 2, 0, 0]
memref.store %c2, %A[%i1]: memref<4xf32>
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
call @print_memref_f32(%U): (memref<*xf32>) -> ()
// No op async region to create a token for testing async dependency.
%noop = async.execute {
// CHECK: Current thread id: [[THREAD1:.*]]
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
async.yield
}
%inner = async.execute [%noop] {
// CHECK: Current thread id: [[THREAD2:.*]]
// CHECK: [1, 2, 3, 0]
memref.store %c3, %A[%i2]: memref<4xf32>
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
call @print_memref_f32(%U): (memref<*xf32>) -> ()
async.yield
}
async.await %inner : !async.token
// CHECK: Current thread id: [[THREAD3:.*]]
// CHECK: [1, 2, 3, 4]
memref.store %c4, %A[%i3]: memref<4xf32>
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
call @print_memref_f32(%U): (memref<*xf32>) -> ()
async.yield
}
async.await %outer : !async.token
// CHECK: Current thread id: [[MAIN]]
// CHECK: [1, 2, 3, 4]
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
call @print_memref_f32(%U): (memref<*xf32>) -> ()
memref.dealloc %A : memref<4xf32>
return
}
func private @mlirAsyncRuntimePrintCurrentThreadId() -> ()
func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }