forked from OSchip/llvm-project
337 lines
11 KiB
MLIR
337 lines
11 KiB
MLIR
// RUN: mlir-opt -allow-unregistered-dialect %s -test-loop-fusion -test-loop-fusion-dependence-check -split-input-file -verify-diagnostics | FileCheck %s
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @cannot_fuse_would_create_cycle() {
|
|
func.func @cannot_fuse_would_create_cycle() {
|
|
%a = memref.alloc() : memref<10xf32>
|
|
%b = memref.alloc() : memref<10xf32>
|
|
%c = memref.alloc() : memref<10xf32>
|
|
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
// Set up the following dependences:
|
|
// 1) loop0 -> loop1 on memref '%a'
|
|
// 2) loop0 -> loop2 on memref '%b'
|
|
// 3) loop1 -> loop2 on memref '%c'
|
|
|
|
// Fusing loop nest '%i0' and loop nest '%i2' would create a cycle.
|
|
affine.for %i0 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
|
|
%v0 = affine.load %a[%i0] : memref<10xf32>
|
|
affine.store %cf7, %b[%i0] : memref<10xf32>
|
|
}
|
|
affine.for %i1 = 0 to 10 {
|
|
affine.store %cf7, %a[%i1] : memref<10xf32>
|
|
%v1 = affine.load %c[%i1] : memref<10xf32>
|
|
}
|
|
affine.for %i2 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
|
|
%v2 = affine.load %b[%i2] : memref<10xf32>
|
|
affine.store %cf7, %c[%i2] : memref<10xf32>
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @can_fuse_rar_dependence() {
|
|
func.func @can_fuse_rar_dependence() {
|
|
%a = memref.alloc() : memref<10xf32>
|
|
%b = memref.alloc() : memref<10xf32>
|
|
%c = memref.alloc() : memref<10xf32>
|
|
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
// Set up the following dependences:
|
|
// Make dependence from 0 to 1 on '%a' read-after-read.
|
|
// 1) loop0 -> loop1 on memref '%a'
|
|
// 2) loop0 -> loop2 on memref '%b'
|
|
// 3) loop1 -> loop2 on memref '%c'
|
|
|
|
// Should fuse: no fusion preventing remarks should be emitted for this test.
|
|
affine.for %i0 = 0 to 10 {
|
|
%v0 = affine.load %a[%i0] : memref<10xf32>
|
|
affine.store %cf7, %b[%i0] : memref<10xf32>
|
|
}
|
|
affine.for %i1 = 0 to 10 {
|
|
%v1 = affine.load %a[%i1] : memref<10xf32>
|
|
%v2 = affine.load %c[%i1] : memref<10xf32>
|
|
}
|
|
affine.for %i2 = 0 to 10 {
|
|
%v3 = affine.load %b[%i2] : memref<10xf32>
|
|
affine.store %cf7, %c[%i2] : memref<10xf32>
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @can_fuse_different_memrefs() {
|
|
func.func @can_fuse_different_memrefs() {
|
|
%a = memref.alloc() : memref<10xf32>
|
|
%b = memref.alloc() : memref<10xf32>
|
|
%c = memref.alloc() : memref<10xf32>
|
|
%d = memref.alloc() : memref<10xf32>
|
|
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
// Set up the following dependences:
|
|
// Make dependence from 0 to 1 on unrelated memref '%d'.
|
|
// 1) loop0 -> loop1 on memref '%a'
|
|
// 2) loop0 -> loop2 on memref '%b'
|
|
// 3) loop1 -> loop2 on memref '%c'
|
|
|
|
// Should fuse: no fusion preventing remarks should be emitted for this test.
|
|
affine.for %i0 = 0 to 10 {
|
|
%v0 = affine.load %a[%i0] : memref<10xf32>
|
|
affine.store %cf7, %b[%i0] : memref<10xf32>
|
|
}
|
|
affine.for %i1 = 0 to 10 {
|
|
affine.store %cf7, %d[%i1] : memref<10xf32>
|
|
%v1 = affine.load %c[%i1] : memref<10xf32>
|
|
}
|
|
affine.for %i2 = 0 to 10 {
|
|
%v2 = affine.load %b[%i2] : memref<10xf32>
|
|
affine.store %cf7, %c[%i2] : memref<10xf32>
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @should_not_fuse_across_intermediate_store() {
|
|
func.func @should_not_fuse_across_intermediate_store() {
|
|
%0 = memref.alloc() : memref<10xf32>
|
|
%c0 = arith.constant 0 : index
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
affine.for %i0 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
|
|
%v0 = affine.load %0[%i0] : memref<10xf32>
|
|
"op0"(%v0) : (f32) -> ()
|
|
}
|
|
|
|
// Should not fuse loop nests '%i0' and '%i1' across top-level store.
|
|
affine.store %cf7, %0[%c0] : memref<10xf32>
|
|
|
|
affine.for %i1 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
|
|
%v1 = affine.load %0[%i1] : memref<10xf32>
|
|
"op1"(%v1) : (f32) -> ()
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @should_not_fuse_across_intermediate_load() {
|
|
func.func @should_not_fuse_across_intermediate_load() {
|
|
%0 = memref.alloc() : memref<10xf32>
|
|
%c0 = arith.constant 0 : index
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
affine.for %i0 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
|
|
affine.store %cf7, %0[%i0] : memref<10xf32>
|
|
}
|
|
|
|
// Should not fuse loop nests '%i0' and '%i1' across top-level load.
|
|
%v0 = affine.load %0[%c0] : memref<10xf32>
|
|
"op0"(%v0) : (f32) -> ()
|
|
|
|
affine.for %i1 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
|
|
affine.store %cf7, %0[%i1] : memref<10xf32>
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def() {
|
|
func.func @should_not_fuse_across_ssa_value_def() {
|
|
%0 = memref.alloc() : memref<10xf32>
|
|
%1 = memref.alloc() : memref<10xf32>
|
|
%c0 = arith.constant 0 : index
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
affine.for %i0 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
|
|
%v0 = affine.load %0[%i0] : memref<10xf32>
|
|
affine.store %v0, %1[%i0] : memref<10xf32>
|
|
}
|
|
|
|
// Loop nest '%i0" cannot be fused past load from '%1' due to RAW dependence.
|
|
%v1 = affine.load %1[%c0] : memref<10xf32>
|
|
"op0"(%v1) : (f32) -> ()
|
|
|
|
// Loop nest '%i1' cannot be fused past SSA value def '%c2' which it uses.
|
|
%c2 = arith.constant 2 : index
|
|
|
|
affine.for %i1 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
|
|
affine.store %cf7, %0[%c2] : memref<10xf32>
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @should_not_fuse_store_before_load() {
|
|
func.func @should_not_fuse_store_before_load() {
|
|
%0 = memref.alloc() : memref<10xf32>
|
|
%c0 = arith.constant 0 : index
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
affine.for %i0 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
|
|
affine.store %cf7, %0[%i0] : memref<10xf32>
|
|
%v0 = affine.load %0[%i0] : memref<10xf32>
|
|
}
|
|
|
|
affine.for %i1 = 0 to 10 {
|
|
%v1 = affine.load %0[%i1] : memref<10xf32>
|
|
}
|
|
|
|
affine.for %i2 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
|
|
affine.store %cf7, %0[%i2] : memref<10xf32>
|
|
%v2 = affine.load %0[%i2] : memref<10xf32>
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @should_not_fuse_across_load_at_depth1() {
|
|
func.func @should_not_fuse_across_load_at_depth1() {
|
|
%0 = memref.alloc() : memref<10x10xf32>
|
|
%c0 = arith.constant 0 : index
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
affine.for %i0 = 0 to 10 {
|
|
affine.for %i1 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
|
|
affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
|
|
}
|
|
|
|
%v1 = affine.load %0[%i0, %c0] : memref<10x10xf32>
|
|
|
|
affine.for %i3 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
|
|
affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @should_not_fuse_across_load_in_loop_at_depth1() {
|
|
func.func @should_not_fuse_across_load_in_loop_at_depth1() {
|
|
%0 = memref.alloc() : memref<10x10xf32>
|
|
%c0 = arith.constant 0 : index
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
affine.for %i0 = 0 to 10 {
|
|
affine.for %i1 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
|
|
affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
|
|
}
|
|
|
|
affine.for %i2 = 0 to 10 {
|
|
%v1 = affine.load %0[%i0, %i2] : memref<10x10xf32>
|
|
}
|
|
|
|
affine.for %i3 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
|
|
affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @should_not_fuse_across_store_at_depth1() {
|
|
func.func @should_not_fuse_across_store_at_depth1() {
|
|
%0 = memref.alloc() : memref<10x10xf32>
|
|
%c0 = arith.constant 0 : index
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
affine.for %i0 = 0 to 10 {
|
|
affine.for %i1 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
|
|
%v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
|
|
}
|
|
|
|
affine.store %cf7, %0[%i0, %c0] : memref<10x10xf32>
|
|
|
|
affine.for %i3 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
|
|
%v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @should_not_fuse_across_store_in_loop_at_depth1() {
|
|
func.func @should_not_fuse_across_store_in_loop_at_depth1() {
|
|
%0 = memref.alloc() : memref<10x10xf32>
|
|
%c0 = arith.constant 0 : index
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
affine.for %i0 = 0 to 10 {
|
|
affine.for %i1 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
|
|
%v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
|
|
}
|
|
|
|
affine.for %i2 = 0 to 10 {
|
|
affine.store %cf7, %0[%i0, %i2] : memref<10x10xf32>
|
|
}
|
|
|
|
affine.for %i3 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
|
|
%v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def_at_depth1() {
|
|
func.func @should_not_fuse_across_ssa_value_def_at_depth1() {
|
|
%0 = memref.alloc() : memref<10x10xf32>
|
|
%1 = memref.alloc() : memref<10x10xf32>
|
|
%c0 = arith.constant 0 : index
|
|
%cf7 = arith.constant 7.0 : f32
|
|
|
|
affine.for %i0 = 0 to 10 {
|
|
affine.for %i1 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
|
|
%v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
|
|
affine.store %v0, %1[%i0, %i1] : memref<10x10xf32>
|
|
}
|
|
|
|
// RAW dependence from store in loop nest '%i1' to 'load %1' prevents
|
|
// fusion loop nest '%i1' into loops after load.
|
|
%v1 = affine.load %1[%i0, %c0] : memref<10x10xf32>
|
|
"op0"(%v1) : (f32) -> ()
|
|
|
|
// Loop nest '%i2' cannot be fused past SSA value def '%c2' which it uses.
|
|
%c2 = arith.constant 2 : index
|
|
|
|
affine.for %i2 = 0 to 10 {
|
|
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
|
|
affine.store %cf7, %0[%i0, %c2] : memref<10x10xf32>
|
|
}
|
|
}
|
|
return
|
|
} |