forked from OSchip/llvm-project
NFC: Rename the 'for' operation in the AffineOps dialect to 'affine.for'. The is the second step to adding a namespace to the AffineOps dialect.
PiperOrigin-RevId: 232717775
This commit is contained in:
parent
905d84851d
commit
90d10b4e00
|
@ -15,7 +15,7 @@ loops and if instructions), the result of a
|
|||
[`affine.apply` operation](#'affine.apply'-operation) that recursively takes as
|
||||
arguments any symbolic identifiers. Dimensions may be bound not only to anything
|
||||
that a symbol is bound to, but also to induction variables of enclosing
|
||||
[for instructions](#'for'-operation), and the result of an
|
||||
['affine.for' operations](#'affine.for'-operation), and the result of an
|
||||
[`affine.apply` operation](#'affine.apply'-operation) (which recursively may use
|
||||
other dimensions and symbols).
|
||||
|
||||
|
@ -47,12 +47,12 @@ Example:
|
|||
%2 = affine.apply (i)[s0] -> (i+s0) (%42)[%n]
|
||||
```
|
||||
|
||||
#### 'for' operation {#'for'-operation}
|
||||
#### 'affine.for' operation {#'affine.for'-operation}
|
||||
|
||||
Syntax:
|
||||
|
||||
``` {.ebnf}
|
||||
operation ::= `for` ssa-id `=` lower-bound `to` upper-bound
|
||||
operation ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
|
||||
(`step` integer-literal)? `{` inst* `}`
|
||||
|
||||
lower-bound ::= `max`? affine-map dim-and-symbol-use-list | shorthand-bound
|
||||
|
@ -60,17 +60,17 @@ upper-bound ::= `min`? affine-map dim-and-symbol-use-list | shorthand-bound
|
|||
shorthand-bound ::= ssa-id | `-`? integer-literal
|
||||
```
|
||||
|
||||
The `for` operation represents an affine loop nest, defining an SSA value for
|
||||
its induction variable. This SSA value always has type
|
||||
The `affine.for` operation represents an affine loop nest, defining an SSA value
|
||||
for its induction variable. This SSA value always has type
|
||||
[`index`](LangRef.md#index-type), which is the size of the machine word.
|
||||
|
||||
The `for` operation executes its body a number of times iterating from a lower
|
||||
bound to an upper bound by a stride. The stride, represented by `step`, is a
|
||||
positive constant integer which defaults to "1" if not present. The lower and
|
||||
The `affine.for` operation executes its body a number of times iterating from a
|
||||
lower bound to an upper bound by a stride. The stride, represented by `step`, is
|
||||
a positive constant integer which defaults to "1" if not present. The lower and
|
||||
upper bounds specify a half-open range: the range includes the lower bound but
|
||||
does not include the upper bound.
|
||||
|
||||
The lower and upper bounds of a `for` operation are represented as an
|
||||
The lower and upper bounds of a `affine.for` operation are represented as an
|
||||
application of an affine mapping to a list of SSA values passed to the map. The
|
||||
[same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
|
||||
values as for all bindings of SSA values to dimensions and symbols.
|
||||
|
@ -94,8 +94,8 @@ Example showing reverse iteration of the inner loop:
|
|||
|
||||
func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
|
||||
%N = dim %A, 0 : memref<?x?xf32>
|
||||
for %i = 0 to %N step 1 {
|
||||
for %j = 0 to %N { // implicitly steps by 1
|
||||
affine.for %i = 0 to %N step 1 {
|
||||
affine.for %j = 0 to %N { // implicitly steps by 1
|
||||
%0 = affine.apply #map57(%j)[%N]
|
||||
%tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
|
||||
call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
|
||||
|
@ -130,8 +130,8 @@ Example:
|
|||
#set = (d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
|
||||
d1 - 10 >= 0, s0 - d1 - 9 >= 0)
|
||||
func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
|
||||
for %i = 0 to %N {
|
||||
for %j = 0 to %N {
|
||||
affine.for %i = 0 to %N {
|
||||
affine.for %j = 0 to %N {
|
||||
%0 = affine.apply #map42(%j)
|
||||
%tmp = call @S1(%X, %i, %0)
|
||||
if #set(%i, %j)[%N] {
|
||||
|
|
|
@ -22,9 +22,9 @@ Examples:
|
|||
// Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32> and
|
||||
// pad with %f0 to handle the boundary case:
|
||||
%f0 = constant 0.0f : f32
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 step 256 {
|
||||
for %i2 = 0 to %2 step 32 {
|
||||
affine.for %i0 = 0 to %0 {
|
||||
affine.for %i1 = 0 to %1 step 256 {
|
||||
affine.for %i2 = 0 to %2 step 32 {
|
||||
%v = vector_transfer_read %A, %i0, %i1, %i2, %f0
|
||||
{permutation_map: (d0, d1, d2) -> (d2, d1)} :
|
||||
(memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
|
||||
|
@ -33,8 +33,8 @@ for %i0 = 0 to %0 {
|
|||
// Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
|
||||
// vector<128xf32>. The underlying implementation will require a 1-D vector
|
||||
// broadcast:
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 {
|
||||
affine.for %i0 = 0 to %0 {
|
||||
affine.for %i1 = 0 to %1 {
|
||||
%3 = vector_transfer_read %A, %i0, %i1
|
||||
{permutation_map: (d0, d1) -> (0)} :
|
||||
(memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
|
@ -80,9 +80,9 @@ A notional lowering of vector_transfer_read could generate code resembling:
|
|||
// %expr1, %expr2, %expr3, %expr4 defined before this point
|
||||
%tmp = alloc() : vector<3x4x5xf32>
|
||||
%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
|
||||
for %i = 0 to 3 {
|
||||
for %j = 0 to 4 {
|
||||
for %k = 0 to 5 {
|
||||
affine.for %i = 0 to 3 {
|
||||
affine.for %j = 0 to 4 {
|
||||
affine.for %k = 0 to 5 {
|
||||
%a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
|
||||
store %tmp[%i, %j, %k] : vector<3x4x5xf32>
|
||||
}}}
|
||||
|
@ -101,8 +101,8 @@ lowered code would resemble:
|
|||
// %expr1, %expr2, %expr3, %expr4 defined before this point
|
||||
%tmp = alloc() : vector<3x4x5xf32>
|
||||
%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
|
||||
for %i = 0 to 3 {
|
||||
for %k = 0 to 5 {
|
||||
affine.for %i = 0 to 3 {
|
||||
affine.for %k = 0 to 5 {
|
||||
%a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
|
||||
store %tmp[%i, 0, %k] : vector<3x4x5xf32>
|
||||
}}
|
||||
|
@ -129,10 +129,10 @@ Examples:
|
|||
|
||||
```mlir {.mlir}
|
||||
// write vector<16x32x64xf32> into the slice `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 step 32 {
|
||||
for %i2 = 0 to %2 step 64 {
|
||||
for %i3 = 0 to %3 step 16 {
|
||||
affine.for %i0 = 0 to %0 {
|
||||
affine.for %i1 = 0 to %1 step 32 {
|
||||
affine.for %i2 = 0 to %2 step 64 {
|
||||
affine.for %i3 = 0 to %3 step 16 {
|
||||
%val = `ssa-value` : vector<16x32x64xf32>
|
||||
vector_transfer_write %val, %A, %i0, %i1, %i2, %i3
|
||||
{permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
|
||||
|
|
|
@ -40,7 +40,7 @@ which means that values are defined before use and have scope defined by their
|
|||
dominance relations. Operations may produce zero or more results, and each is a
|
||||
distinct SSA value with its own type defined by the [type system](#type-system).
|
||||
|
||||
MLIR incorporates polyhedral compiler concepts, including `for` and `if`
|
||||
MLIR incorporates polyhedral compiler concepts, including `affine.for` and `if`
|
||||
operations defined by the [affine dialect](Dialects/Affine.md), which model
|
||||
affine loops and affine conditionals. It also includes affine maps integrated
|
||||
into the type system - they are key to the representation of data and
|
||||
|
@ -99,10 +99,10 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
|
|||
%C = alloc memref<100x50xf32>()
|
||||
|
||||
// Multiplication loop nest.
|
||||
for %i = 0 to 100 {
|
||||
for %j = 0 to 50 {
|
||||
affine.for %i = 0 to 100 {
|
||||
affine.for %j = 0 to 50 {
|
||||
store 0 to %C[%i, %j] : memref<100x50xf32>
|
||||
for %k = 0 to %n {
|
||||
affine.for %k = 0 to %n {
|
||||
%a_v = load %A[%i, %k] : memref<100x?xf32>
|
||||
%b_v = load %B[%k, %j] : memref<?x50xf32>
|
||||
%prod = mulf %a_v, %b_v : f32
|
||||
|
@ -1434,8 +1434,8 @@ The arity of indices is the rank of the memref (i.e., if the memref loaded from
|
|||
is of rank 3, then 3 indices are required for the load following the memref
|
||||
identifier).
|
||||
|
||||
In an `if` or `for` body, the indices of a load are restricted to SSA values
|
||||
bound to surrounding loop induction variables,
|
||||
In an `if` or `affine.for` body, the indices of a load are restricted to SSA
|
||||
values bound to surrounding loop induction variables,
|
||||
[symbols](#dimensions-and-symbols), results of a
|
||||
[`constant` operation](#'constant'-operation), or the result of an
|
||||
`affine.apply` operation that can in turn take as arguments all of the
|
||||
|
@ -1456,7 +1456,7 @@ Example:
|
|||
|
||||
**Context:** The `load` and `store` instructions are specifically crafted to
|
||||
fully resolve a reference to an element of a memref, and (in affine `if` and
|
||||
`for` instructions) the compiler can follow use-def chains (e.g. through
|
||||
`affine.for` instructions) the compiler can follow use-def chains (e.g. through
|
||||
[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
|
||||
precisely analyze references at compile-time using polyhedral techniques. This
|
||||
is possible because of the
|
||||
|
@ -1492,7 +1492,7 @@ store %100, %A[%1, 1023] : memref<4x?xf32, #layout, hbm>
|
|||
|
||||
**Context:** The `load` and `store` instructions are specifically crafted to
|
||||
fully resolve a reference to an element of a memref, and (in polyhedral `if` and
|
||||
`for` instructions) the compiler can follow use-def chains (e.g. through
|
||||
`affine.for` instructions) the compiler can follow use-def chains (e.g. through
|
||||
[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
|
||||
precisely analyze references at compile-time using polyhedral techniques. This
|
||||
is possible because of the
|
||||
|
|
|
@ -39,8 +39,8 @@ These restrictions may be lifted in the future.
|
|||
|
||||
### Output IR
|
||||
|
||||
Functions with `for` and `if` instructions eliminated. These functions may
|
||||
contain operations from the Standard dialect in addition to those already
|
||||
Functions with `affine.for` and `if` instructions eliminated. These functions
|
||||
may contain operations from the Standard dialect in addition to those already
|
||||
present before the pass.
|
||||
|
||||
### Invariants
|
||||
|
|
|
@ -150,8 +150,8 @@ func bar(%A : memref<8x?xf32, #lmap>) {
|
|||
// dynamically using dim instruction.
|
||||
%N = dim %A, 1 : memref<8x?xf32, #lmap>
|
||||
|
||||
for %i = 0 to 8 {
|
||||
for %j = 0 to %N {
|
||||
affine.for %i = 0 to 8 {
|
||||
affine.for %j = 0 to %N {
|
||||
// A[i,j] += 1
|
||||
%s1 = load %A [%i, %j] : memref<8x?xf32, #lmap>
|
||||
%s2 = add %s1, 1
|
||||
|
@ -534,7 +534,7 @@ nested in an outer function that using affine loops.
|
|||
func @search(memref<?x?xi32 %A, <?xi32> %S, i32 %key) {
|
||||
%ni = dim %A, 0 : memref<?x?xi32>
|
||||
// This loop can be parallelized
|
||||
for %i = 0 to %ni {
|
||||
affine.for %i = 0 to %ni {
|
||||
call @search_body (%A, %S, %i) : (memref<?x?xi32>, memref<?xi32>, i32)
|
||||
}
|
||||
return
|
||||
|
@ -568,10 +568,10 @@ func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32) {
|
|||
|
||||
As per the [MLIR spec](LangRef.md), the restrictions on dimensions and symbol
|
||||
identifiers to be used with the affine.apply instruction only apply to accesses
|
||||
inside `for` and `if` instructions. However, an analysis of accesses inside the
|
||||
called function (`@search_body`) is necessary to determine if the `%i` loop
|
||||
could be parallelized: such function access analysis is calling context
|
||||
sensitive.
|
||||
inside `affine.for` and `if` instructions. However, an analysis of accesses
|
||||
inside the called function (`@search_body`) is necessary to determine if the
|
||||
`%i` loop could be parallelized: such function access analysis is calling
|
||||
context sensitive.
|
||||
|
||||
### Non-affine loop bounds {#non-affine-loop-bounds}
|
||||
|
||||
|
@ -590,8 +590,8 @@ for (i=0; i <N; i++)
|
|||
|
||||
```mlir {.mlir}
|
||||
func @outer_nest(%n) : (i32) {
|
||||
for %i = 0 to %n {
|
||||
for %j = 0 to %n {
|
||||
affine.for %i = 0 to %n {
|
||||
affine.for %j = 0 to %n {
|
||||
call @inner_nest(%i, %j, %n)
|
||||
}
|
||||
}
|
||||
|
@ -606,8 +606,8 @@ func @inner_nest(%i: i32, %j: i32, %n: i32) {
|
|||
}
|
||||
|
||||
func @inner_nest2(%m, %n) -> i32 {
|
||||
for %k = 0 to %m {
|
||||
for %l = 0 to %n {
|
||||
affine.for %k = 0 to %m {
|
||||
affine.for %l = 0 to %n {
|
||||
...
|
||||
}
|
||||
}
|
||||
|
@ -649,13 +649,13 @@ in a dilated convolution.
|
|||
func @conv2d(memref<16x1024x1024x3xf32, #lm0, vmem> %input,
|
||||
memref<5x5x3x32xf32, #lm0, vmem> %kernel,
|
||||
memref<16x512x512x32xf32, #lm0, vmem> %output) {
|
||||
for %b = 0 to %batch {
|
||||
for %oh = 0 to %output_height {
|
||||
for %ow = 0 to %output_width {
|
||||
for %of = 0 to %output_feature {
|
||||
for %kh = 0 to %kernel_height {
|
||||
for %kw = 0 to %kernel_width {
|
||||
for %if = 0 to %input_feature {
|
||||
affine.for %b = 0 to %batch {
|
||||
affine.for %oh = 0 to %output_height {
|
||||
affine.for %ow = 0 to %output_width {
|
||||
affine.for %of = 0 to %output_feature {
|
||||
affine.for %kh = 0 to %kernel_height {
|
||||
affine.for %kw = 0 to %kernel_width {
|
||||
affine.for %if = 0 to %input_feature {
|
||||
// Calculate input indices.
|
||||
%1_0 = affine.apply #map1_0 (%0#1, %0#2, %0#4, %0#5)
|
||||
[%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
|
||||
|
@ -899,14 +899,14 @@ func @dma_hbm_to_vmem(memref<1024 x f32, #layout_map0, hbm> %a,
|
|||
representation. 2(b) requires no change, but impacts how cost models look at
|
||||
index and layout maps.
|
||||
|
||||
### `if` and `for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
|
||||
### `if` and `affine.for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
|
||||
|
||||
We considered providing a representation for SSA values that are live out of
|
||||
`if/else` conditional bodies and loop carried in `for` loops. We ultimately
|
||||
abandoned this approach due to its complexity. In the current design of MLIR,
|
||||
scalar variables cannot escape for loops or if instructions. In situations,
|
||||
where escaping is necessary, we use zero-dimensional tensors and memrefs instead
|
||||
of scalars.
|
||||
`if/else` conditional bodies and loop carried in `affine.for` loops. We
|
||||
ultimately abandoned this approach due to its complexity. In the current design
|
||||
of MLIR, scalar variables cannot escape for loops or if instructions. In
|
||||
situations, where escaping is necessary, we use zero-dimensional tensors and
|
||||
memrefs instead of scalars.
|
||||
|
||||
**TODO**: This whole section is obsolete and should be updated to use block
|
||||
arguments and a yield like terminator in for/if instructions.
|
||||
|
@ -919,7 +919,7 @@ Syntax:
|
|||
|
||||
``` {.ebnf}
|
||||
[<out-var-list> =]
|
||||
for %<index-variable-name> = <lower-bound> ... <upper-bound> step <step>
|
||||
affine.for %<index-variable-name> = <lower-bound> ... <upper-bound> step <step>
|
||||
[with <in-var-list>] { <loop-instruction-list> }
|
||||
```
|
||||
|
||||
|
@ -934,7 +934,7 @@ Example:
|
|||
// Return sum of elements in 1-dimensional mref A
|
||||
func int32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
|
||||
%init = 0
|
||||
%result = for %i = 0 to N with %tmp(%init) {
|
||||
%result = affine.for %i = 0 to N with %tmp(%init) {
|
||||
%value = load %A[%i]
|
||||
%sum = %value + %tmp
|
||||
yield %sum
|
||||
|
@ -964,7 +964,7 @@ Example:
|
|||
// Compute sum of half of the array
|
||||
func int32 @sum_half(%A, %N) {
|
||||
%s0 = 0
|
||||
%s1 = for %i = 1 ... N step 1 with %s2 (%s0) {
|
||||
%s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
|
||||
%s3 = if (%i >= %N / 2) {
|
||||
%v0 = load %A[%i]
|
||||
%s4 = %s2 + %v0
|
||||
|
|
|
@ -184,8 +184,8 @@ Our simple example above would be represented as:
|
|||
|
||||
```mlir
|
||||
mlfunc @simple_example(... %N) {
|
||||
for %i = 0 ... %N step 1 {
|
||||
for %j = 0 ... %N step 1 {
|
||||
affine.for %i = 0 ... %N step 1 {
|
||||
affine.for %j = 0 ... %N step 1 {
|
||||
// identity noop in this case, but can exist in general.
|
||||
%0,%1 = affine.apply #57(%i, %j)
|
||||
|
||||
|
@ -203,8 +203,8 @@ The example with the reduced domain would be represented with an if instruction:
|
|||
|
||||
```mlir
|
||||
mlfunc @reduced_domain_example(... %N) {
|
||||
for %i = 0 ... %N step 1 {
|
||||
for %j = 0 ... %N step 1 {
|
||||
affine.for %i = 0 ... %N step 1 {
|
||||
affine.for %j = 0 ... %N step 1 {
|
||||
// identity noop in this case, but can exist in general.
|
||||
%0,%1 = affinecall #57(%i, %j)
|
||||
|
||||
|
@ -233,8 +233,8 @@ that transformations call into):
|
|||
|
||||
```mlir
|
||||
mlfunc @skewed_domain_example(... %N) {
|
||||
for %t1 = 0 ... 2*N-2 step 1 {
|
||||
for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
|
||||
affine.for %t1 = 0 ... 2*N-2 step 1 {
|
||||
affine.for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
|
||||
(%i, %j) = (%t1-%t2, %t2)
|
||||
...
|
||||
}
|
||||
|
@ -373,7 +373,7 @@ mlfunc's (if we support them) will also have to have domains.
|
|||
### Lack of redundancy in IR
|
||||
|
||||
The traditional form has multiple encodings for the same sorts of behavior: you
|
||||
end up having bits on `for` loops to specify whether codegen should use
|
||||
end up having bits on `affine.for` loops to specify whether codegen should use
|
||||
"atomic/separate" policies, unroll loops, etc. Instructions can be split or can
|
||||
generate multiple copies of their instruction because of overlapping domains,
|
||||
etc.
|
||||
|
|
|
@ -90,15 +90,15 @@ private:
|
|||
explicit AffineApplyOp(const Instruction *state) : Op(state) {}
|
||||
};
|
||||
|
||||
/// The "for" instruction represents an affine loop nest, defining an SSA value
|
||||
/// for its induction variable. The induction variable is represented as a
|
||||
/// The "affine.for" instruction represents an affine loop nest, defining an SSA
|
||||
/// value for its induction variable. The induction variable is represented as a
|
||||
/// BlockArgument to the entry block of the body. The body and induction
|
||||
/// variable can be created automatically for new "for" ops with 'createBody'.
|
||||
/// This SSA value always has type index, which is the size of the machine word.
|
||||
/// The stride, represented by step, is a positive constant integer which
|
||||
/// defaults to "1" if not present. The lower and upper bounds specify a
|
||||
/// half-open range: the range includes the lower bound but does not include the
|
||||
/// upper bound.
|
||||
/// variable can be created automatically for new "affine.for" ops with
|
||||
/// 'createBody'. This SSA value always has type index, which is the size of the
|
||||
/// machine word. The stride, represented by step, is a positive constant
|
||||
/// integer which defaults to "1" if not present. The lower and upper bounds
|
||||
/// specify a half-open range: the range includes the lower bound but does not
|
||||
/// include the upper bound.
|
||||
///
|
||||
/// The lower and upper bounds of a for operation are represented as an
|
||||
/// application of an affine mapping to a list of SSA values passed to the map.
|
||||
|
@ -110,7 +110,7 @@ private:
|
|||
///
|
||||
/// Example:
|
||||
///
|
||||
/// for %i = 1 to 10 {
|
||||
/// affine.for %i = 1 to 10 {
|
||||
/// ...
|
||||
/// }
|
||||
///
|
||||
|
@ -131,7 +131,7 @@ public:
|
|||
static void getCanonicalizationPatterns(OwningRewritePatternList &results,
|
||||
MLIRContext *context);
|
||||
|
||||
static StringRef getOperationName() { return "for"; }
|
||||
static StringRef getOperationName() { return "affine.for"; }
|
||||
static StringRef getStepAttrName() { return "step"; }
|
||||
static StringRef getLowerBoundAttrName() { return "lower_bound"; }
|
||||
static StringRef getUpperBoundAttrName() { return "upper_bound"; }
|
||||
|
@ -253,15 +253,15 @@ ConstOpPointer<AffineForOp> getForInductionVarOwner(const Value *val);
|
|||
void extractForInductionVars(ArrayRef<OpPointer<AffineForOp>> forInsts,
|
||||
SmallVectorImpl<Value *> *ivs);
|
||||
|
||||
/// Adds constraints (lower and upper bounds) for the specified 'for'
|
||||
/// Adds constraints (lower and upper bounds) for the specified 'affine.for'
|
||||
/// instruction's Value using IR information stored in its bound maps. The
|
||||
/// right identifier is first looked up using forOp's Value. Returns
|
||||
/// false for the yet unimplemented/unsupported cases, and true if the
|
||||
/// information is successfully added. Asserts if the Value corresponding to
|
||||
/// the 'for' instruction isn't found in the constraint system. Any new
|
||||
/// identifiers that are found in the bound operands of the 'for' instruction
|
||||
/// are added as trailing identifiers (either dimensional or symbolic
|
||||
/// depending on whether the operand is a valid ML Function symbol).
|
||||
/// the 'affine.for' instruction isn't found in the constraint system. Any new
|
||||
/// identifiers that are found in the bound operands of the 'affine.for'
|
||||
/// instruction are added as trailing identifiers (either dimensional or
|
||||
/// symbolic depending on whether the operand is a valid ML Function symbol).
|
||||
// TODO(bondhugula): add support for non-unit strides.
|
||||
bool addAffineForOpDomain(ConstOpPointer<AffineForOp> forOp,
|
||||
FlatAffineConstraints *constraints);
|
||||
|
@ -297,10 +297,10 @@ public:
|
|||
operand_range getOperands() const { return {operand_begin(), operand_end()}; }
|
||||
|
||||
private:
|
||||
// 'for' instruction that contains this bound.
|
||||
// 'affine.for' instruction that contains this bound.
|
||||
ConstOpPointer<AffineForOp> inst;
|
||||
// Start and end positions of this affine bound operands in the list of
|
||||
// the containing 'for' instruction operands.
|
||||
// the containing 'affine.for' instruction operands.
|
||||
unsigned opStart, opEnd;
|
||||
// Affine map for this bound.
|
||||
AffineMap map;
|
||||
|
|
|
@ -52,7 +52,7 @@ bool dominates(const Instruction &a, const Instruction &b);
|
|||
bool properlyDominates(const Instruction &a, const Instruction &b);
|
||||
|
||||
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
|
||||
/// the outermost 'for' instruction to the innermost one.
|
||||
/// the outermost 'affine.for' instruction to the innermost one.
|
||||
// TODO(bondhugula): handle 'if' inst's.
|
||||
void getLoopIVs(const Instruction &inst,
|
||||
SmallVectorImpl<OpPointer<AffineForOp>> *loops);
|
||||
|
@ -105,8 +105,8 @@ insertBackwardComputationSlice(Instruction *srcOpInst, Instruction *dstOpInst,
|
|||
/// surrounding such op's.
|
||||
// For example, the memref region for a load operation at loop depth = 1:
|
||||
//
|
||||
// for %i = 0 to 32 {
|
||||
// for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
// affine.for %i = 0 to 32 {
|
||||
// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
// load %A[%ii]
|
||||
// }
|
||||
// }
|
||||
|
@ -139,8 +139,8 @@ struct MemRefRegion {
|
|||
/// For example, the memref region for this operation at loopDepth = 1 will
|
||||
/// be:
|
||||
///
|
||||
/// for %i = 0 to 32 {
|
||||
/// for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
/// affine.for %i = 0 to 32 {
|
||||
/// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
/// load %A[%ii]
|
||||
/// }
|
||||
/// }
|
||||
|
|
|
@ -76,9 +76,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
|
|||
/// The following MLIR snippet:
|
||||
///
|
||||
/// ```mlir
|
||||
/// for %i3 = 0 to %0 {
|
||||
/// for %i4 = 0 to %1 {
|
||||
/// for %i5 = 0 to %2 {
|
||||
/// affine.for %i3 = 0 to %0 {
|
||||
/// affine.for %i4 = 0 to %1 {
|
||||
/// affine.for %i5 = 0 to %2 {
|
||||
/// %a5 = load %arg0[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
/// }}}
|
||||
/// ```
|
||||
|
@ -86,9 +86,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
|
|||
/// may vectorize with {permutation_map: (d0, d1, d2) -> (d2, d1)} into:
|
||||
///
|
||||
/// ```mlir
|
||||
/// for %i3 = 0 to %0 step 32 {
|
||||
/// for %i4 = 0 to %1 {
|
||||
/// for %i5 = 0 to %2 step 256 {
|
||||
/// affine.for %i3 = 0 to %0 step 32 {
|
||||
/// affine.for %i4 = 0 to %1 {
|
||||
/// affine.for %i5 = 0 to %2 step 256 {
|
||||
/// %4 = vector_transfer_read %arg0, %i4, %i5, %i3
|
||||
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
|
||||
/// (memref<?x?x?xf32>, index, index) -> vector<32x256xf32>
|
||||
|
@ -103,7 +103,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
|
|||
///
|
||||
/// ```mlir
|
||||
/// %cst0 = constant 0 : index
|
||||
/// for %i0 = 0 to %0 {
|
||||
/// affine.for %i0 = 0 to %0 {
|
||||
/// %a0 = load %arg0[%cst0, %cst0] : memref<?x?xf32>
|
||||
/// }
|
||||
/// ```
|
||||
|
@ -111,7 +111,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
|
|||
/// may vectorize with {permutation_map: (d0) -> (0)} into:
|
||||
///
|
||||
/// ```mlir
|
||||
/// for %i0 = 0 to %0 step 128 {
|
||||
/// affine.for %i0 = 0 to %0 step 128 {
|
||||
/// %3 = vector_transfer_read %arg0, %c0_0, %c0_0
|
||||
/// {permutation_map: (d0, d1) -> (0)} :
|
||||
/// (memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
|
|
|
@ -83,9 +83,10 @@ AffineMap getUnrolledLoopUpperBound(ConstOpPointer<AffineForOp> forOp,
|
|||
unsigned unrollFactor,
|
||||
FuncBuilder *builder);
|
||||
|
||||
/// Skew the instructions in the body of a 'for' instruction with the specified
|
||||
/// instruction-wise shifts. The shifts are with respect to the original
|
||||
/// execution order, and are multiplied by the loop 'step' before being applied.
|
||||
/// Skew the instructions in the body of a 'affine.for' instruction with the
|
||||
/// specified instruction-wise shifts. The shifts are with respect to the
|
||||
/// original execution order, and are multiplied by the loop 'step' before being
|
||||
/// applied.
|
||||
UtilResult instBodySkew(OpPointer<AffineForOp> forOp, ArrayRef<uint64_t> shifts,
|
||||
bool unrollPrologueEpilogue = false);
|
||||
|
||||
|
|
|
@ -94,14 +94,14 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
|
|||
///
|
||||
/// Before
|
||||
///
|
||||
/// for %i = 0 to #map(%N)
|
||||
/// affine.for %i = 0 to #map(%N)
|
||||
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
/// send %A[%idx], ...
|
||||
/// %v = "compute"(%idx, ...)
|
||||
///
|
||||
/// After
|
||||
///
|
||||
/// for %i = 0 to #map(%N)
|
||||
/// affine.for %i = 0 to #map(%N)
|
||||
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
/// send %A[%idx], ...
|
||||
/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
|
|
|
@ -716,7 +716,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) {
|
|||
}
|
||||
|
||||
void AffineForOp::print(OpAsmPrinter *p) const {
|
||||
*p << "for ";
|
||||
*p << "affine.for ";
|
||||
p->printOperand(getBody()->getArgument(0));
|
||||
*p << " = ";
|
||||
printBound(getLowerBound(), "max", p);
|
||||
|
|
|
@ -756,8 +756,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
|
|||
// For example, given the following MLIR code with with "source" and
|
||||
// "destination" accesses to the same memref labled, and symbols %M, %N, %K:
|
||||
//
|
||||
// for %i0 = 0 to 100 {
|
||||
// for %i1 = 0 to 50 {
|
||||
// affine.for %i0 = 0 to 100 {
|
||||
// affine.for %i1 = 0 to 50 {
|
||||
// %a0 = affine.apply
|
||||
// (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
|
||||
// // Source memref access.
|
||||
|
@ -765,8 +765,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
|
|||
// }
|
||||
// }
|
||||
//
|
||||
// for %i2 = 0 to 100 {
|
||||
// for %i3 = 0 to 50 {
|
||||
// affine.for %i2 = 0 to 100 {
|
||||
// affine.for %i3 = 0 to 50 {
|
||||
// %a1 = affine.apply
|
||||
// (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
|
||||
// // Destination memref access.
|
||||
|
|
|
@ -36,13 +36,13 @@
|
|||
using namespace mlir;
|
||||
|
||||
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
|
||||
/// the outermost 'for' instruction to the innermost one.
|
||||
/// the outermost 'affine.for' instruction to the innermost one.
|
||||
void mlir::getLoopIVs(const Instruction &inst,
|
||||
SmallVectorImpl<OpPointer<AffineForOp>> *loops) {
|
||||
auto *currInst = inst.getParentInst();
|
||||
OpPointer<AffineForOp> currAffineForOp;
|
||||
// Traverse up the hierarchy collecing all 'for' instruction while skipping
|
||||
// over 'if' instructions.
|
||||
// Traverse up the hierarchy collecing all 'affine.for' instruction while
|
||||
// skipping over 'if' instructions.
|
||||
while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
|
||||
currInst->isa<AffineIfOp>())) {
|
||||
if (currAffineForOp)
|
||||
|
@ -111,8 +111,8 @@ bool MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
|
|||
// For example, the memref region for this load operation at loopDepth = 1 will
|
||||
// be as below:
|
||||
//
|
||||
// for %i = 0 to 32 {
|
||||
// for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
// affine.for %i = 0 to 32 {
|
||||
// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
// load %A[%ii]
|
||||
// }
|
||||
// }
|
||||
|
@ -614,7 +614,7 @@ Optional<int64_t> mlir::getMemoryFootprintBytes(const Block &block,
|
|||
int memorySpace) {
|
||||
std::vector<std::unique_ptr<MemRefRegion>> regions;
|
||||
|
||||
// Walk this 'for' instruction to gather all memory regions.
|
||||
// Walk this 'affine.for' instruction to gather all memory regions.
|
||||
bool error = false;
|
||||
const_cast<Block *>(&block)->walk([&](Instruction *opInst) {
|
||||
if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) {
|
||||
|
|
|
@ -189,7 +189,7 @@ unsigned Block::getNumSuccessors() const {
|
|||
return terminator->getNumSuccessors();
|
||||
}
|
||||
assert(getParent() && "top-level block with no terminator");
|
||||
// Blocks inside 'for'/'if' instructions don't have successors.
|
||||
// Blocks inside 'affine.for'/'if' instructions don't have successors.
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -338,7 +338,8 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block,
|
|||
auto fastMemRefType = top.getMemRefType(
|
||||
fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);
|
||||
|
||||
// Create the fast memory space buffer just before the 'for' instruction.
|
||||
// Create the fast memory space buffer just before the 'affine.for'
|
||||
// instruction.
|
||||
fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult();
|
||||
// Record it.
|
||||
fastBufferMap[memref] = fastMemRef;
|
||||
|
@ -456,7 +457,7 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
|
|||
// approach is conservative in some cases at the moment, we do a check later
|
||||
// and report an error with location info.
|
||||
// TODO(bondhugula): An 'if' instruction is being treated similar to an
|
||||
// operation instruction. 'if''s could have 'for's in them; treat them
|
||||
// operation instruction. 'if''s could have 'affine.for's in them; treat them
|
||||
// separately.
|
||||
|
||||
// Get to the first load, store, or for op.
|
||||
|
@ -470,9 +471,9 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
|
|||
if (auto forOp = it->dyn_cast<AffineForOp>()) {
|
||||
// We'll assume for now that loops with steps are tiled loops, and so DMAs
|
||||
// are not performed for that depth, but only further inside.
|
||||
// If the memory footprint of the 'for' loop is higher than fast memory
|
||||
// capacity (when provided), we recurse to DMA at an inner level until
|
||||
// we find a depth at which footprint fits in the capacity. If the
|
||||
// If the memory footprint of the 'affine.for' loop is higher than fast
|
||||
// memory capacity (when provided), we recurse to DMA at an inner level
|
||||
// until we find a depth at which footprint fits in the capacity. If the
|
||||
// footprint can't be calcuated, we assume for now it fits.
|
||||
|
||||
// Returns true if the footprint is known to exceed capacity.
|
||||
|
@ -489,13 +490,13 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
|
|||
consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);
|
||||
// Recurse onto the body of this loop.
|
||||
runOnBlock(forOp->getBody(), consumedCapacityBytes);
|
||||
// The next region starts right after the 'for' instruction.
|
||||
// The next region starts right after the 'affine.for' instruction.
|
||||
curBegin = std::next(it);
|
||||
} else {
|
||||
// We have enough capacity, i.e., DMAs will be computed for the portion
|
||||
// of the block until 'it', and for the 'for' loop. For the latter, they
|
||||
// are placed just before this loop (for incoming DMAs) and right after
|
||||
// (for outgoing ones).
|
||||
// of the block until 'it', and for the 'affine.for' loop. For the
|
||||
// latter, they are placed just before this loop (for incoming DMAs) and
|
||||
// right after (for outgoing ones).
|
||||
consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);
|
||||
|
||||
// Inner loop DMAs have their own scope - we don't thus update consumed
|
||||
|
|
|
@ -510,7 +510,8 @@ bool MemRefDependenceGraph::init(Function *f) {
|
|||
// all loads and store accesses it contains.
|
||||
LoopNestStateCollector collector;
|
||||
collector.collect(&inst);
|
||||
// Return false if a non 'for' region was found (not currently supported).
|
||||
// Return false if a non 'affine.for' region was found (not currently
|
||||
// supported).
|
||||
if (collector.hasNonForRegion)
|
||||
return false;
|
||||
Node node(nextNodeId++, &inst);
|
||||
|
|
|
@ -231,7 +231,8 @@ UtilResult mlir::tileCodeGen(MutableArrayRef<OpPointer<AffineForOp>> band,
|
|||
static void
|
||||
getTileableBands(Function *f,
|
||||
std::vector<SmallVector<OpPointer<AffineForOp>, 6>> *bands) {
|
||||
// Get maximal perfect nest of 'for' insts starting from root (inclusive).
|
||||
// Get maximal perfect nest of 'affine.for' insts starting from root
|
||||
// (inclusive).
|
||||
auto getMaximalPerfectLoopNest = [&](OpPointer<AffineForOp> root) {
|
||||
SmallVector<OpPointer<AffineForOp>, 6> band;
|
||||
OpPointer<AffineForOp> currInst = root;
|
||||
|
|
|
@ -164,7 +164,7 @@ PassResult LoopUnroll::runOnFunction(Function *f) {
|
|||
return success();
|
||||
}
|
||||
|
||||
/// Unrolls a 'for' inst. Returns true if the loop was unrolled, false
|
||||
/// Unrolls a 'affine.for' inst. Returns true if the loop was unrolled, false
|
||||
/// otherwise. The default unroll factor is 4.
|
||||
bool LoopUnroll::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
|
||||
// Use the function callback if one was provided.
|
||||
|
|
|
@ -105,7 +105,7 @@ PassResult LoopUnrollAndJam::runOnFunction(Function *f) {
|
|||
return success();
|
||||
}
|
||||
|
||||
/// Unroll and jam a 'for' inst. Default unroll jam factor is
|
||||
/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
|
||||
/// kDefaultUnrollJamFactor. Return false if nothing was done.
|
||||
bool LoopUnrollAndJam::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
|
||||
// Unroll and jam by the factor that was passed if any.
|
||||
|
|
|
@ -283,7 +283,8 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
|
|||
return value;
|
||||
}
|
||||
|
||||
// Convert a "for" loop to a flow of blocks. Return `false` on success.
|
||||
// Convert a "affine.for" loop to a flow of blocks. Return `false` on
|
||||
// success.
|
||||
//
|
||||
// Create an SESE region for the loop (including its body) and append it to the
|
||||
// end of the current region. The loop region consists of the initialization
|
||||
|
@ -330,8 +331,9 @@ bool LowerAffinePass::lowerAffineFor(OpPointer<AffineForOp> forOp) {
|
|||
auto loc = forOp->getLoc();
|
||||
auto *forInst = forOp->getInstruction();
|
||||
|
||||
// Start by splitting the block containing the 'for' into two parts. The part
|
||||
// before will get the init code, the part after will be the end point.
|
||||
// Start by splitting the block containing the 'affine.for' into two parts.
|
||||
// The part before will get the init code, the part after will be the end
|
||||
// point.
|
||||
auto *initBlock = forInst->getBlock();
|
||||
auto *endBlock = initBlock->splitBlock(forInst);
|
||||
|
||||
|
|
|
@ -126,9 +126,9 @@ private:
|
|||
/// // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
|
||||
/// // vector<32x256xf32> and pad with %f0 to handle the boundary case:
|
||||
/// %f0 = constant 0.0f : f32
|
||||
/// for %i0 = 0 to %0 {
|
||||
/// for %i1 = 0 to %1 step 256 {
|
||||
/// for %i2 = 0 to %2 step 32 {
|
||||
/// affine.for %i0 = 0 to %0 {
|
||||
/// affine.for %i1 = 0 to %1 step 256 {
|
||||
/// affine.for %i2 = 0 to %2 step 32 {
|
||||
/// %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
|
||||
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
|
||||
/// (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
|
||||
|
@ -139,8 +139,8 @@ private:
|
|||
/// MLIR resembling:
|
||||
///
|
||||
/// ```mlir
|
||||
/// for %d1 = 0 to 256 {
|
||||
/// for %d2 = 0 to 32 {
|
||||
/// affine.for %d1 = 0 to 256 {
|
||||
/// affine.for %d2 = 0 to 32 {
|
||||
/// %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
|
||||
/// %tmp[%d2, %d1] = %s
|
||||
/// }
|
||||
|
|
|
@ -101,10 +101,10 @@
|
|||
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
|
||||
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
/// %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
|
||||
/// vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
|
||||
/// for %i1 = 0 to %N step 4 {
|
||||
/// for %i2 = 0 to %O {
|
||||
/// for %i3 = 0 to %P step 4 {
|
||||
/// vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
|
||||
/// affine.for %i1 = 0 to %N step 4 {
|
||||
/// affine.for %i2 = 0 to %O {
|
||||
/// affine.for %i3 = 0 to %P step 4 {
|
||||
/// vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
|
||||
/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
|
||||
/// vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
|
||||
|
@ -120,10 +120,10 @@
|
|||
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
|
||||
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
/// %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
|
||||
/// for %i0 = 0 to %arg0 step 4 {
|
||||
/// for %i1 = 0 to %arg1 step 4 {
|
||||
/// for %i2 = 0 to %arg2 {
|
||||
/// for %i3 = 0 to %arg3 step 4 {
|
||||
/// affine.for %i0 = 0 to %arg0 step 4 {
|
||||
/// affine.for %i1 = 0 to %arg1 step 4 {
|
||||
/// affine.for %i2 = 0 to %arg2 {
|
||||
/// affine.for %i3 = 0 to %arg3 step 4 {
|
||||
/// %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
|
||||
/// (%i0, %i1, %i2, %i3)
|
||||
/// vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
|
||||
|
@ -293,10 +293,10 @@ static Value *substitute(Value *v, VectorType hwVectorType,
|
|||
/// super-vectorization has been applied:
|
||||
///
|
||||
/// ```mlir
|
||||
/// for %i0 = 0 to %M {
|
||||
/// for %i1 = 0 to %N step 3 {
|
||||
/// for %i2 = 0 to %O {
|
||||
/// for %i3 = 0 to %P step 32 {
|
||||
/// affine.for %i0 = 0 to %M {
|
||||
/// affine.for %i1 = 0 to %N step 3 {
|
||||
/// affine.for %i2 = 0 to %O {
|
||||
/// affine.for %i3 = 0 to %P step 32 {
|
||||
/// %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2)
|
||||
/// -> vector<3x32xf32>
|
||||
/// ...
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
// potentially getting rid of intermediate memref's entirely.
|
||||
// TODO(mlir-team): In the future, similar techniques could be used to eliminate
|
||||
// dead memref store's and perform more complex forwarding when support for
|
||||
// SSA scalars live out of 'for'/'if' statements is available.
|
||||
// SSA scalars live out of 'affine.for'/'if' statements is available.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
|
@ -55,7 +55,7 @@ namespace {
|
|||
//
|
||||
// (* A dependence being satisfied at a block: a dependence that is satisfied by
|
||||
// virtue of the destination instruction appearing textually / lexically after
|
||||
// the source instruction within the body of a 'for' instruction; thus, a
|
||||
// the source instruction within the body of a 'affine.for' instruction; thus, a
|
||||
// dependence is always either satisfied by a loop or by a block).
|
||||
//
|
||||
// The above conditions are simple to check, sufficient, and powerful for most
|
||||
|
@ -145,8 +145,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(OpPointer<LoadOp> loadOp) {
|
|||
// Check if this store is a candidate for forwarding; we only forward if
|
||||
// the dependence from the store is carried by the *body* of innermost
|
||||
// common surrounding loop. As an example this filters out cases like:
|
||||
// for %i0
|
||||
// for %i1
|
||||
// affine.for %i0
|
||||
// affine.for %i1
|
||||
// %idx = affine.apply (d0) -> (d0 + 1) (%i0)
|
||||
// store %A[%idx]
|
||||
// load %A[%i0]
|
||||
|
|
|
@ -71,11 +71,11 @@ static unsigned getTagMemRefPos(const Instruction &dmaInst) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/// Doubles the buffer of the supplied memref on the specified 'for' instruction
|
||||
/// by adding a leading dimension of size two to the memref. Replaces all uses
|
||||
/// of the old memref by the new one while indexing the newly added dimension by
|
||||
/// the loop IV of the specified 'for' instruction modulo 2. Returns false if
|
||||
/// such a replacement cannot be performed.
|
||||
/// Doubles the buffer of the supplied memref on the specified 'affine.for'
|
||||
/// instruction by adding a leading dimension of size two to the memref.
|
||||
/// Replaces all uses of the old memref by the new one while indexing the newly
|
||||
/// added dimension by the loop IV of the specified 'affine.for' instruction
|
||||
/// modulo 2. Returns false if such a replacement cannot be performed.
|
||||
static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
|
||||
auto *forBody = forOp->getBody();
|
||||
FuncBuilder bInner(forBody, forBody->begin());
|
||||
|
@ -108,7 +108,7 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
|
|||
dynamicDimCount++));
|
||||
}
|
||||
|
||||
// Create and place the alloc right before the 'for' instruction.
|
||||
// Create and place the alloc right before the 'affine.for' instruction.
|
||||
// TODO(mlir-team): we are assuming scoped allocation here, and aren't
|
||||
// inserting a dealloc -- this isn't the right thing.
|
||||
Value *newMemRef =
|
||||
|
@ -137,9 +137,9 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
|
|||
/// Returns success if the IR is in a valid state.
|
||||
PassResult PipelineDataTransfer::runOnFunction(Function *f) {
|
||||
// Do a post order walk so that inner loop DMAs are processed first. This is
|
||||
// necessary since 'for' instructions nested within would otherwise become
|
||||
// invalid (erased) when the outer loop is pipelined (the pipelined one gets
|
||||
// deleted and replaced by a prologue, a new steady-state loop and an
|
||||
// necessary since 'affine.for' instructions nested within would otherwise
|
||||
// become invalid (erased) when the outer loop is pipelined (the pipelined one
|
||||
// gets deleted and replaced by a prologue, a new steady-state loop and an
|
||||
// epilogue).
|
||||
forOps.clear();
|
||||
f->walkPostOrder<AffineForOp>(
|
||||
|
|
|
@ -138,8 +138,8 @@ void mlir::promoteSingleIterationLoops(Function *f) {
|
|||
[](OpPointer<AffineForOp> forOp) { promoteIfSingleIteration(forOp); });
|
||||
}
|
||||
|
||||
/// Generates a 'for' inst with the specified lower and upper bounds while
|
||||
/// generating the right IV remappings for the shifted instructions. The
|
||||
/// Generates a 'affine.for' inst with the specified lower and upper bounds
|
||||
/// while generating the right IV remappings for the shifted instructions. The
|
||||
/// instruction blocks that go into the loop are specified in instGroupQueue
|
||||
/// starting from the specified offset, and in that order; the first element of
|
||||
/// the pair specifies the shift applied to that group of instructions; note
|
||||
|
@ -194,10 +194,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
|
|||
return loopChunk;
|
||||
}
|
||||
|
||||
/// Skew the instructions in the body of a 'for' instruction with the specified
|
||||
/// instruction-wise shifts. The shifts are with respect to the original
|
||||
/// execution order, and are multiplied by the loop 'step' before being applied.
|
||||
/// A shift of zero for each instruction will lead to no change.
|
||||
/// Skew the instructions in the body of a 'affine.for' instruction with the
|
||||
/// specified instruction-wise shifts. The shifts are with respect to the
|
||||
/// original execution order, and are multiplied by the loop 'step' before being
|
||||
/// applied. A shift of zero for each instruction will lead to no change.
|
||||
// The skewing of instructions with respect to one another can be used for
|
||||
// example to allow overlap of asynchronous operations (such as DMA
|
||||
// communication) with computation, or just relative shifting of instructions
|
||||
|
@ -246,7 +246,7 @@ UtilResult mlir::instBodySkew(OpPointer<AffineForOp> forOp,
|
|||
|
||||
// An array of instruction groups sorted by shift amount; each group has all
|
||||
// instructions with the same shift in the order in which they appear in the
|
||||
// body of the 'for' inst.
|
||||
// body of the 'affine.for' inst.
|
||||
std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
|
||||
unsigned pos = 0;
|
||||
for (auto &inst : *forOp->getBody()) {
|
||||
|
|
|
@ -194,14 +194,14 @@ bool mlir::replaceAllMemRefUsesWith(const Value *oldMemRef, Value *newMemRef,
|
|||
///
|
||||
/// Before
|
||||
///
|
||||
/// for %i = 0 to #map(%N)
|
||||
/// affine.for %i = 0 to #map(%N)
|
||||
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
/// "send"(%idx, %A, ...)
|
||||
/// "compute"(%idx)
|
||||
///
|
||||
/// After
|
||||
///
|
||||
/// for %i = 0 to #map(%N)
|
||||
/// affine.for %i = 0 to #map(%N)
|
||||
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
/// "send"(%idx, %A, ...)
|
||||
/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
|
|
|
@ -113,7 +113,7 @@ using namespace mlir;
|
|||
///
|
||||
/// At a high level, a vectorized load in a loop will resemble:
|
||||
/// ```mlir
|
||||
/// for %i = ? to ? step ? {
|
||||
/// affine.for %i = ? to ? step ? {
|
||||
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
|
||||
/// vector<128xf32>
|
||||
/// }
|
||||
|
@ -309,7 +309,7 @@ using namespace mlir;
|
|||
/// ```mlir
|
||||
/// mlfunc @fill(%A : memref<128xf32>) -> () {
|
||||
/// %f1 = constant 1.0 : f32
|
||||
/// for %i0 = 0 to 32 {
|
||||
/// affine.for %i0 = 0 to 32 {
|
||||
/// store %f1, %A[%i0] : memref<128xf32, 0>
|
||||
/// }
|
||||
/// return
|
||||
|
@ -322,7 +322,7 @@ using namespace mlir;
|
|||
/// is still subject to exploratory tradeoffs. In particular, say we want to
|
||||
/// vectorize by a factor 128, we want to transform the following input:
|
||||
/// ```mlir
|
||||
/// for %i = %M to %N {
|
||||
/// affine.for %i = %M to %N {
|
||||
/// %a = load A[%i] : memref<?xf32>
|
||||
/// }
|
||||
/// ```
|
||||
|
@ -331,8 +331,8 @@ using namespace mlir;
|
|||
/// memory promotion etc) say after stripmining (and potentially unrolling in
|
||||
/// the case of LLVM's SLP vectorizer):
|
||||
/// ```mlir
|
||||
/// for %i = floor(%M, 128) to ceil(%N, 128) {
|
||||
/// for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
|
||||
/// affine.for %i = floor(%M, 128) to ceil(%N, 128) {
|
||||
/// affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
|
||||
/// %a = load A[%ii] : memref<?xf32>
|
||||
/// }
|
||||
/// }
|
||||
|
@ -341,7 +341,7 @@ using namespace mlir;
|
|||
/// Instead, we seek to vectorize early and freeze vector types before
|
||||
/// scheduling, so we want to generate a pattern that resembles:
|
||||
/// ```mlir
|
||||
/// for %i = ? to ? step ? {
|
||||
/// affine.for %i = ? to ? step ? {
|
||||
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
|
||||
/// vector<128xf32>
|
||||
/// }
|
||||
|
@ -362,7 +362,7 @@ using namespace mlir;
|
|||
/// For the simple strawman example above, vectorizing for a 1-D vector
|
||||
/// abstraction of size 128 returns code similar to:
|
||||
/// ```mlir
|
||||
/// for %i = %M to %N step 128 {
|
||||
/// affine.for %i = %M to %N step 128 {
|
||||
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
|
||||
/// vector<128xf32>
|
||||
/// }
|
||||
|
@ -391,20 +391,20 @@ using namespace mlir;
|
|||
/// %C = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
/// %f1 = constant 1.0 : f32
|
||||
/// %f2 = constant 2.0 : f32
|
||||
/// for %i0 = 0 to %M {
|
||||
/// for %i1 = 0 to %N {
|
||||
/// affine.for %i0 = 0 to %M {
|
||||
/// affine.for %i1 = 0 to %N {
|
||||
/// // non-scoped %f1
|
||||
/// store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
/// }
|
||||
/// }
|
||||
/// for %i2 = 0 to %M {
|
||||
/// for %i3 = 0 to %N {
|
||||
/// affine.for %i2 = 0 to %M {
|
||||
/// affine.for %i3 = 0 to %N {
|
||||
/// // non-scoped %f2
|
||||
/// store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
/// }
|
||||
/// }
|
||||
/// for %i4 = 0 to %M {
|
||||
/// for %i5 = 0 to %N {
|
||||
/// affine.for %i4 = 0 to %M {
|
||||
/// affine.for %i5 = 0 to %N {
|
||||
/// %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
|
||||
/// %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
|
||||
/// %s5 = addf %a5, %b5 : f32
|
||||
|
@ -438,24 +438,24 @@ using namespace mlir;
|
|||
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
|
||||
/// %cst = constant 1.0 : f32
|
||||
/// %cst_0 = constant 2.0 : f32
|
||||
/// for %i0 = 0 to %arg0 {
|
||||
/// for %i1 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i0 = 0 to %arg0 {
|
||||
/// affine.for %i1 = 0 to %arg1 step 256 {
|
||||
/// %cst_1 = constant splat<vector<256xf32>, 1.0> :
|
||||
/// vector<256xf32>
|
||||
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
|
||||
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
|
||||
/// }
|
||||
/// }
|
||||
/// for %i2 = 0 to %arg0 {
|
||||
/// for %i3 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i2 = 0 to %arg0 {
|
||||
/// affine.for %i3 = 0 to %arg1 step 256 {
|
||||
/// %cst_2 = constant splat<vector<256xf32>, 2.0> :
|
||||
/// vector<256xf32>
|
||||
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
|
||||
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
|
||||
/// }
|
||||
/// }
|
||||
/// for %i4 = 0 to %arg0 {
|
||||
/// for %i5 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i4 = 0 to %arg0 {
|
||||
/// affine.for %i5 = 0 to %arg1 step 256 {
|
||||
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
|
||||
/// (memref<?x?xf32>, index, index) -> vector<256xf32>
|
||||
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :
|
||||
|
@ -494,24 +494,24 @@ using namespace mlir;
|
|||
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
|
||||
/// %cst = constant 1.0 : f32
|
||||
/// %cst_0 = constant 2.0 : f32
|
||||
/// for %i0 = 0 to %arg0 step 32 {
|
||||
/// for %i1 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i0 = 0 to %arg0 step 32 {
|
||||
/// affine.for %i1 = 0 to %arg1 step 256 {
|
||||
/// %cst_1 = constant splat<vector<32x256xf32>, 1.0> :
|
||||
/// vector<32x256xf32>
|
||||
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
|
||||
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
|
||||
/// }
|
||||
/// }
|
||||
/// for %i2 = 0 to %arg0 step 32 {
|
||||
/// for %i3 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i2 = 0 to %arg0 step 32 {
|
||||
/// affine.for %i3 = 0 to %arg1 step 256 {
|
||||
/// %cst_2 = constant splat<vector<32x256xf32>, 2.0> :
|
||||
/// vector<32x256xf32>
|
||||
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
|
||||
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
|
||||
/// }
|
||||
/// }
|
||||
/// for %i4 = 0 to %arg0 step 32 {
|
||||
/// for %i5 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i4 = 0 to %arg0 step 32 {
|
||||
/// affine.for %i5 = 0 to %arg1 step 256 {
|
||||
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
|
||||
/// (memref<?x?xf32>, index, index) -> vector<32x256xf32>
|
||||
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
func @compose_affine_maps_1dto2d_no_symbols() {
|
||||
%0 = alloc() : memref<4x4xf32>
|
||||
|
||||
for %i0 = 0 to 15 {
|
||||
affine.for %i0 = 0 to 15 {
|
||||
// Test load[%x, %x]
|
||||
|
||||
%x0 = affine.apply (d0) -> (d0 - 1) (%i0)
|
||||
|
@ -78,7 +78,7 @@ func @compose_affine_maps_1dto2d_no_symbols() {
|
|||
func @compose_affine_maps_1dto2d_with_symbols() {
|
||||
%0 = alloc() : memref<4x4xf32>
|
||||
|
||||
for %i0 = 0 to 15 {
|
||||
affine.for %i0 = 0 to 15 {
|
||||
// Test load[%x0, %x0] with symbol %c4
|
||||
%c4 = constant 4 : index
|
||||
%x0 = affine.apply (d0)[s0] -> (d0 - s0) (%i0)[%c4]
|
||||
|
@ -119,13 +119,13 @@ func @compose_affine_maps_2d_tile() {
|
|||
%c4 = constant 4 : index
|
||||
%c8 = constant 8 : index
|
||||
|
||||
for %i0 = 0 to 3 {
|
||||
affine.for %i0 = 0 to 3 {
|
||||
%x0 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i0)[%c4]
|
||||
for %i1 = 0 to 3 {
|
||||
affine.for %i1 = 0 to 3 {
|
||||
%x1 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i1)[%c8]
|
||||
for %i2 = 0 to 3 {
|
||||
affine.for %i2 = 0 to 3 {
|
||||
%x2 = affine.apply (d0)[s0] -> (d0 mod s0) (%i2)[%c4]
|
||||
for %i3 = 0 to 3 {
|
||||
affine.for %i3 = 0 to 3 {
|
||||
%x3 = affine.apply (d0)[s0] -> (d0 mod s0) (%i3)[%c8]
|
||||
|
||||
%x40 = affine.apply (d0, d1, d2, d3)[s0, s1] ->
|
||||
|
@ -151,9 +151,9 @@ func @compose_affine_maps_dependent_loads() {
|
|||
%0 = alloc() : memref<16x32xf32>
|
||||
%1 = alloc() : memref<16x32xf32>
|
||||
|
||||
for %i0 = 0 to 3 {
|
||||
for %i1 = 0 to 3 {
|
||||
for %i2 = 0 to 3 {
|
||||
affine.for %i0 = 0 to 3 {
|
||||
affine.for %i1 = 0 to 3 {
|
||||
affine.for %i2 = 0 to 3 {
|
||||
%c3 = constant 3 : index
|
||||
%c7 = constant 7 : index
|
||||
|
||||
|
@ -197,7 +197,7 @@ func @compose_affine_maps_dependent_loads() {
|
|||
func @compose_affine_maps_diamond_dependency() {
|
||||
%0 = alloc() : memref<4x4xf32>
|
||||
|
||||
for %i0 = 0 to 15 {
|
||||
affine.for %i0 = 0 to 15 {
|
||||
%a = affine.apply (d0) -> (d0 - 1) (%i0)
|
||||
%b = affine.apply (d0) -> (d0 + 7) (%a)
|
||||
%c = affine.apply (d0) -> (d0 * 4) (%a)
|
||||
|
@ -217,8 +217,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
|
|||
%c9 = constant 9 : index
|
||||
%1 = alloc() : memref<100x100xf32, 1>
|
||||
%2 = alloc() : memref<1xi32>
|
||||
for %i0 = 0 to 100 {
|
||||
for %i1 = 0 to 100 {
|
||||
affine.for %i0 = 0 to 100 {
|
||||
affine.for %i1 = 0 to 100 {
|
||||
%3 = affine.apply (d0, d1)[s0, s1] -> (d1 + s0 + s1)
|
||||
(%i0, %i1)[%arg1, %c9]
|
||||
%4 = affine.apply (d0, d1, d3) -> (d3 - (d0 + d1))
|
||||
|
@ -238,7 +238,7 @@ func @trivial_maps() {
|
|||
%0 = alloc() : memref<10xf32>
|
||||
%c0 = constant 0 : index
|
||||
%cst = constant 0.000000e+00 : f32
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%1 = affine.apply ()[s0] -> (s0)()[%c0]
|
||||
store %cst, %0[%1] : memref<10xf32>
|
||||
%2 = load %0[%c0] : memref<10xf32>
|
||||
|
@ -277,20 +277,20 @@ func @constant_fold_bounds(%N : index) {
|
|||
%c3 = affine.apply (d0, d1) -> (d0 + d1) (%c1, %c2)
|
||||
%l = "foo"() : () -> index
|
||||
|
||||
// CHECK: for %i0 = 5 to 7 {
|
||||
for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
|
||||
// CHECK: affine.for %i0 = 5 to 7 {
|
||||
affine.for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
|
||||
"foo"(%i, %c3) : (index, index) -> ()
|
||||
}
|
||||
|
||||
// Bound takes a non-constant argument but can still be folded.
|
||||
// CHECK: for %i1 = 1 to 7 {
|
||||
for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
|
||||
// CHECK: affine.for %i1 = 1 to 7 {
|
||||
affine.for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
|
||||
"foo"(%j, %c3) : (index, index) -> ()
|
||||
}
|
||||
|
||||
// None of the bounds can be folded.
|
||||
// CHECK: for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
|
||||
for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
|
||||
// CHECK: affine.for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
|
||||
affine.for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
|
||||
"foo"(%k, %c3) : (index, index) -> ()
|
||||
}
|
||||
return
|
||||
|
|
|
@ -204,35 +204,35 @@ func @illegaltype(i0) // expected-error {{invalid integer width}}
|
|||
// -----
|
||||
|
||||
func @malformed_for_percent() {
|
||||
for i = 1 to 10 { // expected-error {{expected SSA operand}}
|
||||
affine.for i = 1 to 10 { // expected-error {{expected SSA operand}}
|
||||
|
||||
// -----
|
||||
|
||||
func @malformed_for_equal() {
|
||||
for %i 1 to 10 { // expected-error {{expected '='}}
|
||||
affine.for %i 1 to 10 { // expected-error {{expected '='}}
|
||||
|
||||
// -----
|
||||
|
||||
func @malformed_for_to() {
|
||||
for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
|
||||
affine.for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func @incomplete_for() {
|
||||
for %i = 1 to 10 step 2
|
||||
affine.for %i = 1 to 10 step 2
|
||||
} // expected-error {{expected '{' to begin block list}}
|
||||
|
||||
// -----
|
||||
|
||||
func @nonconstant_step(%1 : i32) {
|
||||
for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
|
||||
affine.for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
|
||||
|
||||
// -----
|
||||
|
||||
func @for_negative_stride() {
|
||||
for %i = 1 to 10 step -1
|
||||
affine.for %i = 1 to 10 step -1
|
||||
} // expected-error@-1 {{expected step to be representable as a positive signed integer}}
|
||||
|
||||
// -----
|
||||
|
@ -244,7 +244,7 @@ func @non_instruction() {
|
|||
// -----
|
||||
|
||||
func @invalid_if_conditional2() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
if (i)[N] : (i >= ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
|
||||
}
|
||||
}
|
||||
|
@ -252,7 +252,7 @@ func @invalid_if_conditional2() {
|
|||
// -----
|
||||
|
||||
func @invalid_if_conditional3() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
if (i)[N] : (i == 1) // expected-error {{expected '0' after '=='}}
|
||||
}
|
||||
}
|
||||
|
@ -260,7 +260,7 @@ func @invalid_if_conditional3() {
|
|||
// -----
|
||||
|
||||
func @invalid_if_conditional4() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
if (i)[N] : (i >= 2) // expected-error {{expected '0' after '>='}}
|
||||
}
|
||||
}
|
||||
|
@ -268,7 +268,7 @@ func @invalid_if_conditional4() {
|
|||
// -----
|
||||
|
||||
func @invalid_if_conditional5() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
if (i)[N] : (i <= 0 ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
|
||||
}
|
||||
}
|
||||
|
@ -276,7 +276,7 @@ func @invalid_if_conditional5() {
|
|||
// -----
|
||||
|
||||
func @invalid_if_conditional6() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
if (i) : (i) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
|
||||
}
|
||||
}
|
||||
|
@ -284,7 +284,7 @@ func @invalid_if_conditional6() {
|
|||
// -----
|
||||
// TODO (support if (1)?
|
||||
func @invalid_if_conditional7() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
if (i) : (1) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
|
||||
}
|
||||
}
|
||||
|
@ -438,8 +438,8 @@ func @undef() {
|
|||
// -----
|
||||
|
||||
func @duplicate_induction_var() {
|
||||
for %i = 1 to 10 { // expected-error {{previously defined here}}
|
||||
for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
|
||||
affine.for %i = 1 to 10 { // expected-error {{previously defined here}}
|
||||
affine.for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
|
||||
}
|
||||
}
|
||||
return
|
||||
|
@ -448,7 +448,7 @@ func @duplicate_induction_var() {
|
|||
// -----
|
||||
|
||||
func @dominance_failure() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
}
|
||||
"xxx"(%i) : (index)->() // expected-error {{operand #0 does not dominate this use}}
|
||||
return
|
||||
|
@ -475,7 +475,7 @@ func @return_type_mismatch() -> i32 {
|
|||
// -----
|
||||
|
||||
func @return_inside_loop() -> i8 {
|
||||
for %i = 1 to 100 {
|
||||
affine.for %i = 1 to 100 {
|
||||
%a = "foo"() : ()->i8
|
||||
return %a : i8
|
||||
// expected-error@-1 {{'return' op may only be at the top level of a function}}
|
||||
|
@ -521,7 +521,7 @@ func @referer() {
|
|||
#map1 = (i)[j] -> (i+j)
|
||||
|
||||
func @bound_symbol_mismatch(%N : index) {
|
||||
for %i = #map1(%N) to 100 {
|
||||
affine.for %i = #map1(%N) to 100 {
|
||||
// expected-error@-1 {{symbol operand count and integer set symbol count must match}}
|
||||
}
|
||||
return
|
||||
|
@ -532,7 +532,7 @@ func @bound_symbol_mismatch(%N : index) {
|
|||
#map1 = (i)[j] -> (i+j)
|
||||
|
||||
func @bound_dim_mismatch(%N : index) {
|
||||
for %i = #map1(%N, %N)[%N] to 100 {
|
||||
affine.for %i = #map1(%N, %N)[%N] to 100 {
|
||||
// expected-error@-1 {{dim operand count and integer set dim count must match}}
|
||||
}
|
||||
return
|
||||
|
@ -541,7 +541,7 @@ func @bound_dim_mismatch(%N : index) {
|
|||
// -----
|
||||
|
||||
func @large_bound() {
|
||||
for %i = 1 to 9223372036854775810 {
|
||||
affine.for %i = 1 to 9223372036854775810 {
|
||||
// expected-error@-1 {{integer constant out of range for attribute}}
|
||||
}
|
||||
return
|
||||
|
@ -550,7 +550,7 @@ func @large_bound() {
|
|||
// -----
|
||||
|
||||
func @max_in_upper_bound(%N : index) {
|
||||
for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
|
||||
affine.for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -558,7 +558,7 @@ func @max_in_upper_bound(%N : index) {
|
|||
// -----
|
||||
|
||||
func @step_typo() {
|
||||
for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
|
||||
affine.for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -566,7 +566,7 @@ func @step_typo() {
|
|||
// -----
|
||||
|
||||
func @invalid_bound_map(%N : i32) {
|
||||
for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
|
||||
affine.for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -579,7 +579,7 @@ func @invalid_bound_map(%N : i32) {
|
|||
#set0 = (i)[N] : (i >= 0, N - i >= 0)
|
||||
|
||||
func @invalid_if_operands1(%N : index) {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
if #set0(%i) {
|
||||
// expected-error@-1 {{symbol operand count and integer set symbol count must match}}
|
||||
|
||||
|
@ -587,7 +587,7 @@ func @invalid_if_operands1(%N : index) {
|
|||
#set0 = (i)[N] : (i >= 0, N - i >= 0)
|
||||
|
||||
func @invalid_if_operands2(%N : index) {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
if #set0()[%N] {
|
||||
// expected-error@-1 {{dim operand count and integer set dim count must match}}
|
||||
|
||||
|
@ -595,7 +595,7 @@ func @invalid_if_operands2(%N : index) {
|
|||
#set0 = (i)[N] : (i >= 0, N - i >= 0)
|
||||
|
||||
func @invalid_if_operands3(%N : index) {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
if #set0(%i)[%i] {
|
||||
// expected-error@-1 {{operand cannot be used as a symbol}}
|
||||
}
|
||||
|
@ -736,11 +736,11 @@ func @f(f32) {
|
|||
// -----
|
||||
|
||||
func @f(%m : memref<?x?xf32>) {
|
||||
for %i0 = 0 to 42 {
|
||||
affine.for %i0 = 0 to 42 {
|
||||
// expected-error@+1 {{operand #2 does not dominate this use}}
|
||||
%x = load %m[%i0, %i1] : memref<?x?xf32>
|
||||
}
|
||||
for %i1 = 0 to 42 {
|
||||
affine.for %i1 = 0 to 42 {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -790,7 +790,7 @@ func @type_alias_unknown(!unknown_alias) -> () { // expected-error {{undefined t
|
|||
|
||||
// Check ill-formed opaque tensor.
|
||||
func @complex_loops() {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// expected-error @+1 {{expected '"' in string literal}}
|
||||
"opaqueIntTensor"(){bar: opaque<tensor<2x1x4xi32>, "0x686]>} : () -> ()
|
||||
|
||||
|
@ -824,7 +824,7 @@ func @invalid_affine_structure() {
|
|||
|
||||
func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
// expected-error @+1 {{lower loop bound affine map with multiple results requires 'max' prefix}}
|
||||
for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
|
||||
affine.for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -833,7 +833,7 @@ func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
|||
|
||||
func @missing_for_min(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
// expected-error @+1 {{upper loop bound affine map with multiple results requires 'min' prefix}}
|
||||
for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
|
||||
affine.for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
|
|||
%2 = constant 4 : index loc(callsite("foo" at "mysource.cc":10:8))
|
||||
|
||||
// CHECK: } loc(fused["foo", "mysource.cc":10:8])
|
||||
for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
} loc(fused["foo", "mysource.cc":10:8])
|
||||
|
||||
// CHECK: } loc(fused<"myPass">["foo", "foo2"])
|
||||
|
|
|
@ -208,8 +208,8 @@ func @identity_functor(%a : () -> ()) -> (() -> ()) {
|
|||
func @func_ops_in_loop() {
|
||||
// CHECK: %0 = "foo"() : () -> i64
|
||||
%a = "foo"() : ()->i64
|
||||
// CHECK: for %i0 = 1 to 10 {
|
||||
for %i = 1 to 10 {
|
||||
// CHECK: affine.for %i0 = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
// CHECK: %1 = "doo"() : () -> f32
|
||||
%b = "doo"() : ()->f32
|
||||
// CHECK: "bar"(%0, %1) : (i64, f32) -> ()
|
||||
|
@ -224,10 +224,10 @@ func @func_ops_in_loop() {
|
|||
|
||||
// CHECK-LABEL: func @loops() {
|
||||
func @loops() {
|
||||
// CHECK: for %i0 = 1 to 100 step 2 {
|
||||
for %i = 1 to 100 step 2 {
|
||||
// CHECK: for %i1 = 1 to 200 {
|
||||
for %j = 1 to 200 {
|
||||
// CHECK: affine.for %i0 = 1 to 100 step 2 {
|
||||
affine.for %i = 1 to 100 step 2 {
|
||||
// CHECK: affine.for %i1 = 1 to 200 {
|
||||
affine.for %j = 1 to 200 {
|
||||
} // CHECK: }
|
||||
} // CHECK: }
|
||||
return // CHECK: return
|
||||
|
@ -235,14 +235,14 @@ func @loops() {
|
|||
|
||||
// CHECK-LABEL: func @complex_loops() {
|
||||
func @complex_loops() {
|
||||
for %i1 = 1 to 100 { // CHECK: for %i0 = 1 to 100 {
|
||||
for %j1 = 1 to 100 { // CHECK: for %i1 = 1 to 100 {
|
||||
affine.for %i1 = 1 to 100 { // CHECK: affine.for %i0 = 1 to 100 {
|
||||
affine.for %j1 = 1 to 100 { // CHECK: affine.for %i1 = 1 to 100 {
|
||||
// CHECK: "foo"(%i0, %i1) : (index, index) -> ()
|
||||
"foo"(%i1, %j1) : (index,index) -> ()
|
||||
} // CHECK: }
|
||||
"boo"() : () -> () // CHECK: "boo"() : () -> ()
|
||||
for %j2 = 1 to 10 { // CHECK: for %i2 = 1 to 10 {
|
||||
for %k2 = 1 to 10 { // CHECK: for %i3 = 1 to 10 {
|
||||
affine.for %j2 = 1 to 10 { // CHECK: affine.for %i2 = 1 to 10 {
|
||||
affine.for %k2 = 1 to 10 { // CHECK: affine.for %i3 = 1 to 10 {
|
||||
"goo"() : () -> () // CHECK: "goo"() : () -> ()
|
||||
} // CHECK: }
|
||||
} // CHECK: }
|
||||
|
@ -253,8 +253,8 @@ func @complex_loops() {
|
|||
// CHECK: func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
|
||||
func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
|
||||
%c = constant 0 : i32 // CHECK: %c0_i32 = constant 0 : i32
|
||||
for %i0 = 1 to %arg0 { // CHECK: for %i0 = 1 to %arg0 {
|
||||
for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 { // CHECK: for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
|
||||
affine.for %i0 = 1 to %arg0 { // CHECK: affine.for %i0 = 1 to %arg0 {
|
||||
affine.for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 { // CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
|
||||
store %c, %arg1[%i0, %i1] : memref<?x?xi32> // CHECK: store %c0_i32, %arg1[%i0, %i1]
|
||||
} // CHECK: }
|
||||
} // CHECK: }
|
||||
|
@ -263,8 +263,8 @@ func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
|
|||
|
||||
// CHECK: func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
// CHECK: for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
|
||||
for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
|
||||
// CHECK: affine.for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
|
||||
affine.for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
|
||||
// CHECK: "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
|
||||
"foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
|
||||
} // CHECK: }
|
||||
|
@ -275,24 +275,24 @@ func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
|||
func @loop_bounds(%N : index) {
|
||||
// CHECK: %0 = "foo"(%arg0) : (index) -> index
|
||||
%s = "foo"(%N) : (index) -> index
|
||||
// CHECK: for %i0 = %0 to %arg0
|
||||
for %i = %s to %N {
|
||||
// CHECK: for %i1 = #map{{[0-9]+}}(%i0) to 0
|
||||
for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
|
||||
// CHECK: affine.for %i0 = %0 to %arg0
|
||||
affine.for %i = %s to %N {
|
||||
// CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to 0
|
||||
affine.for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
|
||||
// CHECK: %1 = affine.apply #map{{.*}}(%i0, %i1)[%0]
|
||||
%w1 = affine.apply(d0, d1)[s0] -> (d0+d1) (%i, %j) [%s]
|
||||
// CHECK: %2 = affine.apply #map{{.*}}(%i0, %i1)[%0]
|
||||
%w2 = affine.apply(d0, d1)[s0] -> (s0+1) (%i, %j) [%s]
|
||||
// CHECK: for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
|
||||
for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
|
||||
// CHECK: affine.for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
|
||||
affine.for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
|
||||
// CHECK: "foo"(%i0, %i1, %i2) : (index, index, index) -> ()
|
||||
"foo"(%i, %j, %k) : (index, index, index)->()
|
||||
// CHECK: %c30 = constant 30 : index
|
||||
%c = constant 30 : index
|
||||
// CHECK: %3 = affine.apply #map{{.*}}(%arg0, %c30)
|
||||
%u = affine.apply (d0, d1)->(d0+d1) (%N, %c)
|
||||
// CHECK: for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
|
||||
for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
|
||||
// CHECK: affine.for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
|
||||
affine.for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
|
||||
// CHECK: "bar"(%i3) : (index) -> ()
|
||||
"bar"(%l) : (index) -> ()
|
||||
} // CHECK: }
|
||||
|
@ -305,7 +305,7 @@ func @loop_bounds(%N : index) {
|
|||
// CHECK-LABEL: func @ifinst(%arg0: index) {
|
||||
func @ifinst(%N: index) {
|
||||
%c = constant 200 : index // CHECK %c200 = constant 200
|
||||
for %i = 1 to 10 { // CHECK for %i0 = 1 to 10 {
|
||||
affine.for %i = 1 to 10 { // CHECK affine.for %i0 = 1 to 10 {
|
||||
if #set0(%i)[%N, %c] { // CHECK if #set0(%i0)[%arg0, %c200] {
|
||||
%x = constant 1 : i32
|
||||
// CHECK: %c1_i32 = constant 1 : i32
|
||||
|
@ -328,7 +328,7 @@ func @ifinst(%N: index) {
|
|||
// CHECK-LABEL: func @simple_ifinst(%arg0: index) {
|
||||
func @simple_ifinst(%N: index) {
|
||||
%c = constant 200 : index // CHECK %c200 = constant 200
|
||||
for %i = 1 to 10 { // CHECK for %i0 = 1 to 10 {
|
||||
affine.for %i = 1 to 10 { // CHECK affine.for %i0 = 1 to 10 {
|
||||
if #set0(%i)[%N, %c] { // CHECK if #set0(%i0)[%arg0, %c200] {
|
||||
%x = constant 1 : i32
|
||||
// CHECK: %c1_i32 = constant 1 : i32
|
||||
|
@ -544,18 +544,18 @@ func @funcattrwithblock() -> ()
|
|||
#map_non_simple2 = ()[s0, s1] -> (s0 + s1)
|
||||
#map_non_simple3 = ()[s0] -> (s0 + 3)
|
||||
func @funcsimplemap(%arg0: index, %arg1: index) -> () {
|
||||
for %i0 = 0 to #map_simple0()[] {
|
||||
// CHECK: for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to #map_simple1()[%arg1] {
|
||||
// CHECK: for %i1 = 0 to %arg1 {
|
||||
for %i2 = 0 to #map_non_simple0(%i0)[] {
|
||||
// CHECK: for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
|
||||
for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
|
||||
// CHECK: for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
|
||||
for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
|
||||
// CHECK: for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
|
||||
for %i5 = 0 to #map_non_simple3()[%arg0] {
|
||||
// CHECK: for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
|
||||
affine.for %i0 = 0 to #map_simple0()[] {
|
||||
// CHECK: affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to #map_simple1()[%arg1] {
|
||||
// CHECK: affine.for %i1 = 0 to %arg1 {
|
||||
affine.for %i2 = 0 to #map_non_simple0(%i0)[] {
|
||||
// CHECK: affine.for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
|
||||
affine.for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
|
||||
// CHECK: affine.for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
|
||||
affine.for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
|
||||
// CHECK: affine.for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
|
||||
affine.for %i5 = 0 to #map_non_simple3()[%arg0] {
|
||||
// CHECK: affine.for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
|
||||
%c42_i32 = constant 42 : i32
|
||||
}
|
||||
}
|
||||
|
@ -749,9 +749,9 @@ func @sparsevectorattr() -> () {
|
|||
// CHECK-LABEL: func @loops_with_blockids() {
|
||||
func @loops_with_blockids() {
|
||||
^block0:
|
||||
for %i = 1 to 100 step 2 {
|
||||
affine.for %i = 1 to 100 step 2 {
|
||||
^block1:
|
||||
for %j = 1 to 200 {
|
||||
affine.for %j = 1 to 200 {
|
||||
^block2:
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
|
|||
%3 = constant 4 : index loc(callsite("foo" at callsite("mysource1.cc":10:8 at callsite("mysource2.cc":13:8 at "mysource3.cc":100:10))))
|
||||
|
||||
// CHECK: } ["foo", mysource.cc:10:8]
|
||||
for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
} loc(fused["foo", "mysource.cc":10:8])
|
||||
|
||||
// CHECK: } <"myPass">["foo", "foo2"]
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
// CHECK-LABEL: func @materialize_read_1d() {
|
||||
func @materialize_read_1d() {
|
||||
%A = alloc () : memref<7x42xf32>
|
||||
for %i0 = 0 to 7 step 4 {
|
||||
for %i1 = 0 to 42 step 4 {
|
||||
affine.for %i0 = 0 to 7 step 4 {
|
||||
affine.for %i1 = 0 to 42 step 4 {
|
||||
%f1 = vector_transfer_read %A, %i0, %i1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
|
||||
%ip1 = affine.apply (d0) -> (d0 + 1) (%i1)
|
||||
%f2 = vector_transfer_read %A, %i0, %ip1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
|
||||
|
@ -29,11 +29,11 @@ func @materialize_read_1d() {
|
|||
// CHECK-LABEL: func @materialize_read_1d_partially_specialized
|
||||
func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
|
||||
%A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
|
||||
for %i0 = 0 to 7 {
|
||||
for %i1 = 0 to %dyn1 {
|
||||
for %i2 = 0 to %dyn2 {
|
||||
for %i3 = 0 to 42 step 2 {
|
||||
for %i4 = 0 to %dyn4 {
|
||||
affine.for %i0 = 0 to 7 {
|
||||
affine.for %i1 = 0 to %dyn1 {
|
||||
affine.for %i2 = 0 to %dyn2 {
|
||||
affine.for %i3 = 0 to 42 step 2 {
|
||||
affine.for %i4 = 0 to %dyn4 {
|
||||
%f1 = vector_transfer_read %A, %i0, %i1, %i2, %i3, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
|
||||
%i3p1 = affine.apply (d0) -> (d0 + 1) (%i3)
|
||||
%f2 = vector_transfer_read %A, %i0, %i1, %i2, %i3p1, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
|
||||
|
@ -54,10 +54,10 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
|
|||
// CHECK-LABEL: func @materialize_read(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
|
||||
func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
||||
// CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: for %[[I0:.*]] = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %[[I1:.*]] = 0 to %arg1 {
|
||||
// CHECK-NEXT: for %[[I2:.*]] = 0 to %arg2 {
|
||||
// CHECK-NEXT: for %[[I3:.*]] = 0 to %arg3 step 5 {
|
||||
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %arg1 {
|
||||
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %arg2 {
|
||||
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
|
||||
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
|
||||
// CHECK: {{.*}} = dim %0, 0 : memref<?x?x?x?xf32>
|
||||
|
@ -66,9 +66,9 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
|||
// CHECK-NEXT: {{.*}} = dim %0, 3 : memref<?x?x?x?xf32>
|
||||
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
|
||||
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector_type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
|
||||
// CHECK-NEXT: for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]]
|
||||
|
@ -109,10 +109,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
|||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT:}
|
||||
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
for %i0 = 0 to %M step 3 {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %O {
|
||||
for %i3 = 0 to %P step 5 {
|
||||
affine.for %i0 = 0 to %M step 3 {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %O {
|
||||
affine.for %i3 = 0 to %P step 5 {
|
||||
%f = vector_transfer_read %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, 0, d0)} : (memref<?x?x?x?xf32, 0>, index, index, index, index) -> vector<5x4x3xf32>
|
||||
}
|
||||
}
|
||||
|
@ -125,10 +125,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
|||
func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
||||
// CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: %cst = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
|
||||
// CHECK-NEXT: for %[[I0:.*]] = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %[[I1:.*]] = 0 to %arg1 step 4 {
|
||||
// CHECK-NEXT: for %[[I2:.*]] = 0 to %arg2 {
|
||||
// CHECK-NEXT: for %[[I3:.*]] = 0 to %arg3 step 5 {
|
||||
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %arg1 step 4 {
|
||||
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %arg2 {
|
||||
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
|
||||
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
|
||||
// CHECK: {{.*}} = dim %0, 0 : memref<?x?x?x?xf32>
|
||||
|
@ -138,9 +138,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
|||
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
|
||||
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector_type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
|
||||
// CHECK-NEXT: store %cst, {{.*}}[%[[C0]]] : memref<1xvector<5x4x3xf32>>
|
||||
// CHECK-NEXT: for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: {{.*}} = load {{.*}}[%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32>
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
|
||||
|
@ -184,10 +184,10 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
|||
// CHECK-NEXT:}
|
||||
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
%f1 = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
|
||||
for %i0 = 0 to %M step 3 {
|
||||
for %i1 = 0 to %N step 4 {
|
||||
for %i2 = 0 to %O {
|
||||
for %i3 = 0 to %P step 5 {
|
||||
affine.for %i0 = 0 to %M step 3 {
|
||||
affine.for %i1 = 0 to %N step 4 {
|
||||
affine.for %i2 = 0 to %O {
|
||||
affine.for %i3 = 0 to %P step 5 {
|
||||
vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : vector<5x4x3xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,10 +10,10 @@
|
|||
func @materialize(%M : index, %N : index, %O : index, %P : index) {
|
||||
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
%f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
|
||||
// CHECK: for %i0 = 0 to %arg0 step 4 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 4 {
|
||||
// CHECK-NEXT: for %i2 = 0 to %arg2 {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg3 step 4 {
|
||||
// CHECK: affine.for %i0 = 0 to %arg0 step 4 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 4 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to %arg2 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to %arg3 step 4 {
|
||||
// CHECK-NEXT: %[[a:[0-9]+]] = {{.*}}[[ID1]](%i0)
|
||||
// CHECK-NEXT: %[[b:[0-9]+]] = {{.*}}[[ID1]](%i1)
|
||||
// CHECK-NEXT: %[[c:[0-9]+]] = {{.*}}[[ID1]](%i2)
|
||||
|
@ -25,10 +25,10 @@ func @materialize(%M : index, %N : index, %O : index, %P : index) {
|
|||
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
// CHECK: %[[b3:[0-9]+]] = {{.*}}[[D0P3]](%i1)
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
for %i0 = 0 to %M step 4 {
|
||||
for %i1 = 0 to %N step 4 {
|
||||
for %i2 = 0 to %O {
|
||||
for %i3 = 0 to %P step 4 {
|
||||
affine.for %i0 = 0 to %M step 4 {
|
||||
affine.for %i1 = 0 to %N step 4 {
|
||||
affine.for %i2 = 0 to %O {
|
||||
affine.for %i3 = 0 to %P step 4 {
|
||||
"vector_transfer_write"(%f1, %A, %i0, %i1, %i2, %i3) {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : (vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index) -> ()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
// 4x unroll (jammed by construction).
|
||||
// CHECK: for %i0 = 0 to %arg0 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i0 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
|
@ -34,15 +34,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
// CHECK-NEXT: [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
//
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// 4x unroll (jammed by construction).
|
||||
// CHECK: for %i2 = 0 to %arg0 {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i2 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
|
@ -60,15 +60,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
// CHECK-NEXT: [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
//
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// non-scoped %f2
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// 4x unroll (jammed by construction).
|
||||
// CHECK: for %i4 = 0 to %arg0 {
|
||||
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i4 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
|
@ -110,8 +110,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
//
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%s5 = addf %a5, %b5 : f32
|
||||
|
|
|
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
// (3x2)x unroll (jammed by construction).
|
||||
// CHECK: for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 16 {
|
||||
// CHECK: affine.for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 16 {
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
|
@ -41,26 +41,26 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
// CHECK-NEXT: [[VAL50:%.*]] = affine.apply [[D0P2]](%i0)
|
||||
// CHECK-NEXT: [[VAL51:%.*]] = affine.apply [[D0P8]](%i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// (3x2)x unroll (jammed by construction).
|
||||
// CHECK: for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg1 step 16 {
|
||||
// CHECK: affine.for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 16 {
|
||||
// .....
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// non-scoped %f2
|
||||
// CHECK does (3x4)x unrolling.
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// (3x2)x unroll (jammed by construction).
|
||||
// CHECK: for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i5 = 0 to %arg1 step 16 {
|
||||
// CHECK: affine.for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 16 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
|
@ -122,8 +122,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
//
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%s5 = addf %a5, %b5 : f32
|
||||
|
|
|
@ -13,8 +13,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
// 2x unroll (jammed by construction).
|
||||
// CHECK: for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: [[VAL00:%.*]] = affine.apply [[ID1]](%i0)
|
||||
|
@ -24,15 +24,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
// CHECK-NEXT: [[VAL11:%.*]] = affine.apply [[D0P16]](%i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
|
||||
//
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// 2x unroll (jammed by construction).
|
||||
// CHECK: for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: [[VAL00:%.*]] = affine.apply [[ID1]](%i2)
|
||||
|
@ -42,15 +42,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
// CHECK-NEXT: [[VAL11:%.*]] = affine.apply [[D0P16]](%i3)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
|
||||
//
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// non-scoped %f2
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// 2x unroll (jammed by construction).
|
||||
// CHECK: for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
|
@ -72,8 +72,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
//
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%s5 = addf %a5, %b5 : f32
|
||||
|
|
|
@ -9,19 +9,19 @@
|
|||
|
||||
// CHECK-LABEL: func @simple()
|
||||
func @simple() {
|
||||
for %i0 = 0 to 7 {
|
||||
affine.for %i0 = 0 to 7 {
|
||||
%0 = affine.apply (d0) -> (d0) (%i0)
|
||||
%1 = affine.apply (d0) -> (d0) (%0)
|
||||
%2 = affine.apply (d0, d1) -> (d0 + d1) (%0, %0)
|
||||
%3 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
|
||||
}
|
||||
// CHECK-NEXT: for %i0 = 0 to 7
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 7
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i0)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[D0TIMES2]](%i0)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ZERO]]()
|
||||
|
||||
for %i1 = 0 to 7 {
|
||||
for %i2 = 0 to 42 {
|
||||
affine.for %i1 = 0 to 7 {
|
||||
affine.for %i2 = 0 to 42 {
|
||||
%20 = affine.apply (d0, d1) -> (d1) (%i1, %i2)
|
||||
%21 = affine.apply (d0, d1) -> (d0) (%i1, %i2)
|
||||
%22 = affine.apply (d0, d1) -> (d0 + d1) (%20, %21)
|
||||
|
@ -29,15 +29,15 @@ func @simple() {
|
|||
%24 = affine.apply (d0, d1) -> (-d0 + d1) (%20, %21)
|
||||
}
|
||||
}
|
||||
// CHECK: for %i1 = 0 to 7
|
||||
// CHECK-NEXT: for %i2 = 0 to 42
|
||||
// CHECK: affine.for %i1 = 0 to 7
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 42
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[D0PLUSD1]](%i1, %i2)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[MINSD0PLUSD1]](%i1, %i2)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[D0MINUSD1]](%i1, %i2)
|
||||
|
||||
for %i3 = 0 to 16 {
|
||||
for %i4 = 0 to 47 step 2 {
|
||||
for %i5 = 0 to 78 step 16 {
|
||||
affine.for %i3 = 0 to 16 {
|
||||
affine.for %i4 = 0 to 47 step 2 {
|
||||
affine.for %i5 = 0 to 78 step 16 {
|
||||
%50 = affine.apply (d0) -> (d0) (%i3)
|
||||
%51 = affine.apply (d0) -> (d0) (%i4)
|
||||
%52 = affine.apply (d0) -> (d0) (%i5)
|
||||
|
@ -47,9 +47,9 @@ func @simple() {
|
|||
}
|
||||
}
|
||||
}
|
||||
// CHECK: for %i3 = 0 to 16
|
||||
// CHECK-NEXT: for %i4 = 0 to 47 step 2
|
||||
// CHECK-NEXT: for %i5 = 0 to 78 step 16
|
||||
// CHECK: affine.for %i3 = 0 to 16
|
||||
// CHECK-NEXT: affine.for %i4 = 0 to 47 step 2
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to 78 step 16
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i3)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i4)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i5)
|
||||
|
|
|
@ -23,17 +23,17 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
|||
//
|
||||
// CHECK: for {{.*}} step 128
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
for %i0 = 0 to %M { // vectorized due to scalar -> vector
|
||||
affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector
|
||||
%a0 = load %A[%cst0, %cst0] : memref<?x?xf32>
|
||||
}
|
||||
//
|
||||
// CHECK:for {{.*}} [[ARG_M]] {
|
||||
for %i1 = 0 to %M { // not vectorized
|
||||
affine.for %i1 = 0 to %M { // not vectorized
|
||||
%a1 = load %A[%i1, %i1] : memref<?x?xf32>
|
||||
}
|
||||
//
|
||||
// CHECK: for %i{{[0-9]*}} = 0 to [[ARG_M]] {
|
||||
for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
// CHECK: affine.for %i{{[0-9]*}} = 0 to [[ARG_M]] {
|
||||
affine.for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
%r2 = affine.apply (d0) -> (d0) (%i2)
|
||||
%a2 = load %A[%r2#0, %cst0] : memref<?x?xf32>
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
|||
// CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
|
||||
// CHECK-NEXT: [[APP3:%[a-zA-Z0-9]+]] = affine.apply {{.*}}[[IV3]]
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[APP3]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
|
||||
for %i3 = 0 to %M { // vectorized
|
||||
affine.for %i3 = 0 to %M { // vectorized
|
||||
%r3 = affine.apply (d0) -> (d0) (%i3)
|
||||
%a3 = load %A[%cst0, %r3#0] : memref<?x?xf32>
|
||||
}
|
||||
|
@ -51,8 +51,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
|||
// CHECK-NEXT: [[APP50:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
|
||||
// CHECK-NEXT: [[APP51:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[APP50]], [[APP51]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
|
||||
for %i4 = 0 to %M { // vectorized
|
||||
for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
affine.for %i4 = 0 to %M { // vectorized
|
||||
affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
%r50 = affine.apply (d0, d1) -> (d1) (%i4, %i5)
|
||||
%r51 = affine.apply (d0, d1) -> (d0) (%i4, %i5)
|
||||
%a5 = load %A[%r50, %r51] : memref<?x?xf32>
|
||||
|
@ -61,8 +61,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
|||
//
|
||||
// CHECK: for [[IV6:%[i0-9]*]] = 0 to [[ARG_M]] {
|
||||
// CHECK-NEXT: for [[IV7:%[i0-9]*]] = 0 to [[ARG_N]] {
|
||||
for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
for %i7 = 0 to %N { // not vectorized, can never vectorize
|
||||
affine.for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
affine.for %i7 = 0 to %N { // not vectorized, can never vectorize
|
||||
%r70 = affine.apply (d0, d1) -> (d1 + d0) (%i6, %i7)
|
||||
%r71 = affine.apply (d0, d1) -> (d0) (%i6, %i7)
|
||||
%a7 = load %A[%r70, %r71] : memref<?x?xf32>
|
||||
|
@ -74,8 +74,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
|||
// CHECK-NEXT: [[APP9_0:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
|
||||
// CHECK-NEXT: [[APP9_1:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[APP9_0]], [[APP9_1]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
|
||||
for %i8 = 0 to %M { // vectorized
|
||||
for %i9 = 0 to %N {
|
||||
affine.for %i8 = 0 to %M { // vectorized
|
||||
affine.for %i9 = 0 to %N {
|
||||
%r90 = affine.apply (d0, d1) -> (d1) (%i8, %i9)
|
||||
%r91 = affine.apply (d0, d1) -> (d0 + d1) (%i8, %i9)
|
||||
%a9 = load %A[%r90, %r91] : memref<?x?xf32>
|
||||
|
@ -84,8 +84,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
|||
//
|
||||
// CHECK: for [[IV10:%[i0-9]*]] = 0 to %{{[0-9]*}} {
|
||||
// CHECK: for [[IV11:%[i0-9]*]] = 0 to %{{[0-9]*}} {
|
||||
for %i10 = 0 to %M { // not vectorized, need per load transposes
|
||||
for %i11 = 0 to %N { // not vectorized, need per load transposes
|
||||
affine.for %i10 = 0 to %M { // not vectorized, need per load transposes
|
||||
affine.for %i11 = 0 to %N { // not vectorized, need per load transposes
|
||||
%r11_0 = affine.apply (d0, d1) -> (d0) (%i10, %i11)
|
||||
%r11_1 = affine.apply (d0, d1) -> (d1) (%i10, %i11)
|
||||
%a11 = load %A[%r11_0, %r11_1] : memref<?x?xf32>
|
||||
|
@ -98,9 +98,9 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
|||
// CHECK: for [[IV12:%[i0-9]*]] = 0 to %{{[0-9]*}} {
|
||||
// CHECK: for [[IV13:%[i0-9]*]] = 0 to %{{[0-9]*}} {
|
||||
// CHECK: for [[IV14:%[i0-9]+]] = 0 to [[ARG_P]] step 128
|
||||
for %i12 = 0 to %M { // not vectorized, can never vectorize
|
||||
for %i13 = 0 to %N { // not vectorized, can never vectorize
|
||||
for %i14 = 0 to %P { // vectorized
|
||||
affine.for %i12 = 0 to %M { // not vectorized, can never vectorize
|
||||
affine.for %i13 = 0 to %N { // not vectorized, can never vectorize
|
||||
affine.for %i14 = 0 to %P { // vectorized
|
||||
%r14_0 = affine.apply (d0, d1, d2) -> (d1) (%i12, %i13, %i14)
|
||||
%r14_1 = affine.apply (d0, d1, d2) -> (d0 + d1) (%i12, %i13, %i14)
|
||||
%r14_2 = affine.apply (d0, d1, d2) -> (d0 + d2) (%i12, %i13, %i14)
|
||||
|
@ -109,24 +109,24 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
|||
}
|
||||
}
|
||||
//
|
||||
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
for %i15 = 0 to %M { // not vectorized due to condition below
|
||||
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
affine.for %i15 = 0 to %M { // not vectorized due to condition below
|
||||
if #set0(%i15) {
|
||||
%a15 = load %A[%cst0, %cst0] : memref<?x?xf32>
|
||||
}
|
||||
}
|
||||
//
|
||||
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
|
||||
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
affine.for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
|
||||
%a16 = alloc(%M) : memref<?xvector<2xf32>>
|
||||
%l16 = load %a16[%i16] : memref<?xvector<2xf32>>
|
||||
}
|
||||
//
|
||||
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
// CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
|
||||
// CHECK: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : {{.*}} -> vector<128xf32>
|
||||
for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
|
||||
for %i18 = 0 to %M { // vectorized due to scalar -> vector
|
||||
affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
|
||||
affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector
|
||||
%a18 = load %A[%cst0, %cst0] : memref<?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -139,24 +139,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
%C = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// CHECK: [[C1:%.*]] = constant splat<vector<128xf32>, 1.000000e+00> : vector<128xf32>
|
||||
// CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// CHECK: [[C3:%.*]] = constant splat<vector<128xf32>, 2.000000e+00> : vector<128xf32>
|
||||
// CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
|
||||
// non-scoped %f2
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
// CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
// CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
// CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32>
|
||||
|
@ -188,10 +188,10 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
// CHECK-LABEL: @vec_rejected
|
||||
func @vec_rejected(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
|
||||
%N = dim %A, 0 : memref<?x?xf32>
|
||||
for %i = 0 to %N {
|
||||
affine.for %i = 0 to %N {
|
||||
// CHECK-NOT: vector
|
||||
%a = load %A[%i, %i] : memref<?x?xf32> // not vectorized
|
||||
for %j = 0 to %N {
|
||||
affine.for %j = 0 to %N {
|
||||
%b = load %A[%i, %j] : memref<?x?xf32> // may be vectorized
|
||||
// CHECK-NOT: vector
|
||||
%c = addf %a, %b : f32 // not vectorized because %a wasn't
|
||||
|
|
|
@ -11,13 +11,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
|||
// CHECK: for {{.*}} = 0 to %1 step 32
|
||||
// CHECK: for {{.*}} = 0 to %2 step 256
|
||||
// Example:
|
||||
// for %i0 = 0 to %0 {
|
||||
// for %i1 = 0 to %1 step 32 {
|
||||
// for %i2 = 0 to %2 step 256 {
|
||||
// affine.for %i0 = 0 to %0 {
|
||||
// affine.for %i1 = 0 to %1 step 32 {
|
||||
// affine.for %i2 = 0 to %2 step 256 {
|
||||
// %3 = "vector_transfer_read"(%arg0, %i0, %i1, %i2) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %P {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %P {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -27,9 +27,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
|||
// CHECK: for {{.*}} = 0 to %2 {
|
||||
// For the case: --test-fastest-varying=1 --test-fastest-varying=0 no
|
||||
// vectorization happens because of loop nesting order .
|
||||
for %i3 = 0 to %M {
|
||||
for %i4 = 0 to %N {
|
||||
for %i5 = 0 to %P {
|
||||
affine.for %i3 = 0 to %M {
|
||||
affine.for %i4 = 0 to %N {
|
||||
affine.for %i5 = 0 to %P {
|
||||
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -43,24 +43,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
%C = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// CHECK: [[C1:%.*]] = constant splat<vector<32x256xf32>, 1.000000e+00> : vector<32x256xf32>
|
||||
// CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// CHECK: [[C3:%.*]] = constant splat<vector<32x256xf32>, 2.000000e+00> : vector<32x256xf32>
|
||||
// CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
|
||||
// non-scoped %f2
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
// CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
|
||||
// CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
|
||||
// CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
|
||||
|
|
|
@ -7,17 +7,17 @@ func @vec3d(%A : memref<?x?x?xf32>) {
|
|||
%0 = dim %A, 0 : memref<?x?x?xf32>
|
||||
%1 = dim %A, 1 : memref<?x?x?xf32>
|
||||
%2 = dim %A, 2 : memref<?x?x?xf32>
|
||||
// CHECK: for %i0 = 0 to %0 {
|
||||
// CHECK: for %i1 = 0 to %0 {
|
||||
// CHECK: for %i2 = 0 to %0 step 32 {
|
||||
// CHECK: for %i3 = 0 to %1 step 64 {
|
||||
// CHECK: for %i4 = 0 to %2 step 256 {
|
||||
// CHECK: affine.for %i0 = 0 to %0 {
|
||||
// CHECK: affine.for %i1 = 0 to %0 {
|
||||
// CHECK: affine.for %i2 = 0 to %0 step 32 {
|
||||
// CHECK: affine.for %i3 = 0 to %1 step 64 {
|
||||
// CHECK: affine.for %i4 = 0 to %2 step 256 {
|
||||
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i3, %i4 {permutation_map: #[[map_proj_d0d1d2_d0d1d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x64x256xf32>
|
||||
for %t0 = 0 to %0 {
|
||||
for %t1 = 0 to %0 {
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 {
|
||||
for %i2 = 0 to %2 {
|
||||
affine.for %t0 = 0 to %0 {
|
||||
affine.for %t1 = 0 to %0 {
|
||||
affine.for %i0 = 0 to %0 {
|
||||
affine.for %i1 = 0 to %1 {
|
||||
affine.for %i2 = 0 to %2 {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,13 +7,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
|||
%M = dim %A, 0 : memref<?x?x?xf32>
|
||||
%N = dim %A, 1 : memref<?x?x?xf32>
|
||||
%P = dim %A, 2 : memref<?x?x?xf32>
|
||||
// CHECK: for %i0 = 0 to %0 step 32
|
||||
// CHECK: for %i1 = 0 to %1 {
|
||||
// CHECK: for %i2 = 0 to %2 step 256
|
||||
// CHECK: affine.for %i0 = 0 to %0 step 32
|
||||
// CHECK: affine.for %i1 = 0 to %1 {
|
||||
// CHECK: affine.for %i2 = 0 to %2 step 256
|
||||
// CHECK: {{.*}} = vector_transfer_read %arg0, %i0, %i1, %i2 {permutation_map: #[[map_proj_d0d1d2_d0d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %P {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %P {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -23,9 +23,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
|||
// CHECK: for {{.*}} = 0 to %2 {
|
||||
// For the case: --test-fastest-varying=2 --test-fastest-varying=0 no
|
||||
// vectorization happens because of loop nesting order
|
||||
for %i3 = 0 to %M {
|
||||
for %i4 = 0 to %N {
|
||||
for %i5 = 0 to %P {
|
||||
affine.for %i3 = 0 to %M {
|
||||
affine.for %i4 = 0 to %N {
|
||||
affine.for %i5 = 0 to %P {
|
||||
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
|||
// CHECK: for {{.*}} = 0 to %2 {
|
||||
// For the case: --test-fastest-varying=0 --test-fastest-varying=2 no
|
||||
// vectorization happens because of loop nesting order.
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %P {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %P {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
}
|
||||
// CHECK: for %i3 = 0 to %0 step 32
|
||||
// CHECK: for %i4 = 0 to %1 step 256
|
||||
// CHECK: for %i5 = 0 to %2 {
|
||||
// CHECK: affine.for %i3 = 0 to %0 step 32
|
||||
// CHECK: affine.for %i4 = 0 to %1 step 256
|
||||
// CHECK: affine.for %i5 = 0 to %2 {
|
||||
// CHECK: {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i3 = 0 to %M {
|
||||
for %i4 = 0 to %N {
|
||||
for %i5 = 0 to %P {
|
||||
affine.for %i3 = 0 to %M {
|
||||
affine.for %i4 = 0 to %N {
|
||||
affine.for %i5 = 0 to %P {
|
||||
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
|
|||
%0 = dim %A, 0 : memref<?x?x?xf32>
|
||||
%1 = dim %A, 1 : memref<?x?x?xf32>
|
||||
%2 = dim %A, 2 : memref<?x?x?xf32>
|
||||
// CHECK: for %i0 = 0 to %0 step 32 {
|
||||
// CHECK: for %i1 = 0 to %1 {
|
||||
// CHECK: for %i2 = 0 to %2 step 256 {
|
||||
// CHECK: affine.for %i0 = 0 to %0 step 32 {
|
||||
// CHECK: affine.for %i1 = 0 to %1 {
|
||||
// CHECK: affine.for %i2 = 0 to %2 step 256 {
|
||||
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
// CHECK: for %i3 = 0 to %1 step 256 {
|
||||
// CHECK: for %i4 = 0 to %2 {
|
||||
// CHECK: affine.for %i3 = 0 to %1 step 256 {
|
||||
// CHECK: affine.for %i4 = 0 to %2 {
|
||||
// CHECK: %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
// CHECK: for %i5 = 0 to %2 {
|
||||
// CHECK: affine.for %i5 = 0 to %2 {
|
||||
// CHECK: %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 {
|
||||
for %i2 = 0 to %2 {
|
||||
affine.for %i0 = 0 to %0 {
|
||||
affine.for %i1 = 0 to %1 {
|
||||
affine.for %i2 = 0 to %2 {
|
||||
%a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
for %i3 = 0 to %1 {
|
||||
for %i4 = 0 to %2 {
|
||||
affine.for %i3 = 0 to %1 {
|
||||
affine.for %i4 = 0 to %2 {
|
||||
%a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
for %i5 = 0 to %2 {
|
||||
affine.for %i5 = 0 to %2 {
|
||||
%a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
|||
// CHECK: for {{.*}} = 0 to %2 {
|
||||
// For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
|
||||
// vectorization happens because of loop nesting order.
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %P {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %P {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
}
|
||||
// CHECK: for %i3 = 0 to %0 step 32
|
||||
// CHECK: for %i4 = 0 to %1 {
|
||||
// CHECK: for %i5 = 0 to %2 step 256
|
||||
// CHECK: affine.for %i3 = 0 to %0 step 32
|
||||
// CHECK: affine.for %i4 = 0 to %1 {
|
||||
// CHECK: affine.for %i5 = 0 to %2 step 256
|
||||
// CHECK: {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i3 = 0 to %M {
|
||||
for %i4 = 0 to %N {
|
||||
for %i5 = 0 to %P {
|
||||
affine.for %i3 = 0 to %M {
|
||||
affine.for %i4 = 0 to %N {
|
||||
affine.for %i5 = 0 to %P {
|
||||
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
|
|||
%0 = dim %A, 0 : memref<?x?x?xf32>
|
||||
%1 = dim %A, 1 : memref<?x?x?xf32>
|
||||
%2 = dim %A, 2 : memref<?x?x?xf32>
|
||||
// CHECK: for %i0 = 0 to %0 step 32 {
|
||||
// CHECK: for %i1 = 0 to %1 step 256 {
|
||||
// CHECK: for %i2 = 0 to %2 {
|
||||
// CHECK: affine.for %i0 = 0 to %0 step 32 {
|
||||
// CHECK: affine.for %i1 = 0 to %1 step 256 {
|
||||
// CHECK: affine.for %i2 = 0 to %2 {
|
||||
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
// CHECK: for %i3 = 0 to %1 {
|
||||
// CHECK: for %i4 = 0 to %2 step 256 {
|
||||
// CHECK: affine.for %i3 = 0 to %1 {
|
||||
// CHECK: affine.for %i4 = 0 to %2 step 256 {
|
||||
// CHECK: %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
// CHECK: for %i5 = 0 to %2 step 256 {
|
||||
// CHECK: affine.for %i5 = 0 to %2 step 256 {
|
||||
// CHECK: %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 {
|
||||
for %i2 = 0 to %2 {
|
||||
affine.for %i0 = 0 to %0 {
|
||||
affine.for %i1 = 0 to %1 {
|
||||
affine.for %i2 = 0 to %2 {
|
||||
%a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
for %i3 = 0 to %1 {
|
||||
for %i4 = 0 to %2 {
|
||||
affine.for %i3 = 0 to %1 {
|
||||
affine.for %i4 = 0 to %2 {
|
||||
%a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
for %i5 = 0 to %2 {
|
||||
affine.for %i5 = 0 to %2 {
|
||||
%a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
|
|
@ -213,10 +213,10 @@ func @dyn_shape_fold(%L : index, %M : index) -> (memref<? x ? x i32>, memref<? x
|
|||
// CHECK-NEXT: %2 = alloc() : memref<512x1024xi32>
|
||||
%c = alloc(%K, %N) : memref<? x ? x i32>
|
||||
|
||||
// CHECK: for %i0 =
|
||||
for %i = 0 to %L {
|
||||
// CHECK-NEXT: for %i1 =
|
||||
for %j = 0 to 10 {
|
||||
// CHECK: affine.for %i0 =
|
||||
affine.for %i = 0 to %L {
|
||||
// CHECK-NEXT: affine.for %i1 =
|
||||
affine.for %j = 0 to 10 {
|
||||
// CHECK-NEXT: %4 = load %0[%i0, %i1] : memref<?x1024xf32>
|
||||
// CHECK-NEXT: store %4, %1[%c0, %c0, %i0, %i1, %c0] : memref<4x1024x8x512x?xf32>
|
||||
%v = load %a[%i, %j] : memref<?x?xf32>
|
||||
|
@ -242,8 +242,8 @@ func @merge_constants() -> (index, index) {
|
|||
// CHECK-LABEL: func @hoist_constant
|
||||
func @hoist_constant(%arg0: memref<8xi32>) {
|
||||
// CHECK-NEXT: %c42_i32 = constant 42 : i32
|
||||
// CHECK-NEXT: for %i0 = 0 to 8 {
|
||||
for %i0 = 0 to 8 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
// CHECK-NEXT: store %c42_i32, %arg0[%i0]
|
||||
%c42_i32 = constant 42 : i32
|
||||
store %c42_i32, %arg0[%i0] : memref<8xi32>
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
// CHECK-LABEL: @test(%arg0: memref<f32>) {
|
||||
func @test(%p : memref<f32>) {
|
||||
for %i0 = 0 to 128 {
|
||||
for %i1 = 0 to 8 { // CHECK: for %i1 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 128 {
|
||||
affine.for %i1 = 0 to 8 { // CHECK: affine.for %i1 = 0 to 8 {
|
||||
%0 = constant 4.5 : f32
|
||||
%1 = constant 1.5 : f32
|
||||
|
||||
|
|
|
@ -123,8 +123,8 @@ func @down_propagate_for_ml() {
|
|||
// CHECK: %c1_i32 = constant 1 : i32
|
||||
%0 = constant 1 : i32
|
||||
|
||||
// CHECK-NEXT: for %i0 = 0 to 4 {
|
||||
for %i = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 4 {
|
||||
affine.for %i = 0 to 4 {
|
||||
// CHECK-NEXT: "foo"(%c1_i32, %c1_i32) : (i32, i32) -> ()
|
||||
%1 = constant 1 : i32
|
||||
"foo"(%0, %1) : (i32, i32) -> ()
|
||||
|
@ -155,8 +155,8 @@ func @down_propagate_cfg() -> i32 {
|
|||
/// Check that operation definitions are NOT propagated up the dominance tree.
|
||||
// CHECK-LABEL: @up_propagate_ml
|
||||
func @up_propagate_ml() -> i32 {
|
||||
// CHECK: for %i0 = 0 to 4 {
|
||||
for %i = 0 to 4 {
|
||||
// CHECK: affine.for %i0 = 0 to 4 {
|
||||
affine.for %i = 0 to 4 {
|
||||
// CHECK-NEXT: %c1_i32 = constant 1 : i32
|
||||
// CHECK-NEXT: "foo"(%c1_i32) : (i32) -> ()
|
||||
%0 = constant 1 : i32
|
||||
|
|
|
@ -32,7 +32,7 @@ func @loop_nest_1d() {
|
|||
// Second DMA transfer.
|
||||
// CHECK: dma_start %1[%c256], %5[%c0], %c256_0, %6[%c0] : memref<512xf32>, memref<256xf32, 1>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %6[%c0], %c256_0 : memref<1xi32>
|
||||
// CHECK: for %i0 = 0 to 256 {
|
||||
// CHECK: affine.for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: %7 = load %3[%i0] : memref<256xf32, 1>
|
||||
// CHECK: %8 = affine.apply [[MAP_PLUS_256]](%i0)
|
||||
// CHECK: %9 = affine.apply [[MAP_MINUS_256]](%8)
|
||||
|
@ -41,7 +41,7 @@ func @loop_nest_1d() {
|
|||
// CHECK: %11 = load %2[%i0] : memref<256xf32, 1>
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
for %i = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
load %A[%i] : memref<256 x f32>
|
||||
%idx = affine.apply (d0) -> (d0 + 256)(%i)
|
||||
load %B[%idx] : memref<512 x f32>
|
||||
|
@ -68,20 +68,20 @@ func @loop_nest_1d() {
|
|||
// INCOMING DMA for C.
|
||||
// CHECK-DAG: dma_start %arg2[%c0, %c0], [[BUFC]][%c0, %c0], %c16384_0, [[TAGC]][%c0] : memref<512x32xf32>, memref<512x32xf32, 1>, memref<1xi32>
|
||||
// CHECK-DAG: dma_wait [[TAGC]][%c0], %c16384_0 : memref<1xi32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 32 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 32 {
|
||||
// CHECK-NEXT: for %i2 = 0 to 32 {
|
||||
// CHECK-NEXT: for %i3 = 0 to 16 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 32 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 32 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 32 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to 16 {
|
||||
// CHECK-NEXT: %7 = affine.apply #map{{[0-9]+}}(%i1, %i3)
|
||||
// CHECK-NEXT: %8 = load [[BUFB]][%7, %i0] : memref<512x32xf32, 1>
|
||||
// CHECK-NEXT: "foo"(%8) : (f32) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i4 = 0 to 16 {
|
||||
// CHECK-NEXT: affine.for %i4 = 0 to 16 {
|
||||
// CHECK-NEXT: %9 = affine.apply #map{{[0-9]+}}(%i2, %i4)
|
||||
// CHECK-NEXT: %10 = load [[BUFA]][%9, %i1] : memref<512x32xf32, 1>
|
||||
// CHECK-NEXT: "bar"(%10) : (f32) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i5 = 0 to 16 {
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to 16 {
|
||||
// CHECK-NEXT: %11 = "abc_compute"() : () -> f32
|
||||
// CHECK-NEXT: %12 = affine.apply #map{{[0-9]+}}(%i2, %i5)
|
||||
// CHECK-NEXT: %13 = load [[BUFC]][%12, %i0] : memref<512x32xf32, 1>
|
||||
|
@ -102,20 +102,20 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
|
|||
// DMAs will be performed at this level (jT is the first loop without a stride).
|
||||
// A and B are read, while C is both read and written. A total of three new buffers
|
||||
// are allocated and existing load's/store's are replaced by accesses to those buffers.
|
||||
for %jT = 0 to 32 {
|
||||
for %kT = 0 to 32 {
|
||||
for %iT = 0 to 32 {
|
||||
for %kk = 0 to 16 { // k intratile
|
||||
affine.for %jT = 0 to 32 {
|
||||
affine.for %kT = 0 to 32 {
|
||||
affine.for %iT = 0 to 32 {
|
||||
affine.for %kk = 0 to 16 { // k intratile
|
||||
%k = affine.apply (d0, d1) -> (16*d0 + d1) (%kT, %kk)
|
||||
%v0 = load %B[%k, %jT] : memref<512 x 32 x f32>
|
||||
"foo"(%v0) : (f32) -> ()
|
||||
}
|
||||
for %ii = 0 to 16 { // i intratile.
|
||||
affine.for %ii = 0 to 16 { // i intratile.
|
||||
%i = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii)
|
||||
%v1 = load %A[%i, %kT] : memref<512 x 32 x f32>
|
||||
"bar"(%v1) : (f32) -> ()
|
||||
}
|
||||
for %ii_ = 0 to 16 { // i intratile.
|
||||
affine.for %ii_ = 0 to 16 { // i intratile.
|
||||
%v2 = "abc_compute"() : () -> f32
|
||||
%i_ = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii_)
|
||||
%v3 = load %C[%i_, %jT] : memref<512 x 32 x f32>
|
||||
|
@ -134,13 +134,13 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
|
|||
//
|
||||
// CHECK-LABEL: func @loop_nest_modulo() {
|
||||
// CHECK: %0 = alloc() : memref<256x8xf32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 32 step 4 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 32 step 4 {
|
||||
// CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
|
||||
// CHECK-NEXT: %2 = alloc() : memref<1x2xf32, 1>
|
||||
// CHECK-NEXT: %3 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: dma_start %0[%1, %c0], %2[%c0, %c0], %c2, %3[%c0] : memref<256x8xf32>, memref<1x2xf32, 1>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %3[%c0], %c2 : memref<1xi32>
|
||||
// CHECK-NEXT: for %i1 = 0 to 8 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 8 {
|
||||
// ...
|
||||
// ...
|
||||
// CHECK: }
|
||||
|
@ -148,9 +148,9 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
|
|||
// CHECK-NEXT: return
|
||||
func @loop_nest_modulo() {
|
||||
%A = alloc() : memref<256 x 8 x f32>
|
||||
for %i = 0 to 32 step 4 {
|
||||
affine.for %i = 0 to 32 step 4 {
|
||||
// DMAs will be performed at this level (%j is the first unit stride loop)
|
||||
for %j = 0 to 8 {
|
||||
affine.for %j = 0 to 8 {
|
||||
%idx = affine.apply (d0) -> (d0 mod 2) (%j)
|
||||
// A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8).
|
||||
%v = load %A[%i, %idx] : memref<256 x 8 x f32>
|
||||
|
@ -164,17 +164,17 @@ func @loop_nest_modulo() {
|
|||
// CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
|
||||
func @loop_nest_tiled() -> memref<256x1024xf32> {
|
||||
%0 = alloc() : memref<256x1024xf32>
|
||||
for %i0 = 0 to 256 step 32 {
|
||||
for %i1 = 0 to 1024 step 32 {
|
||||
affine.for %i0 = 0 to 256 step 32 {
|
||||
affine.for %i1 = 0 to 1024 step 32 {
|
||||
// CHECK: %3 = alloc() : memref<32x32xf32, 1>
|
||||
// CHECK-NEXT: %4 = alloc() : memref<1xi32>
|
||||
// Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
|
||||
// CHECK-NEXT: dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024_0, %c32 : memref<256x1024xf32>, memref<32x32xf32, 1>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait
|
||||
// CHECK-NEXT: for %i2 = #map
|
||||
// CHECK-NEXT: for %i3 = #map
|
||||
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
|
||||
for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
|
||||
// CHECK-NEXT: affine.for %i2 = #map
|
||||
// CHECK-NEXT: affine.for %i3 = #map
|
||||
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
|
||||
affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP_INDEX_DIFF_EVEN]](%i0, %i1, %i2, %i3)
|
||||
// CHECK-NEXT: %6 = affine.apply [[MAP_INDEX_DIFF_ODD]](%i0, %i1, %i2, %i3)
|
||||
// CHECK-NEXT: %7 = load %3[%5, %6] : memref<32x32xf32, 1>
|
||||
|
@ -196,8 +196,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
|
|||
// No strided DMA needed here.
|
||||
// CHECK: dma_start %arg0[%c1, %c0], %0[%c0, %c0], %c100, %1[%c0] : memref<100x100xf32>, memref<1x100xf32, 1>,
|
||||
// CHECK-NEXT: dma_wait %1[%c0], %c100 : memref<1xi32>
|
||||
for %i = 0 to 100 {
|
||||
for %j = 0 to ()[s0] -> (s0) ()[%N] {
|
||||
affine.for %i = 0 to 100 {
|
||||
affine.for %j = 0 to ()[s0] -> (s0) ()[%N] {
|
||||
// CHECK: %2 = affine.apply [[MAP_D0_MINUS_ONE]](%c1_0, %i1)
|
||||
// CHECK: %3 = affine.apply [[MAP_D1]](%c1_0, %i1)
|
||||
// CHECK-NEXT: %4 = load %0[%2, %3] : memref<1x100xf32, 1>
|
||||
|
@ -210,8 +210,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
|
|||
// CHECK-LABEL: func @dma_with_symbolic_accesses
|
||||
func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
|
||||
%N = constant 9 : index
|
||||
for %i = 0 to 100 {
|
||||
for %j = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
affine.for %j = 0 to 100 {
|
||||
%idy = affine.apply (d0, d1) [s0, s1] -> (d1 + s0 + s1)(%i, %j)[%M, %N]
|
||||
load %A[%i, %idy] : memref<100 x 100 x f32>
|
||||
}
|
||||
|
@ -221,8 +221,8 @@ func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
|
|||
// CHECK-NEXT: %2 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: dma_start %arg0[%c0, %0], %1[%c0, %c0], %c10000, %2[%c0]
|
||||
// CHECK-NEXT: dma_wait %2[%c0], %c10000
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 100 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 100 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 100 {
|
||||
// CHECK-NEXT: %3 = affine.apply [[MAP_SYM_SHIFT]](%i0, %i1)[%arg1, %c9]
|
||||
// CHECK-NEXT: %4 = affine.apply [[MAP_3D_D1]](%arg1, %i0, %3)
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP_SUB_OFFSET]](%arg1, %i0, %3)
|
||||
|
@ -241,8 +241,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
|
|||
// CHECK-NEXT: %1 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: dma_start %arg0[%c0, %c0], %0[%c0, %c0], %c10000, %1[%c0] : memref<100x100xf32>, memref<100x100xf32, 1>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %1[%c0], %c10000 : memref<1xi32>
|
||||
for %i = 0 to 100 {
|
||||
for %j = %M to %N {
|
||||
affine.for %i = 0 to 100 {
|
||||
affine.for %j = %M to %N {
|
||||
%idy = affine.apply (d1) [s0] -> (d1 + s0)(%j)[%K]
|
||||
load %A[%i, %idy] : memref<100 x 100 x f32>
|
||||
}
|
||||
|
@ -256,8 +256,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
|
|||
func @dma_unknown_size(%arg0: memref<?x?xf32>) {
|
||||
%M = dim %arg0, 0 : memref<? x ? x f32>
|
||||
%N = dim %arg0, 0 : memref<? x ? x f32>
|
||||
for %i = 0 to %M {
|
||||
for %j = 0 to %N {
|
||||
affine.for %i = 0 to %M {
|
||||
affine.for %j = 0 to %N {
|
||||
// If this loop nest isn't tiled, the access requires a non-constant DMA
|
||||
// size -- not yet implemented.
|
||||
// CHECK: %2 = load %arg0[%i0, %i1] : memref<?x?xf32>
|
||||
|
@ -272,9 +272,9 @@ func @dma_unknown_size(%arg0: memref<?x?xf32>) {
|
|||
|
||||
// CHECK-LABEL: func @dma_memref_3d
|
||||
func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
|
||||
for %i = 0 to 1024 {
|
||||
for %j = 0 to 1024 {
|
||||
for %k = 0 to 1024 {
|
||||
affine.for %i = 0 to 1024 {
|
||||
affine.for %j = 0 to 1024 {
|
||||
affine.for %k = 0 to 1024 {
|
||||
%idx = affine.apply (d0) -> (d0 mod 128)(%i)
|
||||
%idy = affine.apply (d0) -> (d0 mod 128)(%j)
|
||||
%idz = affine.apply (d0) -> (d0 mod 128)(%k)
|
||||
|
@ -308,8 +308,8 @@ func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
|
|||
// CHECK-LABEL: func @multi_load_store_union() {
|
||||
func @multi_load_store_union() {
|
||||
%A = alloc() : memref<512 x 512 x f32>
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%idx = affine.apply (d0) -> (d0 + 64)(%i)
|
||||
%idy = affine.apply (d0) -> (d0 + 128)(%j)
|
||||
%ishift = affine.apply (d0) -> (d0 + 2)(%i)
|
||||
|
@ -333,8 +333,8 @@ func @multi_load_store_union() {
|
|||
// CHECK-NEXT: dma_start %0[%c2_1, %c2_2], %1[%c0, %c0], %c170372_3, %2[%c0], %c512_4, %c446_5 : memref<512x512xf32>, memref<382x446xf32, 1>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %2[%c0], %c170372_3 : memref<1xi32>
|
||||
// CHECK-NEXT: %3 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 256 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 256 {
|
||||
// CHECK-NEXT: %4 = affine.apply [[MAP_PLUS_64]](%i0)
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_128]](%i1)
|
||||
// CHECK-NEXT: %6 = affine.apply [[MAP_PLUS_2]](%i0)
|
||||
|
@ -370,7 +370,7 @@ func @dma_loop_straightline_interspersed() {
|
|||
%c255 = constant 255 : index
|
||||
%A = alloc() : memref<256 x f32>
|
||||
%v = load %A[%c0] : memref<256 x f32>
|
||||
for %i = 1 to 255 {
|
||||
affine.for %i = 1 to 255 {
|
||||
load %A[%i] : memref<256 x f32>
|
||||
}
|
||||
%l = load %A[%c255] : memref<256 x f32>
|
||||
|
@ -389,7 +389,7 @@ func @dma_loop_straightline_interspersed() {
|
|||
// CHECK-NEXT: %5 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: dma_start %0[%c1_0], %4[%c0], %c254, %5[%c0] : memref<256xf32>, memref<254xf32, 1>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %5[%c0], %c254 : memref<1xi32>
|
||||
// CHECK-NEXT: for %i0 = 1 to 255 {
|
||||
// CHECK-NEXT: affine.for %i0 = 1 to 255 {
|
||||
// CHECK-NEXT: %6 = affine.apply [[MAP_MINUS_ONE]](%i0)
|
||||
// CHECK-NEXT: %7 = load %4[%6] : memref<254xf32, 1>
|
||||
// CHECK-NEXT: }
|
||||
|
@ -410,10 +410,10 @@ func @dma_loop_straightline_interspersed() {
|
|||
func @dma_mixed_loop_blocks() {
|
||||
%c0 = constant 0 : index
|
||||
%A = alloc() : memref<256 x 256 x vector<8 x f32>>
|
||||
for %i = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
%v = load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
|
||||
"foo"(%v) : (vector<8 x f32>) -> ()
|
||||
for %j = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%w = load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>>
|
||||
"bar"(%w) : (vector<8 x f32>) -> ()
|
||||
}
|
||||
|
@ -425,7 +425,7 @@ func @dma_mixed_loop_blocks() {
|
|||
// CHECK-DAG: [[TAG:%[0-9]+]] = alloc() : memref<1xi32>
|
||||
// CHECK: dma_start [[MEM]][%c0, %c0], [[BUF]][%c0, %c0], %c65536, [[TAG]][%c0] : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 1>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait [[TAG]][%c0], %c65536 : memref<1xi32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: %3 = load [[BUF]][%c0_0, %c0_0] : memref<256x256xvector<8xf32>, 1>
|
||||
// CHECK: for %i1 = 0 to 256 {
|
||||
// CHECK: affine.for %i1 = 0 to 256 {
|
||||
// CHECK-NEXT: %4 = load [[BUF]][%i0, %i1] : memref<256x256xvector<8xf32>, 1>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -8,12 +8,12 @@
|
|||
// CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)
|
||||
|
||||
// CHECK-LABEL: func @loop_tiling()
|
||||
// CHECK-NEXT: for %i0 = 0 to 256 step 32 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 512 step 32 {
|
||||
// CHECK-NEXT: for %i2 = 0 to 1024 step 32 {
|
||||
// CHECK-NEXT: for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
|
||||
// CHECK-NEXT: for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
|
||||
// CHECK-NEXT: for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 256 step 32 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 512 step 32 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 1024 step 32 {
|
||||
// CHECK-NEXT: affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
|
||||
// CHECK-NEXT: affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
|
||||
// CHECK-NEXT: affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
|
||||
// CHECK-NEXT: "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
|
@ -21,32 +21,32 @@
|
|||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i6 = 0 to 50 step 32 {
|
||||
// CHECK-NEXT: for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
|
||||
// CHECK-NEXT: affine.for %i6 = 0 to 50 step 32 {
|
||||
// CHECK-NEXT: affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
|
||||
// CHECK-NEXT: "bar"(%i7, %i7) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i8 = 0 to 21 step 32 {
|
||||
// CHECK-NEXT: for %i9 = [[IDENTITY]](%i8) to 21 {
|
||||
// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
|
||||
// CHECK-NEXT: affine.for %i9 = [[IDENTITY]](%i8) to 21 {
|
||||
// CHECK-NEXT: "foobar"(%i9) : (index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
func @loop_tiling() {
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 512 {
|
||||
for %k = 0 to 1024 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 512 {
|
||||
affine.for %k = 0 to 1024 {
|
||||
"foo"(%i, %j, %k) : (index, index, index) -> ()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for %x = 0 to 50 {
|
||||
affine.for %x = 0 to 50 {
|
||||
"bar"(%x, %x) : (index, index) -> ()
|
||||
}
|
||||
|
||||
// Intra-tile loop won't need a min expression.
|
||||
for %y = 0 to 21 {
|
||||
affine.for %y = 0 to 21 {
|
||||
"foobar"(%y) : (index) -> ()
|
||||
}
|
||||
|
||||
|
@ -58,12 +58,12 @@ func @loop_tiling() {
|
|||
// CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
|
||||
func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
|
||||
%M = dim %A, 0 : memref<? x i32>
|
||||
for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
|
||||
affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
|
||||
%out = affine.apply (d0) -> (d0) (%iTT)
|
||||
}
|
||||
return
|
||||
// CHECK: for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
|
||||
// CHECK-NEXT: for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
|
||||
// CHECK: affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
|
||||
// CHECK-NEXT: affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
|
||||
// CHECK-NEXT: %1 = affine.apply [[IDENTITY]](%i1)
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
|
|
|
@ -24,7 +24,7 @@ func @body(index) -> ()
|
|||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @simple_loop() {
|
||||
for %i = 1 to 42 {
|
||||
affine.for %i = 1 to 42 {
|
||||
call @body(%i) : (index) -> ()
|
||||
}
|
||||
return
|
||||
|
@ -65,9 +65,9 @@ func @post(index) -> ()
|
|||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @imperfectly_nested_loops() {
|
||||
for %i = 0 to 42 {
|
||||
affine.for %i = 0 to 42 {
|
||||
call @pre(%i) : (index) -> ()
|
||||
for %j = 7 to 56 step 2 {
|
||||
affine.for %j = 7 to 56 step 2 {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
call @post(%i) : (index) -> ()
|
||||
|
@ -122,13 +122,13 @@ func @body3(index, index) -> ()
|
|||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @more_imperfectly_nested_loops() {
|
||||
for %i = 0 to 42 {
|
||||
affine.for %i = 0 to 42 {
|
||||
call @pre(%i) : (index) -> ()
|
||||
for %j = 7 to 56 step 2 {
|
||||
affine.for %j = 7 to 56 step 2 {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
call @mid(%i) : (index) -> ()
|
||||
for %k = 18 to 37 step 3 {
|
||||
affine.for %k = 18 to 37 step 3 {
|
||||
call @body3(%i, %k) : (index, index) -> ()
|
||||
}
|
||||
call @post(%i) : (index) -> ()
|
||||
|
@ -161,8 +161,8 @@ func @more_imperfectly_nested_loops() {
|
|||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @affine_apply_loops_shorthand(%N : index) {
|
||||
for %i = 0 to %N {
|
||||
for %j = %i to 42 {
|
||||
affine.for %i = 0 to %N {
|
||||
affine.for %j = %i to 42 {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
|
@ -360,7 +360,7 @@ func @if_for() {
|
|||
// CHECK-NEXT: [[outerEndBB]]:
|
||||
// CHECK-NEXT: br [[outerLoopInit:\^bb[0-9]+]]
|
||||
if #set1(%i) {
|
||||
for %j = 0 to 42 {
|
||||
affine.for %j = 0 to 42 {
|
||||
if #set2(%j) {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
|
@ -397,9 +397,9 @@ func @if_for() {
|
|||
// CHECK-NEXT: %c1_9 = constant 1 : index
|
||||
// CHECK-NEXT: %16 = addi %9, %c1_9 : index
|
||||
// CHECK-NEXT: br [[outerLoopCond]](%16 : index)
|
||||
for %k = 0 to 42 {
|
||||
affine.for %k = 0 to 42 {
|
||||
if #set2(%k) {
|
||||
for %l = 0 to 42 {
|
||||
affine.for %l = 0 to 42 {
|
||||
call @body3(%k, %l) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
|
@ -446,8 +446,8 @@ func @if_for() {
|
|||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @loop_min_max(%N : index) {
|
||||
for %i = 0 to 42 {
|
||||
for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
|
||||
affine.for %i = 0 to 42 {
|
||||
affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
|
@ -486,7 +486,7 @@ func @loop_min_max(%N : index) {
|
|||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @min_reduction_tree(%v : index) {
|
||||
for %i = 0 to min #map_7_values(%v)[] {
|
||||
affine.for %i = 0 to min #map_7_values(%v)[] {
|
||||
call @body(%i) : (index) -> ()
|
||||
}
|
||||
return
|
||||
|
|
|
@ -11,8 +11,8 @@ func @test() {
|
|||
%A = alloc() : memref<9 x 9 x i32>
|
||||
%B = alloc() : memref<111 x i32>
|
||||
|
||||
for %i = -1 to 10 {
|
||||
for %j = -1 to 10 {
|
||||
affine.for %i = -1 to 10 {
|
||||
affine.for %j = -1 to 10 {
|
||||
%idx0 = affine.apply (d0, d1) -> (d0)(%i, %j)
|
||||
%idx1 = affine.apply (d0, d1) -> (d1)(%i, %j)
|
||||
// Out of bound access.
|
||||
|
@ -27,7 +27,7 @@ func @test() {
|
|||
}
|
||||
}
|
||||
|
||||
for %k = 0 to 10 {
|
||||
affine.for %k = 0 to 10 {
|
||||
// In bound.
|
||||
%u = load %B[%zero] : memref<111 x i32>
|
||||
// Out of bounds.
|
||||
|
@ -43,8 +43,8 @@ func @test_mod_floordiv_ceildiv() {
|
|||
%zero = constant 0 : index
|
||||
%A = alloc() : memref<128 x 64 x 64 x i32>
|
||||
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
|
||||
%idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
|
||||
%idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
|
||||
|
@ -69,8 +69,8 @@ func @test_no_out_of_bounds() {
|
|||
%C = alloc() : memref<257 x i32>
|
||||
%B = alloc() : memref<1 x i32>
|
||||
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
// All of these accesses are in bound; check that no errors are emitted.
|
||||
// CHECK: %3 = affine.apply {{#map.*}}(%i0, %i1)
|
||||
// CHECK-NEXT: %4 = load %0[%3, %c0] : memref<257x256xi32>
|
||||
|
@ -93,8 +93,8 @@ func @mod_div() {
|
|||
%zero = constant 0 : index
|
||||
%A = alloc() : memref<128 x 64 x 64 x i32>
|
||||
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
|
||||
%idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
|
||||
%idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
|
||||
|
@ -115,8 +115,8 @@ func @mod_div() {
|
|||
// CHECK-LABEL: func @mod_floordiv_nested() {
|
||||
func @mod_floordiv_nested() {
|
||||
%A = alloc() : memref<256 x 256 x i32>
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%idx0 = affine.apply (d0, d1) -> ((d0 mod 1024) floordiv 4)(%i, %j)
|
||||
%idx1 = affine.apply (d0, d1) -> ((((d1 mod 128) mod 32) ceildiv 4) * 32)(%i, %j)
|
||||
load %A[%idx0, %idx1] : memref<256 x 256 x i32> // expected-error {{'load' op memref out of upper bound access along dimension #2}}
|
||||
|
@ -128,7 +128,7 @@ func @mod_floordiv_nested() {
|
|||
// CHECK-LABEL: func @test_semi_affine_bailout
|
||||
func @test_semi_affine_bailout(%N : index) {
|
||||
%B = alloc() : memref<10 x i32>
|
||||
for %i = 0 to 10 {
|
||||
affine.for %i = 0 to 10 {
|
||||
%idx = affine.apply (d0)[s0] -> (d0 * s0)(%i)[%N]
|
||||
%y = load %B[%idx] : memref<10 x i32>
|
||||
}
|
||||
|
@ -138,7 +138,7 @@ func @test_semi_affine_bailout(%N : index) {
|
|||
// CHECK-LABEL: func @multi_mod_floordiv
|
||||
func @multi_mod_floordiv() {
|
||||
%A = alloc() : memref<2x2xi32>
|
||||
for %ii = 0 to 64 {
|
||||
affine.for %ii = 0 to 64 {
|
||||
%idx0 = affine.apply (d0) -> ((d0 mod 147456) floordiv 1152) (%ii)
|
||||
%idx1 = affine.apply (d0) -> (((d0 mod 147456) mod 1152) floordiv 384) (%ii)
|
||||
%v = load %A[%idx0, %idx1] : memref<2x2xi32>
|
||||
|
@ -153,8 +153,8 @@ func @delinearize_mod_floordiv() {
|
|||
%out = alloc() : memref<64x9xi32>
|
||||
|
||||
// Reshape '%in' into '%out'.
|
||||
for %ii = 0 to 64 {
|
||||
for %jj = 0 to 9 {
|
||||
affine.for %ii = 0 to 64 {
|
||||
affine.for %jj = 0 to 9 {
|
||||
%a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
|
||||
%a10 = affine.apply (d0) ->
|
||||
(d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
|
||||
|
@ -189,7 +189,7 @@ func @out_of_bounds() {
|
|||
%in = alloc() : memref<1xi32>
|
||||
%c9 = constant 9 : i32
|
||||
|
||||
for %i0 = 10 to 11 {
|
||||
affine.for %i0 = 10 to 11 {
|
||||
%idy = affine.apply (d0) -> (100 * d0 floordiv 1000) (%i0)
|
||||
store %c9, %in[%idy] : memref<1xi32> // expected-error {{'store' op memref out of upper bound access along dimension #1}}
|
||||
}
|
||||
|
|
|
@ -10,14 +10,14 @@
|
|||
func @simple_store_load() {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
}
|
||||
return
|
||||
// CHECK: %cst = constant 7.000000e+00 : f32
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: %0 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
|
@ -30,7 +30,7 @@ func @multi_store_load() {
|
|||
%cf8 = constant 8.0 : f32
|
||||
%cf9 = constant 9.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
|
@ -45,7 +45,7 @@ func @multi_store_load() {
|
|||
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
|
||||
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
|
||||
// CHECK-NEXT: %cst_1 = constant 9.000000e+00 : f32
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: %0 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: %1 = mulf %cst_1, %cst_1 : f32
|
||||
// CHECK-NEXT: }
|
||||
|
@ -59,8 +59,8 @@ func @multi_store_load() {
|
|||
func @store_load_affine_apply() -> memref<10x10xf32> {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
|
||||
%t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
|
||||
%idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
|
||||
|
@ -75,8 +75,8 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
|
|||
return %m : memref<10x10xf32>
|
||||
// CHECK: %cst = constant 7.000000e+00 : f32
|
||||
// CHECK-NEXT: %0 = alloc() : memref<10x10xf32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 10 {
|
||||
// CHECK-NEXT: %1 = affine.apply [[MAP0]](%i0, %i1)
|
||||
// CHECK-NEXT: %2 = affine.apply [[MAP1]](%i0, %i1)
|
||||
// CHECK-NEXT: %3 = affine.apply [[MAP2]](%1, %2)
|
||||
|
@ -92,17 +92,17 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
|
|||
func @store_load_nested(%N : index) {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
}
|
||||
}
|
||||
return
|
||||
// CHECK: %cst = constant 7.000000e+00 : f32
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg0 {
|
||||
// CHECK-NEXT: %0 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
|
@ -117,12 +117,12 @@ func @multi_store_load_nested_no_fwd(%N : index) {
|
|||
%cf7 = constant 7.0 : f32
|
||||
%cf8 = constant 8.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
store %cf8, %m[%i1] : memref<10xf32>
|
||||
}
|
||||
for %i2 = 0 to %N {
|
||||
affine.for %i2 = 0 to %N {
|
||||
// CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
|
@ -138,9 +138,9 @@ func @store_load_store_nested_no_fwd(%N : index) {
|
|||
%cf7 = constant 7.0 : f32
|
||||
%cf9 = constant 9.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
|
@ -159,16 +159,16 @@ func @multi_store_load_nested_fwd(%N : index) {
|
|||
%cf9 = constant 9.0 : f32
|
||||
%cf10 = constant 10.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
store %cf8, %m[%i1] : memref<10xf32>
|
||||
}
|
||||
for %i2 = 0 to %N {
|
||||
affine.for %i2 = 0 to %N {
|
||||
store %cf9, %m[%i2] : memref<10xf32>
|
||||
}
|
||||
store %cf10, %m[%i0] : memref<10xf32>
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// CHECK-NOT: %{{[0-9]+}} = load
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
|
@ -182,10 +182,10 @@ func @multi_store_load_nested_fwd(%N : index) {
|
|||
func @store_load_no_fwd() {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to 10 {
|
||||
for %i2 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
affine.for %i2 = 0 to 10 {
|
||||
// CHECK: load %{{[0-9]+}}
|
||||
%v0 = load %m[%i2] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
|
@ -202,9 +202,9 @@ func @store_load_fwd() {
|
|||
%c0 = constant 0 : index
|
||||
%m = alloc() : memref<10xf32>
|
||||
store %cf7, %m[%c0] : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
for %i2 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
affine.for %i2 = 0 to 10 {
|
||||
// CHECK-NOT: load %{{[0-9]}}+
|
||||
%v0 = load %m[%c0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
|
@ -223,9 +223,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
|
|||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
%idx = affine.apply (d0) -> (d0 + 1) (%i0)
|
||||
|
@ -236,9 +236,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
|
|||
%v3 = load %m[%c1] : memref<10xf32>
|
||||
return %v3 : f32
|
||||
// CHECK: %0 = alloc() : memref<10xf32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: store %cst, %0[%i0] : memref<10xf32>
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg0 {
|
||||
// CHECK-NEXT: %1 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: %2 = affine.apply [[MAP4]](%i0)
|
||||
// CHECK-NEXT: store %cst_0, %0[%2] : memref<10xf32>
|
||||
|
|
|
@ -13,14 +13,14 @@ func @store_may_execute_before_load() {
|
|||
// ancestor IfOp of the store, dominates the ancestor ForSmt of the load,
|
||||
// and thus the store "may" conditionally execute before the load.
|
||||
if #set0(%c0) {
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
|
||||
// expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
|
||||
}
|
||||
}
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%v0 = load %m[%i1] : memref<10xf32>
|
||||
// expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
|
||||
|
@ -37,13 +37,13 @@ func @dependent_loops() {
|
|||
%cst = constant 7.000000e+00 : f32
|
||||
// There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
|
||||
// because the first loop with the store dominates the second loop.
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cst, %0[%i0] : memref<10xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
|
||||
// expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
|
||||
}
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%1 = load %0[%i1] : memref<10xf32>
|
||||
// expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
|
||||
|
@ -231,7 +231,7 @@ func @store_range_load_after_range() {
|
|||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c10 = constant 10 : index
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
|
@ -254,7 +254,7 @@ func @store_load_func_symbol(%arg0: index, %arg1: index) {
|
|||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c10 = constant 10 : index
|
||||
for %i0 = 0 to %arg1 {
|
||||
affine.for %i0 = 0 to %arg1 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%arg0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = [1, +inf]}}
|
||||
|
@ -277,7 +277,7 @@ func @store_range_load_last_in_range() {
|
|||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c10 = constant 10 : index
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
// For dependence from 0 to 1, we do not have a loop carried dependence
|
||||
// because only the final write in the loop accesses the same element as the
|
||||
|
@ -305,7 +305,7 @@ func @store_range_load_before_range() {
|
|||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c0 = constant 0 : index
|
||||
for %i0 = 1 to 11 {
|
||||
affine.for %i0 = 1 to 11 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
|
@ -328,7 +328,7 @@ func @store_range_load_first_in_range() {
|
|||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c0 = constant 0 : index
|
||||
for %i0 = 1 to 11 {
|
||||
affine.for %i0 = 1 to 11 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
// Dependence from 0 to 1 at depth 1 is a range because all loads at
|
||||
// constant index zero are reads after first store at index zero during
|
||||
|
@ -353,7 +353,7 @@ func @store_range_load_first_in_range() {
|
|||
func @store_plus_3() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 1 to 11 {
|
||||
affine.for %i0 = 1 to 11 {
|
||||
%a0 = affine.apply (d0) -> (d0 + 3) (%i0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
|
@ -375,7 +375,7 @@ func @store_plus_3() {
|
|||
func @load_minus_2() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 2 to 11 {
|
||||
affine.for %i0 = 2 to 11 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
|
@ -397,8 +397,8 @@ func @load_minus_2() {
|
|||
func @perfectly_nested_loops_loop_independent() {
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 11 {
|
||||
for %i1 = 0 to 11 {
|
||||
affine.for %i0 = 0 to 11 {
|
||||
affine.for %i1 = 0 to 11 {
|
||||
// Dependence from access 0 to 1 is loop independent at depth = 3.
|
||||
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
|
||||
|
@ -428,8 +428,8 @@ func @perfectly_nested_loops_loop_independent() {
|
|||
func @perfectly_nested_loops_loop_carried_at_depth1() {
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 9 {
|
||||
for %i1 = 0 to 9 {
|
||||
affine.for %i0 = 0 to 9 {
|
||||
affine.for %i1 = 0 to 9 {
|
||||
// Dependence from access 0 to 1 is loop carried at depth 1.
|
||||
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
|
||||
|
@ -459,8 +459,8 @@ func @perfectly_nested_loops_loop_carried_at_depth1() {
|
|||
func @perfectly_nested_loops_loop_carried_at_depth2() {
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
// Dependence from access 0 to 1 is loop carried at depth 2.
|
||||
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
|
||||
|
@ -491,8 +491,8 @@ func @one_common_loop() {
|
|||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
// There is a loop-independent dependence from access 0 to 1 at depth 2.
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
|
||||
store %c7, %m[%a00, %a01] : memref<10x10xf32>
|
||||
|
@ -502,7 +502,7 @@ func @one_common_loop() {
|
|||
// expected-note@-4 {{dependence from 0 to 1 at depth 1 = false}}
|
||||
// expected-note@-5 {{dependence from 0 to 1 at depth 2 = true}}
|
||||
}
|
||||
for %i2 = 0 to 9 {
|
||||
affine.for %i2 = 0 to 9 {
|
||||
%a10 = affine.apply (d0, d1) -> (d0) (%i0, %i2)
|
||||
%a11 = affine.apply (d0, d1) -> (d1) (%i0, %i2)
|
||||
%v0 = load %m[%a10, %a11] : memref<10x10xf32>
|
||||
|
@ -525,7 +525,7 @@ func @dependence_cycle() {
|
|||
// Dependences:
|
||||
// *) loop-independent dependence from access 1 to 2 at depth 2.
|
||||
// *) loop-carried dependence from access 3 to 0 at depth 1.
|
||||
for %i0 = 0 to 9 {
|
||||
affine.for %i0 = 0 to 9 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
%v0 = load %m.a[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
|
@ -575,8 +575,8 @@ func @dependence_cycle() {
|
|||
func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to %arg0 {
|
||||
for %i1 = 0 to %arg1 {
|
||||
affine.for %i0 = 0 to %arg0 {
|
||||
affine.for %i1 = 0 to %arg1 {
|
||||
%a00 = affine.apply (d0, d1) -> (d0 - 1) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1 + 1) (%i0, %i1)
|
||||
%v0 = load %m[%a00, %a01] : memref<10x10xf32>
|
||||
|
@ -605,8 +605,8 @@ func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
|
|||
func @war_raw_waw_deps() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%a0 = affine.apply (d0) -> (d0 + 1) (%i1)
|
||||
%v0 = load %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
|
@ -633,7 +633,7 @@ func @war_raw_waw_deps() {
|
|||
func @mod_deps() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
%a0 = affine.apply (d0) -> (d0 mod 2) (%i0)
|
||||
// Results are conservative here since we currently don't have a way to
|
||||
// represent strided sets in FlatAffineConstraints.
|
||||
|
@ -658,8 +658,8 @@ func @loop_nest_depth() {
|
|||
%0 = alloc() : memref<100x100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
|
||||
for %i0 = 0 to 128 {
|
||||
for %i1 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 128 {
|
||||
affine.for %i1 = 0 to 8 {
|
||||
store %c7, %0[%i0, %i1] : memref<100x100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
|
||||
|
@ -667,10 +667,10 @@ func @loop_nest_depth() {
|
|||
// expected-note@-4 {{dependence from 0 to 1 at depth 1 = true}}
|
||||
}
|
||||
}
|
||||
for %i2 = 0 to 8 {
|
||||
for %i3 = 0 to 8 {
|
||||
for %i4 = 0 to 8 {
|
||||
for %i5 = 0 to 16 {
|
||||
affine.for %i2 = 0 to 8 {
|
||||
affine.for %i3 = 0 to 8 {
|
||||
affine.for %i4 = 0 to 8 {
|
||||
affine.for %i5 = 0 to 16 {
|
||||
%8 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i4, %i5)
|
||||
%9 = load %0[%8, %i3] : memref<100x100xf32>
|
||||
// expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
|
||||
|
@ -693,9 +693,9 @@ func @loop_nest_depth() {
|
|||
func @mod_div_3d() {
|
||||
%M = alloc() : memref<2x2x2xi32>
|
||||
%c0 = constant 0 : i32
|
||||
for %i0 = 0 to 8 {
|
||||
for %i1 = 0 to 8 {
|
||||
for %i2 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
affine.for %i1 = 0 to 8 {
|
||||
affine.for %i2 = 0 to 8 {
|
||||
%idx0 = affine.apply (d0, d1, d2) -> (d0 floordiv 4) (%i0, %i1, %i2)
|
||||
%idx1 = affine.apply (d0, d1, d2) -> (d1 mod 2) (%i0, %i1, %i2)
|
||||
%idx2 = affine.apply (d0, d1, d2) -> (d2 floordiv 4) (%i0, %i1, %i2)
|
||||
|
@ -719,12 +719,12 @@ func @delinearize_mod_floordiv() {
|
|||
%in = alloc() : memref<2x2x3x3x16x1xi32>
|
||||
%out = alloc() : memref<64x9xi32>
|
||||
|
||||
for %i0 = 0 to 2 {
|
||||
for %i1 = 0 to 2 {
|
||||
for %i2 = 0 to 3 {
|
||||
for %i3 = 0 to 3 {
|
||||
for %i4 = 0 to 16 {
|
||||
for %i5 = 0 to 1 {
|
||||
affine.for %i0 = 0 to 2 {
|
||||
affine.for %i1 = 0 to 2 {
|
||||
affine.for %i2 = 0 to 3 {
|
||||
affine.for %i3 = 0 to 3 {
|
||||
affine.for %i4 = 0 to 16 {
|
||||
affine.for %i5 = 0 to 1 {
|
||||
store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
|
||||
|
@ -742,8 +742,8 @@ func @delinearize_mod_floordiv() {
|
|||
}
|
||||
}
|
||||
|
||||
for %ii = 0 to 64 {
|
||||
for %jj = 0 to 9 {
|
||||
affine.for %ii = 0 to 64 {
|
||||
affine.for %jj = 0 to 9 {
|
||||
%a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
|
||||
%a10 = affine.apply (d0) ->
|
||||
(d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
|
||||
|
|
|
@ -16,13 +16,13 @@ func @loop_nest_dma() {
|
|||
%zero = constant 0 : index
|
||||
%num_elts = constant 128 : index
|
||||
|
||||
for %i = 0 to 8 {
|
||||
affine.for %i = 0 to 8 {
|
||||
dma_start %A[%i], %Ah[%i], %num_elts, %tag[%zero] : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
|
||||
dma_wait %tag[%zero], %num_elts : memref<1 x f32>
|
||||
%v = load %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
|
||||
%r = "compute"(%v) : (f32) -> (f32)
|
||||
store %r, %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
|
||||
for %j = 0 to 128 {
|
||||
affine.for %j = 0 to 128 {
|
||||
"do_more_compute"(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ func @loop_nest_dma() {
|
|||
// CHECK-NEXT: %3 = affine.apply [[MOD_2]](%c0)
|
||||
// CHECK-NEXT: %4 = affine.apply [[MOD_2]](%c0)
|
||||
// CHECK-NEXT: dma_start %0[%c0], %1[%3, %c0], %c128, %2[%4, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
|
||||
// CHECK-NEXT: for %i0 = 1 to 8 {
|
||||
// CHECK-NEXT: affine.for %i0 = 1 to 8 {
|
||||
// CHECK-NEXT: %5 = affine.apply [[MOD_2]](%i0)
|
||||
// CHECK-NEXT: %6 = affine.apply [[MOD_2]](%i0)
|
||||
// CHECK-NEXT: dma_start %0[%i0], %1[%5, %i0], %c128, %2[%6, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
|
||||
|
@ -45,7 +45,7 @@ func @loop_nest_dma() {
|
|||
// CHECK-NEXT: %10 = load %1[%9, %7] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: %11 = "compute"(%10) : (f32) -> f32
|
||||
// CHECK-NEXT: store %11, %1[%9, %7] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: for %i1 = 0 to 128 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 128 {
|
||||
// CHECK-NEXT: "do_more_compute"(%7, %i1) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
|
@ -56,7 +56,7 @@ func @loop_nest_dma() {
|
|||
// CHECK-NEXT: %15 = load %1[%14, %12] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: %16 = "compute"(%15) : (f32) -> f32
|
||||
// CHECK-NEXT: store %16, %1[%14, %12] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: for %i2 = 0 to 128 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 128 {
|
||||
// CHECK-NEXT: "do_more_compute"(%12, %i2) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
|
@ -68,7 +68,7 @@ func @loop_step(%arg0: memref<512xf32>,
|
|||
%arg1: memref<512xf32>) {
|
||||
%c0 = constant 0 : index
|
||||
%c4 = constant 4 : index
|
||||
for %i0 = 0 to 512 step 4 {
|
||||
affine.for %i0 = 0 to 512 step 4 {
|
||||
%1 = alloc() : memref<4xf32, 1>
|
||||
%2 = alloc() : memref<1xi32>
|
||||
dma_start %arg0[%i0], %1[%c0], %c4, %2[%c0]
|
||||
|
@ -82,7 +82,7 @@ func @loop_step(%arg0: memref<512xf32>,
|
|||
// CHECK: %2 = affine.apply [[FLOOR_MOD_2]](%c0)
|
||||
// CHECK: %3 = affine.apply [[FLOOR_MOD_2]](%c0)
|
||||
// CHECK-NEXT: dma_start %arg0[%c0], %0[%2, %c0_0], %c4, [[TAG]][%3, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
|
||||
// CHECK-NEXT: for %i0 = 4 to 512 step 4 {
|
||||
// CHECK-NEXT: affine.for %i0 = 4 to 512 step 4 {
|
||||
// CHECK-NEXT: %4 = affine.apply [[FLOOR_MOD_2]](%i0)
|
||||
// CHECK-NEXT: %5 = affine.apply [[FLOOR_MOD_2]](%i0)
|
||||
// CHECK-NEXT: dma_start %arg0[%i0], %0[%4, %c0_0], %c4, [[TAG]][%5, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
|
||||
|
@ -114,8 +114,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
|
|||
// Prologue for DMA overlap on arg2.
|
||||
// CHECK:[[TAG_ARG2:%[0-9]+]] = alloc() : memref<2x2xi32>
|
||||
// CHECK: dma_start %arg2[
|
||||
// CHECK: for %i0 = 1 to 8 {
|
||||
for %i0 = 0 to 8 {
|
||||
// CHECK: affine.for %i0 = 1 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
%6 = affine.apply #map2(%i0)
|
||||
dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
|
||||
dma_wait %5[%c0], %num_elts : memref<2xi32>
|
||||
|
@ -127,8 +127,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
|
|||
// CHECK: [[TAG_ARG1:%[0-9]+]] = alloc() : memref<2x2xi32>
|
||||
// CHECK: dma_start %arg0[
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK-NEXT for %i1 = 1 to 8 {
|
||||
for %i1 = 0 to 8 {
|
||||
// CHECK-NEXT affine.for %i1 = 1 to 8 {
|
||||
affine.for %i1 = 0 to 8 {
|
||||
%7 = affine.apply #map1(%i0, %i1)
|
||||
%8 = affine.apply #map2(%i1)
|
||||
dma_start %arg0[%7, %c0], %0[%c0, %c0], %num_elts, %3[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
|
||||
|
@ -140,8 +140,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
|
|||
// CHECK: dma_start %arg1[
|
||||
// CHECK: dma_wait [[TAG_ARG0]]
|
||||
// CHECK: dma_wait [[TAG_ARG1]]
|
||||
// CHECK-NEXT: for %i2 = 0 to 4 {
|
||||
for %i2 = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 4 {
|
||||
affine.for %i2 = 0 to 4 {
|
||||
"foo"() : () -> ()
|
||||
}
|
||||
}
|
||||
|
@ -155,16 +155,16 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
|
|||
// CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
|
||||
// CHECK: dma_start %arg0[
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK: for %i4 = 1 to 8 {
|
||||
// CHECK: affine.for %i4 = 1 to 8 {
|
||||
// CHECK: dma_start %arg0[
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK: dma_wait [[TAG_ARG0_NESTED]]
|
||||
// CHECK: dma_wait [[TAG_ARG1_NESTED]]
|
||||
// CHECK: for %i5 = 0 to 4 {
|
||||
// CHECK: affine.for %i5 = 0 to 4 {
|
||||
// CHECK: "foo"() : () -> ()
|
||||
// CHECK: dma_wait [[TAG_ARG0_NESTED]]
|
||||
// CHECK: dma_wait [[TAG_ARG1_NESTED]]
|
||||
// CHECK: for %i6 = 0 to 4 {
|
||||
// CHECK: affine.for %i6 = 0 to 4 {
|
||||
}
|
||||
return
|
||||
// CHECK: }
|
||||
|
@ -185,8 +185,8 @@ func @loop_dma_dependent(%arg2: memref<512x32xvector<8xf32>>) {
|
|||
// The two DMAs below are dependent (incoming and outgoing on the same
|
||||
// memref) in the same iteration; so no pipelining here.
|
||||
// CHECK-NOT: dma_start
|
||||
// CHECK: for %i0 = 0 to 8 {
|
||||
for %i0 = 0 to 8 {
|
||||
// CHECK: affine.for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
%6 = affine.apply #map2(%i0)
|
||||
dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>>, memref<64x4xvector<8xf32>, 2>, memref<2xi32>
|
||||
dma_wait %5[%c0], %num_elts : memref<2xi32>
|
||||
|
@ -206,8 +206,8 @@ func @escaping_use(%arg0: memref<512 x 32 x f32>) {
|
|||
%tag = alloc() : memref<1 x i32>
|
||||
|
||||
// CHECK-NOT: dma_start
|
||||
// CHECK: for %i0 = 0 to 16 {
|
||||
for %kTT = 0 to 16 {
|
||||
// CHECK: affine.for %i0 = 0 to 16 {
|
||||
affine.for %kTT = 0 to 16 {
|
||||
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
|
||||
memref<512 x 32 x f32>,
|
||||
memref<32 x 32 x f32, 2>, memref<1 x i32>
|
||||
|
@ -230,14 +230,14 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
|
|||
%tag = alloc() : memref<1 x i32>
|
||||
|
||||
// CHECK-NOT: dma_start
|
||||
// CHECK: for %i0 = 0 to 16 {
|
||||
for %kTT = 0 to 16 {
|
||||
// CHECK: affine.for %i0 = 0 to 16 {
|
||||
affine.for %kTT = 0 to 16 {
|
||||
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
|
||||
memref<512 x 32 x f32>,
|
||||
memref<32 x 32 x f32, 2>, memref<1 x i32>
|
||||
dma_wait %tag[%zero], %num_elt : memref<1 x i32>
|
||||
}
|
||||
// Use live out of 'for' inst; no DMA pipelining will be done.
|
||||
// Use live out of 'affine.for' inst; no DMA pipelining will be done.
|
||||
%v = load %Av[%zero, %zero] : memref<32 x 32 x f32, 2>
|
||||
return %v : f32
|
||||
// CHECK: %{{[0-9]+}} = load %{{[0-9]+}}[%c0, %c0] : memref<32x32xf32, 2>
|
||||
|
@ -261,14 +261,14 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
|
|||
// CHECK: %5 = affine.apply [[MOD_2]](%c0)
|
||||
// CHECK: %6 = affine.apply [[MOD_2]](%c0)
|
||||
// CHECK: dma_start %arg0[%c0_0, %c0_0], %3[%5, %c0_0, %c0_0], %c512, %4[%6, %c0_0]
|
||||
for %kTT = 0 to 16 {
|
||||
affine.for %kTT = 0 to 16 {
|
||||
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
|
||||
memref<512 x 32 x f32>,
|
||||
memref<? x ? x f32, 2>, memref<1 x i32>
|
||||
dma_wait %tag[%zero], %num_elt : memref<1 x i32>
|
||||
}
|
||||
return
|
||||
// CHECK-NEXT: for %i0 = 1 to 16 {
|
||||
// CHECK-NEXT: affine.for %i0 = 1 to 16 {
|
||||
// CHECK: %7 = affine.apply [[MOD_2]](%i0)
|
||||
// CHECK: %8 = affine.apply [[MOD_2]](%i0)
|
||||
// CHECK: dma_start %arg0[%c0_0, %c0_0], %3[%7, %c0_0, %c0_0], %c512, %4[%8, %c0_0]
|
||||
|
|
|
@ -73,8 +73,8 @@
|
|||
|
||||
// CHECK-LABEL: func @test_gaussian_elimination_empty_set0() {
|
||||
func @test_gaussian_elimination_empty_set0() {
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: [[SET_EMPTY_2D]](%i0, %i1)
|
||||
if (d0, d1) : (2 == 0)(%i0, %i1) {
|
||||
}
|
||||
|
@ -85,8 +85,8 @@ func @test_gaussian_elimination_empty_set0() {
|
|||
|
||||
// CHECK-LABEL: func @test_gaussian_elimination_empty_set1() {
|
||||
func @test_gaussian_elimination_empty_set1() {
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: [[SET_EMPTY_2D]](%i0, %i1)
|
||||
if (d0, d1) : (1 >= 0, -1 >= 0) (%i0, %i1) {
|
||||
}
|
||||
|
@ -97,8 +97,8 @@ func @test_gaussian_elimination_empty_set1() {
|
|||
|
||||
// CHECK-LABEL: func @test_gaussian_elimination_non_empty_set2() {
|
||||
func @test_gaussian_elimination_non_empty_set2() {
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: #set1(%i0, %i1)
|
||||
if #set2(%i0, %i1) {
|
||||
}
|
||||
|
@ -111,8 +111,8 @@ func @test_gaussian_elimination_non_empty_set2() {
|
|||
func @test_gaussian_elimination_empty_set3() {
|
||||
%c7 = constant 7 : index
|
||||
%c11 = constant 11 : index
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: #set2(%i0, %i1)[%c7, %c11]
|
||||
if #set3(%i0, %i1)[%c7, %c11] {
|
||||
}
|
||||
|
@ -125,8 +125,8 @@ func @test_gaussian_elimination_empty_set3() {
|
|||
func @test_gaussian_elimination_non_empty_set4() {
|
||||
%c7 = constant 7 : index
|
||||
%c11 = constant 11 : index
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: #set3(%i0, %i1)[%c7, %c11]
|
||||
if #set4(%i0, %i1)[%c7, %c11] {
|
||||
}
|
||||
|
@ -139,8 +139,8 @@ func @test_gaussian_elimination_non_empty_set4() {
|
|||
func @test_gaussian_elimination_empty_set5() {
|
||||
%c7 = constant 7 : index
|
||||
%c11 = constant 11 : index
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: #set2(%i0, %i1)[%c7, %c11]
|
||||
if #set5(%i0, %i1)[%c7, %c11] {
|
||||
}
|
||||
|
@ -151,8 +151,8 @@ func @test_gaussian_elimination_empty_set5() {
|
|||
|
||||
// CHECK-LABEL: func @test_fuzz_explosion
|
||||
func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) {
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
if #set_fuzz_virus(%i0, %i1, %arg0, %arg1, %arg2, %arg3) {
|
||||
}
|
||||
}
|
||||
|
@ -163,8 +163,8 @@ func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i
|
|||
|
||||
// CHECK-LABEL: func @test_empty_set(%arg0: index) {
|
||||
func @test_empty_set(%N : index) {
|
||||
for %i = 0 to 10 {
|
||||
for %j = 0 to 10 {
|
||||
affine.for %i = 0 to 10 {
|
||||
affine.for %j = 0 to 10 {
|
||||
// CHECK: if [[SET_EMPTY_2D]](%i0, %i1)
|
||||
if (d0, d1) : (d0 - d1 >= 0, d1 - d0 - 1 >= 0)(%i, %j) {
|
||||
"foo"() : () -> ()
|
||||
|
@ -198,8 +198,8 @@ func @test_empty_set(%N : index) {
|
|||
}
|
||||
}
|
||||
// The tests below test GCDTightenInequalities().
|
||||
for %k = 0 to 10 {
|
||||
for %l = 0 to 10 {
|
||||
affine.for %k = 0 to 10 {
|
||||
affine.for %l = 0 to 10 {
|
||||
// Empty because no multiple of 8 lies between 4 and 7.
|
||||
// CHECK: if [[SET_EMPTY_1D]](%i2)
|
||||
if (d0) : (8*d0 - 4 >= 0, -8*d0 + 7 >= 0)(%k) {
|
||||
|
@ -226,7 +226,7 @@ func @test_empty_set(%N : index) {
|
|||
}
|
||||
}
|
||||
|
||||
for %m = 0 to 10 {
|
||||
affine.for %m = 0 to 10 {
|
||||
// CHECK: if [[SET_EMPTY_1D]](%i{{[0-9]+}})
|
||||
if (d0) : (d0 mod 2 - 3 == 0) (%m) {
|
||||
"foo"() : () -> ()
|
||||
|
|
|
@ -10,7 +10,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
|
|||
%1 = "foo"() : () -> i32 loc("foo")
|
||||
|
||||
// CHECK: } loc(unknown)
|
||||
for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
} loc(fused["foo", "mysource.cc":10:8])
|
||||
|
||||
// CHECK: } loc(unknown)
|
||||
|
|
|
@ -7,13 +7,13 @@
|
|||
// CHECK-LABEL: func @unroll_jam_imperfect_nest() {
|
||||
func @unroll_jam_imperfect_nest() {
|
||||
// CHECK: %c100 = constant 100 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 99 step 2 {
|
||||
for %i = 0 to 101 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 99 step 2 {
|
||||
affine.for %i = 0 to 101 {
|
||||
// CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
|
||||
// CHECK-NEXT: %1 = affine.apply [[MAP_PLUS_1]](%i0)
|
||||
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
%x = "addi32"(%i, %i) : (index, index) -> i32
|
||||
for %j = 0 to 17 {
|
||||
affine.for %j = 0 to 17 {
|
||||
// CHECK: %3 = "addi32"(%i0, %i0) : (index, index) -> i32
|
||||
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_1]](%i0)
|
||||
|
@ -29,7 +29,7 @@ func @unroll_jam_imperfect_nest() {
|
|||
} // CHECK }
|
||||
// cleanup loop (single iteration)
|
||||
// CHECK: %11 = "addi32"(%c100, %c100) : (index, index) -> i32
|
||||
// CHECK-NEXT: for %i2 = 0 to 17 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 17 {
|
||||
// CHECK-NEXT: %12 = "addi32"(%c100, %c100) : (index, index) -> i32
|
||||
// CHECK-NEXT: %13 = "addi32"(%12, %12) : (i32, i32) -> i32
|
||||
// CHECK-NEXT: }
|
||||
|
@ -39,8 +39,8 @@ func @unroll_jam_imperfect_nest() {
|
|||
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
|
||||
func @loop_nest_unknown_count_1(%N : index) {
|
||||
// UNROLL-BY-4-NEXT: for %i0 = 1 to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i0 = 1 to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i1 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
|
@ -48,14 +48,14 @@ func @loop_nest_unknown_count_1(%N : index) {
|
|||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// A cleanup loop should be generated here.
|
||||
// UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
|
||||
// UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i3 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
// Specify the lower bound in a form so that both lb and ub operands match.
|
||||
for %i = ()[s0] -> (1)()[%N] to %N {
|
||||
for %j = 1 to 100 {
|
||||
affine.for %i = ()[s0] -> (1)()[%N] to %N {
|
||||
affine.for %j = 1 to 100 {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
|
@ -64,8 +64,8 @@ func @loop_nest_unknown_count_1(%N : index) {
|
|||
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
|
||||
func @loop_nest_unknown_count_2(%arg : index) {
|
||||
// UNROLL-BY-4-NEXT: for %i0 = %arg0 to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i0 = %arg0 to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i1 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"(%i0) : (index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"(%1) : (index) -> i32
|
||||
|
@ -77,12 +77,12 @@ func @loop_nest_unknown_count_2(%arg : index) {
|
|||
// UNROLL-BY-4-NEXT: }
|
||||
// The cleanup loop is a single iteration one and is promoted.
|
||||
// UNROLL-BY-4-NEXT: %7 = affine.apply [[M1:#map{{[0-9]+}}]]()[%arg0]
|
||||
// UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i3 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: %8 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
// Specify the lower bound in a form so that both lb and ub operands match.
|
||||
for %i = ()[s0] -> (s0) ()[%arg] to ()[s0] -> (s0+8) ()[%arg] {
|
||||
for %j = 1 to 100 {
|
||||
affine.for %i = ()[s0] -> (s0) ()[%arg] to ()[s0] -> (s0+8) ()[%arg] {
|
||||
affine.for %j = 1 to 100 {
|
||||
%x = "foo"(%i) : (index) -> i32
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,13 +46,13 @@
|
|||
|
||||
// CHECK-LABEL: func @loop_nest_simplest() {
|
||||
func @loop_nest_simplest() {
|
||||
// CHECK: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// CHECK: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
// CHECK: %c1_i32 = constant 1 : i32
|
||||
// CHECK-NEXT: %c1_i32_0 = constant 1 : i32
|
||||
// CHECK-NEXT: %c1_i32_1 = constant 1 : i32
|
||||
// CHECK-NEXT: %c1_i32_2 = constant 1 : i32
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = constant 1 : i32
|
||||
}
|
||||
} // CHECK: }
|
||||
|
@ -62,8 +62,8 @@ func @loop_nest_simplest() {
|
|||
// CHECK-LABEL: func @loop_nest_simple_iv_use() {
|
||||
func @loop_nest_simple_iv_use() {
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
// CHECK: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
|
||||
// CHECK: %1 = affine.apply [[MAP0]](%c0)
|
||||
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
|
@ -71,7 +71,7 @@ func @loop_nest_simple_iv_use() {
|
|||
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32
|
||||
// CHECK: %5 = affine.apply [[MAP2]](%c0)
|
||||
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
}
|
||||
} // CHECK: }
|
||||
|
@ -82,8 +82,8 @@ func @loop_nest_simple_iv_use() {
|
|||
// CHECK-LABEL: func @loop_nest_body_def_use() {
|
||||
func @loop_nest_body_def_use() {
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
// CHECK: %c0_0 = constant 0 : index
|
||||
%c0 = constant 0 : index
|
||||
// CHECK: %0 = affine.apply [[MAP0]](%c0)
|
||||
|
@ -97,7 +97,7 @@ func @loop_nest_body_def_use() {
|
|||
// CHECK-NEXT: %8 = affine.apply [[MAP2]](%c0)
|
||||
// CHECK-NEXT: %9 = affine.apply [[MAP0]](%8)
|
||||
// CHECK-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %c0) : (index, index) -> index
|
||||
|
@ -110,14 +110,14 @@ func @loop_nest_body_def_use() {
|
|||
func @loop_nest_strided() {
|
||||
// CHECK: %c2 = constant 2 : index
|
||||
// CHECK-NEXT: %c2_0 = constant 2 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// CHECK: %0 = affine.apply [[MAP0]](%c2_0)
|
||||
// CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// CHECK-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
|
||||
// CHECK-NEXT: %3 = affine.apply [[MAP0]](%2)
|
||||
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
|
||||
for %j = 2 to 6 step 2 {
|
||||
affine.for %j = 2 to 6 step 2 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
|
@ -130,7 +130,7 @@ func @loop_nest_strided() {
|
|||
// CHECK-NEXT: %10 = affine.apply [[MAP3]](%c2)
|
||||
// CHECK-NEXT: %11 = affine.apply [[MAP0]](%10)
|
||||
// CHECK-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
|
||||
for %k = 2 to 7 step 2 {
|
||||
affine.for %k = 2 to 7 step 2 {
|
||||
%z = "affine.apply" (%k) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%w = "addi32"(%z, %z) : (index, index) -> index
|
||||
|
@ -142,8 +142,8 @@ func @loop_nest_strided() {
|
|||
// CHECK-LABEL: func @loop_nest_multiple_results() {
|
||||
func @loop_nest_multiple_results() {
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// CHECK: %0 = affine.apply [[MAP4]](%i0, %c0)
|
||||
// CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// CHECK-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
|
||||
|
@ -153,7 +153,7 @@ func @loop_nest_multiple_results() {
|
|||
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
|
||||
// CHECK-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
|
||||
// CHECK-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
|
||||
for %j = 0 to 2 step 1 {
|
||||
affine.for %j = 0 to 2 step 1 {
|
||||
%x = affine.apply (d0, d1) -> (d0 + 1) (%i, %j)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
%z = affine.apply (d0, d1) -> (d0 + 3) (%i, %j)
|
||||
|
@ -170,8 +170,8 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
|
|||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: %c128 = constant 128 : index
|
||||
%c128 = constant 128 : index
|
||||
// CHECK: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// CHECK: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// CHECK: %0 = "vld"(%i0) : (index) -> i32
|
||||
%ld = "vld"(%i) : (index) -> i32
|
||||
// CHECK: %1 = affine.apply [[MAP0]](%c0)
|
||||
|
@ -189,7 +189,7 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
|
|||
// CHECK-NEXT: %13 = affine.apply [[MAP0]](%12)
|
||||
// CHECK-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
|
||||
// CHECK-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "vmulf"(%j, %x) : (index, index) -> index
|
||||
|
@ -218,7 +218,7 @@ func @loop_nest_seq_multiple() {
|
|||
// CHECK-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
|
||||
// CHECK-NEXT: %6 = affine.apply [[MAP0]](%5)
|
||||
// CHECK-NEXT: "mul"(%6, %6) : (index, index) -> ()
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
"mul"(%x, %x) : (index, index) -> ()
|
||||
|
@ -226,8 +226,8 @@ func @loop_nest_seq_multiple() {
|
|||
|
||||
// CHECK: %c99 = constant 99 : index
|
||||
%k = "constant"(){value: 99} : () -> index
|
||||
// CHECK: for %i0 = 0 to 100 step 2 {
|
||||
for %m = 0 to 100 step 2 {
|
||||
// CHECK: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %m = 0 to 100 step 2 {
|
||||
// CHECK: %7 = affine.apply [[MAP0]](%c0)
|
||||
// CHECK-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
|
||||
// CHECK-NEXT: %9 = affine.apply [[MAP0]](%c0)
|
||||
|
@ -239,7 +239,7 @@ func @loop_nest_seq_multiple() {
|
|||
// CHECK-NEXT: %15 = affine.apply [[MAP2]](%c0)
|
||||
// CHECK-NEXT: %16 = affine.apply [[MAP0]](%15)
|
||||
// CHECK-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
|
||||
for %n = 0 to 4 {
|
||||
affine.for %n = 0 to 4 {
|
||||
%y = "affine.apply" (%n) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%z = "affine.apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
|
||||
|
@ -251,16 +251,16 @@ func @loop_nest_seq_multiple() {
|
|||
|
||||
// SHORT-LABEL: func @loop_nest_outer_unroll() {
|
||||
func @loop_nest_outer_unroll() {
|
||||
// SHORT: for %i0 = 0 to 4 {
|
||||
// SHORT: affine.for %i0 = 0 to 4 {
|
||||
// SHORT-NEXT: %0 = affine.apply [[MAP0]](%i0)
|
||||
// SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// SHORT-NEXT: }
|
||||
// SHORT-NEXT: for %i1 = 0 to 4 {
|
||||
// SHORT-NEXT: affine.for %i1 = 0 to 4 {
|
||||
// SHORT-NEXT: %2 = affine.apply [[MAP0]](%i1)
|
||||
// SHORT-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> index
|
||||
// SHORT-NEXT: }
|
||||
for %i = 0 to 2 {
|
||||
for %j = 0 to 4 {
|
||||
affine.for %i = 0 to 2 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
|
@ -284,28 +284,28 @@ func @loop_nest_seq_long() -> i32 {
|
|||
|
||||
%zero_idx = constant 0 : index
|
||||
|
||||
for %n0 = 0 to 512 {
|
||||
for %n1 = 0 to 8 {
|
||||
affine.for %n0 = 0 to 512 {
|
||||
affine.for %n1 = 0 to 8 {
|
||||
store %one, %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
store %two, %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
}
|
||||
}
|
||||
|
||||
for %i0 = 0 to 2 {
|
||||
for %i1 = 0 to 2 {
|
||||
for %i2 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 2 {
|
||||
affine.for %i1 = 0 to 2 {
|
||||
affine.for %i2 = 0 to 8 {
|
||||
%b2 = "affine.apply" (%i1, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%x = load %B[%i0, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
"op1"(%x) : (i32) -> ()
|
||||
}
|
||||
for %j1 = 0 to 8 {
|
||||
for %j2 = 0 to 8 {
|
||||
affine.for %j1 = 0 to 8 {
|
||||
affine.for %j2 = 0 to 8 {
|
||||
%a2 = "affine.apply" (%i1, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
"op2"(%v203) : (i32) -> ()
|
||||
}
|
||||
for %k2 = 0 to 8 {
|
||||
affine.for %k2 = 0 to 8 {
|
||||
%s0 = "op3"() : () -> i32
|
||||
%c2 = "affine.apply" (%i0, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%s1 = load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
|
@ -322,8 +322,8 @@ func @loop_nest_seq_long() -> i32 {
|
|||
|
||||
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() {
|
||||
func @unroll_unit_stride_no_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
|
@ -337,13 +337,13 @@ func @unroll_unit_stride_no_cleanup() {
|
|||
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
for %j = 0 to 8 {
|
||||
affine.for %j = 0 to 8 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
%y = "addi32"(%x, %x) : (i32, i32) -> i32
|
||||
}
|
||||
// empty loop
|
||||
// UNROLL-BY-4: for %i2 = 0 to 8 {
|
||||
for %k = 0 to 8 {
|
||||
// UNROLL-BY-4: affine.for %i2 = 0 to 8 {
|
||||
affine.for %k = 0 to 8 {
|
||||
}
|
||||
}
|
||||
return
|
||||
|
@ -351,8 +351,8 @@ func @unroll_unit_stride_no_cleanup() {
|
|||
|
||||
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
|
||||
func @unroll_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 7 step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
|
@ -370,7 +370,7 @@ func @unroll_unit_stride_cleanup() {
|
|||
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
for %j = 0 to 10 {
|
||||
affine.for %j = 0 to 10 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
%y = "addi32"(%x, %x) : (i32, i32) -> i32
|
||||
}
|
||||
|
@ -380,8 +380,8 @@ func @unroll_unit_stride_cleanup() {
|
|||
|
||||
// UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() {
|
||||
func @unroll_non_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 37 step 20 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
|
@ -399,7 +399,7 @@ func @unroll_non_unit_stride_cleanup() {
|
|||
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
for %j = 2 to 48 step 5 {
|
||||
affine.for %j = 2 to 48 step 5 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
%y = "addi32"(%x, %x) : (i32, i32) -> i32
|
||||
}
|
||||
|
@ -411,8 +411,8 @@ func @unroll_non_unit_stride_cleanup() {
|
|||
func @loop_nest_single_iteration_after_unroll(%N: index) {
|
||||
// UNROLL-BY-4: %c0 = constant 0 : index
|
||||
// UNROLL-BY-4: %c4 = constant 4 : index
|
||||
// UNROLL-BY-4: for %i0 = 0 to %arg0 {
|
||||
for %i = 0 to %N {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to %arg0 {
|
||||
affine.for %i = 0 to %N {
|
||||
// UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
|
@ -422,7 +422,7 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
|
|||
// UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NOT: for
|
||||
for %j = 0 to 5 {
|
||||
affine.for %j = 0 to 5 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
} // UNROLL-BY-4-NOT: }
|
||||
} // UNROLL-BY-4: }
|
||||
|
@ -434,8 +434,8 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
|
|||
// No cleanup will be generated here.
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
|
||||
func @loop_nest_operand1() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4-NEXT: for %i1 = [[MAP10]](%i0) to #map{{[0-9]+}}(%i0) step 4
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i1 = [[MAP10]](%i0) to #map{{[0-9]+}}(%i0) step 4
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
|
@ -443,8 +443,8 @@ func @loop_nest_operand1() {
|
|||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: return
|
||||
for %i = 0 to 100 step 2 {
|
||||
for %j = (d0) -> (0) (%i) to (d0) -> (d0 - d0 mod 4) (%i) {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
affine.for %j = (d0) -> (0) (%i) to (d0) -> (d0 - d0 mod 4) (%i) {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
|
@ -454,8 +454,8 @@ func @loop_nest_operand1() {
|
|||
// No cleanup will be generated here.
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
|
||||
func @loop_nest_operand2() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4-NEXT: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
|
@ -463,8 +463,8 @@ func @loop_nest_operand2() {
|
|||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: return
|
||||
for %i = 0 to 100 step 2 {
|
||||
for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
affine.for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
|
@ -475,16 +475,16 @@ func @loop_nest_operand2() {
|
|||
// factor. The cleanup loop happens to be a single iteration one and is promoted.
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_operand3() {
|
||||
func @loop_nest_operand3() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
|
||||
for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
|
||||
affine.for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
} // UNROLL-BY-4: }
|
||||
|
@ -493,20 +493,20 @@ func @loop_nest_operand3() {
|
|||
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_operand4(%arg0: index) {
|
||||
func @loop_nest_operand4(%N : index) {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for %i1 = [[MAP12]]()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: affine.for %i1 = [[MAP12]]()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// A cleanup loop will be be generated here.
|
||||
// UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
|
||||
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
// Specify the lower bound so that both lb and ub operands match.
|
||||
for %j = ()[s0] -> (0)()[%N] to %N {
|
||||
affine.for %j = ()[s0] -> (0)()[%N] to %N {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
|
@ -518,7 +518,7 @@ func @loop_nest_unroll_full() {
|
|||
// CHECK-NEXT: %0 = "foo"() : () -> i32
|
||||
// CHECK-NEXT: %1 = "bar"() : () -> i32
|
||||
// CHECK-NEXT: return
|
||||
for %i = 0 to 1 {
|
||||
affine.for %i = 0 to 1 {
|
||||
%x = "foo"() : () -> i32
|
||||
%y = "bar"() : () -> i32
|
||||
}
|
||||
|
@ -527,7 +527,7 @@ func @loop_nest_unroll_full() {
|
|||
|
||||
// UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
|
||||
func @unroll_by_one_should_promote_single_iteration_loop() {
|
||||
for %i = 0 to 1 {
|
||||
affine.for %i = 0 to 1 {
|
||||
%x = "foo"(%i) : (index) -> i32
|
||||
}
|
||||
return
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
;; Keywords
|
||||
`(,(regexp-opt
|
||||
'(;; Toplevel entities
|
||||
"br" "ceildiv" "cfgfunc" "cond_br" "else" "extfunc" "false" "floordiv" "for" "if" "mlfunc" "mod" "return" "size" "step" "to" "true" "??" ) 'symbols) . font-lock-keyword-face))
|
||||
"br" "ceildiv" "func" "cond_br" "else" "extfunc" "false" "floordiv" "affine.for" "if" "mod" "return" "size" "step" "to" "true" "??" ) 'symbols) . font-lock-keyword-face))
|
||||
"Syntax highlighting for MLIR.")
|
||||
|
||||
;; Emacs 23 compatibility.
|
||||
|
|
|
@ -10,9 +10,9 @@ syn keyword mlirType index i1 i2 i4 i8 i13 i16 i32 i64
|
|||
\ f16 f32 tf_control
|
||||
syn keyword mlirType memref tensor vector
|
||||
|
||||
syntax keyword mlirKeywords extfunc cfgfunc mlfunc for to step return
|
||||
syntax keyword mlirKeywords extfunc func to step return
|
||||
syntax keyword mlirConditional if else
|
||||
syntax keyword mlirCoreOps dim addf addi subf subi mulf muli cmpi select constant affine.apply call call_indirect extract_element getTensor memref_cast tensor_cast load store alloc dealloc dma_start dma_wait
|
||||
syntax keyword mlirCoreOps dim addf addi subf subi mulf muli cmpi select constant affine.apply affine.for call call_indirect extract_element getTensor memref_cast tensor_cast load store alloc dealloc dma_start dma_wait
|
||||
|
||||
syn match mlirInt "-\=\<\d\+\>"
|
||||
syn match mlirFloat "-\=\<\d\+\.\d\+\>"
|
||||
|
|
Loading…
Reference in New Issue