forked from OSchip/llvm-project
NFC: Rename the 'for' operation in the AffineOps dialect to 'affine.for'. The is the second step to adding a namespace to the AffineOps dialect.
PiperOrigin-RevId: 232717775
This commit is contained in:
@ -15,7 +15,7 @@ loops and if instructions), the result of a
[`affine.apply` operation](#'affine.apply'-operation) that recursively takes as
arguments any symbolic identifiers. Dimensions may be bound not only to anything
that a symbol is bound to, but also to induction variables of enclosing
[for instructions](#'for'-operation), and the result of an
['affine.for' operations](#'affine.for'-operation), and the result of an
[`affine.apply` operation](#'affine.apply'-operation) (which recursively may use
other dimensions and symbols).
@ -47,12 +47,12 @@ Example:
%2 = affine.apply (i)[s0] -> (i+s0) (%42)[%n]
#### 'for' operation {#'for'-operation}
#### 'affine.for' operation {#'affine.for'-operation}
``` {.ebnf}
operation ::= `for` ssa-id `=` lower-bound `to` upper-bound
operation ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
(`step` integer-literal)? `{` inst* `}`
lower-bound ::= `max`? affine-map dim-and-symbol-use-list | shorthand-bound
@ -60,17 +60,17 @@ upper-bound ::= `min`? affine-map dim-and-symbol-use-list | shorthand-bound
shorthand-bound ::= ssa-id | `-`? integer-literal
The `for` operation represents an affine loop nest, defining an SSA value for
its induction variable. This SSA value always has type
The `affine.for` operation represents an affine loop nest, defining an SSA value
for its induction variable. This SSA value always has type
[`index`](, which is the size of the machine word.
The `for` operation executes its body a number of times iterating from a lower
bound to an upper bound by a stride. The stride, represented by `step`, is a
positive constant integer which defaults to "1" if not present. The lower and
The `affine.for` operation executes its body a number of times iterating from a
lower bound to an upper bound by a stride. The stride, represented by `step`, is
a positive constant integer which defaults to "1" if not present. The lower and
upper bounds specify a half-open range: the range includes the lower bound but
does not include the upper bound.
The lower and upper bounds of a `for` operation are represented as an
The lower and upper bounds of a `affine.for` operation are represented as an
application of an affine mapping to a list of SSA values passed to the map. The
[same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
values as for all bindings of SSA values to dimensions and symbols.
@ -94,8 +94,8 @@ Example showing reverse iteration of the inner loop:
func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
%N = dim %A, 0 : memref<?x?xf32>
for %i = 0 to %N step 1 {
for %j = 0 to %N { // implicitly steps by 1
affine.for %i = 0 to %N step 1 {
affine.for %j = 0 to %N { // implicitly steps by 1
%0 = affine.apply #map57(%j)[%N]
%tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
@ -130,8 +130,8 @@ Example:
#set = (d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
d1 - 10 >= 0, s0 - d1 - 9 >= 0)
func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
for %i = 0 to %N {
for %j = 0 to %N {
affine.for %i = 0 to %N {
affine.for %j = 0 to %N {
%0 = affine.apply #map42(%j)
%tmp = call @S1(%X, %i, %0)
if #set(%i, %j)[%N] {
@ -22,9 +22,9 @@ Examples:
// Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32> and
// pad with %f0 to handle the boundary case:
%f0 = constant 0.0f : f32
for %i0 = 0 to %0 {
for %i1 = 0 to %1 step 256 {
for %i2 = 0 to %2 step 32 {
affine.for %i0 = 0 to %0 {
affine.for %i1 = 0 to %1 step 256 {
affine.for %i2 = 0 to %2 step 32 {
%v = vector_transfer_read %A, %i0, %i1, %i2, %f0
{permutation_map: (d0, d1, d2) -> (d2, d1)} :
(memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@ -33,8 +33,8 @@ for %i0 = 0 to %0 {
// Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
// vector<128xf32>. The underlying implementation will require a 1-D vector
// broadcast:
for %i0 = 0 to %0 {
for %i1 = 0 to %1 {
affine.for %i0 = 0 to %0 {
affine.for %i1 = 0 to %1 {
%3 = vector_transfer_read %A, %i0, %i1
{permutation_map: (d0, d1) -> (0)} :
(memref<?x?xf32>, index, index) -> vector<128xf32>
@ -80,9 +80,9 @@ A notional lowering of vector_transfer_read could generate code resembling:
// %expr1, %expr2, %expr3, %expr4 defined before this point
%tmp = alloc() : vector<3x4x5xf32>
%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
for %i = 0 to 3 {
for %j = 0 to 4 {
for %k = 0 to 5 {
affine.for %i = 0 to 3 {
affine.for %j = 0 to 4 {
affine.for %k = 0 to 5 {
%a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
store %tmp[%i, %j, %k] : vector<3x4x5xf32>
@ -101,8 +101,8 @@ lowered code would resemble:
// %expr1, %expr2, %expr3, %expr4 defined before this point
%tmp = alloc() : vector<3x4x5xf32>
%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
for %i = 0 to 3 {
for %k = 0 to 5 {
affine.for %i = 0 to 3 {
affine.for %k = 0 to 5 {
%a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
store %tmp[%i, 0, %k] : vector<3x4x5xf32>
@ -129,10 +129,10 @@ Examples:
```mlir {.mlir}
// write vector<16x32x64xf32> into the slice `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
for %i0 = 0 to %0 {
for %i1 = 0 to %1 step 32 {
for %i2 = 0 to %2 step 64 {
for %i3 = 0 to %3 step 16 {
affine.for %i0 = 0 to %0 {
affine.for %i1 = 0 to %1 step 32 {
affine.for %i2 = 0 to %2 step 64 {
affine.for %i3 = 0 to %3 step 16 {
%val = `ssa-value` : vector<16x32x64xf32>
vector_transfer_write %val, %A, %i0, %i1, %i2, %i3
{permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
@ -40,7 +40,7 @@ which means that values are defined before use and have scope defined by their
dominance relations. Operations may produce zero or more results, and each is a
distinct SSA value with its own type defined by the [type system](#type-system).
MLIR incorporates polyhedral compiler concepts, including `for` and `if`
MLIR incorporates polyhedral compiler concepts, including `affine.for` and `if`
operations defined by the [affine dialect](Dialects/, which model
affine loops and affine conditionals. It also includes affine maps integrated
into the type system - they are key to the representation of data and
@ -99,10 +99,10 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
%C = alloc memref<100x50xf32>()
// Multiplication loop nest.
for %i = 0 to 100 {
for %j = 0 to 50 {
affine.for %i = 0 to 100 {
affine.for %j = 0 to 50 {
store 0 to %C[%i, %j] : memref<100x50xf32>
for %k = 0 to %n {
affine.for %k = 0 to %n {
%a_v = load %A[%i, %k] : memref<100x?xf32>
%b_v = load %B[%k, %j] : memref<?x50xf32>
%prod = mulf %a_v, %b_v : f32
@ -1434,8 +1434,8 @@ The arity of indices is the rank of the memref (i.e., if the memref loaded from
is of rank 3, then 3 indices are required for the load following the memref
In an `if` or `for` body, the indices of a load are restricted to SSA values
bound to surrounding loop induction variables,
In an `if` or `affine.for` body, the indices of a load are restricted to SSA
values bound to surrounding loop induction variables,
[symbols](#dimensions-and-symbols), results of a
[`constant` operation](#'constant'-operation), or the result of an
`affine.apply` operation that can in turn take as arguments all of the
@ -1456,7 +1456,7 @@ Example:
**Context:** The `load` and `store` instructions are specifically crafted to
fully resolve a reference to an element of a memref, and (in affine `if` and
`for` instructions) the compiler can follow use-def chains (e.g. through
`affine.for` instructions) the compiler can follow use-def chains (e.g. through
[`affine.apply`](Dialects/'affine.apply'-operation) operations) to
precisely analyze references at compile-time using polyhedral techniques. This
is possible because of the
@ -1492,7 +1492,7 @@ store %100, %A[%1, 1023] : memref<4x?xf32, #layout, hbm>
**Context:** The `load` and `store` instructions are specifically crafted to
fully resolve a reference to an element of a memref, and (in polyhedral `if` and
`for` instructions) the compiler can follow use-def chains (e.g. through
`affine.for` instructions) the compiler can follow use-def chains (e.g. through
[`affine.apply`](Dialects/'affine.apply'-operation) operations) to
precisely analyze references at compile-time using polyhedral techniques. This
is possible because of the
@ -39,8 +39,8 @@ These restrictions may be lifted in the future.
### Output IR
Functions with `for` and `if` instructions eliminated. These functions may
contain operations from the Standard dialect in addition to those already
Functions with `affine.for` and `if` instructions eliminated. These functions
may contain operations from the Standard dialect in addition to those already
present before the pass.
### Invariants
@ -150,8 +150,8 @@ func bar(%A : memref<8x?xf32, #lmap>) {
// dynamically using dim instruction.
%N = dim %A, 1 : memref<8x?xf32, #lmap>
for %i = 0 to 8 {
for %j = 0 to %N {
affine.for %i = 0 to 8 {
affine.for %j = 0 to %N {
// A[i,j] += 1
%s1 = load %A [%i, %j] : memref<8x?xf32, #lmap>
%s2 = add %s1, 1
@ -534,7 +534,7 @@ nested in an outer function that using affine loops.
func @search(memref<?x?xi32 %A, <?xi32> %S, i32 %key) {
%ni = dim %A, 0 : memref<?x?xi32>
// This loop can be parallelized
for %i = 0 to %ni {
affine.for %i = 0 to %ni {
call @search_body (%A, %S, %i) : (memref<?x?xi32>, memref<?xi32>, i32)
@ -568,10 +568,10 @@ func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32) {
As per the [MLIR spec](, the restrictions on dimensions and symbol
identifiers to be used with the affine.apply instruction only apply to accesses
inside `for` and `if` instructions. However, an analysis of accesses inside the
called function (`@search_body`) is necessary to determine if the `%i` loop
could be parallelized: such function access analysis is calling context
inside `affine.for` and `if` instructions. However, an analysis of accesses
inside the called function (`@search_body`) is necessary to determine if the
`%i` loop could be parallelized: such function access analysis is calling
context sensitive.
### Non-affine loop bounds {#non-affine-loop-bounds}
@ -590,8 +590,8 @@ for (i=0; i <N; i++)
```mlir {.mlir}
func @outer_nest(%n) : (i32) {
for %i = 0 to %n {
for %j = 0 to %n {
affine.for %i = 0 to %n {
affine.for %j = 0 to %n {
call @inner_nest(%i, %j, %n)
@ -606,8 +606,8 @@ func @inner_nest(%i: i32, %j: i32, %n: i32) {
func @inner_nest2(%m, %n) -> i32 {
for %k = 0 to %m {
for %l = 0 to %n {
affine.for %k = 0 to %m {
affine.for %l = 0 to %n {
@ -649,13 +649,13 @@ in a dilated convolution.
func @conv2d(memref<16x1024x1024x3xf32, #lm0, vmem> %input,
memref<5x5x3x32xf32, #lm0, vmem> %kernel,
memref<16x512x512x32xf32, #lm0, vmem> %output) {
for %b = 0 to %batch {
for %oh = 0 to %output_height {
for %ow = 0 to %output_width {
for %of = 0 to %output_feature {
for %kh = 0 to %kernel_height {
for %kw = 0 to %kernel_width {
for %if = 0 to %input_feature {
affine.for %b = 0 to %batch {
affine.for %oh = 0 to %output_height {
affine.for %ow = 0 to %output_width {
affine.for %of = 0 to %output_feature {
affine.for %kh = 0 to %kernel_height {
affine.for %kw = 0 to %kernel_width {
affine.for %if = 0 to %input_feature {
// Calculate input indices.
%1_0 = affine.apply #map1_0 (%0#1, %0#2, %0#4, %0#5)
[%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
@ -899,14 +899,14 @@ func @dma_hbm_to_vmem(memref<1024 x f32, #layout_map0, hbm> %a,
representation. 2(b) requires no change, but impacts how cost models look at
index and layout maps.
### `if` and `for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
### `if` and `affine.for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
We considered providing a representation for SSA values that are live out of
`if/else` conditional bodies and loop carried in `for` loops. We ultimately
abandoned this approach due to its complexity. In the current design of MLIR,
scalar variables cannot escape for loops or if instructions. In situations,
where escaping is necessary, we use zero-dimensional tensors and memrefs instead
of scalars.
`if/else` conditional bodies and loop carried in `affine.for` loops. We
ultimately abandoned this approach due to its complexity. In the current design
of MLIR, scalar variables cannot escape for loops or if instructions. In
situations, where escaping is necessary, we use zero-dimensional tensors and
memrefs instead of scalars.
**TODO**: This whole section is obsolete and should be updated to use block
arguments and a yield like terminator in for/if instructions.
@ -919,7 +919,7 @@ Syntax:
``` {.ebnf}
[<out-var-list> =]
for %<index-variable-name> = <lower-bound> ... <upper-bound> step <step>
affine.for %<index-variable-name> = <lower-bound> ... <upper-bound> step <step>
[with <in-var-list>] { <loop-instruction-list> }
@ -934,7 +934,7 @@ Example:
// Return sum of elements in 1-dimensional mref A
func int32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
%init = 0
%result = for %i = 0 to N with %tmp(%init) {
%result = affine.for %i = 0 to N with %tmp(%init) {
%value = load %A[%i]
%sum = %value + %tmp
yield %sum
@ -964,7 +964,7 @@ Example:
// Compute sum of half of the array
func int32 @sum_half(%A, %N) {
%s0 = 0
%s1 = for %i = 1 ... N step 1 with %s2 (%s0) {
%s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
%s3 = if (%i >= %N / 2) {
%v0 = load %A[%i]
%s4 = %s2 + %v0
@ -184,8 +184,8 @@ Our simple example above would be represented as:
mlfunc @simple_example(... %N) {
for %i = 0 ... %N step 1 {
for %j = 0 ... %N step 1 {
affine.for %i = 0 ... %N step 1 {
affine.for %j = 0 ... %N step 1 {
// identity noop in this case, but can exist in general.
%0,%1 = affine.apply #57(%i, %j)
@ -203,8 +203,8 @@ The example with the reduced domain would be represented with an if instruction:
mlfunc @reduced_domain_example(... %N) {
for %i = 0 ... %N step 1 {
for %j = 0 ... %N step 1 {
affine.for %i = 0 ... %N step 1 {
affine.for %j = 0 ... %N step 1 {
// identity noop in this case, but can exist in general.
%0,%1 = affinecall #57(%i, %j)
@ -233,8 +233,8 @@ that transformations call into):
mlfunc @skewed_domain_example(... %N) {
for %t1 = 0 ... 2*N-2 step 1 {
for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
affine.for %t1 = 0 ... 2*N-2 step 1 {
affine.for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
(%i, %j) = (%t1-%t2, %t2)
@ -373,7 +373,7 @@ mlfunc's (if we support them) will also have to have domains.
### Lack of redundancy in IR
The traditional form has multiple encodings for the same sorts of behavior: you
end up having bits on `for` loops to specify whether codegen should use
end up having bits on `affine.for` loops to specify whether codegen should use
"atomic/separate" policies, unroll loops, etc. Instructions can be split or can
generate multiple copies of their instruction because of overlapping domains,
@ -90,15 +90,15 @@ private:
explicit AffineApplyOp(const Instruction *state) : Op(state) {}
/// The "for" instruction represents an affine loop nest, defining an SSA value
/// for its induction variable. The induction variable is represented as a
/// The "affine.for" instruction represents an affine loop nest, defining an SSA
/// value for its induction variable. The induction variable is represented as a
/// BlockArgument to the entry block of the body. The body and induction
/// variable can be created automatically for new "for" ops with 'createBody'.
/// This SSA value always has type index, which is the size of the machine word.
/// The stride, represented by step, is a positive constant integer which
/// defaults to "1" if not present. The lower and upper bounds specify a
/// half-open range: the range includes the lower bound but does not include the
/// upper bound.
/// variable can be created automatically for new "affine.for" ops with
/// 'createBody'. This SSA value always has type index, which is the size of the
/// machine word. The stride, represented by step, is a positive constant
/// integer which defaults to "1" if not present. The lower and upper bounds
/// specify a half-open range: the range includes the lower bound but does not
/// include the upper bound.
/// The lower and upper bounds of a for operation are represented as an
/// application of an affine mapping to a list of SSA values passed to the map.
@ -110,7 +110,7 @@ private:
/// Example:
/// for %i = 1 to 10 {
/// affine.for %i = 1 to 10 {
/// ...
/// }
@ -131,7 +131,7 @@ public:
static void getCanonicalizationPatterns(OwningRewritePatternList &results,
MLIRContext *context);
static StringRef getOperationName() { return "for"; }
static StringRef getOperationName() { return "affine.for"; }
static StringRef getStepAttrName() { return "step"; }
static StringRef getLowerBoundAttrName() { return "lower_bound"; }
static StringRef getUpperBoundAttrName() { return "upper_bound"; }
@ -253,15 +253,15 @@ ConstOpPointer<AffineForOp> getForInductionVarOwner(const Value *val);
void extractForInductionVars(ArrayRef<OpPointer<AffineForOp>> forInsts,
SmallVectorImpl<Value *> *ivs);
/// Adds constraints (lower and upper bounds) for the specified 'for'
/// Adds constraints (lower and upper bounds) for the specified 'affine.for'
/// instruction's Value using IR information stored in its bound maps. The
/// right identifier is first looked up using forOp's Value. Returns
/// false for the yet unimplemented/unsupported cases, and true if the
/// information is successfully added. Asserts if the Value corresponding to
/// the 'for' instruction isn't found in the constraint system. Any new
/// identifiers that are found in the bound operands of the 'for' instruction
/// are added as trailing identifiers (either dimensional or symbolic
/// depending on whether the operand is a valid ML Function symbol).
/// the 'affine.for' instruction isn't found in the constraint system. Any new
/// identifiers that are found in the bound operands of the 'affine.for'
/// instruction are added as trailing identifiers (either dimensional or
/// symbolic depending on whether the operand is a valid ML Function symbol).
// TODO(bondhugula): add support for non-unit strides.
bool addAffineForOpDomain(ConstOpPointer<AffineForOp> forOp,
FlatAffineConstraints *constraints);
@ -297,10 +297,10 @@ public:
operand_range getOperands() const { return {operand_begin(), operand_end()}; }
// 'for' instruction that contains this bound.
// 'affine.for' instruction that contains this bound.
ConstOpPointer<AffineForOp> inst;
// Start and end positions of this affine bound operands in the list of
// the containing 'for' instruction operands.
// the containing 'affine.for' instruction operands.
unsigned opStart, opEnd;
// Affine map for this bound.
AffineMap map;
@ -52,7 +52,7 @@ bool dominates(const Instruction &a, const Instruction &b);
bool properlyDominates(const Instruction &a, const Instruction &b);
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
/// the outermost 'for' instruction to the innermost one.
/// the outermost 'affine.for' instruction to the innermost one.
// TODO(bondhugula): handle 'if' inst's.
void getLoopIVs(const Instruction &inst,
SmallVectorImpl<OpPointer<AffineForOp>> *loops);
@ -105,8 +105,8 @@ insertBackwardComputationSlice(Instruction *srcOpInst, Instruction *dstOpInst,
/// surrounding such op's.
// For example, the memref region for a load operation at loop depth = 1:
// for %i = 0 to 32 {
// for %ii = %i to (d0) -> (d0 + 8) (%i) {
// affine.for %i = 0 to 32 {
// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
// load %A[%ii]
// }
// }
@ -139,8 +139,8 @@ struct MemRefRegion {
/// For example, the memref region for this operation at loopDepth = 1 will
/// be:
/// for %i = 0 to 32 {
/// for %ii = %i to (d0) -> (d0 + 8) (%i) {
/// affine.for %i = 0 to 32 {
/// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
/// load %A[%ii]
/// }
/// }
@ -76,9 +76,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
/// The following MLIR snippet:
/// ```mlir
/// for %i3 = 0 to %0 {
/// for %i4 = 0 to %1 {
/// for %i5 = 0 to %2 {
/// affine.for %i3 = 0 to %0 {
/// affine.for %i4 = 0 to %1 {
/// affine.for %i5 = 0 to %2 {
/// %a5 = load %arg0[%i4, %i5, %i3] : memref<?x?x?xf32>
/// }}}
/// ```
@ -86,9 +86,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
/// may vectorize with {permutation_map: (d0, d1, d2) -> (d2, d1)} into:
/// ```mlir
/// for %i3 = 0 to %0 step 32 {
/// for %i4 = 0 to %1 {
/// for %i5 = 0 to %2 step 256 {
/// affine.for %i3 = 0 to %0 step 32 {
/// affine.for %i4 = 0 to %1 {
/// affine.for %i5 = 0 to %2 step 256 {
/// %4 = vector_transfer_read %arg0, %i4, %i5, %i3
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
/// (memref<?x?x?xf32>, index, index) -> vector<32x256xf32>
@ -103,7 +103,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
/// ```mlir
/// %cst0 = constant 0 : index
/// for %i0 = 0 to %0 {
/// affine.for %i0 = 0 to %0 {
/// %a0 = load %arg0[%cst0, %cst0] : memref<?x?xf32>
/// }
/// ```
@ -111,7 +111,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
/// may vectorize with {permutation_map: (d0) -> (0)} into:
/// ```mlir
/// for %i0 = 0 to %0 step 128 {
/// affine.for %i0 = 0 to %0 step 128 {
/// %3 = vector_transfer_read %arg0, %c0_0, %c0_0
/// {permutation_map: (d0, d1) -> (0)} :
/// (memref<?x?xf32>, index, index) -> vector<128xf32>
@ -83,9 +83,10 @@ AffineMap getUnrolledLoopUpperBound(ConstOpPointer<AffineForOp> forOp,
unsigned unrollFactor,
FuncBuilder *builder);
/// Skew the instructions in the body of a 'for' instruction with the specified
/// instruction-wise shifts. The shifts are with respect to the original
/// execution order, and are multiplied by the loop 'step' before being applied.
/// Skew the instructions in the body of a 'affine.for' instruction with the
/// specified instruction-wise shifts. The shifts are with respect to the
/// original execution order, and are multiplied by the loop 'step' before being
/// applied.
UtilResult instBodySkew(OpPointer<AffineForOp> forOp, ArrayRef<uint64_t> shifts,
bool unrollPrologueEpilogue = false);
@ -94,14 +94,14 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
/// Before
/// for %i = 0 to #map(%N)
/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// send %A[%idx], ...
/// %v = "compute"(%idx, ...)
/// After
/// for %i = 0 to #map(%N)
/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// send %A[%idx], ...
/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
@ -716,7 +716,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) {
void AffineForOp::print(OpAsmPrinter *p) const {
*p << "for ";
*p << "affine.for ";
*p << " = ";
printBound(getLowerBound(), "max", p);
@ -756,8 +756,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
// For example, given the following MLIR code with with "source" and
// "destination" accesses to the same memref labled, and symbols %M, %N, %K:
// for %i0 = 0 to 100 {
// for %i1 = 0 to 50 {
// affine.for %i0 = 0 to 100 {
// affine.for %i1 = 0 to 50 {
// %a0 = affine.apply
// (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
// // Source memref access.
@ -765,8 +765,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
// }
// }
// for %i2 = 0 to 100 {
// for %i3 = 0 to 50 {
// affine.for %i2 = 0 to 100 {
// affine.for %i3 = 0 to 50 {
// %a1 = affine.apply
// (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
// // Destination memref access.
@ -36,13 +36,13 @@
using namespace mlir;
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
/// the outermost 'for' instruction to the innermost one.
/// the outermost 'affine.for' instruction to the innermost one.
void mlir::getLoopIVs(const Instruction &inst,
SmallVectorImpl<OpPointer<AffineForOp>> *loops) {
auto *currInst = inst.getParentInst();
OpPointer<AffineForOp> currAffineForOp;
// Traverse up the hierarchy collecing all 'for' instruction while skipping
// over 'if' instructions.
// Traverse up the hierarchy collecing all 'affine.for' instruction while
// skipping over 'if' instructions.
while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
currInst->isa<AffineIfOp>())) {
if (currAffineForOp)
@ -111,8 +111,8 @@ bool MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
// For example, the memref region for this load operation at loopDepth = 1 will
// be as below:
// for %i = 0 to 32 {
// for %ii = %i to (d0) -> (d0 + 8) (%i) {
// affine.for %i = 0 to 32 {
// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
// load %A[%ii]
// }
// }
@ -614,7 +614,7 @@ Optional<int64_t> mlir::getMemoryFootprintBytes(const Block &block,
int memorySpace) {
std::vector<std::unique_ptr<MemRefRegion>> regions;
// Walk this 'for' instruction to gather all memory regions.
// Walk this 'affine.for' instruction to gather all memory regions.
bool error = false;
const_cast<Block *>(&block)->walk([&](Instruction *opInst) {
if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) {
@ -189,7 +189,7 @@ unsigned Block::getNumSuccessors() const {
return terminator->getNumSuccessors();
assert(getParent() && "top-level block with no terminator");
// Blocks inside 'for'/'if' instructions don't have successors.
// Blocks inside 'affine.for'/'if' instructions don't have successors.
return 0;
@ -338,7 +338,8 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block,
auto fastMemRefType = top.getMemRefType(
fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);
// Create the fast memory space buffer just before the 'for' instruction.
// Create the fast memory space buffer just before the 'affine.for'
// instruction.
fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult();
// Record it.
fastBufferMap[memref] = fastMemRef;
@ -456,7 +457,7 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
// approach is conservative in some cases at the moment, we do a check later
// and report an error with location info.
// TODO(bondhugula): An 'if' instruction is being treated similar to an
// operation instruction. 'if''s could have 'for's in them; treat them
// operation instruction. 'if''s could have 'affine.for's in them; treat them
// separately.
// Get to the first load, store, or for op.
@ -470,9 +471,9 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
if (auto forOp = it->dyn_cast<AffineForOp>()) {
// We'll assume for now that loops with steps are tiled loops, and so DMAs
// are not performed for that depth, but only further inside.
// If the memory footprint of the 'for' loop is higher than fast memory
// capacity (when provided), we recurse to DMA at an inner level until
// we find a depth at which footprint fits in the capacity. If the
// If the memory footprint of the 'affine.for' loop is higher than fast
// memory capacity (when provided), we recurse to DMA at an inner level
// until we find a depth at which footprint fits in the capacity. If the
// footprint can't be calcuated, we assume for now it fits.
// Returns true if the footprint is known to exceed capacity.
@ -489,13 +490,13 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);
// Recurse onto the body of this loop.
runOnBlock(forOp->getBody(), consumedCapacityBytes);
// The next region starts right after the 'for' instruction.
// The next region starts right after the 'affine.for' instruction.
curBegin = std::next(it);
} else {
// We have enough capacity, i.e., DMAs will be computed for the portion
// of the block until 'it', and for the 'for' loop. For the latter, they
// are placed just before this loop (for incoming DMAs) and right after
// (for outgoing ones).
// of the block until 'it', and for the 'affine.for' loop. For the
// latter, they are placed just before this loop (for incoming DMAs) and
// right after (for outgoing ones).
consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);
// Inner loop DMAs have their own scope - we don't thus update consumed
@ -510,7 +510,8 @@ bool MemRefDependenceGraph::init(Function *f) {
// all loads and store accesses it contains.
LoopNestStateCollector collector;
// Return false if a non 'for' region was found (not currently supported).
// Return false if a non 'affine.for' region was found (not currently
// supported).
if (collector.hasNonForRegion)
return false;
Node node(nextNodeId++, &inst);
@ -231,7 +231,8 @@ UtilResult mlir::tileCodeGen(MutableArrayRef<OpPointer<AffineForOp>> band,
static void
getTileableBands(Function *f,
std::vector<SmallVector<OpPointer<AffineForOp>, 6>> *bands) {
// Get maximal perfect nest of 'for' insts starting from root (inclusive).
// Get maximal perfect nest of 'affine.for' insts starting from root
// (inclusive).
auto getMaximalPerfectLoopNest = [&](OpPointer<AffineForOp> root) {
SmallVector<OpPointer<AffineForOp>, 6> band;
OpPointer<AffineForOp> currInst = root;
@ -164,7 +164,7 @@ PassResult LoopUnroll::runOnFunction(Function *f) {
return success();
/// Unrolls a 'for' inst. Returns true if the loop was unrolled, false
/// Unrolls a 'affine.for' inst. Returns true if the loop was unrolled, false
/// otherwise. The default unroll factor is 4.
bool LoopUnroll::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
// Use the function callback if one was provided.
@ -105,7 +105,7 @@ PassResult LoopUnrollAndJam::runOnFunction(Function *f) {
return success();
/// Unroll and jam a 'for' inst. Default unroll jam factor is
/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
/// kDefaultUnrollJamFactor. Return false if nothing was done.
bool LoopUnrollAndJam::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
// Unroll and jam by the factor that was passed if any.
@ -283,7 +283,8 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
return value;
// Convert a "for" loop to a flow of blocks. Return `false` on success.
// Convert a "affine.for" loop to a flow of blocks. Return `false` on
// success.
// Create an SESE region for the loop (including its body) and append it to the
// end of the current region. The loop region consists of the initialization
@ -330,8 +331,9 @@ bool LowerAffinePass::lowerAffineFor(OpPointer<AffineForOp> forOp) {
auto loc = forOp->getLoc();
auto *forInst = forOp->getInstruction();
// Start by splitting the block containing the 'for' into two parts. The part
// before will get the init code, the part after will be the end point.
// Start by splitting the block containing the 'affine.for' into two parts.
// The part before will get the init code, the part after will be the end
// point.
auto *initBlock = forInst->getBlock();
auto *endBlock = initBlock->splitBlock(forInst);
@ -126,9 +126,9 @@ private:
/// // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
/// // vector<32x256xf32> and pad with %f0 to handle the boundary case:
/// %f0 = constant 0.0f : f32
/// for %i0 = 0 to %0 {
/// for %i1 = 0 to %1 step 256 {
/// for %i2 = 0 to %2 step 32 {
/// affine.for %i0 = 0 to %0 {
/// affine.for %i1 = 0 to %1 step 256 {
/// affine.for %i2 = 0 to %2 step 32 {
/// %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
/// (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@ -139,8 +139,8 @@ private:
/// MLIR resembling:
/// ```mlir
/// for %d1 = 0 to 256 {
/// for %d2 = 0 to 32 {
/// affine.for %d1 = 0 to 256 {
/// affine.for %d2 = 0 to 32 {
/// %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
/// %tmp[%d2, %d1] = %s
/// }
@ -101,10 +101,10 @@
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
/// %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
/// vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
/// for %i1 = 0 to %N step 4 {
/// for %i2 = 0 to %O {
/// for %i3 = 0 to %P step 4 {
/// vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
/// affine.for %i1 = 0 to %N step 4 {
/// affine.for %i2 = 0 to %O {
/// affine.for %i3 = 0 to %P step 4 {
/// vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
/// vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
@ -120,10 +120,10 @@
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
/// %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
/// for %i0 = 0 to %arg0 step 4 {
/// for %i1 = 0 to %arg1 step 4 {
/// for %i2 = 0 to %arg2 {
/// for %i3 = 0 to %arg3 step 4 {
/// affine.for %i0 = 0 to %arg0 step 4 {
/// affine.for %i1 = 0 to %arg1 step 4 {
/// affine.for %i2 = 0 to %arg2 {
/// affine.for %i3 = 0 to %arg3 step 4 {
/// %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
/// (%i0, %i1, %i2, %i3)
/// vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
@ -293,10 +293,10 @@ static Value *substitute(Value *v, VectorType hwVectorType,
/// super-vectorization has been applied:
/// ```mlir
/// for %i0 = 0 to %M {
/// for %i1 = 0 to %N step 3 {
/// for %i2 = 0 to %O {
/// for %i3 = 0 to %P step 32 {
/// affine.for %i0 = 0 to %M {
/// affine.for %i1 = 0 to %N step 3 {
/// affine.for %i2 = 0 to %O {
/// affine.for %i3 = 0 to %P step 32 {
/// %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2)
/// -> vector<3x32xf32>
/// ...
@ -19,7 +19,7 @@
// potentially getting rid of intermediate memref's entirely.
// TODO(mlir-team): In the future, similar techniques could be used to eliminate
// dead memref store's and perform more complex forwarding when support for
// SSA scalars live out of 'for'/'if' statements is available.
// SSA scalars live out of 'affine.for'/'if' statements is available.
#include "mlir/Analysis/AffineAnalysis.h"
@ -55,7 +55,7 @@ namespace {
// (* A dependence being satisfied at a block: a dependence that is satisfied by
// virtue of the destination instruction appearing textually / lexically after
// the source instruction within the body of a 'for' instruction; thus, a
// the source instruction within the body of a 'affine.for' instruction; thus, a
// dependence is always either satisfied by a loop or by a block).
// The above conditions are simple to check, sufficient, and powerful for most
@ -145,8 +145,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(OpPointer<LoadOp> loadOp) {
// Check if this store is a candidate for forwarding; we only forward if
// the dependence from the store is carried by the *body* of innermost
// common surrounding loop. As an example this filters out cases like:
// for %i0
// for %i1
// affine.for %i0
// affine.for %i1
// %idx = affine.apply (d0) -> (d0 + 1) (%i0)
// store %A[%idx]
// load %A[%i0]
@ -71,11 +71,11 @@ static unsigned getTagMemRefPos(const Instruction &dmaInst) {
return 0;
/// Doubles the buffer of the supplied memref on the specified 'for' instruction
/// by adding a leading dimension of size two to the memref. Replaces all uses
/// of the old memref by the new one while indexing the newly added dimension by
/// the loop IV of the specified 'for' instruction modulo 2. Returns false if
/// such a replacement cannot be performed.
/// Doubles the buffer of the supplied memref on the specified 'affine.for'
/// instruction by adding a leading dimension of size two to the memref.
/// Replaces all uses of the old memref by the new one while indexing the newly
/// added dimension by the loop IV of the specified 'affine.for' instruction
/// modulo 2. Returns false if such a replacement cannot be performed.
static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
auto *forBody = forOp->getBody();
FuncBuilder bInner(forBody, forBody->begin());
@ -108,7 +108,7 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
// Create and place the alloc right before the 'for' instruction.
// Create and place the alloc right before the 'affine.for' instruction.
// TODO(mlir-team): we are assuming scoped allocation here, and aren't
// inserting a dealloc -- this isn't the right thing.
Value *newMemRef =
@ -137,9 +137,9 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
/// Returns success if the IR is in a valid state.
PassResult PipelineDataTransfer::runOnFunction(Function *f) {
// Do a post order walk so that inner loop DMAs are processed first. This is
// necessary since 'for' instructions nested within would otherwise become
// invalid (erased) when the outer loop is pipelined (the pipelined one gets
// deleted and replaced by a prologue, a new steady-state loop and an
// necessary since 'affine.for' instructions nested within would otherwise
// become invalid (erased) when the outer loop is pipelined (the pipelined one
// gets deleted and replaced by a prologue, a new steady-state loop and an
// epilogue).
@ -138,8 +138,8 @@ void mlir::promoteSingleIterationLoops(Function *f) {
[](OpPointer<AffineForOp> forOp) { promoteIfSingleIteration(forOp); });
/// Generates a 'for' inst with the specified lower and upper bounds while
/// generating the right IV remappings for the shifted instructions. The
/// Generates a 'affine.for' inst with the specified lower and upper bounds
/// while generating the right IV remappings for the shifted instructions. The
/// instruction blocks that go into the loop are specified in instGroupQueue
/// starting from the specified offset, and in that order; the first element of
/// the pair specifies the shift applied to that group of instructions; note
@ -194,10 +194,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
return loopChunk;
/// Skew the instructions in the body of a 'for' instruction with the specified
/// instruction-wise shifts. The shifts are with respect to the original
/// execution order, and are multiplied by the loop 'step' before being applied.
/// A shift of zero for each instruction will lead to no change.
/// Skew the instructions in the body of a 'affine.for' instruction with the
/// specified instruction-wise shifts. The shifts are with respect to the
/// original execution order, and are multiplied by the loop 'step' before being
/// applied. A shift of zero for each instruction will lead to no change.
// The skewing of instructions with respect to one another can be used for
// example to allow overlap of asynchronous operations (such as DMA
// communication) with computation, or just relative shifting of instructions
@ -246,7 +246,7 @@ UtilResult mlir::instBodySkew(OpPointer<AffineForOp> forOp,
// An array of instruction groups sorted by shift amount; each group has all
// instructions with the same shift in the order in which they appear in the
// body of the 'for' inst.
// body of the 'affine.for' inst.
std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
unsigned pos = 0;
for (auto &inst : *forOp->getBody()) {
@ -194,14 +194,14 @@ bool mlir::replaceAllMemRefUsesWith(const Value *oldMemRef, Value *newMemRef,
/// Before
/// for %i = 0 to #map(%N)
/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// "send"(%idx, %A, ...)
/// "compute"(%idx)
/// After
/// for %i = 0 to #map(%N)
/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// "send"(%idx, %A, ...)
/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
@ -113,7 +113,7 @@ using namespace mlir;
/// At a high level, a vectorized load in a loop will resemble:
/// ```mlir
/// for %i = ? to ? step ? {
/// affine.for %i = ? to ? step ? {
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
/// vector<128xf32>
/// }
@ -309,7 +309,7 @@ using namespace mlir;
/// ```mlir
/// mlfunc @fill(%A : memref<128xf32>) -> () {
/// %f1 = constant 1.0 : f32
/// for %i0 = 0 to 32 {
/// affine.for %i0 = 0 to 32 {
/// store %f1, %A[%i0] : memref<128xf32, 0>
/// }
/// return
@ -322,7 +322,7 @@ using namespace mlir;
/// is still subject to exploratory tradeoffs. In particular, say we want to
/// vectorize by a factor 128, we want to transform the following input:
/// ```mlir
/// for %i = %M to %N {
/// affine.for %i = %M to %N {
/// %a = load A[%i] : memref<?xf32>
/// }
/// ```
@ -331,8 +331,8 @@ using namespace mlir;
/// memory promotion etc) say after stripmining (and potentially unrolling in
/// the case of LLVM's SLP vectorizer):
/// ```mlir
/// for %i = floor(%M, 128) to ceil(%N, 128) {
/// for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
/// affine.for %i = floor(%M, 128) to ceil(%N, 128) {
/// affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
/// %a = load A[%ii] : memref<?xf32>
/// }
/// }
@ -341,7 +341,7 @@ using namespace mlir;
/// Instead, we seek to vectorize early and freeze vector types before
/// scheduling, so we want to generate a pattern that resembles:
/// ```mlir
/// for %i = ? to ? step ? {
/// affine.for %i = ? to ? step ? {
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
/// vector<128xf32>
/// }
@ -362,7 +362,7 @@ using namespace mlir;
/// For the simple strawman example above, vectorizing for a 1-D vector
/// abstraction of size 128 returns code similar to:
/// ```mlir
/// for %i = %M to %N step 128 {
/// affine.for %i = %M to %N step 128 {
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
/// vector<128xf32>
/// }
@ -391,20 +391,20 @@ using namespace mlir;
/// %C = alloc (%M, %N) : memref<?x?xf32, 0>
/// %f1 = constant 1.0 : f32
/// %f2 = constant 2.0 : f32
/// for %i0 = 0 to %M {
/// for %i1 = 0 to %N {
/// affine.for %i0 = 0 to %M {
/// affine.for %i1 = 0 to %N {
/// // non-scoped %f1
/// store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
/// }
/// }
/// for %i2 = 0 to %M {
/// for %i3 = 0 to %N {
/// affine.for %i2 = 0 to %M {
/// affine.for %i3 = 0 to %N {
/// // non-scoped %f2
/// store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
/// }
/// }
/// for %i4 = 0 to %M {
/// for %i5 = 0 to %N {
/// affine.for %i4 = 0 to %M {
/// affine.for %i5 = 0 to %N {
/// %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
/// %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
/// %s5 = addf %a5, %b5 : f32
@ -438,24 +438,24 @@ using namespace mlir;
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
/// %cst = constant 1.0 : f32
/// %cst_0 = constant 2.0 : f32
/// for %i0 = 0 to %arg0 {
/// for %i1 = 0 to %arg1 step 256 {
/// affine.for %i0 = 0 to %arg0 {
/// affine.for %i1 = 0 to %arg1 step 256 {
/// %cst_1 = constant splat<vector<256xf32>, 1.0> :
/// vector<256xf32>
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
/// for %i2 = 0 to %arg0 {
/// for %i3 = 0 to %arg1 step 256 {
/// affine.for %i2 = 0 to %arg0 {
/// affine.for %i3 = 0 to %arg1 step 256 {
/// %cst_2 = constant splat<vector<256xf32>, 2.0> :
/// vector<256xf32>
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
/// for %i4 = 0 to %arg0 {
/// for %i5 = 0 to %arg1 step 256 {
/// affine.for %i4 = 0 to %arg0 {
/// affine.for %i5 = 0 to %arg1 step 256 {
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
/// (memref<?x?xf32>, index, index) -> vector<256xf32>
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :
@ -494,24 +494,24 @@ using namespace mlir;
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
/// %cst = constant 1.0 : f32
/// %cst_0 = constant 2.0 : f32
/// for %i0 = 0 to %arg0 step 32 {
/// for %i1 = 0 to %arg1 step 256 {
/// affine.for %i0 = 0 to %arg0 step 32 {
/// affine.for %i1 = 0 to %arg1 step 256 {
/// %cst_1 = constant splat<vector<32x256xf32>, 1.0> :
/// vector<32x256xf32>
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
/// for %i2 = 0 to %arg0 step 32 {
/// for %i3 = 0 to %arg1 step 256 {
/// affine.for %i2 = 0 to %arg0 step 32 {
/// affine.for %i3 = 0 to %arg1 step 256 {
/// %cst_2 = constant splat<vector<32x256xf32>, 2.0> :
/// vector<32x256xf32>
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
/// for %i4 = 0 to %arg0 step 32 {
/// for %i5 = 0 to %arg1 step 256 {
/// affine.for %i4 = 0 to %arg0 step 32 {
/// affine.for %i5 = 0 to %arg1 step 256 {
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
/// (memref<?x?xf32>, index, index) -> vector<32x256xf32>
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :
@ -32,7 +32,7 @@
func @compose_affine_maps_1dto2d_no_symbols() {
%0 = alloc() : memref<4x4xf32>
for %i0 = 0 to 15 {
affine.for %i0 = 0 to 15 {
// Test load[%x, %x]
%x0 = affine.apply (d0) -> (d0 - 1) (%i0)
@ -78,7 +78,7 @@ func @compose_affine_maps_1dto2d_no_symbols() {
func @compose_affine_maps_1dto2d_with_symbols() {
%0 = alloc() : memref<4x4xf32>
for %i0 = 0 to 15 {
affine.for %i0 = 0 to 15 {
// Test load[%x0, %x0] with symbol %c4
%c4 = constant 4 : index
%x0 = affine.apply (d0)[s0] -> (d0 - s0) (%i0)[%c4]
@ -119,13 +119,13 @@ func @compose_affine_maps_2d_tile() {
%c4 = constant 4 : index
%c8 = constant 8 : index
for %i0 = 0 to 3 {
affine.for %i0 = 0 to 3 {
%x0 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i0)[%c4]
for %i1 = 0 to 3 {
affine.for %i1 = 0 to 3 {
%x1 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i1)[%c8]
for %i2 = 0 to 3 {
affine.for %i2 = 0 to 3 {
%x2 = affine.apply (d0)[s0] -> (d0 mod s0) (%i2)[%c4]
for %i3 = 0 to 3 {
affine.for %i3 = 0 to 3 {
%x3 = affine.apply (d0)[s0] -> (d0 mod s0) (%i3)[%c8]
%x40 = affine.apply (d0, d1, d2, d3)[s0, s1] ->
@ -151,9 +151,9 @@ func @compose_affine_maps_dependent_loads() {
%0 = alloc() : memref<16x32xf32>
%1 = alloc() : memref<16x32xf32>
for %i0 = 0 to 3 {
for %i1 = 0 to 3 {
for %i2 = 0 to 3 {
affine.for %i0 = 0 to 3 {
affine.for %i1 = 0 to 3 {
affine.for %i2 = 0 to 3 {
%c3 = constant 3 : index
%c7 = constant 7 : index
@ -197,7 +197,7 @@ func @compose_affine_maps_dependent_loads() {
func @compose_affine_maps_diamond_dependency() {
%0 = alloc() : memref<4x4xf32>
for %i0 = 0 to 15 {
affine.for %i0 = 0 to 15 {
%a = affine.apply (d0) -> (d0 - 1) (%i0)
%b = affine.apply (d0) -> (d0 + 7) (%a)
%c = affine.apply (d0) -> (d0 * 4) (%a)
@ -217,8 +217,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
%c9 = constant 9 : index
%1 = alloc() : memref<100x100xf32, 1>
%2 = alloc() : memref<1xi32>
for %i0 = 0 to 100 {
for %i1 = 0 to 100 {
affine.for %i0 = 0 to 100 {
affine.for %i1 = 0 to 100 {
%3 = affine.apply (d0, d1)[s0, s1] -> (d1 + s0 + s1)
(%i0, %i1)[%arg1, %c9]
%4 = affine.apply (d0, d1, d3) -> (d3 - (d0 + d1))
@ -238,7 +238,7 @@ func @trivial_maps() {
%0 = alloc() : memref<10xf32>
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
for %i1 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%1 = affine.apply ()[s0] -> (s0)()[%c0]
store %cst, %0[%1] : memref<10xf32>
%2 = load %0[%c0] : memref<10xf32>
@ -277,20 +277,20 @@ func @constant_fold_bounds(%N : index) {
%c3 = affine.apply (d0, d1) -> (d0 + d1) (%c1, %c2)
%l = "foo"() : () -> index
// CHECK: for %i0 = 5 to 7 {
for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
// CHECK: affine.for %i0 = 5 to 7 {
affine.for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
"foo"(%i, %c3) : (index, index) -> ()
// Bound takes a non-constant argument but can still be folded.
// CHECK: for %i1 = 1 to 7 {
for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
// CHECK: affine.for %i1 = 1 to 7 {
affine.for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
"foo"(%j, %c3) : (index, index) -> ()
// None of the bounds can be folded.
// CHECK: for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
// CHECK: affine.for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
affine.for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
"foo"(%k, %c3) : (index, index) -> ()
@ -204,35 +204,35 @@ func @illegaltype(i0) // expected-error {{invalid integer width}}
// -----
func @malformed_for_percent() {
for i = 1 to 10 { // expected-error {{expected SSA operand}}
affine.for i = 1 to 10 { // expected-error {{expected SSA operand}}
// -----
func @malformed_for_equal() {
for %i 1 to 10 { // expected-error {{expected '='}}
affine.for %i 1 to 10 { // expected-error {{expected '='}}
// -----
func @malformed_for_to() {
for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
affine.for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
// -----
func @incomplete_for() {
for %i = 1 to 10 step 2
affine.for %i = 1 to 10 step 2
} // expected-error {{expected '{' to begin block list}}
// -----
func @nonconstant_step(%1 : i32) {
for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
affine.for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
// -----
func @for_negative_stride() {
for %i = 1 to 10 step -1
affine.for %i = 1 to 10 step -1
} // expected-error@-1 {{expected step to be representable as a positive signed integer}}
// -----
@ -244,7 +244,7 @@ func @non_instruction() {
// -----
func @invalid_if_conditional2() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
if (i)[N] : (i >= ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
@ -252,7 +252,7 @@ func @invalid_if_conditional2() {
// -----
func @invalid_if_conditional3() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
if (i)[N] : (i == 1) // expected-error {{expected '0' after '=='}}
@ -260,7 +260,7 @@ func @invalid_if_conditional3() {
// -----
func @invalid_if_conditional4() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
if (i)[N] : (i >= 2) // expected-error {{expected '0' after '>='}}
@ -268,7 +268,7 @@ func @invalid_if_conditional4() {
// -----
func @invalid_if_conditional5() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
if (i)[N] : (i <= 0 ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
@ -276,7 +276,7 @@ func @invalid_if_conditional5() {
// -----
func @invalid_if_conditional6() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
if (i) : (i) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
@ -284,7 +284,7 @@ func @invalid_if_conditional6() {
// -----
// TODO (support if (1)?
func @invalid_if_conditional7() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
if (i) : (1) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
@ -438,8 +438,8 @@ func @undef() {
// -----
func @duplicate_induction_var() {
for %i = 1 to 10 { // expected-error {{previously defined here}}
for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
affine.for %i = 1 to 10 { // expected-error {{previously defined here}}
affine.for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
@ -448,7 +448,7 @@ func @duplicate_induction_var() {
// -----
func @dominance_failure() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
"xxx"(%i) : (index)->() // expected-error {{operand #0 does not dominate this use}}
@ -475,7 +475,7 @@ func @return_type_mismatch() -> i32 {
// -----
func @return_inside_loop() -> i8 {
for %i = 1 to 100 {
affine.for %i = 1 to 100 {
%a = "foo"() : ()->i8
return %a : i8
// expected-error@-1 {{'return' op may only be at the top level of a function}}
@ -521,7 +521,7 @@ func @referer() {
#map1 = (i)[j] -> (i+j)
func @bound_symbol_mismatch(%N : index) {
for %i = #map1(%N) to 100 {
affine.for %i = #map1(%N) to 100 {
// expected-error@-1 {{symbol operand count and integer set symbol count must match}}
@ -532,7 +532,7 @@ func @bound_symbol_mismatch(%N : index) {
#map1 = (i)[j] -> (i+j)
func @bound_dim_mismatch(%N : index) {
for %i = #map1(%N, %N)[%N] to 100 {
affine.for %i = #map1(%N, %N)[%N] to 100 {
// expected-error@-1 {{dim operand count and integer set dim count must match}}
@ -541,7 +541,7 @@ func @bound_dim_mismatch(%N : index) {
// -----
func @large_bound() {
for %i = 1 to 9223372036854775810 {
affine.for %i = 1 to 9223372036854775810 {
// expected-error@-1 {{integer constant out of range for attribute}}
@ -550,7 +550,7 @@ func @large_bound() {
// -----
func @max_in_upper_bound(%N : index) {
for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
affine.for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
@ -558,7 +558,7 @@ func @max_in_upper_bound(%N : index) {
// -----
func @step_typo() {
for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
affine.for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
@ -566,7 +566,7 @@ func @step_typo() {
// -----
func @invalid_bound_map(%N : i32) {
for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
affine.for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
@ -579,7 +579,7 @@ func @invalid_bound_map(%N : i32) {
#set0 = (i)[N] : (i >= 0, N - i >= 0)
func @invalid_if_operands1(%N : index) {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
if #set0(%i) {
// expected-error@-1 {{symbol operand count and integer set symbol count must match}}
@ -587,7 +587,7 @@ func @invalid_if_operands1(%N : index) {
#set0 = (i)[N] : (i >= 0, N - i >= 0)
func @invalid_if_operands2(%N : index) {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
if #set0()[%N] {
// expected-error@-1 {{dim operand count and integer set dim count must match}}
@ -595,7 +595,7 @@ func @invalid_if_operands2(%N : index) {
#set0 = (i)[N] : (i >= 0, N - i >= 0)
func @invalid_if_operands3(%N : index) {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
if #set0(%i)[%i] {
// expected-error@-1 {{operand cannot be used as a symbol}}
@ -736,11 +736,11 @@ func @f(f32) {
// -----
func @f(%m : memref<?x?xf32>) {
for %i0 = 0 to 42 {
affine.for %i0 = 0 to 42 {
// expected-error@+1 {{operand #2 does not dominate this use}}
%x = load %m[%i0, %i1] : memref<?x?xf32>
for %i1 = 0 to 42 {
affine.for %i1 = 0 to 42 {
@ -790,7 +790,7 @@ func @type_alias_unknown(!unknown_alias) -> () { // expected-error {{undefined t
// Check ill-formed opaque tensor.
func @complex_loops() {
for %i1 = 1 to 100 {
affine.for %i1 = 1 to 100 {
// expected-error @+1 {{expected '"' in string literal}}
"opaqueIntTensor"(){bar: opaque<tensor<2x1x4xi32>, "0x686]>} : () -> ()
@ -824,7 +824,7 @@ func @invalid_affine_structure() {
func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
// expected-error @+1 {{lower loop bound affine map with multiple results requires 'max' prefix}}
for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
affine.for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
@ -833,7 +833,7 @@ func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
func @missing_for_min(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
// expected-error @+1 {{upper loop bound affine map with multiple results requires 'min' prefix}}
for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
affine.for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
@ -13,7 +13,7 @@ func @inline_notation() -> i32 loc("":10:8) {
%2 = constant 4 : index loc(callsite("foo" at "":10:8))
// CHECK: } loc(fused["foo", "":10:8])
for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
} loc(fused["foo", "":10:8])
// CHECK: } loc(fused<"myPass">["foo", "foo2"])
@ -208,8 +208,8 @@ func @identity_functor(%a : () -> ()) -> (() -> ()) {
func @func_ops_in_loop() {
// CHECK: %0 = "foo"() : () -> i64
%a = "foo"() : ()->i64
// CHECK: for %i0 = 1 to 10 {
for %i = 1 to 10 {
// CHECK: affine.for %i0 = 1 to 10 {
affine.for %i = 1 to 10 {
// CHECK: %1 = "doo"() : () -> f32
%b = "doo"() : ()->f32
// CHECK: "bar"(%0, %1) : (i64, f32) -> ()
@ -224,10 +224,10 @@ func @func_ops_in_loop() {
// CHECK-LABEL: func @loops() {
func @loops() {
// CHECK: for %i0 = 1 to 100 step 2 {
for %i = 1 to 100 step 2 {
// CHECK: for %i1 = 1 to 200 {
for %j = 1 to 200 {
// CHECK: affine.for %i0 = 1 to 100 step 2 {
affine.for %i = 1 to 100 step 2 {
// CHECK: affine.for %i1 = 1 to 200 {
affine.for %j = 1 to 200 {
} // CHECK: }
} // CHECK: }
return // CHECK: return
@ -235,14 +235,14 @@ func @loops() {
// CHECK-LABEL: func @complex_loops() {
func @complex_loops() {
for %i1 = 1 to 100 { // CHECK: for %i0 = 1 to 100 {
for %j1 = 1 to 100 { // CHECK: for %i1 = 1 to 100 {
affine.for %i1 = 1 to 100 { // CHECK: affine.for %i0 = 1 to 100 {
affine.for %j1 = 1 to 100 { // CHECK: affine.for %i1 = 1 to 100 {
// CHECK: "foo"(%i0, %i1) : (index, index) -> ()
"foo"(%i1, %j1) : (index,index) -> ()
} // CHECK: }
"boo"() : () -> () // CHECK: "boo"() : () -> ()
for %j2 = 1 to 10 { // CHECK: for %i2 = 1 to 10 {
for %k2 = 1 to 10 { // CHECK: for %i3 = 1 to 10 {
affine.for %j2 = 1 to 10 { // CHECK: affine.for %i2 = 1 to 10 {
affine.for %k2 = 1 to 10 { // CHECK: affine.for %i3 = 1 to 10 {
"goo"() : () -> () // CHECK: "goo"() : () -> ()
} // CHECK: }
} // CHECK: }
@ -253,8 +253,8 @@ func @complex_loops() {
// CHECK: func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
%c = constant 0 : i32 // CHECK: %c0_i32 = constant 0 : i32
for %i0 = 1 to %arg0 { // CHECK: for %i0 = 1 to %arg0 {
for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 { // CHECK: for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
affine.for %i0 = 1 to %arg0 { // CHECK: affine.for %i0 = 1 to %arg0 {
affine.for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 { // CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
store %c, %arg1[%i0, %i1] : memref<?x?xi32> // CHECK: store %c0_i32, %arg1[%i0, %i1]
} // CHECK: }
} // CHECK: }
@ -263,8 +263,8 @@ func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
// CHECK: func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
// CHECK: for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
// CHECK: affine.for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
affine.for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
// CHECK: "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
"foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
} // CHECK: }
@ -275,24 +275,24 @@ func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
func @loop_bounds(%N : index) {
// CHECK: %0 = "foo"(%arg0) : (index) -> index
%s = "foo"(%N) : (index) -> index
// CHECK: for %i0 = %0 to %arg0
for %i = %s to %N {
// CHECK: for %i1 = #map{{[0-9]+}}(%i0) to 0
for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
// CHECK: affine.for %i0 = %0 to %arg0
affine.for %i = %s to %N {
// CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to 0
affine.for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
// CHECK: %1 = affine.apply #map{{.*}}(%i0, %i1)[%0]
%w1 = affine.apply(d0, d1)[s0] -> (d0+d1) (%i, %j) [%s]
// CHECK: %2 = affine.apply #map{{.*}}(%i0, %i1)[%0]
%w2 = affine.apply(d0, d1)[s0] -> (s0+1) (%i, %j) [%s]
// CHECK: for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
// CHECK: affine.for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
affine.for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
// CHECK: "foo"(%i0, %i1, %i2) : (index, index, index) -> ()
"foo"(%i, %j, %k) : (index, index, index)->()
// CHECK: %c30 = constant 30 : index
%c = constant 30 : index
// CHECK: %3 = affine.apply #map{{.*}}(%arg0, %c30)
%u = affine.apply (d0, d1)->(d0+d1) (%N, %c)
// CHECK: for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
// CHECK: affine.for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
affine.for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
// CHECK: "bar"(%i3) : (index) -> ()
"bar"(%l) : (index) -> ()
} // CHECK: }
@ -305,7 +305,7 @@ func @loop_bounds(%N : index) {
// CHECK-LABEL: func @ifinst(%arg0: index) {
func @ifinst(%N: index) {
%c = constant 200 : index // CHECK %c200 = constant 200
for %i = 1 to 10 { // CHECK for %i0 = 1 to 10 {
affine.for %i = 1 to 10 { // CHECK affine.for %i0 = 1 to 10 {
if #set0(%i)[%N, %c] { // CHECK if #set0(%i0)[%arg0, %c200] {
%x = constant 1 : i32
// CHECK: %c1_i32 = constant 1 : i32
@ -328,7 +328,7 @@ func @ifinst(%N: index) {
// CHECK-LABEL: func @simple_ifinst(%arg0: index) {
func @simple_ifinst(%N: index) {
%c = constant 200 : index // CHECK %c200 = constant 200
for %i = 1 to 10 { // CHECK for %i0 = 1 to 10 {
affine.for %i = 1 to 10 { // CHECK affine.for %i0 = 1 to 10 {
if #set0(%i)[%N, %c] { // CHECK if #set0(%i0)[%arg0, %c200] {
%x = constant 1 : i32
// CHECK: %c1_i32 = constant 1 : i32
@ -544,18 +544,18 @@ func @funcattrwithblock() -> ()
#map_non_simple2 = ()[s0, s1] -> (s0 + s1)
#map_non_simple3 = ()[s0] -> (s0 + 3)
func @funcsimplemap(%arg0: index, %arg1: index) -> () {
for %i0 = 0 to #map_simple0()[] {
// CHECK: for %i0 = 0 to 10 {
for %i1 = 0 to #map_simple1()[%arg1] {
// CHECK: for %i1 = 0 to %arg1 {
for %i2 = 0 to #map_non_simple0(%i0)[] {
// CHECK: for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
// CHECK: for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
// CHECK: for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
for %i5 = 0 to #map_non_simple3()[%arg0] {
// CHECK: for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
affine.for %i0 = 0 to #map_simple0()[] {
// CHECK: affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to #map_simple1()[%arg1] {
// CHECK: affine.for %i1 = 0 to %arg1 {
affine.for %i2 = 0 to #map_non_simple0(%i0)[] {
// CHECK: affine.for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
affine.for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
// CHECK: affine.for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
affine.for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
// CHECK: affine.for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
affine.for %i5 = 0 to #map_non_simple3()[%arg0] {
// CHECK: affine.for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
%c42_i32 = constant 42 : i32
@ -749,9 +749,9 @@ func @sparsevectorattr() -> () {
// CHECK-LABEL: func @loops_with_blockids() {
func @loops_with_blockids() {
for %i = 1 to 100 step 2 {
affine.for %i = 1 to 100 step 2 {
for %j = 1 to 200 {
affine.for %j = 1 to 200 {
@ -18,7 +18,7 @@ func @inline_notation() -> i32 loc("":10:8) {
%3 = constant 4 : index loc(callsite("foo" at callsite("":10:8 at callsite("":13:8 at "":100:10))))
// CHECK: } ["foo",]
for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
} loc(fused["foo", "":10:8])
// CHECK: } <"myPass">["foo", "foo2"]
@ -6,8 +6,8 @@
// CHECK-LABEL: func @materialize_read_1d() {
func @materialize_read_1d() {
%A = alloc () : memref<7x42xf32>
for %i0 = 0 to 7 step 4 {
for %i1 = 0 to 42 step 4 {
affine.for %i0 = 0 to 7 step 4 {
affine.for %i1 = 0 to 42 step 4 {
%f1 = vector_transfer_read %A, %i0, %i1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
%ip1 = affine.apply (d0) -> (d0 + 1) (%i1)
%f2 = vector_transfer_read %A, %i0, %ip1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
@ -29,11 +29,11 @@ func @materialize_read_1d() {
// CHECK-LABEL: func @materialize_read_1d_partially_specialized
func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
%A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
for %i0 = 0 to 7 {
for %i1 = 0 to %dyn1 {
for %i2 = 0 to %dyn2 {
for %i3 = 0 to 42 step 2 {
for %i4 = 0 to %dyn4 {
affine.for %i0 = 0 to 7 {
affine.for %i1 = 0 to %dyn1 {
affine.for %i2 = 0 to %dyn2 {
affine.for %i3 = 0 to 42 step 2 {
affine.for %i4 = 0 to %dyn4 {
%f1 = vector_transfer_read %A, %i0, %i1, %i2, %i3, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
%i3p1 = affine.apply (d0) -> (d0 + 1) (%i3)
%f2 = vector_transfer_read %A, %i0, %i1, %i2, %i3p1, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
@ -54,10 +54,10 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
// CHECK-LABEL: func @materialize_read(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
// CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
// CHECK-NEXT: for %[[I0:.*]] = 0 to %arg0 step 3 {
// CHECK-NEXT: for %[[I1:.*]] = 0 to %arg1 {
// CHECK-NEXT: for %[[I2:.*]] = 0 to %arg2 {
// CHECK-NEXT: for %[[I3:.*]] = 0 to %arg3 step 5 {
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %arg1 {
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %arg2 {
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
// CHECK: {{.*}} = dim %0, 0 : memref<?x?x?x?xf32>
@ -66,9 +66,9 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
// CHECK-NEXT: {{.*}} = dim %0, 3 : memref<?x?x?x?xf32>
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector_type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
// CHECK-NEXT: for %[[I4:.*]] = 0 to 3 {
// CHECK-NEXT: for %[[I5:.*]] = 0 to 4 {
// CHECK-NEXT: for %[[I6:.*]] = 0 to 5 {
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]]
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]]
@ -109,10 +109,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
// CHECK-NEXT: return
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
for %i0 = 0 to %M step 3 {
for %i1 = 0 to %N {
for %i2 = 0 to %O {
for %i3 = 0 to %P step 5 {
affine.for %i0 = 0 to %M step 3 {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %O {
affine.for %i3 = 0 to %P step 5 {
%f = vector_transfer_read %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, 0, d0)} : (memref<?x?x?x?xf32, 0>, index, index, index, index) -> vector<5x4x3xf32>
@ -125,10 +125,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
// CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
// CHECK-NEXT: %cst = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
// CHECK-NEXT: for %[[I0:.*]] = 0 to %arg0 step 3 {
// CHECK-NEXT: for %[[I1:.*]] = 0 to %arg1 step 4 {
// CHECK-NEXT: for %[[I2:.*]] = 0 to %arg2 {
// CHECK-NEXT: for %[[I3:.*]] = 0 to %arg3 step 5 {
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %arg1 step 4 {
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %arg2 {
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
// CHECK: {{.*}} = dim %0, 0 : memref<?x?x?x?xf32>
@ -138,9 +138,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector_type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
// CHECK-NEXT: store %cst, {{.*}}[%[[C0]]] : memref<1xvector<5x4x3xf32>>
// CHECK-NEXT: for %[[I4:.*]] = 0 to 3 {
// CHECK-NEXT: for %[[I5:.*]] = 0 to 4 {
// CHECK-NEXT: for %[[I6:.*]] = 0 to 5 {
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
// CHECK-NEXT: {{.*}} = load {{.*}}[%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32>
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
@ -184,10 +184,10 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
%f1 = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
for %i0 = 0 to %M step 3 {
for %i1 = 0 to %N step 4 {
for %i2 = 0 to %O {
for %i3 = 0 to %P step 5 {
affine.for %i0 = 0 to %M step 3 {
affine.for %i1 = 0 to %N step 4 {
affine.for %i2 = 0 to %O {
affine.for %i3 = 0 to %P step 5 {
vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : vector<5x4x3xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index
@ -10,10 +10,10 @@
func @materialize(%M : index, %N : index, %O : index, %P : index) {
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
%f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
// CHECK: for %i0 = 0 to %arg0 step 4 {
// CHECK-NEXT: for %i1 = 0 to %arg1 step 4 {
// CHECK-NEXT: for %i2 = 0 to %arg2 {
// CHECK-NEXT: for %i3 = 0 to %arg3 step 4 {
// CHECK: affine.for %i0 = 0 to %arg0 step 4 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 4 {
// CHECK-NEXT: affine.for %i2 = 0 to %arg2 {
// CHECK-NEXT: affine.for %i3 = 0 to %arg3 step 4 {
// CHECK-NEXT: %[[a:[0-9]+]] = {{.*}}[[ID1]](%i0)
// CHECK-NEXT: %[[b:[0-9]+]] = {{.*}}[[ID1]](%i1)
// CHECK-NEXT: %[[c:[0-9]+]] = {{.*}}[[ID1]](%i2)
@ -25,10 +25,10 @@ func @materialize(%M : index, %N : index, %O : index, %P : index) {
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
// CHECK: %[[b3:[0-9]+]] = {{.*}}[[D0P3]](%i1)
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
for %i0 = 0 to %M step 4 {
for %i1 = 0 to %N step 4 {
for %i2 = 0 to %O {
for %i3 = 0 to %P step 4 {
affine.for %i0 = 0 to %M step 4 {
affine.for %i1 = 0 to %N step 4 {
affine.for %i2 = 0 to %O {
affine.for %i3 = 0 to %P step 4 {
"vector_transfer_write"(%f1, %A, %i0, %i1, %i2, %i3) {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : (vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index) -> ()
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
// 4x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 {
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
// CHECK: affine.for %i0 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 32 {
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@ -34,15 +34,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
// 4x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 {
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
// CHECK: affine.for %i2 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 32 {
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
@ -60,15 +60,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// non-scoped %f2
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
// 4x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 {
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
// CHECK: affine.for %i4 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
@ -110,8 +110,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: vector_transfer_write
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
%s5 = addf %a5, %b5 : f32
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
// (3x2)x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i1 = 0 to %arg1 step 16 {
// CHECK: affine.for %i0 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 16 {
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@ -41,26 +41,26 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL50:%.*]] = affine.apply [[D0P2]](%i0)
// CHECK-NEXT: [[VAL51:%.*]] = affine.apply [[D0P8]](%i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
// (3x2)x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i3 = 0 to %arg1 step 16 {
// CHECK: affine.for %i2 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 16 {
// .....
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// non-scoped %f2
// CHECK does (3x4)x unrolling.
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
// (3x2)x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i5 = 0 to %arg1 step 16 {
// CHECK: affine.for %i4 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 16 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
@ -122,8 +122,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: vector_transfer_write
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
%s5 = addf %a5, %b5 : f32
@ -13,8 +13,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
// 2x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
// CHECK: affine.for %i0 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: [[VAL00:%.*]] = affine.apply [[ID1]](%i0)
@ -24,15 +24,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL11:%.*]] = affine.apply [[D0P16]](%i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
// 2x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
// CHECK: affine.for %i2 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: [[VAL00:%.*]] = affine.apply [[ID1]](%i2)
@ -42,15 +42,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL11:%.*]] = affine.apply [[D0P16]](%i3)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// non-scoped %f2
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
// 2x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
// CHECK: affine.for %i4 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
@ -72,8 +72,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: vector_transfer_write
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
%s5 = addf %a5, %b5 : f32
@ -9,19 +9,19 @@
// CHECK-LABEL: func @simple()
func @simple() {
for %i0 = 0 to 7 {
affine.for %i0 = 0 to 7 {
%0 = affine.apply (d0) -> (d0) (%i0)
%1 = affine.apply (d0) -> (d0) (%0)
%2 = affine.apply (d0, d1) -> (d0 + d1) (%0, %0)
%3 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
// CHECK-NEXT: for %i0 = 0 to 7
// CHECK-NEXT: affine.for %i0 = 0 to 7
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i0)
// CHECK-NEXT: {{.*}} affine.apply #[[D0TIMES2]](%i0)
// CHECK-NEXT: {{.*}} affine.apply #[[ZERO]]()
for %i1 = 0 to 7 {
for %i2 = 0 to 42 {
affine.for %i1 = 0 to 7 {
affine.for %i2 = 0 to 42 {
%20 = affine.apply (d0, d1) -> (d1) (%i1, %i2)
%21 = affine.apply (d0, d1) -> (d0) (%i1, %i2)
%22 = affine.apply (d0, d1) -> (d0 + d1) (%20, %21)
@ -29,15 +29,15 @@ func @simple() {
%24 = affine.apply (d0, d1) -> (-d0 + d1) (%20, %21)
// CHECK: for %i1 = 0 to 7
// CHECK-NEXT: for %i2 = 0 to 42
// CHECK: affine.for %i1 = 0 to 7
// CHECK-NEXT: affine.for %i2 = 0 to 42
// CHECK-NEXT: {{.*}} affine.apply #[[D0PLUSD1]](%i1, %i2)
// CHECK-NEXT: {{.*}} affine.apply #[[MINSD0PLUSD1]](%i1, %i2)
// CHECK-NEXT: {{.*}} affine.apply #[[D0MINUSD1]](%i1, %i2)
for %i3 = 0 to 16 {
for %i4 = 0 to 47 step 2 {
for %i5 = 0 to 78 step 16 {
affine.for %i3 = 0 to 16 {
affine.for %i4 = 0 to 47 step 2 {
affine.for %i5 = 0 to 78 step 16 {
%50 = affine.apply (d0) -> (d0) (%i3)
%51 = affine.apply (d0) -> (d0) (%i4)
%52 = affine.apply (d0) -> (d0) (%i5)
@ -47,9 +47,9 @@ func @simple() {
// CHECK: for %i3 = 0 to 16
// CHECK-NEXT: for %i4 = 0 to 47 step 2
// CHECK-NEXT: for %i5 = 0 to 78 step 16
// CHECK: affine.for %i3 = 0 to 16
// CHECK-NEXT: affine.for %i4 = 0 to 47 step 2
// CHECK-NEXT: affine.for %i5 = 0 to 78 step 16
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i3)
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i4)
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i5)
@ -23,17 +23,17 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK: for {{.*}} step 128
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
for %i0 = 0 to %M { // vectorized due to scalar -> vector
affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector
%a0 = load %A[%cst0, %cst0] : memref<?x?xf32>
// CHECK:for {{.*}} [[ARG_M]] {
for %i1 = 0 to %M { // not vectorized
affine.for %i1 = 0 to %M { // not vectorized
%a1 = load %A[%i1, %i1] : memref<?x?xf32>
// CHECK: for %i{{[0-9]*}} = 0 to [[ARG_M]] {
for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
// CHECK: affine.for %i{{[0-9]*}} = 0 to [[ARG_M]] {
affine.for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
%r2 = affine.apply (d0) -> (d0) (%i2)
%a2 = load %A[%r2#0, %cst0] : memref<?x?xf32>
@ -41,7 +41,7 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
// CHECK-NEXT: [[APP3:%[a-zA-Z0-9]+]] = affine.apply {{.*}}[[IV3]]
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[APP3]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
for %i3 = 0 to %M { // vectorized
affine.for %i3 = 0 to %M { // vectorized
%r3 = affine.apply (d0) -> (d0) (%i3)
%a3 = load %A[%cst0, %r3#0] : memref<?x?xf32>
@ -51,8 +51,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK-NEXT: [[APP50:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
// CHECK-NEXT: [[APP51:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[APP50]], [[APP51]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
for %i4 = 0 to %M { // vectorized
for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
affine.for %i4 = 0 to %M { // vectorized
affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
%r50 = affine.apply (d0, d1) -> (d1) (%i4, %i5)
%r51 = affine.apply (d0, d1) -> (d0) (%i4, %i5)
%a5 = load %A[%r50, %r51] : memref<?x?xf32>
@ -61,8 +61,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK: for [[IV6:%[i0-9]*]] = 0 to [[ARG_M]] {
// CHECK-NEXT: for [[IV7:%[i0-9]*]] = 0 to [[ARG_N]] {
for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
for %i7 = 0 to %N { // not vectorized, can never vectorize
affine.for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
affine.for %i7 = 0 to %N { // not vectorized, can never vectorize
%r70 = affine.apply (d0, d1) -> (d1 + d0) (%i6, %i7)
%r71 = affine.apply (d0, d1) -> (d0) (%i6, %i7)
%a7 = load %A[%r70, %r71] : memref<?x?xf32>
@ -74,8 +74,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK-NEXT: [[APP9_0:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
// CHECK-NEXT: [[APP9_1:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[APP9_0]], [[APP9_1]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
for %i8 = 0 to %M { // vectorized
for %i9 = 0 to %N {
affine.for %i8 = 0 to %M { // vectorized
affine.for %i9 = 0 to %N {
%r90 = affine.apply (d0, d1) -> (d1) (%i8, %i9)
%r91 = affine.apply (d0, d1) -> (d0 + d1) (%i8, %i9)
%a9 = load %A[%r90, %r91] : memref<?x?xf32>
@ -84,8 +84,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK: for [[IV10:%[i0-9]*]] = 0 to %{{[0-9]*}} {
// CHECK: for [[IV11:%[i0-9]*]] = 0 to %{{[0-9]*}} {
for %i10 = 0 to %M { // not vectorized, need per load transposes
for %i11 = 0 to %N { // not vectorized, need per load transposes
affine.for %i10 = 0 to %M { // not vectorized, need per load transposes
affine.for %i11 = 0 to %N { // not vectorized, need per load transposes
%r11_0 = affine.apply (d0, d1) -> (d0) (%i10, %i11)
%r11_1 = affine.apply (d0, d1) -> (d1) (%i10, %i11)
%a11 = load %A[%r11_0, %r11_1] : memref<?x?xf32>
@ -98,9 +98,9 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK: for [[IV12:%[i0-9]*]] = 0 to %{{[0-9]*}} {
// CHECK: for [[IV13:%[i0-9]*]] = 0 to %{{[0-9]*}} {
// CHECK: for [[IV14:%[i0-9]+]] = 0 to [[ARG_P]] step 128
for %i12 = 0 to %M { // not vectorized, can never vectorize
for %i13 = 0 to %N { // not vectorized, can never vectorize
for %i14 = 0 to %P { // vectorized
affine.for %i12 = 0 to %M { // not vectorized, can never vectorize
affine.for %i13 = 0 to %N { // not vectorized, can never vectorize
affine.for %i14 = 0 to %P { // vectorized
%r14_0 = affine.apply (d0, d1, d2) -> (d1) (%i12, %i13, %i14)
%r14_1 = affine.apply (d0, d1, d2) -> (d0 + d1) (%i12, %i13, %i14)
%r14_2 = affine.apply (d0, d1, d2) -> (d0 + d2) (%i12, %i13, %i14)
@ -109,24 +109,24 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
for %i15 = 0 to %M { // not vectorized due to condition below
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
affine.for %i15 = 0 to %M { // not vectorized due to condition below
if #set0(%i15) {
%a15 = load %A[%cst0, %cst0] : memref<?x?xf32>
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
affine.for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
%a16 = alloc(%M) : memref<?xvector<2xf32>>
%l16 = load %a16[%i16] : memref<?xvector<2xf32>>
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
// CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
// CHECK: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : {{.*}} -> vector<128xf32>
for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
for %i18 = 0 to %M { // vectorized due to scalar -> vector
affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector
%a18 = load %A[%cst0, %cst0] : memref<?x?xf32>
@ -139,24 +139,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%C = alloc (%M, %N) : memref<?x?xf32, 0>
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// CHECK: [[C1:%.*]] = constant splat<vector<128xf32>, 1.000000e+00> : vector<128xf32>
// CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// CHECK: [[C3:%.*]] = constant splat<vector<128xf32>, 2.000000e+00> : vector<128xf32>
// CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
// non-scoped %f2
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
// CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
// CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
// CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32>
@ -188,10 +188,10 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-LABEL: @vec_rejected
func @vec_rejected(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
%N = dim %A, 0 : memref<?x?xf32>
for %i = 0 to %N {
affine.for %i = 0 to %N {
// CHECK-NOT: vector
%a = load %A[%i, %i] : memref<?x?xf32> // not vectorized
for %j = 0 to %N {
affine.for %j = 0 to %N {
%b = load %A[%i, %j] : memref<?x?xf32> // may be vectorized
// CHECK-NOT: vector
%c = addf %a, %b : f32 // not vectorized because %a wasn't
@ -11,13 +11,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %1 step 32
// CHECK: for {{.*}} = 0 to %2 step 256
// Example:
// for %i0 = 0 to %0 {
// for %i1 = 0 to %1 step 32 {
// for %i2 = 0 to %2 step 256 {
// affine.for %i0 = 0 to %0 {
// affine.for %i1 = 0 to %1 step 32 {
// affine.for %i2 = 0 to %2 step 256 {
// %3 = "vector_transfer_read"(%arg0, %i0, %i1, %i2) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
for %i2 = 0 to %P {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %P {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
@ -27,9 +27,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %2 {
// For the case: --test-fastest-varying=1 --test-fastest-varying=0 no
// vectorization happens because of loop nesting order .
for %i3 = 0 to %M {
for %i4 = 0 to %N {
for %i5 = 0 to %P {
affine.for %i3 = 0 to %M {
affine.for %i4 = 0 to %N {
affine.for %i5 = 0 to %P {
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
@ -43,24 +43,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%C = alloc (%M, %N) : memref<?x?xf32, 0>
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// CHECK: [[C1:%.*]] = constant splat<vector<32x256xf32>, 1.000000e+00> : vector<32x256xf32>
// CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// CHECK: [[C3:%.*]] = constant splat<vector<32x256xf32>, 2.000000e+00> : vector<32x256xf32>
// CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
// non-scoped %f2
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
// CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
// CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
// CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
@ -7,17 +7,17 @@ func @vec3d(%A : memref<?x?x?xf32>) {
%0 = dim %A, 0 : memref<?x?x?xf32>
%1 = dim %A, 1 : memref<?x?x?xf32>
%2 = dim %A, 2 : memref<?x?x?xf32>
// CHECK: for %i0 = 0 to %0 {
// CHECK: for %i1 = 0 to %0 {
// CHECK: for %i2 = 0 to %0 step 32 {
// CHECK: for %i3 = 0 to %1 step 64 {
// CHECK: for %i4 = 0 to %2 step 256 {
// CHECK: affine.for %i0 = 0 to %0 {
// CHECK: affine.for %i1 = 0 to %0 {
// CHECK: affine.for %i2 = 0 to %0 step 32 {
// CHECK: affine.for %i3 = 0 to %1 step 64 {
// CHECK: affine.for %i4 = 0 to %2 step 256 {
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i3, %i4 {permutation_map: #[[map_proj_d0d1d2_d0d1d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x64x256xf32>
for %t0 = 0 to %0 {
for %t1 = 0 to %0 {
for %i0 = 0 to %0 {
for %i1 = 0 to %1 {
for %i2 = 0 to %2 {
affine.for %t0 = 0 to %0 {
affine.for %t1 = 0 to %0 {
affine.for %i0 = 0 to %0 {
affine.for %i1 = 0 to %1 {
affine.for %i2 = 0 to %2 {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
@ -7,13 +7,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
%M = dim %A, 0 : memref<?x?x?xf32>
%N = dim %A, 1 : memref<?x?x?xf32>
%P = dim %A, 2 : memref<?x?x?xf32>
// CHECK: for %i0 = 0 to %0 step 32
// CHECK: for %i1 = 0 to %1 {
// CHECK: for %i2 = 0 to %2 step 256
// CHECK: affine.for %i0 = 0 to %0 step 32
// CHECK: affine.for %i1 = 0 to %1 {
// CHECK: affine.for %i2 = 0 to %2 step 256
// CHECK: {{.*}} = vector_transfer_read %arg0, %i0, %i1, %i2 {permutation_map: #[[map_proj_d0d1d2_d0d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
for %i2 = 0 to %P {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %P {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
@ -23,9 +23,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %2 {
// For the case: --test-fastest-varying=2 --test-fastest-varying=0 no
// vectorization happens because of loop nesting order
for %i3 = 0 to %M {
for %i4 = 0 to %N {
for %i5 = 0 to %P {
affine.for %i3 = 0 to %M {
affine.for %i4 = 0 to %N {
affine.for %i5 = 0 to %P {
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %2 {
// For the case: --test-fastest-varying=0 --test-fastest-varying=2 no
// vectorization happens because of loop nesting order.
for %i0 = 0 to %M {
for %i1 = 0 to %N {
for %i2 = 0 to %P {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %P {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
// CHECK: for %i3 = 0 to %0 step 32
// CHECK: for %i4 = 0 to %1 step 256
// CHECK: for %i5 = 0 to %2 {
// CHECK: affine.for %i3 = 0 to %0 step 32
// CHECK: affine.for %i4 = 0 to %1 step 256
// CHECK: affine.for %i5 = 0 to %2 {
// CHECK: {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i3 = 0 to %M {
for %i4 = 0 to %N {
for %i5 = 0 to %P {
affine.for %i3 = 0 to %M {
affine.for %i4 = 0 to %N {
affine.for %i5 = 0 to %P {
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
%0 = dim %A, 0 : memref<?x?x?xf32>
%1 = dim %A, 1 : memref<?x?x?xf32>
%2 = dim %A, 2 : memref<?x?x?xf32>
// CHECK: for %i0 = 0 to %0 step 32 {
// CHECK: for %i1 = 0 to %1 {
// CHECK: for %i2 = 0 to %2 step 256 {
// CHECK: affine.for %i0 = 0 to %0 step 32 {
// CHECK: affine.for %i1 = 0 to %1 {
// CHECK: affine.for %i2 = 0 to %2 step 256 {
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
// CHECK: for %i3 = 0 to %1 step 256 {
// CHECK: for %i4 = 0 to %2 {
// CHECK: affine.for %i3 = 0 to %1 step 256 {
// CHECK: affine.for %i4 = 0 to %2 {
// CHECK: %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
// CHECK: for %i5 = 0 to %2 {
// CHECK: affine.for %i5 = 0 to %2 {
// CHECK: %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i0 = 0 to %0 {
for %i1 = 0 to %1 {
for %i2 = 0 to %2 {
affine.for %i0 = 0 to %0 {
affine.for %i1 = 0 to %1 {
affine.for %i2 = 0 to %2 {
%a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
for %i3 = 0 to %1 {
for %i4 = 0 to %2 {
affine.for %i3 = 0 to %1 {
affine.for %i4 = 0 to %2 {
%a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
for %i5 = 0 to %2 {
affine.for %i5 = 0 to %2 {
%a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %2 {
// For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
// vectorization happens because of loop nesting order.
for %i0 = 0 to %M {
for %i1 = 0 to %N {
for %i2 = 0 to %P {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %P {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
// CHECK: for %i3 = 0 to %0 step 32
// CHECK: for %i4 = 0 to %1 {
// CHECK: for %i5 = 0 to %2 step 256
// CHECK: affine.for %i3 = 0 to %0 step 32
// CHECK: affine.for %i4 = 0 to %1 {
// CHECK: affine.for %i5 = 0 to %2 step 256
// CHECK: {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i3 = 0 to %M {
for %i4 = 0 to %N {
for %i5 = 0 to %P {
affine.for %i3 = 0 to %M {
affine.for %i4 = 0 to %N {
affine.for %i5 = 0 to %P {
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
%0 = dim %A, 0 : memref<?x?x?xf32>
%1 = dim %A, 1 : memref<?x?x?xf32>
%2 = dim %A, 2 : memref<?x?x?xf32>
// CHECK: for %i0 = 0 to %0 step 32 {
// CHECK: for %i1 = 0 to %1 step 256 {
// CHECK: for %i2 = 0 to %2 {
// CHECK: affine.for %i0 = 0 to %0 step 32 {
// CHECK: affine.for %i1 = 0 to %1 step 256 {
// CHECK: affine.for %i2 = 0 to %2 {
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
// CHECK: for %i3 = 0 to %1 {
// CHECK: for %i4 = 0 to %2 step 256 {
// CHECK: affine.for %i3 = 0 to %1 {
// CHECK: affine.for %i4 = 0 to %2 step 256 {
// CHECK: %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
// CHECK: for %i5 = 0 to %2 step 256 {
// CHECK: affine.for %i5 = 0 to %2 step 256 {
// CHECK: %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i0 = 0 to %0 {
for %i1 = 0 to %1 {
for %i2 = 0 to %2 {
affine.for %i0 = 0 to %0 {
affine.for %i1 = 0 to %1 {
affine.for %i2 = 0 to %2 {
%a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
for %i3 = 0 to %1 {
for %i4 = 0 to %2 {
affine.for %i3 = 0 to %1 {
affine.for %i4 = 0 to %2 {
%a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
for %i5 = 0 to %2 {
affine.for %i5 = 0 to %2 {
%a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
@ -213,10 +213,10 @@ func @dyn_shape_fold(%L : index, %M : index) -> (memref<? x ? x i32>, memref<? x
// CHECK-NEXT: %2 = alloc() : memref<512x1024xi32>
%c = alloc(%K, %N) : memref<? x ? x i32>
// CHECK: for %i0 =
for %i = 0 to %L {
// CHECK-NEXT: for %i1 =
for %j = 0 to 10 {
// CHECK: affine.for %i0 =
affine.for %i = 0 to %L {
// CHECK-NEXT: affine.for %i1 =
affine.for %j = 0 to 10 {
// CHECK-NEXT: %4 = load %0[%i0, %i1] : memref<?x1024xf32>
// CHECK-NEXT: store %4, %1[%c0, %c0, %i0, %i1, %c0] : memref<4x1024x8x512x?xf32>
%v = load %a[%i, %j] : memref<?x?xf32>
@ -242,8 +242,8 @@ func @merge_constants() -> (index, index) {
// CHECK-LABEL: func @hoist_constant
func @hoist_constant(%arg0: memref<8xi32>) {
// CHECK-NEXT: %c42_i32 = constant 42 : i32
// CHECK-NEXT: for %i0 = 0 to 8 {
for %i0 = 0 to 8 {
// CHECK-NEXT: affine.for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
// CHECK-NEXT: store %c42_i32, %arg0[%i0]
%c42_i32 = constant 42 : i32
store %c42_i32, %arg0[%i0] : memref<8xi32>
@ -2,8 +2,8 @@
// CHECK-LABEL: @test(%arg0: memref<f32>) {
func @test(%p : memref<f32>) {
for %i0 = 0 to 128 {
for %i1 = 0 to 8 { // CHECK: for %i1 = 0 to 8 {
affine.for %i0 = 0 to 128 {
affine.for %i1 = 0 to 8 { // CHECK: affine.for %i1 = 0 to 8 {
%0 = constant 4.5 : f32
%1 = constant 1.5 : f32
@ -123,8 +123,8 @@ func @down_propagate_for_ml() {
// CHECK: %c1_i32 = constant 1 : i32
%0 = constant 1 : i32
// CHECK-NEXT: for %i0 = 0 to 4 {
for %i = 0 to 4 {
// CHECK-NEXT: affine.for %i0 = 0 to 4 {
affine.for %i = 0 to 4 {
// CHECK-NEXT: "foo"(%c1_i32, %c1_i32) : (i32, i32) -> ()
%1 = constant 1 : i32
"foo"(%0, %1) : (i32, i32) -> ()
@ -155,8 +155,8 @@ func @down_propagate_cfg() -> i32 {
/// Check that operation definitions are NOT propagated up the dominance tree.
// CHECK-LABEL: @up_propagate_ml
func @up_propagate_ml() -> i32 {
// CHECK: for %i0 = 0 to 4 {
for %i = 0 to 4 {
// CHECK: affine.for %i0 = 0 to 4 {
affine.for %i = 0 to 4 {
// CHECK-NEXT: %c1_i32 = constant 1 : i32
// CHECK-NEXT: "foo"(%c1_i32) : (i32) -> ()
%0 = constant 1 : i32
@ -32,7 +32,7 @@ func @loop_nest_1d() {
// Second DMA transfer.
// CHECK: dma_start %1[%c256], %5[%c0], %c256_0, %6[%c0] : memref<512xf32>, memref<256xf32, 1>, memref<1xi32>
// CHECK-NEXT: dma_wait %6[%c0], %c256_0 : memref<1xi32>
// CHECK: for %i0 = 0 to 256 {
// CHECK: affine.for %i0 = 0 to 256 {
// CHECK-NEXT: %7 = load %3[%i0] : memref<256xf32, 1>
// CHECK: %8 = affine.apply [[MAP_PLUS_256]](%i0)
// CHECK: %9 = affine.apply [[MAP_MINUS_256]](%8)
@ -41,7 +41,7 @@ func @loop_nest_1d() {
// CHECK: %11 = load %2[%i0] : memref<256xf32, 1>
// CHECK-NEXT: return
for %i = 0 to 256 {
affine.for %i = 0 to 256 {
load %A[%i] : memref<256 x f32>
%idx = affine.apply (d0) -> (d0 + 256)(%i)
load %B[%idx] : memref<512 x f32>
@ -68,20 +68,20 @@ func @loop_nest_1d() {
// CHECK-DAG: dma_start %arg2[%c0, %c0], [[BUFC]][%c0, %c0], %c16384_0, [[TAGC]][%c0] : memref<512x32xf32>, memref<512x32xf32, 1>, memref<1xi32>
// CHECK-DAG: dma_wait [[TAGC]][%c0], %c16384_0 : memref<1xi32>
// CHECK-NEXT: for %i0 = 0 to 32 {
// CHECK-NEXT: for %i1 = 0 to 32 {
// CHECK-NEXT: for %i2 = 0 to 32 {
// CHECK-NEXT: for %i3 = 0 to 16 {
// CHECK-NEXT: affine.for %i0 = 0 to 32 {
// CHECK-NEXT: affine.for %i1 = 0 to 32 {
// CHECK-NEXT: affine.for %i2 = 0 to 32 {
// CHECK-NEXT: affine.for %i3 = 0 to 16 {
// CHECK-NEXT: %7 = affine.apply #map{{[0-9]+}}(%i1, %i3)
// CHECK-NEXT: %8 = load [[BUFB]][%7, %i0] : memref<512x32xf32, 1>
// CHECK-NEXT: "foo"(%8) : (f32) -> ()
// CHECK-NEXT: for %i4 = 0 to 16 {
// CHECK-NEXT: affine.for %i4 = 0 to 16 {
// CHECK-NEXT: %9 = affine.apply #map{{[0-9]+}}(%i2, %i4)
// CHECK-NEXT: %10 = load [[BUFA]][%9, %i1] : memref<512x32xf32, 1>
// CHECK-NEXT: "bar"(%10) : (f32) -> ()
// CHECK-NEXT: for %i5 = 0 to 16 {
// CHECK-NEXT: affine.for %i5 = 0 to 16 {
// CHECK-NEXT: %11 = "abc_compute"() : () -> f32
// CHECK-NEXT: %12 = affine.apply #map{{[0-9]+}}(%i2, %i5)
// CHECK-NEXT: %13 = load [[BUFC]][%12, %i0] : memref<512x32xf32, 1>
@ -102,20 +102,20 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
// DMAs will be performed at this level (jT is the first loop without a stride).
// A and B are read, while C is both read and written. A total of three new buffers
// are allocated and existing load's/store's are replaced by accesses to those buffers.
for %jT = 0 to 32 {
for %kT = 0 to 32 {
for %iT = 0 to 32 {
for %kk = 0 to 16 { // k intratile
affine.for %jT = 0 to 32 {
affine.for %kT = 0 to 32 {
affine.for %iT = 0 to 32 {
affine.for %kk = 0 to 16 { // k intratile
%k = affine.apply (d0, d1) -> (16*d0 + d1) (%kT, %kk)
%v0 = load %B[%k, %jT] : memref<512 x 32 x f32>
"foo"(%v0) : (f32) -> ()
for %ii = 0 to 16 { // i intratile.
affine.for %ii = 0 to 16 { // i intratile.
%i = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii)
%v1 = load %A[%i, %kT] : memref<512 x 32 x f32>
"bar"(%v1) : (f32) -> ()
for %ii_ = 0 to 16 { // i intratile.
affine.for %ii_ = 0 to 16 { // i intratile.
%v2 = "abc_compute"() : () -> f32
%i_ = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii_)
%v3 = load %C[%i_, %jT] : memref<512 x 32 x f32>
@ -134,13 +134,13 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
// CHECK-LABEL: func @loop_nest_modulo() {
// CHECK: %0 = alloc() : memref<256x8xf32>
// CHECK-NEXT: for %i0 = 0 to 32 step 4 {
// CHECK-NEXT: affine.for %i0 = 0 to 32 step 4 {
// CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
// CHECK-NEXT: %2 = alloc() : memref<1x2xf32, 1>
// CHECK-NEXT: %3 = alloc() : memref<1xi32>
// CHECK-NEXT: dma_start %0[%1, %c0], %2[%c0, %c0], %c2, %3[%c0] : memref<256x8xf32>, memref<1x2xf32, 1>, memref<1xi32>
// CHECK-NEXT: dma_wait %3[%c0], %c2 : memref<1xi32>
// CHECK-NEXT: for %i1 = 0 to 8 {
// CHECK-NEXT: affine.for %i1 = 0 to 8 {
// ...
// ...
// CHECK: }
@ -148,9 +148,9 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
// CHECK-NEXT: return
func @loop_nest_modulo() {
%A = alloc() : memref<256 x 8 x f32>
for %i = 0 to 32 step 4 {
affine.for %i = 0 to 32 step 4 {
// DMAs will be performed at this level (%j is the first unit stride loop)
for %j = 0 to 8 {
affine.for %j = 0 to 8 {
%idx = affine.apply (d0) -> (d0 mod 2) (%j)
// A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8).
%v = load %A[%i, %idx] : memref<256 x 8 x f32>
@ -164,17 +164,17 @@ func @loop_nest_modulo() {
// CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
func @loop_nest_tiled() -> memref<256x1024xf32> {
%0 = alloc() : memref<256x1024xf32>
for %i0 = 0 to 256 step 32 {
for %i1 = 0 to 1024 step 32 {
affine.for %i0 = 0 to 256 step 32 {
affine.for %i1 = 0 to 1024 step 32 {
// CHECK: %3 = alloc() : memref<32x32xf32, 1>
// CHECK-NEXT: %4 = alloc() : memref<1xi32>
// Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
// CHECK-NEXT: dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024_0, %c32 : memref<256x1024xf32>, memref<32x32xf32, 1>, memref<1xi32>
// CHECK-NEXT: dma_wait
// CHECK-NEXT: for %i2 = #map
// CHECK-NEXT: for %i3 = #map
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
// CHECK-NEXT: affine.for %i2 = #map
// CHECK-NEXT: affine.for %i3 = #map
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
// CHECK-NEXT: %5 = affine.apply [[MAP_INDEX_DIFF_EVEN]](%i0, %i1, %i2, %i3)
// CHECK-NEXT: %6 = affine.apply [[MAP_INDEX_DIFF_ODD]](%i0, %i1, %i2, %i3)
// CHECK-NEXT: %7 = load %3[%5, %6] : memref<32x32xf32, 1>
@ -196,8 +196,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
// No strided DMA needed here.
// CHECK: dma_start %arg0[%c1, %c0], %0[%c0, %c0], %c100, %1[%c0] : memref<100x100xf32>, memref<1x100xf32, 1>,
// CHECK-NEXT: dma_wait %1[%c0], %c100 : memref<1xi32>
for %i = 0 to 100 {
for %j = 0 to ()[s0] -> (s0) ()[%N] {
affine.for %i = 0 to 100 {
affine.for %j = 0 to ()[s0] -> (s0) ()[%N] {
// CHECK: %2 = affine.apply [[MAP_D0_MINUS_ONE]](%c1_0, %i1)
// CHECK: %3 = affine.apply [[MAP_D1]](%c1_0, %i1)
// CHECK-NEXT: %4 = load %0[%2, %3] : memref<1x100xf32, 1>
@ -210,8 +210,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
// CHECK-LABEL: func @dma_with_symbolic_accesses
func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
%N = constant 9 : index
for %i = 0 to 100 {
for %j = 0 to 100 {
affine.for %i = 0 to 100 {
affine.for %j = 0 to 100 {
%idy = affine.apply (d0, d1) [s0, s1] -> (d1 + s0 + s1)(%i, %j)[%M, %N]
load %A[%i, %idy] : memref<100 x 100 x f32>
@ -221,8 +221,8 @@ func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
// CHECK-NEXT: %2 = alloc() : memref<1xi32>
// CHECK-NEXT: dma_start %arg0[%c0, %0], %1[%c0, %c0], %c10000, %2[%c0]
// CHECK-NEXT: dma_wait %2[%c0], %c10000
// CHECK-NEXT: for %i0 = 0 to 100 {
// CHECK-NEXT: for %i1 = 0 to 100 {
// CHECK-NEXT: affine.for %i0 = 0 to 100 {
// CHECK-NEXT: affine.for %i1 = 0 to 100 {
// CHECK-NEXT: %3 = affine.apply [[MAP_SYM_SHIFT]](%i0, %i1)[%arg1, %c9]
// CHECK-NEXT: %4 = affine.apply [[MAP_3D_D1]](%arg1, %i0, %3)
// CHECK-NEXT: %5 = affine.apply [[MAP_SUB_OFFSET]](%arg1, %i0, %3)
@ -241,8 +241,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
// CHECK-NEXT: %1 = alloc() : memref<1xi32>
// CHECK-NEXT: dma_start %arg0[%c0, %c0], %0[%c0, %c0], %c10000, %1[%c0] : memref<100x100xf32>, memref<100x100xf32, 1>, memref<1xi32>
// CHECK-NEXT: dma_wait %1[%c0], %c10000 : memref<1xi32>
for %i = 0 to 100 {
for %j = %M to %N {
affine.for %i = 0 to 100 {
affine.for %j = %M to %N {
%idy = affine.apply (d1) [s0] -> (d1 + s0)(%j)[%K]
load %A[%i, %idy] : memref<100 x 100 x f32>
@ -256,8 +256,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
func @dma_unknown_size(%arg0: memref<?x?xf32>) {
%M = dim %arg0, 0 : memref<? x ? x f32>
%N = dim %arg0, 0 : memref<? x ? x f32>
for %i = 0 to %M {
for %j = 0 to %N {
affine.for %i = 0 to %M {
affine.for %j = 0 to %N {
// If this loop nest isn't tiled, the access requires a non-constant DMA
// size -- not yet implemented.
// CHECK: %2 = load %arg0[%i0, %i1] : memref<?x?xf32>
@ -272,9 +272,9 @@ func @dma_unknown_size(%arg0: memref<?x?xf32>) {
// CHECK-LABEL: func @dma_memref_3d
func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
for %i = 0 to 1024 {
for %j = 0 to 1024 {
for %k = 0 to 1024 {
affine.for %i = 0 to 1024 {
affine.for %j = 0 to 1024 {
affine.for %k = 0 to 1024 {
%idx = affine.apply (d0) -> (d0 mod 128)(%i)
%idy = affine.apply (d0) -> (d0 mod 128)(%j)
%idz = affine.apply (d0) -> (d0 mod 128)(%k)
@ -308,8 +308,8 @@ func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
// CHECK-LABEL: func @multi_load_store_union() {
func @multi_load_store_union() {
%A = alloc() : memref<512 x 512 x f32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
%idx = affine.apply (d0) -> (d0 + 64)(%i)
%idy = affine.apply (d0) -> (d0 + 128)(%j)
%ishift = affine.apply (d0) -> (d0 + 2)(%i)
@ -333,8 +333,8 @@ func @multi_load_store_union() {
// CHECK-NEXT: dma_start %0[%c2_1, %c2_2], %1[%c0, %c0], %c170372_3, %2[%c0], %c512_4, %c446_5 : memref<512x512xf32>, memref<382x446xf32, 1>, memref<1xi32>
// CHECK-NEXT: dma_wait %2[%c0], %c170372_3 : memref<1xi32>
// CHECK-NEXT: %3 = alloc() : memref<1xi32>
// CHECK-NEXT: for %i0 = 0 to 256 {
// CHECK-NEXT: for %i1 = 0 to 256 {
// CHECK-NEXT: affine.for %i0 = 0 to 256 {
// CHECK-NEXT: affine.for %i1 = 0 to 256 {
// CHECK-NEXT: %4 = affine.apply [[MAP_PLUS_64]](%i0)
// CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_128]](%i1)
// CHECK-NEXT: %6 = affine.apply [[MAP_PLUS_2]](%i0)
@ -370,7 +370,7 @@ func @dma_loop_straightline_interspersed() {
%c255 = constant 255 : index
%A = alloc() : memref<256 x f32>
%v = load %A[%c0] : memref<256 x f32>
for %i = 1 to 255 {
affine.for %i = 1 to 255 {
load %A[%i] : memref<256 x f32>
%l = load %A[%c255] : memref<256 x f32>
@ -389,7 +389,7 @@ func @dma_loop_straightline_interspersed() {
// CHECK-NEXT: %5 = alloc() : memref<1xi32>
// CHECK-NEXT: dma_start %0[%c1_0], %4[%c0], %c254, %5[%c0] : memref<256xf32>, memref<254xf32, 1>, memref<1xi32>
// CHECK-NEXT: dma_wait %5[%c0], %c254 : memref<1xi32>
// CHECK-NEXT: for %i0 = 1 to 255 {
// CHECK-NEXT: affine.for %i0 = 1 to 255 {
// CHECK-NEXT: %6 = affine.apply [[MAP_MINUS_ONE]](%i0)
// CHECK-NEXT: %7 = load %4[%6] : memref<254xf32, 1>
@ -410,10 +410,10 @@ func @dma_loop_straightline_interspersed() {
func @dma_mixed_loop_blocks() {
%c0 = constant 0 : index
%A = alloc() : memref<256 x 256 x vector<8 x f32>>
for %i = 0 to 256 {
affine.for %i = 0 to 256 {
%v = load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
"foo"(%v) : (vector<8 x f32>) -> ()
for %j = 0 to 256 {
affine.for %j = 0 to 256 {
%w = load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>>
"bar"(%w) : (vector<8 x f32>) -> ()
@ -425,7 +425,7 @@ func @dma_mixed_loop_blocks() {
// CHECK-DAG: [[TAG:%[0-9]+]] = alloc() : memref<1xi32>
// CHECK: dma_start [[MEM]][%c0, %c0], [[BUF]][%c0, %c0], %c65536, [[TAG]][%c0] : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 1>, memref<1xi32>
// CHECK-NEXT: dma_wait [[TAG]][%c0], %c65536 : memref<1xi32>
// CHECK-NEXT: for %i0 = 0 to 256 {
// CHECK-NEXT: affine.for %i0 = 0 to 256 {
// CHECK-NEXT: %3 = load [[BUF]][%c0_0, %c0_0] : memref<256x256xvector<8xf32>, 1>
// CHECK: for %i1 = 0 to 256 {
// CHECK: affine.for %i1 = 0 to 256 {
// CHECK-NEXT: %4 = load [[BUF]][%i0, %i1] : memref<256x256xvector<8xf32>, 1>
File diff suppressed because it is too large
Load Diff
@ -8,12 +8,12 @@
// CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)
// CHECK-LABEL: func @loop_tiling()
// CHECK-NEXT: for %i0 = 0 to 256 step 32 {
// CHECK-NEXT: for %i1 = 0 to 512 step 32 {
// CHECK-NEXT: for %i2 = 0 to 1024 step 32 {
// CHECK-NEXT: for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
// CHECK-NEXT: for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
// CHECK-NEXT: for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
// CHECK-NEXT: affine.for %i0 = 0 to 256 step 32 {
// CHECK-NEXT: affine.for %i1 = 0 to 512 step 32 {
// CHECK-NEXT: affine.for %i2 = 0 to 1024 step 32 {
// CHECK-NEXT: affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
// CHECK-NEXT: affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
// CHECK-NEXT: affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
// CHECK-NEXT: "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
@ -21,32 +21,32 @@
// CHECK-NEXT: for %i6 = 0 to 50 step 32 {
// CHECK-NEXT: for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
// CHECK-NEXT: affine.for %i6 = 0 to 50 step 32 {
// CHECK-NEXT: affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
// CHECK-NEXT: "bar"(%i7, %i7) : (index, index) -> ()
// CHECK-NEXT: for %i8 = 0 to 21 step 32 {
// CHECK-NEXT: for %i9 = [[IDENTITY]](%i8) to 21 {
// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
// CHECK-NEXT: affine.for %i9 = [[IDENTITY]](%i8) to 21 {
// CHECK-NEXT: "foobar"(%i9) : (index) -> ()
// CHECK-NEXT: return
func @loop_tiling() {
for %i = 0 to 256 {
for %j = 0 to 512 {
for %k = 0 to 1024 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 512 {
affine.for %k = 0 to 1024 {
"foo"(%i, %j, %k) : (index, index, index) -> ()
for %x = 0 to 50 {
affine.for %x = 0 to 50 {
"bar"(%x, %x) : (index, index) -> ()
// Intra-tile loop won't need a min expression.
for %y = 0 to 21 {
affine.for %y = 0 to 21 {
"foobar"(%y) : (index) -> ()
@ -58,12 +58,12 @@ func @loop_tiling() {
// CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
%M = dim %A, 0 : memref<? x i32>
for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
%out = affine.apply (d0) -> (d0) (%iTT)
// CHECK: for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
// CHECK-NEXT: for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
// CHECK: affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
// CHECK-NEXT: affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
// CHECK-NEXT: %1 = affine.apply [[IDENTITY]](%i1)
@ -24,7 +24,7 @@ func @body(index) -> ()
// CHECK-NEXT: return
func @simple_loop() {
for %i = 1 to 42 {
affine.for %i = 1 to 42 {
call @body(%i) : (index) -> ()
@ -65,9 +65,9 @@ func @post(index) -> ()
// CHECK-NEXT: return
func @imperfectly_nested_loops() {
for %i = 0 to 42 {
affine.for %i = 0 to 42 {
call @pre(%i) : (index) -> ()
for %j = 7 to 56 step 2 {
affine.for %j = 7 to 56 step 2 {
call @body2(%i, %j) : (index, index) -> ()
call @post(%i) : (index) -> ()
@ -122,13 +122,13 @@ func @body3(index, index) -> ()
// CHECK-NEXT: return
func @more_imperfectly_nested_loops() {
for %i = 0 to 42 {
affine.for %i = 0 to 42 {
call @pre(%i) : (index) -> ()
for %j = 7 to 56 step 2 {
affine.for %j = 7 to 56 step 2 {
call @body2(%i, %j) : (index, index) -> ()
call @mid(%i) : (index) -> ()
for %k = 18 to 37 step 3 {
affine.for %k = 18 to 37 step 3 {
call @body3(%i, %k) : (index, index) -> ()
call @post(%i) : (index) -> ()
@ -161,8 +161,8 @@ func @more_imperfectly_nested_loops() {
// CHECK-NEXT: return
func @affine_apply_loops_shorthand(%N : index) {
for %i = 0 to %N {
for %j = %i to 42 {
affine.for %i = 0 to %N {
affine.for %j = %i to 42 {
call @body2(%i, %j) : (index, index) -> ()
@ -360,7 +360,7 @@ func @if_for() {
// CHECK-NEXT: [[outerEndBB]]:
// CHECK-NEXT: br [[outerLoopInit:\^bb[0-9]+]]
if #set1(%i) {
for %j = 0 to 42 {
affine.for %j = 0 to 42 {
if #set2(%j) {
call @body2(%i, %j) : (index, index) -> ()
@ -397,9 +397,9 @@ func @if_for() {
// CHECK-NEXT: %c1_9 = constant 1 : index
// CHECK-NEXT: %16 = addi %9, %c1_9 : index
// CHECK-NEXT: br [[outerLoopCond]](%16 : index)
for %k = 0 to 42 {
affine.for %k = 0 to 42 {
if #set2(%k) {
for %l = 0 to 42 {
affine.for %l = 0 to 42 {
call @body3(%k, %l) : (index, index) -> ()
@ -446,8 +446,8 @@ func @if_for() {
// CHECK-NEXT: return
func @loop_min_max(%N : index) {
for %i = 0 to 42 {
for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
affine.for %i = 0 to 42 {
affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
call @body2(%i, %j) : (index, index) -> ()
@ -486,7 +486,7 @@ func @loop_min_max(%N : index) {
// CHECK-NEXT: return
func @min_reduction_tree(%v : index) {
for %i = 0 to min #map_7_values(%v)[] {
affine.for %i = 0 to min #map_7_values(%v)[] {
call @body(%i) : (index) -> ()
@ -11,8 +11,8 @@ func @test() {
%A = alloc() : memref<9 x 9 x i32>
%B = alloc() : memref<111 x i32>
for %i = -1 to 10 {
for %j = -1 to 10 {
affine.for %i = -1 to 10 {
affine.for %j = -1 to 10 {
%idx0 = affine.apply (d0, d1) -> (d0)(%i, %j)
%idx1 = affine.apply (d0, d1) -> (d1)(%i, %j)
// Out of bound access.
@ -27,7 +27,7 @@ func @test() {
for %k = 0 to 10 {
affine.for %k = 0 to 10 {
// In bound.
%u = load %B[%zero] : memref<111 x i32>
// Out of bounds.
@ -43,8 +43,8 @@ func @test_mod_floordiv_ceildiv() {
%zero = constant 0 : index
%A = alloc() : memref<128 x 64 x 64 x i32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
%idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
%idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
%idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@ -69,8 +69,8 @@ func @test_no_out_of_bounds() {
%C = alloc() : memref<257 x i32>
%B = alloc() : memref<1 x i32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
// All of these accesses are in bound; check that no errors are emitted.
// CHECK: %3 = affine.apply {{#map.*}}(%i0, %i1)
// CHECK-NEXT: %4 = load %0[%3, %c0] : memref<257x256xi32>
@ -93,8 +93,8 @@ func @mod_div() {
%zero = constant 0 : index
%A = alloc() : memref<128 x 64 x 64 x i32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
%idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
%idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
%idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@ -115,8 +115,8 @@ func @mod_div() {
// CHECK-LABEL: func @mod_floordiv_nested() {
func @mod_floordiv_nested() {
%A = alloc() : memref<256 x 256 x i32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
%idx0 = affine.apply (d0, d1) -> ((d0 mod 1024) floordiv 4)(%i, %j)
%idx1 = affine.apply (d0, d1) -> ((((d1 mod 128) mod 32) ceildiv 4) * 32)(%i, %j)
load %A[%idx0, %idx1] : memref<256 x 256 x i32> // expected-error {{'load' op memref out of upper bound access along dimension #2}}
@ -128,7 +128,7 @@ func @mod_floordiv_nested() {
// CHECK-LABEL: func @test_semi_affine_bailout
func @test_semi_affine_bailout(%N : index) {
%B = alloc() : memref<10 x i32>
for %i = 0 to 10 {
affine.for %i = 0 to 10 {
%idx = affine.apply (d0)[s0] -> (d0 * s0)(%i)[%N]
%y = load %B[%idx] : memref<10 x i32>
@ -138,7 +138,7 @@ func @test_semi_affine_bailout(%N : index) {
// CHECK-LABEL: func @multi_mod_floordiv
func @multi_mod_floordiv() {
%A = alloc() : memref<2x2xi32>
for %ii = 0 to 64 {
affine.for %ii = 0 to 64 {
%idx0 = affine.apply (d0) -> ((d0 mod 147456) floordiv 1152) (%ii)
%idx1 = affine.apply (d0) -> (((d0 mod 147456) mod 1152) floordiv 384) (%ii)
%v = load %A[%idx0, %idx1] : memref<2x2xi32>
@ -153,8 +153,8 @@ func @delinearize_mod_floordiv() {
%out = alloc() : memref<64x9xi32>
// Reshape '%in' into '%out'.
for %ii = 0 to 64 {
for %jj = 0 to 9 {
affine.for %ii = 0 to 64 {
affine.for %jj = 0 to 9 {
%a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a10 = affine.apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
@ -189,7 +189,7 @@ func @out_of_bounds() {
%in = alloc() : memref<1xi32>
%c9 = constant 9 : i32
for %i0 = 10 to 11 {
affine.for %i0 = 10 to 11 {
%idy = affine.apply (d0) -> (100 * d0 floordiv 1000) (%i0)
store %c9, %in[%idy] : memref<1xi32> // expected-error {{'store' op memref out of upper bound access along dimension #1}}
@ -10,14 +10,14 @@
func @simple_store_load() {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
// CHECK: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: %0 = addf %cst, %cst : f32
// CHECK-NEXT: return
@ -30,7 +30,7 @@ func @multi_store_load() {
%cf8 = constant 8.0 : f32
%cf9 = constant 9.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -45,7 +45,7 @@ func @multi_store_load() {
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %cst_1 = constant 9.000000e+00 : f32
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: %0 = addf %cst, %cst : f32
// CHECK-NEXT: %1 = mulf %cst_1, %cst_1 : f32
@ -59,8 +59,8 @@ func @multi_store_load() {
func @store_load_affine_apply() -> memref<10x10xf32> {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10x10xf32>
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
%t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
%idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
@ -75,8 +75,8 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
return %m : memref<10x10xf32>
// CHECK: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %0 = alloc() : memref<10x10xf32>
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: for %i1 = 0 to 10 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i1 = 0 to 10 {
// CHECK-NEXT: %1 = affine.apply [[MAP0]](%i0, %i1)
// CHECK-NEXT: %2 = affine.apply [[MAP1]](%i0, %i1)
// CHECK-NEXT: %3 = affine.apply [[MAP2]](%1, %2)
@ -92,17 +92,17 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
func @store_load_nested(%N : index) {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
// CHECK: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: for %i1 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg0 {
// CHECK-NEXT: %0 = addf %cst, %cst : f32
@ -117,12 +117,12 @@ func @multi_store_load_nested_no_fwd(%N : index) {
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
store %cf8, %m[%i1] : memref<10xf32>
for %i2 = 0 to %N {
affine.for %i2 = 0 to %N {
// CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -138,9 +138,9 @@ func @store_load_store_nested_no_fwd(%N : index) {
%cf7 = constant 7.0 : f32
%cf9 = constant 9.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
// CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -159,16 +159,16 @@ func @multi_store_load_nested_fwd(%N : index) {
%cf9 = constant 9.0 : f32
%cf10 = constant 10.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
store %cf8, %m[%i1] : memref<10xf32>
for %i2 = 0 to %N {
affine.for %i2 = 0 to %N {
store %cf9, %m[%i2] : memref<10xf32>
store %cf10, %m[%i0] : memref<10xf32>
for %i3 = 0 to %N {
affine.for %i3 = 0 to %N {
// CHECK-NOT: %{{[0-9]+}} = load
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -182,10 +182,10 @@ func @multi_store_load_nested_fwd(%N : index) {
func @store_load_no_fwd() {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to 10 {
for %i2 = 0 to 10 {
affine.for %i1 = 0 to 10 {
affine.for %i2 = 0 to 10 {
// CHECK: load %{{[0-9]+}}
%v0 = load %m[%i2] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -202,9 +202,9 @@ func @store_load_fwd() {
%c0 = constant 0 : index
%m = alloc() : memref<10xf32>
store %cf7, %m[%c0] : memref<10xf32>
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
for %i2 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
affine.for %i2 = 0 to 10 {
// CHECK-NOT: load %{{[0-9]}}+
%v0 = load %m[%c0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -223,9 +223,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
%c0 = constant 0 : index
%c1 = constant 1 : index
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
%idx = affine.apply (d0) -> (d0 + 1) (%i0)
@ -236,9 +236,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
%v3 = load %m[%c1] : memref<10xf32>
return %v3 : f32
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: store %cst, %0[%i0] : memref<10xf32>
// CHECK-NEXT: for %i1 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg0 {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: %2 = affine.apply [[MAP4]](%i0)
// CHECK-NEXT: store %cst_0, %0[%2] : memref<10xf32>
@ -13,14 +13,14 @@ func @store_may_execute_before_load() {
// ancestor IfOp of the store, dominates the ancestor ForSmt of the load,
// and thus the store "may" conditionally execute before the load.
if #set0(%c0) {
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
// expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
for %i1 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%v0 = load %m[%i1] : memref<10xf32>
// expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
// expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@ -37,13 +37,13 @@ func @dependent_loops() {
%cst = constant 7.000000e+00 : f32
// There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
// because the first loop with the store dominates the second loop.
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cst, %0[%i0] : memref<10xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
// expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
for %i1 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%1 = load %0[%i1] : memref<10xf32>
// expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
// expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@ -231,7 +231,7 @@ func @store_range_load_after_range() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c10 = constant 10 : index
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
%a0 = affine.apply (d0) -> (d0) (%i0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -254,7 +254,7 @@ func @store_load_func_symbol(%arg0: index, %arg1: index) {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c10 = constant 10 : index
for %i0 = 0 to %arg1 {
affine.for %i0 = 0 to %arg1 {
%a0 = affine.apply (d0) -> (d0) (%arg0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = [1, +inf]}}
@ -277,7 +277,7 @@ func @store_range_load_last_in_range() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c10 = constant 10 : index
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
%a0 = affine.apply (d0) -> (d0) (%i0)
// For dependence from 0 to 1, we do not have a loop carried dependence
// because only the final write in the loop accesses the same element as the
@ -305,7 +305,7 @@ func @store_range_load_before_range() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c0 = constant 0 : index
for %i0 = 1 to 11 {
affine.for %i0 = 1 to 11 {
%a0 = affine.apply (d0) -> (d0) (%i0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -328,7 +328,7 @@ func @store_range_load_first_in_range() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c0 = constant 0 : index
for %i0 = 1 to 11 {
affine.for %i0 = 1 to 11 {
%a0 = affine.apply (d0) -> (d0) (%i0)
// Dependence from 0 to 1 at depth 1 is a range because all loads at
// constant index zero are reads after first store at index zero during
@ -353,7 +353,7 @@ func @store_range_load_first_in_range() {
func @store_plus_3() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
for %i0 = 1 to 11 {
affine.for %i0 = 1 to 11 {
%a0 = affine.apply (d0) -> (d0 + 3) (%i0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -375,7 +375,7 @@ func @store_plus_3() {
func @load_minus_2() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
for %i0 = 2 to 11 {
affine.for %i0 = 2 to 11 {
%a0 = affine.apply (d0) -> (d0) (%i0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -397,8 +397,8 @@ func @load_minus_2() {
func @perfectly_nested_loops_loop_independent() {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 11 {
for %i1 = 0 to 11 {
affine.for %i0 = 0 to 11 {
affine.for %i1 = 0 to 11 {
// Dependence from access 0 to 1 is loop independent at depth = 3.
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -428,8 +428,8 @@ func @perfectly_nested_loops_loop_independent() {
func @perfectly_nested_loops_loop_carried_at_depth1() {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 9 {
for %i1 = 0 to 9 {
affine.for %i0 = 0 to 9 {
affine.for %i1 = 0 to 9 {
// Dependence from access 0 to 1 is loop carried at depth 1.
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -459,8 +459,8 @@ func @perfectly_nested_loops_loop_carried_at_depth1() {
func @perfectly_nested_loops_loop_carried_at_depth2() {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// Dependence from access 0 to 1 is loop carried at depth 2.
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -491,8 +491,8 @@ func @one_common_loop() {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
// There is a loop-independent dependence from access 0 to 1 at depth 2.
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
store %c7, %m[%a00, %a01] : memref<10x10xf32>
@ -502,7 +502,7 @@ func @one_common_loop() {
// expected-note@-4 {{dependence from 0 to 1 at depth 1 = false}}
// expected-note@-5 {{dependence from 0 to 1 at depth 2 = true}}
for %i2 = 0 to 9 {
affine.for %i2 = 0 to 9 {
%a10 = affine.apply (d0, d1) -> (d0) (%i0, %i2)
%a11 = affine.apply (d0, d1) -> (d1) (%i0, %i2)
%v0 = load %m[%a10, %a11] : memref<10x10xf32>
@ -525,7 +525,7 @@ func @dependence_cycle() {
// Dependences:
// *) loop-independent dependence from access 1 to 2 at depth 2.
// *) loop-carried dependence from access 3 to 0 at depth 1.
for %i0 = 0 to 9 {
affine.for %i0 = 0 to 9 {
%a0 = affine.apply (d0) -> (d0) (%i0)
%v0 = load %m.a[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -575,8 +575,8 @@ func @dependence_cycle() {
func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to %arg0 {
for %i1 = 0 to %arg1 {
affine.for %i0 = 0 to %arg0 {
affine.for %i1 = 0 to %arg1 {
%a00 = affine.apply (d0, d1) -> (d0 - 1) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1 + 1) (%i0, %i1)
%v0 = load %m[%a00, %a01] : memref<10x10xf32>
@ -605,8 +605,8 @@ func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
func @war_raw_waw_deps() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%a0 = affine.apply (d0) -> (d0 + 1) (%i1)
%v0 = load %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -633,7 +633,7 @@ func @war_raw_waw_deps() {
func @mod_deps() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
%a0 = affine.apply (d0) -> (d0 mod 2) (%i0)
// Results are conservative here since we currently don't have a way to
// represent strided sets in FlatAffineConstraints.
@ -658,8 +658,8 @@ func @loop_nest_depth() {
%0 = alloc() : memref<100x100xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 128 {
for %i1 = 0 to 8 {
affine.for %i0 = 0 to 128 {
affine.for %i1 = 0 to 8 {
store %c7, %0[%i0, %i1] : memref<100x100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@ -667,10 +667,10 @@ func @loop_nest_depth() {
// expected-note@-4 {{dependence from 0 to 1 at depth 1 = true}}
for %i2 = 0 to 8 {
for %i3 = 0 to 8 {
for %i4 = 0 to 8 {
for %i5 = 0 to 16 {
affine.for %i2 = 0 to 8 {
affine.for %i3 = 0 to 8 {
affine.for %i4 = 0 to 8 {
affine.for %i5 = 0 to 16 {
%8 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i4, %i5)
%9 = load %0[%8, %i3] : memref<100x100xf32>
// expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
@ -693,9 +693,9 @@ func @loop_nest_depth() {
func @mod_div_3d() {
%M = alloc() : memref<2x2x2xi32>
%c0 = constant 0 : i32
for %i0 = 0 to 8 {
for %i1 = 0 to 8 {
for %i2 = 0 to 8 {
affine.for %i0 = 0 to 8 {
affine.for %i1 = 0 to 8 {
affine.for %i2 = 0 to 8 {
%idx0 = affine.apply (d0, d1, d2) -> (d0 floordiv 4) (%i0, %i1, %i2)
%idx1 = affine.apply (d0, d1, d2) -> (d1 mod 2) (%i0, %i1, %i2)
%idx2 = affine.apply (d0, d1, d2) -> (d2 floordiv 4) (%i0, %i1, %i2)
@ -719,12 +719,12 @@ func @delinearize_mod_floordiv() {
%in = alloc() : memref<2x2x3x3x16x1xi32>
%out = alloc() : memref<64x9xi32>
for %i0 = 0 to 2 {
for %i1 = 0 to 2 {
for %i2 = 0 to 3 {
for %i3 = 0 to 3 {
for %i4 = 0 to 16 {
for %i5 = 0 to 1 {
affine.for %i0 = 0 to 2 {
affine.for %i1 = 0 to 2 {
affine.for %i2 = 0 to 3 {
affine.for %i3 = 0 to 3 {
affine.for %i4 = 0 to 16 {
affine.for %i5 = 0 to 1 {
store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@ -742,8 +742,8 @@ func @delinearize_mod_floordiv() {
for %ii = 0 to 64 {
for %jj = 0 to 9 {
affine.for %ii = 0 to 64 {
affine.for %jj = 0 to 9 {
%a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a10 = affine.apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
@ -16,13 +16,13 @@ func @loop_nest_dma() {
%zero = constant 0 : index
%num_elts = constant 128 : index
for %i = 0 to 8 {
affine.for %i = 0 to 8 {
dma_start %A[%i], %Ah[%i], %num_elts, %tag[%zero] : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
dma_wait %tag[%zero], %num_elts : memref<1 x f32>
%v = load %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
%r = "compute"(%v) : (f32) -> (f32)
store %r, %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
for %j = 0 to 128 {
affine.for %j = 0 to 128 {
"do_more_compute"(%i, %j) : (index, index) -> ()
@ -34,7 +34,7 @@ func @loop_nest_dma() {
// CHECK-NEXT: %3 = affine.apply [[MOD_2]](%c0)
// CHECK-NEXT: %4 = affine.apply [[MOD_2]](%c0)
// CHECK-NEXT: dma_start %0[%c0], %1[%3, %c0], %c128, %2[%4, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
// CHECK-NEXT: for %i0 = 1 to 8 {
// CHECK-NEXT: affine.for %i0 = 1 to 8 {
// CHECK-NEXT: %5 = affine.apply [[MOD_2]](%i0)
// CHECK-NEXT: %6 = affine.apply [[MOD_2]](%i0)
// CHECK-NEXT: dma_start %0[%i0], %1[%5, %i0], %c128, %2[%6, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
@ -45,7 +45,7 @@ func @loop_nest_dma() {
// CHECK-NEXT: %10 = load %1[%9, %7] : memref<2x32xf32, 1>
// CHECK-NEXT: %11 = "compute"(%10) : (f32) -> f32
// CHECK-NEXT: store %11, %1[%9, %7] : memref<2x32xf32, 1>
// CHECK-NEXT: for %i1 = 0 to 128 {
// CHECK-NEXT: affine.for %i1 = 0 to 128 {
// CHECK-NEXT: "do_more_compute"(%7, %i1) : (index, index) -> ()
@ -56,7 +56,7 @@ func @loop_nest_dma() {
// CHECK-NEXT: %15 = load %1[%14, %12] : memref<2x32xf32, 1>
// CHECK-NEXT: %16 = "compute"(%15) : (f32) -> f32
// CHECK-NEXT: store %16, %1[%14, %12] : memref<2x32xf32, 1>
// CHECK-NEXT: for %i2 = 0 to 128 {
// CHECK-NEXT: affine.for %i2 = 0 to 128 {
// CHECK-NEXT: "do_more_compute"(%12, %i2) : (index, index) -> ()
// CHECK-NEXT: return
@ -68,7 +68,7 @@ func @loop_step(%arg0: memref<512xf32>,
%arg1: memref<512xf32>) {
%c0 = constant 0 : index
%c4 = constant 4 : index
for %i0 = 0 to 512 step 4 {
affine.for %i0 = 0 to 512 step 4 {
%1 = alloc() : memref<4xf32, 1>
%2 = alloc() : memref<1xi32>
dma_start %arg0[%i0], %1[%c0], %c4, %2[%c0]
@ -82,7 +82,7 @@ func @loop_step(%arg0: memref<512xf32>,
// CHECK: %2 = affine.apply [[FLOOR_MOD_2]](%c0)
// CHECK: %3 = affine.apply [[FLOOR_MOD_2]](%c0)
// CHECK-NEXT: dma_start %arg0[%c0], %0[%2, %c0_0], %c4, [[TAG]][%3, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
// CHECK-NEXT: for %i0 = 4 to 512 step 4 {
// CHECK-NEXT: affine.for %i0 = 4 to 512 step 4 {
// CHECK-NEXT: %4 = affine.apply [[FLOOR_MOD_2]](%i0)
// CHECK-NEXT: %5 = affine.apply [[FLOOR_MOD_2]](%i0)
// CHECK-NEXT: dma_start %arg0[%i0], %0[%4, %c0_0], %c4, [[TAG]][%5, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
@ -114,8 +114,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
// Prologue for DMA overlap on arg2.
// CHECK:[[TAG_ARG2:%[0-9]+]] = alloc() : memref<2x2xi32>
// CHECK: dma_start %arg2[
// CHECK: for %i0 = 1 to 8 {
for %i0 = 0 to 8 {
// CHECK: affine.for %i0 = 1 to 8 {
affine.for %i0 = 0 to 8 {
%6 = affine.apply #map2(%i0)
dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
dma_wait %5[%c0], %num_elts : memref<2xi32>
@ -127,8 +127,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
// CHECK: [[TAG_ARG1:%[0-9]+]] = alloc() : memref<2x2xi32>
// CHECK: dma_start %arg0[
// CHECK: dma_start %arg1[
// CHECK-NEXT for %i1 = 1 to 8 {
for %i1 = 0 to 8 {
// CHECK-NEXT affine.for %i1 = 1 to 8 {
affine.for %i1 = 0 to 8 {
%7 = affine.apply #map1(%i0, %i1)
%8 = affine.apply #map2(%i1)
dma_start %arg0[%7, %c0], %0[%c0, %c0], %num_elts, %3[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
@ -140,8 +140,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
// CHECK: dma_start %arg1[
// CHECK: dma_wait [[TAG_ARG0]]
// CHECK: dma_wait [[TAG_ARG1]]
// CHECK-NEXT: for %i2 = 0 to 4 {
for %i2 = 0 to 4 {
// CHECK-NEXT: affine.for %i2 = 0 to 4 {
affine.for %i2 = 0 to 4 {
"foo"() : () -> ()
@ -155,16 +155,16 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
// CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
// CHECK: dma_start %arg0[
// CHECK: dma_start %arg1[
// CHECK: for %i4 = 1 to 8 {
// CHECK: affine.for %i4 = 1 to 8 {
// CHECK: dma_start %arg0[
// CHECK: dma_start %arg1[
// CHECK: dma_wait [[TAG_ARG0_NESTED]]
// CHECK: dma_wait [[TAG_ARG1_NESTED]]
// CHECK: for %i5 = 0 to 4 {
// CHECK: affine.for %i5 = 0 to 4 {
// CHECK: "foo"() : () -> ()
// CHECK: dma_wait [[TAG_ARG0_NESTED]]
// CHECK: dma_wait [[TAG_ARG1_NESTED]]
// CHECK: for %i6 = 0 to 4 {
// CHECK: affine.for %i6 = 0 to 4 {
// CHECK: }
@ -185,8 +185,8 @@ func @loop_dma_dependent(%arg2: memref<512x32xvector<8xf32>>) {
// The two DMAs below are dependent (incoming and outgoing on the same
// memref) in the same iteration; so no pipelining here.
// CHECK-NOT: dma_start
// CHECK: for %i0 = 0 to 8 {
for %i0 = 0 to 8 {
// CHECK: affine.for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
%6 = affine.apply #map2(%i0)
dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>>, memref<64x4xvector<8xf32>, 2>, memref<2xi32>
dma_wait %5[%c0], %num_elts : memref<2xi32>
@ -206,8 +206,8 @@ func @escaping_use(%arg0: memref<512 x 32 x f32>) {
%tag = alloc() : memref<1 x i32>
// CHECK-NOT: dma_start
// CHECK: for %i0 = 0 to 16 {
for %kTT = 0 to 16 {
// CHECK: affine.for %i0 = 0 to 16 {
affine.for %kTT = 0 to 16 {
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
memref<512 x 32 x f32>,
memref<32 x 32 x f32, 2>, memref<1 x i32>
@ -230,14 +230,14 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
%tag = alloc() : memref<1 x i32>
// CHECK-NOT: dma_start
// CHECK: for %i0 = 0 to 16 {
for %kTT = 0 to 16 {
// CHECK: affine.for %i0 = 0 to 16 {
affine.for %kTT = 0 to 16 {
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
memref<512 x 32 x f32>,
memref<32 x 32 x f32, 2>, memref<1 x i32>
dma_wait %tag[%zero], %num_elt : memref<1 x i32>
// Use live out of 'for' inst; no DMA pipelining will be done.
// Use live out of 'affine.for' inst; no DMA pipelining will be done.
%v = load %Av[%zero, %zero] : memref<32 x 32 x f32, 2>
return %v : f32
// CHECK: %{{[0-9]+}} = load %{{[0-9]+}}[%c0, %c0] : memref<32x32xf32, 2>
@ -261,14 +261,14 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
// CHECK: %5 = affine.apply [[MOD_2]](%c0)
// CHECK: %6 = affine.apply [[MOD_2]](%c0)
// CHECK: dma_start %arg0[%c0_0, %c0_0], %3[%5, %c0_0, %c0_0], %c512, %4[%6, %c0_0]
for %kTT = 0 to 16 {
affine.for %kTT = 0 to 16 {
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
memref<512 x 32 x f32>,
memref<? x ? x f32, 2>, memref<1 x i32>
dma_wait %tag[%zero], %num_elt : memref<1 x i32>
// CHECK-NEXT: for %i0 = 1 to 16 {
// CHECK-NEXT: affine.for %i0 = 1 to 16 {
// CHECK: %7 = affine.apply [[MOD_2]](%i0)
// CHECK: %8 = affine.apply [[MOD_2]](%i0)
// CHECK: dma_start %arg0[%c0_0, %c0_0], %3[%7, %c0_0, %c0_0], %c512, %4[%8, %c0_0]
@ -73,8 +73,8 @@
// CHECK-LABEL: func @test_gaussian_elimination_empty_set0() {
func @test_gaussian_elimination_empty_set0() {
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: [[SET_EMPTY_2D]](%i0, %i1)
if (d0, d1) : (2 == 0)(%i0, %i1) {
@ -85,8 +85,8 @@ func @test_gaussian_elimination_empty_set0() {
// CHECK-LABEL: func @test_gaussian_elimination_empty_set1() {
func @test_gaussian_elimination_empty_set1() {
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: [[SET_EMPTY_2D]](%i0, %i1)
if (d0, d1) : (1 >= 0, -1 >= 0) (%i0, %i1) {
@ -97,8 +97,8 @@ func @test_gaussian_elimination_empty_set1() {
// CHECK-LABEL: func @test_gaussian_elimination_non_empty_set2() {
func @test_gaussian_elimination_non_empty_set2() {
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: #set1(%i0, %i1)
if #set2(%i0, %i1) {
@ -111,8 +111,8 @@ func @test_gaussian_elimination_non_empty_set2() {
func @test_gaussian_elimination_empty_set3() {
%c7 = constant 7 : index
%c11 = constant 11 : index
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: #set2(%i0, %i1)[%c7, %c11]
if #set3(%i0, %i1)[%c7, %c11] {
@ -125,8 +125,8 @@ func @test_gaussian_elimination_empty_set3() {
func @test_gaussian_elimination_non_empty_set4() {
%c7 = constant 7 : index
%c11 = constant 11 : index
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: #set3(%i0, %i1)[%c7, %c11]
if #set4(%i0, %i1)[%c7, %c11] {
@ -139,8 +139,8 @@ func @test_gaussian_elimination_non_empty_set4() {
func @test_gaussian_elimination_empty_set5() {
%c7 = constant 7 : index
%c11 = constant 11 : index
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: #set2(%i0, %i1)[%c7, %c11]
if #set5(%i0, %i1)[%c7, %c11] {
@ -151,8 +151,8 @@ func @test_gaussian_elimination_empty_set5() {
// CHECK-LABEL: func @test_fuzz_explosion
func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) {
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
if #set_fuzz_virus(%i0, %i1, %arg0, %arg1, %arg2, %arg3) {
@ -163,8 +163,8 @@ func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i
// CHECK-LABEL: func @test_empty_set(%arg0: index) {
func @test_empty_set(%N : index) {
for %i = 0 to 10 {
for %j = 0 to 10 {
affine.for %i = 0 to 10 {
affine.for %j = 0 to 10 {
// CHECK: if [[SET_EMPTY_2D]](%i0, %i1)
if (d0, d1) : (d0 - d1 >= 0, d1 - d0 - 1 >= 0)(%i, %j) {
"foo"() : () -> ()
@ -198,8 +198,8 @@ func @test_empty_set(%N : index) {
// The tests below test GCDTightenInequalities().
for %k = 0 to 10 {
for %l = 0 to 10 {
affine.for %k = 0 to 10 {
affine.for %l = 0 to 10 {
// Empty because no multiple of 8 lies between 4 and 7.
// CHECK: if [[SET_EMPTY_1D]](%i2)
if (d0) : (8*d0 - 4 >= 0, -8*d0 + 7 >= 0)(%k) {
@ -226,7 +226,7 @@ func @test_empty_set(%N : index) {
for %m = 0 to 10 {
affine.for %m = 0 to 10 {
// CHECK: if [[SET_EMPTY_1D]](%i{{[0-9]+}})
if (d0) : (d0 mod 2 - 3 == 0) (%m) {
"foo"() : () -> ()
@ -10,7 +10,7 @@ func @inline_notation() -> i32 loc("":10:8) {
%1 = "foo"() : () -> i32 loc("foo")
// CHECK: } loc(unknown)
for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
} loc(fused["foo", "":10:8])
// CHECK: } loc(unknown)
@ -7,13 +7,13 @@
// CHECK-LABEL: func @unroll_jam_imperfect_nest() {
func @unroll_jam_imperfect_nest() {
// CHECK: %c100 = constant 100 : index
// CHECK-NEXT: for %i0 = 0 to 99 step 2 {
for %i = 0 to 101 {
// CHECK-NEXT: affine.for %i0 = 0 to 99 step 2 {
affine.for %i = 0 to 101 {
// CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
// CHECK-NEXT: %1 = affine.apply [[MAP_PLUS_1]](%i0)
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
%x = "addi32"(%i, %i) : (index, index) -> i32
for %j = 0 to 17 {
affine.for %j = 0 to 17 {
// CHECK: %3 = "addi32"(%i0, %i0) : (index, index) -> i32
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
// CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_1]](%i0)
@ -29,7 +29,7 @@ func @unroll_jam_imperfect_nest() {
} // CHECK }
// cleanup loop (single iteration)
// CHECK: %11 = "addi32"(%c100, %c100) : (index, index) -> i32
// CHECK-NEXT: for %i2 = 0 to 17 {
// CHECK-NEXT: affine.for %i2 = 0 to 17 {
// CHECK-NEXT: %12 = "addi32"(%c100, %c100) : (index, index) -> i32
// CHECK-NEXT: %13 = "addi32"(%12, %12) : (i32, i32) -> i32
@ -39,8 +39,8 @@ func @unroll_jam_imperfect_nest() {
// UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
func @loop_nest_unknown_count_1(%N : index) {
// UNROLL-BY-4-NEXT: for %i0 = 1 to #map{{[0-9]+}}()[%arg0] step 4 {
// UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
// UNROLL-BY-4-NEXT: affine.for %i0 = 1 to #map{{[0-9]+}}()[%arg0] step 4 {
// UNROLL-BY-4-NEXT: affine.for %i1 = 1 to 100 {
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
@ -48,14 +48,14 @@ func @loop_nest_unknown_count_1(%N : index) {
// A cleanup loop should be generated here.
// UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
// UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
// UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
// UNROLL-BY-4-NEXT: affine.for %i3 = 1 to 100 {
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
// Specify the lower bound in a form so that both lb and ub operands match.
for %i = ()[s0] -> (1)()[%N] to %N {
for %j = 1 to 100 {
affine.for %i = ()[s0] -> (1)()[%N] to %N {
affine.for %j = 1 to 100 {
%x = "foo"() : () -> i32
@ -64,8 +64,8 @@ func @loop_nest_unknown_count_1(%N : index) {
// UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
func @loop_nest_unknown_count_2(%arg : index) {
// UNROLL-BY-4-NEXT: for %i0 = %arg0 to #map{{[0-9]+}}()[%arg0] step 4 {
// UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
// UNROLL-BY-4-NEXT: affine.for %i0 = %arg0 to #map{{[0-9]+}}()[%arg0] step 4 {
// UNROLL-BY-4-NEXT: affine.for %i1 = 1 to 100 {
// UNROLL-BY-4-NEXT: %0 = "foo"(%i0) : (index) -> i32
// UNROLL-BY-4-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
// UNROLL-BY-4-NEXT: %2 = "foo"(%1) : (index) -> i32
@ -77,12 +77,12 @@ func @loop_nest_unknown_count_2(%arg : index) {
// The cleanup loop is a single iteration one and is promoted.
// UNROLL-BY-4-NEXT: %7 = affine.apply [[M1:#map{{[0-9]+}}]]()[%arg0]
// UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
// UNROLL-BY-4-NEXT: affine.for %i3 = 1 to 100 {
// UNROLL-BY-4-NEXT: %8 = "foo"() : () -> i32
// Specify the lower bound in a form so that both lb and ub operands match.
for %i = ()[s0] -> (s0) ()[%arg] to ()[s0] -> (s0+8) ()[%arg] {
for %j = 1 to 100 {
affine.for %i = ()[s0] -> (s0) ()[%arg] to ()[s0] -> (s0+8) ()[%arg] {
affine.for %j = 1 to 100 {
%x = "foo"(%i) : (index) -> i32
@ -46,13 +46,13 @@
// CHECK-LABEL: func @loop_nest_simplest() {
func @loop_nest_simplest() {
// CHECK: for %i0 = 0 to 100 step 2 {
for %i = 0 to 100 step 2 {
// CHECK: affine.for %i0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// CHECK: %c1_i32 = constant 1 : i32
// CHECK-NEXT: %c1_i32_0 = constant 1 : i32
// CHECK-NEXT: %c1_i32_1 = constant 1 : i32
// CHECK-NEXT: %c1_i32_2 = constant 1 : i32
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = constant 1 : i32
} // CHECK: }
@ -62,8 +62,8 @@ func @loop_nest_simplest() {
// CHECK-LABEL: func @loop_nest_simple_iv_use() {
func @loop_nest_simple_iv_use() {
// CHECK: %c0 = constant 0 : index
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
for %i = 0 to 100 step 2 {
// CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// CHECK: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
// CHECK: %1 = affine.apply [[MAP0]](%c0)
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
@ -71,7 +71,7 @@ func @loop_nest_simple_iv_use() {
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32
// CHECK: %5 = affine.apply [[MAP2]](%c0)
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = "addi32"(%j, %j) : (index, index) -> i32
} // CHECK: }
@ -82,8 +82,8 @@ func @loop_nest_simple_iv_use() {
// CHECK-LABEL: func @loop_nest_body_def_use() {
func @loop_nest_body_def_use() {
// CHECK: %c0 = constant 0 : index
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
for %i = 0 to 100 step 2 {
// CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// CHECK: %c0_0 = constant 0 : index
%c0 = constant 0 : index
// CHECK: %0 = affine.apply [[MAP0]](%c0)
@ -97,7 +97,7 @@ func @loop_nest_body_def_use() {
// CHECK-NEXT: %8 = affine.apply [[MAP2]](%c0)
// CHECK-NEXT: %9 = affine.apply [[MAP0]](%8)
// CHECK-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%y = "addi32"(%x, %c0) : (index, index) -> index
@ -110,14 +110,14 @@ func @loop_nest_body_def_use() {
func @loop_nest_strided() {
// CHECK: %c2 = constant 2 : index
// CHECK-NEXT: %c2_0 = constant 2 : index
// CHECK-NEXT: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// CHECK-NEXT: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// CHECK: %0 = affine.apply [[MAP0]](%c2_0)
// CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
// CHECK-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
// CHECK-NEXT: %3 = affine.apply [[MAP0]](%2)
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
for %j = 2 to 6 step 2 {
affine.for %j = 2 to 6 step 2 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%y = "addi32"(%x, %x) : (index, index) -> index
@ -130,7 +130,7 @@ func @loop_nest_strided() {
// CHECK-NEXT: %10 = affine.apply [[MAP3]](%c2)
// CHECK-NEXT: %11 = affine.apply [[MAP0]](%10)
// CHECK-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
for %k = 2 to 7 step 2 {
affine.for %k = 2 to 7 step 2 {
%z = "affine.apply" (%k) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%w = "addi32"(%z, %z) : (index, index) -> index
@ -142,8 +142,8 @@ func @loop_nest_strided() {
// CHECK-LABEL: func @loop_nest_multiple_results() {
func @loop_nest_multiple_results() {
// CHECK: %c0 = constant 0 : index
// CHECK-NEXT: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// CHECK-NEXT: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// CHECK: %0 = affine.apply [[MAP4]](%i0, %c0)
// CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
// CHECK-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
@ -153,7 +153,7 @@ func @loop_nest_multiple_results() {
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
// CHECK-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
// CHECK-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
for %j = 0 to 2 step 1 {
affine.for %j = 0 to 2 step 1 {
%x = affine.apply (d0, d1) -> (d0 + 1) (%i, %j)
%y = "addi32"(%x, %x) : (index, index) -> index
%z = affine.apply (d0, d1) -> (d0 + 3) (%i, %j)
@ -170,8 +170,8 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
// CHECK: %c0 = constant 0 : index
// CHECK-NEXT: %c128 = constant 128 : index
%c128 = constant 128 : index
// CHECK: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// CHECK: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// CHECK: %0 = "vld"(%i0) : (index) -> i32
%ld = "vld"(%i) : (index) -> i32
// CHECK: %1 = affine.apply [[MAP0]](%c0)
@ -189,7 +189,7 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
// CHECK-NEXT: %13 = affine.apply [[MAP0]](%12)
// CHECK-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
// CHECK-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%y = "vmulf"(%j, %x) : (index, index) -> index
@ -218,7 +218,7 @@ func @loop_nest_seq_multiple() {
// CHECK-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
// CHECK-NEXT: %6 = affine.apply [[MAP0]](%5)
// CHECK-NEXT: "mul"(%6, %6) : (index, index) -> ()
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
"mul"(%x, %x) : (index, index) -> ()
@ -226,8 +226,8 @@ func @loop_nest_seq_multiple() {
// CHECK: %c99 = constant 99 : index
%k = "constant"(){value: 99} : () -> index
// CHECK: for %i0 = 0 to 100 step 2 {
for %m = 0 to 100 step 2 {
// CHECK: affine.for %i0 = 0 to 100 step 2 {
affine.for %m = 0 to 100 step 2 {
// CHECK: %7 = affine.apply [[MAP0]](%c0)
// CHECK-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
// CHECK-NEXT: %9 = affine.apply [[MAP0]](%c0)
@ -239,7 +239,7 @@ func @loop_nest_seq_multiple() {
// CHECK-NEXT: %15 = affine.apply [[MAP2]](%c0)
// CHECK-NEXT: %16 = affine.apply [[MAP0]](%15)
// CHECK-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
for %n = 0 to 4 {
affine.for %n = 0 to 4 {
%y = "affine.apply" (%n) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%z = "affine.apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
@ -251,16 +251,16 @@ func @loop_nest_seq_multiple() {
// SHORT-LABEL: func @loop_nest_outer_unroll() {
func @loop_nest_outer_unroll() {
// SHORT: for %i0 = 0 to 4 {
// SHORT: affine.for %i0 = 0 to 4 {
// SHORT-NEXT: %0 = affine.apply [[MAP0]](%i0)
// SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
// SHORT-NEXT: for %i1 = 0 to 4 {
// SHORT-NEXT: affine.for %i1 = 0 to 4 {
// SHORT-NEXT: %2 = affine.apply [[MAP0]](%i1)
// SHORT-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> index
for %i = 0 to 2 {
for %j = 0 to 4 {
affine.for %i = 0 to 2 {
affine.for %j = 0 to 4 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%y = "addi32"(%x, %x) : (index, index) -> index
@ -284,28 +284,28 @@ func @loop_nest_seq_long() -> i32 {
%zero_idx = constant 0 : index
for %n0 = 0 to 512 {
for %n1 = 0 to 8 {
affine.for %n0 = 0 to 512 {
affine.for %n1 = 0 to 8 {
store %one, %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
store %two, %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
for %i0 = 0 to 2 {
for %i1 = 0 to 2 {
for %i2 = 0 to 8 {
affine.for %i0 = 0 to 2 {
affine.for %i1 = 0 to 2 {
affine.for %i2 = 0 to 8 {
%b2 = "affine.apply" (%i1, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
%x = load %B[%i0, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
"op1"(%x) : (i32) -> ()
for %j1 = 0 to 8 {
for %j2 = 0 to 8 {
affine.for %j1 = 0 to 8 {
affine.for %j2 = 0 to 8 {
%a2 = "affine.apply" (%i1, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
%v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
"op2"(%v203) : (i32) -> ()
for %k2 = 0 to 8 {
affine.for %k2 = 0 to 8 {
%s0 = "op3"() : () -> i32
%c2 = "affine.apply" (%i0, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
%s1 = load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
@ -322,8 +322,8 @@ func @loop_nest_seq_long() -> i32 {
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() {
func @unroll_unit_stride_no_cleanup() {
// UNROLL-BY-4: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -337,13 +337,13 @@ func @unroll_unit_stride_no_cleanup() {
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
for %j = 0 to 8 {
affine.for %j = 0 to 8 {
%x = "addi32"(%j, %j) : (index, index) -> i32
%y = "addi32"(%x, %x) : (i32, i32) -> i32
// empty loop
// UNROLL-BY-4: for %i2 = 0 to 8 {
for %k = 0 to 8 {
// UNROLL-BY-4: affine.for %i2 = 0 to 8 {
affine.for %k = 0 to 8 {
@ -351,8 +351,8 @@ func @unroll_unit_stride_no_cleanup() {
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 7 step 4 {
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -370,7 +370,7 @@ func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
for %j = 0 to 10 {
affine.for %j = 0 to 10 {
%x = "addi32"(%j, %j) : (index, index) -> i32
%y = "addi32"(%x, %x) : (i32, i32) -> i32
@ -380,8 +380,8 @@ func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() {
func @unroll_non_unit_stride_cleanup() {
// UNROLL-BY-4: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 37 step 20 {
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -399,7 +399,7 @@ func @unroll_non_unit_stride_cleanup() {
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
for %j = 2 to 48 step 5 {
affine.for %j = 2 to 48 step 5 {
%x = "addi32"(%j, %j) : (index, index) -> i32
%y = "addi32"(%x, %x) : (i32, i32) -> i32
@ -411,8 +411,8 @@ func @unroll_non_unit_stride_cleanup() {
func @loop_nest_single_iteration_after_unroll(%N: index) {
// UNROLL-BY-4: %c0 = constant 0 : index
// UNROLL-BY-4: %c4 = constant 4 : index
// UNROLL-BY-4: for %i0 = 0 to %arg0 {
for %i = 0 to %N {
// UNROLL-BY-4: affine.for %i0 = 0 to %arg0 {
affine.for %i = 0 to %N {
// UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %1 = affine.apply [[MAP0]](%c0)
// UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
@ -422,7 +422,7 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
// UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
// UNROLL-BY-4-NOT: for
for %j = 0 to 5 {
affine.for %j = 0 to 5 {
%x = "addi32"(%j, %j) : (index, index) -> i32
} // UNROLL-BY-4-NOT: }
} // UNROLL-BY-4: }
@ -434,8 +434,8 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
// No cleanup will be generated here.
// UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
func @loop_nest_operand1() {
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
// UNROLL-BY-4-NEXT: for %i1 = [[MAP10]](%i0) to #map{{[0-9]+}}(%i0) step 4
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
// UNROLL-BY-4-NEXT: affine.for %i1 = [[MAP10]](%i0) to #map{{[0-9]+}}(%i0) step 4
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
@ -443,8 +443,8 @@ func @loop_nest_operand1() {
// UNROLL-BY-4-NEXT: return
for %i = 0 to 100 step 2 {
for %j = (d0) -> (0) (%i) to (d0) -> (d0 - d0 mod 4) (%i) {
affine.for %i = 0 to 100 step 2 {
affine.for %j = (d0) -> (0) (%i) to (d0) -> (d0 - d0 mod 4) (%i) {
%x = "foo"() : () -> i32
@ -454,8 +454,8 @@ func @loop_nest_operand1() {
// No cleanup will be generated here.
// UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
func @loop_nest_operand2() {
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
// UNROLL-BY-4-NEXT: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
// UNROLL-BY-4-NEXT: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
@ -463,8 +463,8 @@ func @loop_nest_operand2() {
// UNROLL-BY-4-NEXT: return
for %i = 0 to 100 step 2 {
for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
affine.for %i = 0 to 100 step 2 {
affine.for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
%x = "foo"() : () -> i32
@ -475,16 +475,16 @@ func @loop_nest_operand2() {
// factor. The cleanup loop happens to be a single iteration one and is promoted.
// UNROLL-BY-4-LABEL: func @loop_nest_operand3() {
func @loop_nest_operand3() {
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
for %i = 0 to 100 step 2 {
// UNROLL-BY-4: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// UNROLL-BY-4: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
affine.for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
%x = "foo"() : () -> i32
} // UNROLL-BY-4: }
@ -493,20 +493,20 @@ func @loop_nest_operand3() {
// UNROLL-BY-4-LABEL: func @loop_nest_operand4(%arg0: index) {
func @loop_nest_operand4(%N : index) {
// UNROLL-BY-4: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-BY-4: for %i1 = [[MAP12]]()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-BY-4: affine.for %i1 = [[MAP12]]()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
// UNROLL-BY-4: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
// A cleanup loop will be be generated here.
// UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
// UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
// Specify the lower bound so that both lb and ub operands match.
for %j = ()[s0] -> (0)()[%N] to %N {
affine.for %j = ()[s0] -> (0)()[%N] to %N {
%x = "foo"() : () -> i32
@ -518,7 +518,7 @@ func @loop_nest_unroll_full() {
// CHECK-NEXT: %0 = "foo"() : () -> i32
// CHECK-NEXT: %1 = "bar"() : () -> i32
// CHECK-NEXT: return
for %i = 0 to 1 {
affine.for %i = 0 to 1 {
%x = "foo"() : () -> i32
%y = "bar"() : () -> i32
@ -527,7 +527,7 @@ func @loop_nest_unroll_full() {
// UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
func @unroll_by_one_should_promote_single_iteration_loop() {
for %i = 0 to 1 {
affine.for %i = 0 to 1 {
%x = "foo"(%i) : (index) -> i32
@ -42,7 +42,7 @@
;; Keywords
'(;; Toplevel entities
"br" "ceildiv" "cfgfunc" "cond_br" "else" "extfunc" "false" "floordiv" "for" "if" "mlfunc" "mod" "return" "size" "step" "to" "true" "??" ) 'symbols) . font-lock-keyword-face))
"br" "ceildiv" "func" "cond_br" "else" "extfunc" "false" "floordiv" "affine.for" "if" "mod" "return" "size" "step" "to" "true" "??" ) 'symbols) . font-lock-keyword-face))
"Syntax highlighting for MLIR.")
;; Emacs 23 compatibility.
@ -10,9 +10,9 @@ syn keyword mlirType index i1 i2 i4 i8 i13 i16 i32 i64
\ f16 f32 tf_control
syn keyword mlirType memref tensor vector
syntax keyword mlirKeywords extfunc cfgfunc mlfunc for to step return
syntax keyword mlirKeywords extfunc func to step return
syntax keyword mlirConditional if else
syntax keyword mlirCoreOps dim addf addi subf subi mulf muli cmpi select constant affine.apply call call_indirect extract_element getTensor memref_cast tensor_cast load store alloc dealloc dma_start dma_wait
syntax keyword mlirCoreOps dim addf addi subf subi mulf muli cmpi select constant affine.apply affine.for call call_indirect extract_element getTensor memref_cast tensor_cast load store alloc dealloc dma_start dma_wait
syn match mlirInt "-\=\<\d\+\>"
syn match mlirFloat "-\=\<\d\+\.\d\+\>"
Reference in New Issue