forked from OSchip/llvm-project
[mlir][NFC] Update dialect/op documentation to be consistent
Summary: This revision performs a lot of different cleanups on operation documentation to ensure that they are consistent, e.g. using mlir code blocks, formatting, etc. This revision also includes the auto-generated documentation into the hand-written documentation for the dialects that have a specific top-level dialect file. This updates the documentation for all dialects aside from SPIRV and STD. These dialects will be updated in a followup. Differential Revision: https://reviews.llvm.org/D76734
This commit is contained in:
parent
f86104bb68
commit
16f27b70a5
|
@ -1,4 +1,4 @@
|
|||
# Affine Dialect
|
||||
# `affine` Dialect
|
||||
|
||||
This dialect provides a powerful abstraction for affine operations and analyses.
|
||||
|
||||
|
@ -295,140 +295,9 @@ affine.if #set42(%i, %j)[%M, %N] {
|
|||
|
||||
## Operations
|
||||
|
||||
#### 'affine.apply' operation
|
||||
[include "Dialects/AffineOps.md"]
|
||||
|
||||
Syntax:
|
||||
|
||||
```
|
||||
operation ::= ssa-id `=` `affine.apply` affine-map-attribute dim-and-symbol-use-list
|
||||
```
|
||||
|
||||
The `affine.apply` operation applies an
|
||||
[affine mapping](#affine-expressions) to a list of SSA values,
|
||||
yielding a single SSA value. The number of dimension and symbol arguments to
|
||||
affine.apply must be equal to the respective number of dimensional and symbolic
|
||||
inputs to the affine mapping; the `affine.apply` operation always returns one
|
||||
value. The input operands and result must all have 'index' type.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
#map10 = affine_map<(d0, d1) -> (d0 floordiv 8 + d1 floordiv 128)>
|
||||
...
|
||||
%1 = affine.apply #map10 (%s, %t)
|
||||
|
||||
// Inline example.
|
||||
%2 = affine.apply affine_map<(i)[s0] -> (i+s0)> (%42)[%n]
|
||||
```
|
||||
|
||||
#### 'affine.for' operation
|
||||
|
||||
Syntax:
|
||||
|
||||
```
|
||||
operation ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
|
||||
(`step` integer-literal)? `{` op* `}`
|
||||
|
||||
lower-bound ::= `max`? affine-map-attribute dim-and-symbol-use-list | shorthand-bound
|
||||
upper-bound ::= `min`? affine-map-attribute dim-and-symbol-use-list | shorthand-bound
|
||||
shorthand-bound ::= ssa-id | `-`? integer-literal
|
||||
```
|
||||
|
||||
The `affine.for` operation represents an affine loop nest. It has one region
|
||||
containing its body. This region must contain one block that terminates with
|
||||
[`affine.terminator`](#affineterminator-operation). *Note:* when `affine.for` is
|
||||
printed in custom format, the terminator is omitted. The block has one argument
|
||||
of [`index`](../LangRef.md#index-type) type that represents the induction
|
||||
variable of the loop.
|
||||
|
||||
The `affine.for` operation executes its body a number of times iterating from a
|
||||
lower bound to an upper bound by a stride. The stride, represented by `step`, is
|
||||
a positive constant integer which defaults to "1" if not present. The lower and
|
||||
upper bounds specify a half-open range: the range includes the lower bound but
|
||||
does not include the upper bound.
|
||||
|
||||
The lower and upper bounds of a `affine.for` operation are represented as an
|
||||
application of an affine mapping to a list of SSA values passed to the map. The
|
||||
[same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
|
||||
values as for all bindings of SSA values to dimensions and symbols.
|
||||
|
||||
The affine mappings for the bounds may return multiple results, in which case
|
||||
the `max`/`min` keywords are required (for the lower/upper bound respectively),
|
||||
and the bound is the maximum/minimum of the returned values. There is no
|
||||
semantic ambiguity, but MLIR syntax requires the use of these keywords to make
|
||||
things more obvious to human readers.
|
||||
|
||||
Many upper and lower bounds are simple, so MLIR accepts two custom form
|
||||
syntaxes: the form that accepts a single 'ssa-id' (e.g. `%N`) is shorthand for
|
||||
applying that SSA value to a function that maps a single symbol to itself, e.g.,
|
||||
`()[s]->(s)()[%N]`. The integer literal form (e.g. `-42`) is shorthand for a
|
||||
nullary mapping function that returns the constant value (e.g. `()->(-42)()`).
|
||||
|
||||
Example showing reverse iteration of the inner loop:
|
||||
|
||||
```mlir
|
||||
#map57 = affine_map<(d0)[s0] -> (s0 - d0 - 1)>
|
||||
|
||||
func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
|
||||
%N = dim %A, 0 : memref<?x?xf32>
|
||||
affine.for %i = 0 to %N step 1 {
|
||||
affine.for %j = 0 to %N { // implicitly steps by 1
|
||||
%0 = affine.apply #map57(%j)[%N]
|
||||
%tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
|
||||
call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
```
|
||||
|
||||
#### 'affine.if' operation
|
||||
|
||||
Syntax:
|
||||
|
||||
```
|
||||
operation ::= `affine.if` if-op-cond `{` op* `}` (`else` `{` op* `}`)?
|
||||
if-op-cond ::= integer-set-attr dim-and-symbol-use-list
|
||||
```
|
||||
|
||||
The `affine.if` operation restricts execution to a subset of the loop iteration
|
||||
space defined by an integer set (a conjunction of affine constraints). A single
|
||||
`affine.if` may end with an optional `else` clause.
|
||||
|
||||
The condition of the `affine.if` is represented by an
|
||||
[integer set](#integer-sets) (a conjunction of affine constraints),
|
||||
and the SSA values bound to the dimensions and symbols in the integer set. The
|
||||
[same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
|
||||
values as for all bindings of SSA values to dimensions and symbols.
|
||||
|
||||
The `affine.if` operation contains two regions for the "then" and "else"
|
||||
clauses. The latter may be empty (i.e. contain no blocks), meaning the absence
|
||||
of the else clause. When non-empty, both regions must contain exactly one block
|
||||
terminating with [`affine.terminator`](#affineterminator-operation). *Note:*
|
||||
when `affine.if` is printed in custom format, the terminator is omitted. These
|
||||
blocks must not have any arguments.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
#set = affine_set<(d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
|
||||
d1 - 10 >= 0, s0 - d1 - 9 >= 0)>
|
||||
func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
|
||||
affine.for %i = 0 to %N {
|
||||
affine.for %j = 0 to %N {
|
||||
%0 = affine.apply #map42(%j)
|
||||
%tmp = call @S1(%X, %i, %0)
|
||||
affine.if #set(%i, %j)[%N] {
|
||||
%1 = affine.apply #map43(%i, %j)
|
||||
call @S2(%tmp, %A, %i, %1)
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
```
|
||||
|
||||
#### 'affine.load' operation
|
||||
### 'affine.load' operation
|
||||
|
||||
Syntax:
|
||||
|
||||
|
@ -458,7 +327,7 @@ Example:
|
|||
|
||||
```
|
||||
|
||||
#### 'affine.store' operation
|
||||
### 'affine.store' operation
|
||||
|
||||
Syntax:
|
||||
|
||||
|
@ -488,7 +357,7 @@ Example:
|
|||
|
||||
```
|
||||
|
||||
#### 'affine.dma_start' operation
|
||||
### 'affine.dma_start' operation
|
||||
|
||||
Syntax:
|
||||
|
||||
|
@ -519,7 +388,6 @@ specified. The value of 'num_elements' must be a multiple of
|
|||
Example:
|
||||
|
||||
```mlir
|
||||
|
||||
For example, a DmaStartOp operation that transfers 256 elements of a memref
|
||||
'%src' in memory space 0 at indices [%i + 3, %j] to memref '%dst' in memory
|
||||
space 1 at indices [%k + 7, %l], would be specified as follows:
|
||||
|
@ -537,10 +405,9 @@ space 1 at indices [%k + 7, %l], would be specified as follows:
|
|||
|
||||
affine.dma_start %src[%i, %j], %dst[%k, %l], %tag[%idx], %num_elements,
|
||||
%stride, %num_elt_per_stride : ...
|
||||
|
||||
```
|
||||
|
||||
#### 'affine.dma_wait' operation
|
||||
### 'affine.dma_wait' operation
|
||||
|
||||
Syntax:
|
||||
|
||||
|
@ -558,54 +425,9 @@ associated with the DMA operation. For example:
|
|||
Example:
|
||||
|
||||
```mlir
|
||||
|
||||
affine.dma_start %src[%i, %j], %dst[%k, %l], %tag[%index], %num_elements :
|
||||
memref<2048xf32, 0>, memref<256xf32, 1>, memref<1xi32, 2>
|
||||
...
|
||||
...
|
||||
affine.dma_wait %tag[%index], %num_elements : memref<1xi32, 2>
|
||||
|
||||
affine.dma_start %src[%i, %j], %dst[%k, %l], %tag[%index], %num_elements :
|
||||
memref<2048xf32, 0>, memref<256xf32, 1>, memref<1xi32, 2>
|
||||
...
|
||||
...
|
||||
affine.dma_wait %tag[%index], %num_elements : memref<1xi32, 2>
|
||||
```
|
||||
|
||||
#### 'affine.min' operation
|
||||
|
||||
Syntax:
|
||||
|
||||
```
|
||||
operation ::= ssa-id `=` `affine.min` affine-map-attribute dim-and-symbol-use-list
|
||||
```
|
||||
|
||||
The `affine.min` operation applies an
|
||||
[affine mapping](#affine-expressions) to a list of SSA values, and returns the
|
||||
minimum value of all result expressions. The number of dimension and symbol
|
||||
arguments to affine.min must be equal to the respective number of dimensional
|
||||
and symbolic inputs to the affine mapping; the `affine.min` operation always
|
||||
returns one value. The input operands and result must all have 'index' type.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
|
||||
%0 = affine.min affine_map<(d0)[s0] -> (1000, d0 + 512, s0)> (%arg0)[%arg1]
|
||||
|
||||
```
|
||||
|
||||
#### `affine.terminator` operation
|
||||
|
||||
Syntax:
|
||||
|
||||
```
|
||||
operation ::= `"affine.terminator"() : () -> ()`
|
||||
```
|
||||
|
||||
Affine terminator is a special terminator operation for blocks inside affine
|
||||
loops ([`affine.for`](#affinefor-operation)) and branches
|
||||
([`affine.if`](#affineif-operation)). It unconditionally transmits the control
|
||||
flow to the successor of the operation enclosing the region.
|
||||
|
||||
*Rationale*: bodies of affine operations are [blocks](../LangRef.md#blocks) that
|
||||
must have terminators. Loops and branches represent structured control flow and
|
||||
should not accept arbitrary branches as terminators.
|
||||
|
||||
This operation does _not_ have a custom syntax. However, affine control
|
||||
operations omit the terminator in their custom syntax for brevity.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# GPU Dialect
|
||||
# `gpu` Dialect
|
||||
|
||||
Note: this dialect is more likely to change than others in the near future; use
|
||||
with caution.
|
||||
|
@ -36,97 +36,4 @@ structure and representing analysis results in the IR.
|
|||
|
||||
## Operations
|
||||
|
||||
### `gpu.block_dim`
|
||||
|
||||
Returns the number of threads in the thread block (aka the block size) along the
|
||||
x, y, or z `dimension`.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
|
||||
```
|
||||
|
||||
### `gpu.block_id`
|
||||
|
||||
Returns the block id, i.e. the index of the current block within the grid along
|
||||
the x, y, or z `dimension`.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
|
||||
```
|
||||
|
||||
### `gpu.grid_dim`
|
||||
|
||||
Returns the number of thread blocks in the grid along the x, y, or z
|
||||
`dimension`.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
|
||||
```
|
||||
|
||||
### `gpu.thread_id`
|
||||
|
||||
Returns the thread id, i.e. the index of the current thread within the block
|
||||
along the x, y, or z `dimension`.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
|
||||
```
|
||||
|
||||
### `gpu.yield`
|
||||
|
||||
Is a special terminator operation for blocks inside regions in gpu ops. It
|
||||
returns values to the immediately enclosing gpu op.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
gpu.yield %f0, %f1 : f32, f32
|
||||
```
|
||||
|
||||
### `gpu.all_reduce`
|
||||
|
||||
The "all_reduce" op reduces the value of every work item across a local
|
||||
workgroup. The result is equal for all work items of a workgroup.
|
||||
|
||||
For example, both
|
||||
|
||||
```mlir
|
||||
%1 = "gpu.all_reduce"(%0) ({}) { op = "add" } : (f32) -> (f32)
|
||||
%2 = "gpu.all_reduce"(%0) ({
|
||||
^bb(%lhs : f32, %rhs : f32):
|
||||
%sum = addf %lhs, %rhs : f32
|
||||
"gpu.yield"(%sum) : (f32) -> ()
|
||||
}) : (f32) -> (f32)
|
||||
```
|
||||
|
||||
compute the sum of each work item's %0 value. The first version specifies the
|
||||
accumulation as operation, whereas the second version specifies the accumulation
|
||||
as code region. The accumulation operation must either be `add` or `mul`.
|
||||
|
||||
Either none or all work items of a workgroup need to execute this op
|
||||
in convergence.
|
||||
|
||||
### `gpu.barrier`
|
||||
|
||||
The "barrier" op synchronizes all work items of a workgroup. It is used
|
||||
to coordinate communication between the work items of the workgroup.
|
||||
|
||||
```mlir
|
||||
gpu.barrier
|
||||
```
|
||||
|
||||
waits until all work items in the workgroup have reached this point and all
|
||||
memory accesses made by these work items prior to the op are visible to all work
|
||||
items in the workgroup. Data hazards between work items accessing the same
|
||||
memory can be avoided by synchronizing work items in-between these accesses.
|
||||
|
||||
Either none or all work items of a workgroup need to execute this op
|
||||
in convergence.
|
||||
[include "Dialects/GPUOps.md"]
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# LLVM IR Dialect
|
||||
# `llvm` Dialect
|
||||
|
||||
This dialect wraps the LLVM IR types and instructions into MLIR types and
|
||||
operations. It provides several additional operations that are necessary to
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Linalg Dialect
|
||||
# `linalg` Dialect
|
||||
|
||||
[TOC]
|
||||
|
||||
|
@ -469,3 +469,7 @@ These key questions (and much more) should be really thought of in the general
|
|||
context of MLIR in which different levels of IR interoperate seamlessly. In
|
||||
practice, it is not necessary (or beneficial) to try and solve all problems in the
|
||||
same IR.
|
||||
|
||||
## Operations
|
||||
|
||||
[include "Dialects/LinalgOps.md"]
|
||||
|
|
|
@ -44,22 +44,23 @@ def ImplicitAffineTerminator
|
|||
def AffineApplyOp : Affine_Op<"apply", [NoSideEffect]> {
|
||||
let summary = "affine apply operation";
|
||||
let description = [{
|
||||
The affine.apply operation applies an affine mapping to a list of SSA
|
||||
values, yielding a single SSA value. The number of dimension and symbol
|
||||
arguments to affine.apply must be equal to the respective number of
|
||||
dimensional and symbolic inputs to the affine mapping; the affine mapping
|
||||
has to be one-dimensional, and so the affine.apply operation always returns
|
||||
one value. The input operands and result must all have ‘index’ type.
|
||||
The affine.apply operation applies an [affine mapping](#affine-expressions)
|
||||
to a list of SSA values, yielding a single SSA value. The number of
|
||||
dimension and symbol arguments to `affine.apply` must be equal to the
|
||||
respective number of dimensional and symbolic inputs to the affine mapping;
|
||||
the affine mapping has to be one-dimensional, and so the `affine.apply`
|
||||
operation always returns one value. The input operands and result must all
|
||||
have ‘index’ type.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
#map10 = affine_map<(d0, d1) -> (d0 floordiv 8 + d1 floordiv 128)>
|
||||
...
|
||||
%1 = affine.apply #map10 (%s, %t)
|
||||
#map10 = affine_map<(d0, d1) -> (d0 floordiv 8 + d1 floordiv 128)>
|
||||
...
|
||||
%1 = affine.apply #map10 (%s, %t)
|
||||
|
||||
// Inline example.
|
||||
%2 = affine.apply affine_map<(i)[s0] -> (i+s0)> (%42)[%n]
|
||||
// Inline example.
|
||||
%2 = affine.apply affine_map<(i)[s0] -> (i+s0)> (%42)[%n]
|
||||
```
|
||||
}];
|
||||
let arguments = (ins AffineMapAttr:$map, Variadic<Index>:$mapOperands);
|
||||
|
@ -100,33 +101,66 @@ def AffineForOp : Affine_Op<"for",
|
|||
DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
|
||||
let summary = "for operation";
|
||||
let description = [{
|
||||
The "affine.for" operation represents an affine loop nest, defining an SSA
|
||||
value for its induction variable. It has one region capturing the loop body.
|
||||
The induction variable is represented as a argument of this region. This SSA
|
||||
value always has type index, which is the size of the machine word. The
|
||||
stride, represented by step, is a positive constant integer which defaults
|
||||
to "1" if not present. The lower and upper bounds specify a half-open range:
|
||||
the range includes the lower bound but does not include the upper bound.
|
||||
Syntax:
|
||||
|
||||
The body region must contain exactly one block that terminates with
|
||||
"affine.terminator". Calling AffineForOp::build will create such region
|
||||
and insert the terminator, so will the parsing even in cases if it is absent
|
||||
from the custom format.
|
||||
```
|
||||
operation ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
|
||||
(`step` integer-literal)? `{` op* `}`
|
||||
|
||||
The lower and upper bounds of a for operation are represented as an
|
||||
lower-bound ::= `max`? affine-map-attribute dim-and-symbol-use-list | shorthand-bound
|
||||
upper-bound ::= `min`? affine-map-attribute dim-and-symbol-use-list | shorthand-bound
|
||||
shorthand-bound ::= ssa-id | `-`? integer-literal
|
||||
```
|
||||
|
||||
The `affine.for` operation represents an affine loop nest. It has one region
|
||||
containing its body. This region must contain one block that terminates with
|
||||
[`affine.terminator`](#affineterminator-operation). *Note:* when
|
||||
`affine.for` is printed in custom format, the terminator is omitted. The
|
||||
block has one argument of [`index`](../LangRef.md#index-type) type that
|
||||
represents the induction variable of the loop.
|
||||
|
||||
The `affine.for` operation executes its body a number of times iterating
|
||||
from a lower bound to an upper bound by a stride. The stride, represented by
|
||||
`step`, is a positive constant integer which defaults to "1" if not present.
|
||||
The lower and upper bounds specify a half-open range: the range includes the
|
||||
lower bound but does not include the upper bound.
|
||||
|
||||
The lower and upper bounds of a `affine.for` operation are represented as an
|
||||
application of an affine mapping to a list of SSA values passed to the map.
|
||||
The same restrictions hold for these SSA values as for all bindings of SSA
|
||||
values to dimensions and symbols. The affine mappings for the bounds may
|
||||
return multiple results, in which case the max/min keywords are required
|
||||
(for the lower/upper bound respectively), and the bound is the
|
||||
maximum/minimum of the returned values.
|
||||
The [same restrictions](#restrictions-on-dimensions-and-symbols) hold for
|
||||
these SSA values as for all bindings of SSA values to dimensions and
|
||||
symbols.
|
||||
|
||||
Example:
|
||||
The affine mappings for the bounds may return multiple results, in which
|
||||
case the `max`/`min` keywords are required (for the lower/upper bound
|
||||
respectively), and the bound is the maximum/minimum of the returned values.
|
||||
There is no semantic ambiguity, but MLIR syntax requires the use of these
|
||||
keywords to make things more obvious to human readers.
|
||||
|
||||
affine.for %i = 1 to 10 {
|
||||
...
|
||||
Many upper and lower bounds are simple, so MLIR accepts two custom form
|
||||
syntaxes: the form that accepts a single 'ssa-id' (e.g. `%N`) is shorthand
|
||||
for applying that SSA value to a function that maps a single symbol to
|
||||
itself, e.g., `()[s]->(s)()[%N]`. The integer literal form (e.g. `-42`) is
|
||||
shorthand for a nullary mapping function that returns the constant value
|
||||
(e.g. `()->(-42)()`).
|
||||
|
||||
Example showing reverse iteration of the inner loop:
|
||||
|
||||
```mlir
|
||||
#map57 = affine_map<(d0)[s0] -> (s0 - d0 - 1)>
|
||||
|
||||
func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
|
||||
%N = dim %A, 0 : memref<?x?xf32>
|
||||
affine.for %i = 0 to %N step 1 {
|
||||
affine.for %j = 0 to %N { // implicitly steps by 1
|
||||
%0 = affine.apply #map57(%j)[%N]
|
||||
%tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
|
||||
call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
```
|
||||
}];
|
||||
let arguments = (ins Variadic<AnyType>);
|
||||
let regions = (region SizedRegion<1>:$region);
|
||||
|
@ -236,23 +270,51 @@ def AffineIfOp : Affine_Op<"if",
|
|||
[ImplicitAffineTerminator, RecursiveSideEffects]> {
|
||||
let summary = "if-then-else operation";
|
||||
let description = [{
|
||||
The "if" operation represents an if-then-else construct for conditionally
|
||||
executing two regions of code. The operands to an if operation are an
|
||||
IntegerSet condition and a set of symbol/dimension operands to the
|
||||
condition set. The operation produces no results. For example:
|
||||
Syntax:
|
||||
|
||||
affine.if #set(%i) {
|
||||
...
|
||||
} else {
|
||||
...
|
||||
}
|
||||
```
|
||||
operation ::= `affine.if` if-op-cond `{` op* `}` (`else` `{` op* `}`)?
|
||||
if-op-cond ::= integer-set-attr dim-and-symbol-use-list
|
||||
```
|
||||
|
||||
The 'else' blocks to the if operation are optional, and may be omitted. For
|
||||
example:
|
||||
The `affine.if` operation restricts execution to a subset of the loop
|
||||
iteration space defined by an integer set (a conjunction of affine
|
||||
constraints). A single `affine.if` may end with an optional `else` clause.
|
||||
|
||||
affine.if #set(%i) {
|
||||
...
|
||||
}
|
||||
The condition of the `affine.if` is represented by an
|
||||
[integer set](#integer-sets) (a conjunction of affine constraints),
|
||||
and the SSA values bound to the dimensions and symbols in the integer set.
|
||||
The [same restrictions](#restrictions-on-dimensions-and-symbols) hold for
|
||||
these SSA values as for all bindings of SSA values to dimensions and
|
||||
symbols.
|
||||
|
||||
The `affine.if` operation contains two regions for the "then" and "else"
|
||||
clauses. The latter may be empty (i.e. contain no blocks), meaning the
|
||||
absence of the else clause. When non-empty, both regions must contain
|
||||
exactly one block terminating with
|
||||
[`affine.terminator`](#affineterminator-operation). *Note:* when `affine.if`
|
||||
is printed in custom format, the terminator is omitted. These blocks must
|
||||
not have any arguments.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
#set = affine_set<(d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
|
||||
d1 - 10 >= 0, s0 - d1 - 9 >= 0)>
|
||||
func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
|
||||
affine.for %i = 0 to %N {
|
||||
affine.for %j = 0 to %N {
|
||||
%0 = affine.apply #map42(%j)
|
||||
%tmp = call @S1(%X, %i, %0)
|
||||
affine.if #set(%i, %j)[%N] {
|
||||
%1 = affine.apply #map43(%i, %j)
|
||||
call @S2(%tmp, %A, %i, %1)
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
```
|
||||
}];
|
||||
let arguments = (ins Variadic<AnyType>);
|
||||
let regions = (region SizedRegion<1>:$thenRegion, AnyRegion:$elseRegion);
|
||||
|
@ -328,12 +390,24 @@ class AffineMinMaxOpBase<string mnemonic, list<OpTrait> traits = []> :
|
|||
def AffineMinOp : AffineMinMaxOpBase<"min", [NoSideEffect]> {
|
||||
let summary = "min operation";
|
||||
let description = [{
|
||||
The "min" operation computes the minimum value result from a multi-result
|
||||
affine map.
|
||||
Syntax:
|
||||
|
||||
```
|
||||
operation ::= ssa-id `=` `affine.min` affine-map-attribute dim-and-symbol-use-list
|
||||
```
|
||||
|
||||
The `affine.min` operation applies an [affine mapping](#affine-expressions)
|
||||
to a list of SSA values, and returns the minimum value of all result
|
||||
expressions. The number of dimension and symbol arguments to `affine.min`
|
||||
must be equal to the respective number of dimensional and symbolic inputs to
|
||||
the affine mapping; the `affine.min` operation always returns one value. The
|
||||
input operands and result must all have 'index' type.
|
||||
|
||||
Example:
|
||||
|
||||
%0 = affine.min (d0) -> (1000, d0 + 512) (%i0) : index
|
||||
```mlir
|
||||
%0 = affine.min affine_map<(d0)[s0] -> (1000, d0 + 512, s0)> (%arg0)[%arg1]
|
||||
```
|
||||
}];
|
||||
}
|
||||
|
||||
|
@ -345,7 +419,9 @@ def AffineMaxOp : AffineMinMaxOpBase<"max", [NoSideEffect]> {
|
|||
|
||||
Example:
|
||||
|
||||
%0 = affine.max (d0) -> (1000, d0 + 512) (%i0) : index
|
||||
```mlir
|
||||
%0 = affine.max (d0) -> (1000, d0 + 512) (%i0) : index
|
||||
```
|
||||
}];
|
||||
}
|
||||
|
||||
|
@ -375,9 +451,9 @@ def AffineParallelOp : Affine_Op<"parallel", [ImplicitAffineTerminator]> {
|
|||
Example:
|
||||
|
||||
```mlir
|
||||
affine.parallel (%i, %j) = (0, 0) to (10, 10) step (1, 1) {
|
||||
...
|
||||
}
|
||||
affine.parallel (%i, %j) = (0, 0) to (10, 10) step (1, 1) {
|
||||
...
|
||||
}
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -430,8 +506,9 @@ def AffinePrefetchOp : Affine_Op<"prefetch"> {
|
|||
a read/write specifier, a locality hint, and a cache type specifier as shown
|
||||
below:
|
||||
|
||||
affine.prefetch %0[%i, %j + 5], read, locality<3>, data
|
||||
: memref<400x400xi32>
|
||||
```mlir
|
||||
affine.prefetch %0[%i, %j + 5], read, locality<3>, data : memref<400x400xi32>
|
||||
```
|
||||
|
||||
The read/write specifier is either 'read' or 'write', the locality hint
|
||||
specifier ranges from locality<0> (no locality) to locality<3> (extremely
|
||||
|
@ -501,9 +578,20 @@ def AffineTerminatorOp :
|
|||
Affine_Op<"terminator", [NoSideEffect, Terminator]> {
|
||||
let summary = "affine terminator operation";
|
||||
let description = [{
|
||||
Syntax:
|
||||
|
||||
```
|
||||
operation ::= `"affine.terminator"() : () -> ()`
|
||||
```
|
||||
|
||||
Affine terminator is a special terminator operation for blocks inside affine
|
||||
loops and branches. It unconditionally transmits the control flow to the
|
||||
successor of the operation enclosing the region.
|
||||
loops ([`affine.for`](#affinefor-operation)) and branches
|
||||
([`affine.if`](#affineif-operation)). It unconditionally transmits the
|
||||
control flow to the successor of the operation enclosing the region.
|
||||
|
||||
*Rationale*: bodies of affine operations are [blocks](../LangRef.md#blocks)
|
||||
that must have terminators. Loops and branches represent structured control
|
||||
flow and should not accept arbitrary branches as terminators.
|
||||
|
||||
This operation does _not_ have a custom syntax. However, affine control
|
||||
operations omit the terminator in their custom syntax for brevity.
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
add_mlir_dialect(AffineOps affine)
|
||||
add_mlir_doc(AffineOps -gen-dialect-doc AffineDialect Dialects/)
|
||||
add_mlir_doc(AffineOps -gen-op-doc AffineOps Dialects/)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
add_mlir_dialect(GPUOps gpu GPUOps)
|
||||
add_mlir_doc(GPUOps -gen-dialect-doc GPUDialect Dialects/)
|
||||
add_mlir_dialect(GPUOps gpu)
|
||||
add_mlir_doc(GPUOps -gen-op-doc GPUOps Dialects/)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS ParallelLoopMapperAttr.td)
|
||||
mlir_tablegen(ParallelLoopMapperAttr.h.inc -gen-struct-attr-decls)
|
||||
|
|
|
@ -36,10 +36,54 @@ class GPU_IndexOp<string mnemonic, list<OpTrait> traits = []> :
|
|||
let verifier = [{ return ::verifyIndexOp(*this); }];
|
||||
}
|
||||
|
||||
def GPU_BlockDimOp : GPU_IndexOp<"block_dim">;
|
||||
def GPU_BlockIdOp : GPU_IndexOp<"block_id">;
|
||||
def GPU_GridDimOp : GPU_IndexOp<"grid_dim">;
|
||||
def GPU_ThreadIdOp : GPU_IndexOp<"thread_id">;
|
||||
def GPU_BlockDimOp : GPU_IndexOp<"block_dim"> {
|
||||
let description = [{
|
||||
Returns the number of threads in the thread block (aka the block size) along
|
||||
the x, y, or z `dimension`.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
|
||||
```
|
||||
}];
|
||||
}
|
||||
def GPU_BlockIdOp : GPU_IndexOp<"block_id"> {
|
||||
let description = [{
|
||||
Returns the block id, i.e. the index of the current block within the grid
|
||||
along the x, y, or z `dimension`.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
|
||||
```
|
||||
}];
|
||||
}
|
||||
def GPU_GridDimOp : GPU_IndexOp<"grid_dim"> {
|
||||
let description = [{
|
||||
Returns the number of thread blocks in the grid along the x, y, or z
|
||||
`dimension`.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
|
||||
```
|
||||
}];
|
||||
}
|
||||
def GPU_ThreadIdOp : GPU_IndexOp<"thread_id"> {
|
||||
let description = [{
|
||||
Returns the thread id, i.e. the index of the current thread within the block
|
||||
along the x, y, or z `dimension`.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
|
||||
```
|
||||
}];
|
||||
}
|
||||
|
||||
def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> {
|
||||
let summary = "Function executable on a GPU";
|
||||
|
@ -471,13 +515,14 @@ def GPU_YieldOp : GPU_Op<"yield", [NoSideEffect, Terminator]>,
|
|||
Arguments<(ins Variadic<AnyType>:$values)> {
|
||||
let summary = "GPU yield operation";
|
||||
let description = [{
|
||||
"gpu.yield" is a special terminator operation for blocks inside regions
|
||||
gpu.yield` is a special terminator operation for blocks inside regions
|
||||
in gpu ops. It returns values to the immediately enclosing gpu op.
|
||||
|
||||
Example:
|
||||
|
||||
```gpu.yield %f0, %f1 : f32, f32
|
||||
```
|
||||
```mlir
|
||||
gpu.yield %f0, %f1 : f32, f32
|
||||
```
|
||||
}];
|
||||
}
|
||||
|
||||
|
@ -509,18 +554,20 @@ def GPU_AllReduceOp : GPU_Op<"all_reduce",
|
|||
Results<(outs AnyType)> {
|
||||
let summary = "Reduce values among workgroup.";
|
||||
let description = [{
|
||||
The "all_reduce" op reduces the value of every work item across a local
|
||||
The `all_reduce` op reduces the value of every work item across a local
|
||||
workgroup. The result is equal for all work items of a workgroup.
|
||||
|
||||
For example, both
|
||||
|
||||
```mlir
|
||||
%1 = "gpu.all_reduce"(%0) ({}) { op = "add" } : (f32) -> (f32)
|
||||
%2 = "gpu.all_reduce"(%0) ({
|
||||
^bb(%lhs : f32, %rhs : f32):
|
||||
%sum = addf %lhs, %rhs : f32
|
||||
"gpu.yield"(%sum) : (f32) -> ()
|
||||
}) : (f32) -> (f32)
|
||||
```
|
||||
%1 = "gpu.all_reduce"(%0) ({}) { op = "add" } : (f32) -> (f32)
|
||||
%2 = "gpu.all_reduce"(%0) ({
|
||||
^bb(%lhs : f32, %rhs : f32):
|
||||
%sum = addf %lhs, %rhs : f32
|
||||
"gpu.yield"(%sum) : (f32) -> ()
|
||||
}) : (f32) -> (f32)
|
||||
```
|
||||
|
||||
compute the sum of each work item's %0 value. The first version specifies
|
||||
the accumulation as operation, whereas the second version specifies the
|
||||
accumulation as code region. The accumulation operation must be one of:
|
||||
|
@ -550,11 +597,13 @@ def GPU_ShuffleOp : GPU_Op<"shuffle", [NoSideEffect]>,
|
|||
The "shuffle" op moves values to a different invocation within the same
|
||||
subgroup.
|
||||
|
||||
For example
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%1, %2 = gpu.shuffle %0, %offset, %width xor : f32
|
||||
```
|
||||
%1, %2 = gpu.shuffle %0, %offset, %width xor : f32
|
||||
```
|
||||
for lane k returns the value from lane `k ^ offset` and `true` if that lane
|
||||
|
||||
For lane k returns the value from lane `k ^ offset` and `true` if that lane
|
||||
is smaller than %width. Otherwise it returns an unspecified value and
|
||||
`false`. A lane is the index of an invocation relative to its subgroup.
|
||||
|
||||
|
@ -574,9 +623,10 @@ def GPU_BarrierOp : GPU_Op<"barrier"> {
|
|||
The "barrier" op synchronizes all work items of a workgroup. It is used
|
||||
to coordinate communication between the work items of the workgroup.
|
||||
|
||||
```mlir
|
||||
gpu.barrier
|
||||
```
|
||||
gpu.barrier
|
||||
```
|
||||
|
||||
waits until all work items in the workgroup have reached this point
|
||||
and all memory accesses made by these work items prior to the op are
|
||||
visible to all work items in the workgroup. Data hazards between work items
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
add_mlir_dialect(LinalgOps linalg)
|
||||
add_mlir_doc(LinalgDoc -gen-dialect-doc LinalgDialect Dialects/)
|
||||
add_mlir_doc(LinalgDoc -gen-op-doc LinalgOps Dialects/)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS LinalgStructuredOps.td)
|
||||
mlir_tablegen(LinalgStructuredOps.h.inc -gen-op-decls)
|
||||
mlir_tablegen(LinalgStructuredOps.cpp.inc -gen-op-defs)
|
||||
|
|
|
@ -44,7 +44,7 @@ def Linalg_RangeOp :
|
|||
Example:
|
||||
|
||||
```mlir
|
||||
%3 = linalg.range %0:%1:%2 : !linalg.range
|
||||
%3 = linalg.range %0:%1:%2 : !linalg.range
|
||||
````
|
||||
}];
|
||||
let builders = [OpBuilder<
|
||||
|
@ -91,15 +91,15 @@ def Linalg_ReshapeOp : Linalg_Op<"reshape", [NoSideEffect]>,
|
|||
Examples:
|
||||
|
||||
```mlir
|
||||
// Dimension collapse (i, j) -> i' and k -> k'
|
||||
%1 = linalg.reshape %0 [(i, j, k) -> (i, j), (i, j, k) -> (k)] :
|
||||
memref<?x?x?xf32, stride_spec> into memref<?x?xf32, stride_spec_2>
|
||||
// Dimension collapse (i, j) -> i' and k -> k'
|
||||
%1 = linalg.reshape %0 [(i, j, k) -> (i, j), (i, j, k) -> (k)] :
|
||||
memref<?x?x?xf32, stride_spec> into memref<?x?xf32, stride_spec_2>
|
||||
```
|
||||
|
||||
```mlir
|
||||
// Dimension expansion i -> (i', j') and (k) -> (k')
|
||||
%1 = linalg.reshape %0 [(i, j, k) -> (i, j), (i, j, k) -> (k)] :
|
||||
memref<?x?xf32, stride_spec> into memref<?x?x?xf32, stride_spec_2>
|
||||
// Dimension expansion i -> (i', j') and (k) -> (k')
|
||||
%1 = linalg.reshape %0 [(i, j, k) -> (i, j), (i, j, k) -> (k)] :
|
||||
memref<?x?xf32, stride_spec> into memref<?x?x?xf32, stride_spec_2>
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -151,22 +151,22 @@ def Linalg_SliceOp : Linalg_Op<"slice", [NoSideEffect]>,
|
|||
1. rank-preserving `slice`:
|
||||
|
||||
```mlir
|
||||
%4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
|
||||
!linalg.range, !linalg.range, memref<?x?xf32, stride_spec>
|
||||
```
|
||||
%4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
|
||||
!linalg.range, !linalg.range, memref<?x?xf32, stride_spec>
|
||||
```
|
||||
|
||||
2. rank-reducing `slice` (from 2-D to 1-D):
|
||||
|
||||
```mlir
|
||||
%4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
|
||||
index, !linalg.range, memref<?x?xf32, stride_spec>
|
||||
%4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
|
||||
index, !linalg.range, memref<?x?xf32, stride_spec>
|
||||
```
|
||||
|
||||
3. rank-reducing `slice` (from 2-D to 0-D):
|
||||
|
||||
```mlir
|
||||
%4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
|
||||
index, index, memref<?x?xf32, stride_spec>
|
||||
%4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
|
||||
index, index, memref<?x?xf32, stride_spec>
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -210,7 +210,7 @@ def Linalg_TransposeOp : Linalg_Op<"transpose", [NoSideEffect]>,
|
|||
Example:
|
||||
|
||||
```mlir
|
||||
%1 = linalg.transpose %0 (i, j) -> (j, i) : memref<?x?xf32, stride_spec>
|
||||
%1 = linalg.transpose %0 (i, j) -> (j, i) : memref<?x?xf32, stride_spec>
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -245,7 +245,7 @@ def Linalg_YieldOp : Linalg_Op<"yield", [NoSideEffect, Terminator]>,
|
|||
Example:
|
||||
|
||||
```mlir
|
||||
linalg.yield %f0, %f1 : f32, f32
|
||||
linalg.yield %f0, %f1 : f32, f32
|
||||
```
|
||||
}];
|
||||
}
|
||||
|
|
|
@ -61,44 +61,48 @@ def CopyOp : LinalgStructured_Op<"copy", [NInputs<1>, NOutputs<1>]> {
|
|||
Copies the data in the input view into the output view.
|
||||
|
||||
Usage:
|
||||
```mlir
|
||||
linalg.copy(%arg0, %arg1) : memref<?xf32, stride_specification>,
|
||||
memref<?xf32, stride_specification>
|
||||
```
|
||||
|
||||
```mlir
|
||||
linalg.copy(%arg0, %arg1) : memref<?xf32, stride_specification>,
|
||||
memref<?xf32, stride_specification>
|
||||
```
|
||||
|
||||
One possible lowering to loop form is:
|
||||
```mlir
|
||||
%0 = linalg.dim %arg0, 0 : index
|
||||
loop.for %i0 = %c0 to %0 step %c1 {
|
||||
%1 = load %arg0[%i0] : memref<?xf32, stride_specification>
|
||||
store %1, %arg1[%i0] : memref<?xf32, stride_specification>
|
||||
}
|
||||
```
|
||||
|
||||
```mlir
|
||||
%0 = linalg.dim %arg0, 0 : index
|
||||
loop.for %i0 = %c0 to %0 step %c1 {
|
||||
%1 = load %arg0[%i0] : memref<?xf32, stride_specification>
|
||||
store %1, %arg1[%i0] : memref<?xf32, stride_specification>
|
||||
}
|
||||
```
|
||||
|
||||
Optionally, can take `input_permutation` and `output_permutation` attributes
|
||||
to reorder the dimensions of the input and output views.
|
||||
|
||||
Usage:
|
||||
```mlir
|
||||
linalg.copy(%arg0, %arg1) {inputPermutation : (i, j, k) -> (i, k, j),
|
||||
outputPermutation : (i, j, k) -> (k, j, i)} :
|
||||
memref<?x?x?xf32, stride_specification>,
|
||||
memref<?x?x?xf32, stride_specification>
|
||||
```
|
||||
|
||||
```mlir
|
||||
linalg.copy(%arg0, %arg1) {inputPermutation : (i, j, k) -> (i, k, j),
|
||||
outputPermutation : (i, j, k) -> (k, j, i)} :
|
||||
memref<?x?x?xf32, stride_specification>,
|
||||
memref<?x?x?xf32, stride_specification>
|
||||
```
|
||||
|
||||
One possible lowering to loop form is:
|
||||
```mlir
|
||||
%0 = linalg.dim %arg0, 0
|
||||
%1 = linalg.dim %arg0, 1
|
||||
%2 = linalg.dim %arg0, 2
|
||||
loop.for %i0 = %c0 to %{{.*}} step %c1 {
|
||||
loop.for %i1 = %c0 to %{{.*}} step %c1 {
|
||||
loop.for %i2 = %c0 to %{{.*}} step %c1 {
|
||||
%3 = load %arg0[%i0, %i2, %i1] :
|
||||
memref<?x?x?xf32, stride_specification>
|
||||
store %3, %arg1[%i2, %i1, %i0] :
|
||||
memref<?x?x?xf32, stride_specification>
|
||||
```
|
||||
|
||||
```mlir
|
||||
%0 = linalg.dim %arg0, 0
|
||||
%1 = linalg.dim %arg0, 1
|
||||
%2 = linalg.dim %arg0, 2
|
||||
loop.for %i0 = %c0 to %{{.*}} step %c1 {
|
||||
loop.for %i1 = %c0 to %{{.*}} step %c1 {
|
||||
loop.for %i2 = %c0 to %{{.*}} step %c1 {
|
||||
%3 = load %arg0[%i0, %i2, %i1] :
|
||||
memref<?x?x?xf32, stride_specification>
|
||||
store %3, %arg1[%i2, %i1, %i0] :
|
||||
memref<?x?x?xf32, stride_specification>
|
||||
```
|
||||
|
||||
The views are expected to be compatible for correctness but this is not
|
||||
enforced at the moment.
|
||||
|
@ -441,10 +445,10 @@ def GenericOp : GenericOpBase<"generic"> {
|
|||
specified as attributes. In pretty form, a linalg.generic op is written as:
|
||||
|
||||
```mlir
|
||||
linalg.generic #trait_attribute %A, %B, %C {other-attributes} :
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
linalg.generic #trait_attribute %A, %B, %C {other-attributes} :
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
```
|
||||
|
||||
Where #trait_attributes is an alias of a dictionary attribute containing:
|
||||
|
@ -474,41 +478,41 @@ def GenericOp : GenericOpBase<"generic"> {
|
|||
Example:
|
||||
Defining a #matmul_trait attribute in MLIR can be done as follows:
|
||||
```mlir
|
||||
func @fma(%a: f32, %b: f32, %c: f32) -> f32 {
|
||||
%d = mulf %a, %b: f32
|
||||
%e = addf %c, %d: f32
|
||||
return %e: f32
|
||||
}
|
||||
#matmul_accesses = [
|
||||
(m, n, k) -> (m, k),
|
||||
(m, n, k) -> (k, n),
|
||||
(m, n, k) -> (m, n)
|
||||
]
|
||||
#matmul_trait = {
|
||||
doc = "C(m, n) += A(m, k) * B(k, n)",
|
||||
fun = @fma,
|
||||
indexing_maps = #matmul_accesses,
|
||||
library_call = "linalg_matmul",
|
||||
n_views = [2, 1],
|
||||
iterator_types = ["parallel", "parallel", "reduction"]
|
||||
}
|
||||
func @fma(%a: f32, %b: f32, %c: f32) -> f32 {
|
||||
%d = mulf %a, %b: f32
|
||||
%e = addf %c, %d: f32
|
||||
return %e: f32
|
||||
}
|
||||
#matmul_accesses = [
|
||||
(m, n, k) -> (m, k),
|
||||
(m, n, k) -> (k, n),
|
||||
(m, n, k) -> (m, n)
|
||||
]
|
||||
#matmul_trait = {
|
||||
doc = "C(m, n) += A(m, k) * B(k, n)",
|
||||
fun = @fma,
|
||||
indexing_maps = #matmul_accesses,
|
||||
library_call = "linalg_matmul",
|
||||
n_views = [2, 1],
|
||||
iterator_types = ["parallel", "parallel", "reduction"]
|
||||
}
|
||||
```
|
||||
|
||||
And can be reused in multiple places as:
|
||||
```mlir
|
||||
linalg.generic #matmul_trait %A, %B, %C [other-attributes] :
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
linalg.generic #matmul_trait %A, %B, %C [other-attributes] :
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
```
|
||||
|
||||
This may lower to either:
|
||||
```mlir
|
||||
call @linalg_matmul(%A, %B, %C) :
|
||||
(memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>)
|
||||
-> ()
|
||||
call @linalg_matmul(%A, %B, %C) :
|
||||
(memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>)
|
||||
-> ()
|
||||
```
|
||||
|
||||
or IR resembling:
|
||||
|
@ -532,10 +536,10 @@ def GenericOp : GenericOpBase<"generic"> {
|
|||
mixing input and output ranked tensor values with input and output memrefs.
|
||||
|
||||
```mlir
|
||||
%C = linalg.generic #trait_attribute %A, %B {other-attributes} :
|
||||
tensor<?x?xf32>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
-> (tensor<?x?xf32>)
|
||||
%C = linalg.generic #trait_attribute %A, %B {other-attributes} :
|
||||
tensor<?x?xf32>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
-> (tensor<?x?xf32>)
|
||||
```
|
||||
|
||||
In this case, the number of outputs (args_out) must match the sum of (1) the
|
||||
|
@ -568,10 +572,10 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
|
|||
written as:
|
||||
|
||||
```mlir
|
||||
linalg.indexed_generic #trait_attribute %A, %B, %C {other-attributes} :
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
linalg.indexed_generic #trait_attribute %A, %B, %C {other-attributes} :
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
```
|
||||
|
||||
Where #trait_attributes is an alias of a dictionary attribute containing:
|
||||
|
@ -600,49 +604,53 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
|
|||
|
||||
Example:
|
||||
Defining a #matmul_trait attribute in MLIR can be done as follows:
|
||||
```mlir
|
||||
func @fma(%offset_m: index, %offset_n: index, %offset_k: index,
|
||||
%a: f32, %b: f32, %c: f32)
|
||||
-> f32
|
||||
{
|
||||
"some_optional_condition"(%offset_m, %offset_n, %offset_k)
|
||||
%d = mulf %a, %b: f32
|
||||
%e = addf %c, %d: f32
|
||||
return %e: f32
|
||||
}
|
||||
#matmul_accesses = [
|
||||
(m, n, k) -> (m, k),
|
||||
(m, n, k) -> (k, n),
|
||||
(m, n, k) -> (m, n)
|
||||
]
|
||||
#matmul_trait = {
|
||||
doc = "C(m, n) += A(m, k) * B(k, n)",
|
||||
fun = @fma,
|
||||
indexing_maps = #matmul_accesses,
|
||||
library_call = "linalg_matmul",
|
||||
n_views = [2, 1],
|
||||
iterator_types = ["parallel", "parallel", "reduction"]
|
||||
}
|
||||
```
|
||||
|
||||
```mlir
|
||||
func @fma(%offset_m: index, %offset_n: index, %offset_k: index,
|
||||
%a: f32, %b: f32, %c: f32)
|
||||
-> f32
|
||||
{
|
||||
"some_optional_condition"(%offset_m, %offset_n, %offset_k)
|
||||
%d = mulf %a, %b: f32
|
||||
%e = addf %c, %d: f32
|
||||
return %e: f32
|
||||
}
|
||||
#matmul_accesses = [
|
||||
(m, n, k) -> (m, k),
|
||||
(m, n, k) -> (k, n),
|
||||
(m, n, k) -> (m, n)
|
||||
]
|
||||
#matmul_trait = {
|
||||
doc = "C(m, n) += A(m, k) * B(k, n)",
|
||||
fun = @fma,
|
||||
indexing_maps = #matmul_accesses,
|
||||
library_call = "linalg_matmul",
|
||||
n_views = [2, 1],
|
||||
iterator_types = ["parallel", "parallel", "reduction"]
|
||||
}
|
||||
```
|
||||
|
||||
And can be reused in multiple places as:
|
||||
```mlir
|
||||
linalg.indexed_generic #matmul_trait %A, %B, %C [other-attributes] :
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
```
|
||||
|
||||
```mlir
|
||||
linalg.indexed_generic #matmul_trait %A, %B, %C [other-attributes] :
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
```
|
||||
|
||||
This may lower to either:
|
||||
```mlir
|
||||
call @linalg_matmul(%offset_m, %offset_n, %offset_k, %A, %B, %C) :
|
||||
(memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>)
|
||||
-> ()
|
||||
```
|
||||
|
||||
```mlir
|
||||
call @linalg_matmul(%offset_m, %offset_n, %offset_k, %A, %B, %C) :
|
||||
(memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>,
|
||||
memref<?x?xf32, stride_specification>)
|
||||
-> ()
|
||||
```
|
||||
|
||||
or IR resembling:
|
||||
|
||||
```mlir
|
||||
loop.for %m = %c0 to %M step %c1 {
|
||||
loop.for %n = %c0 to %N step %c1 {
|
||||
|
@ -664,10 +672,10 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
|
|||
memrefs.
|
||||
|
||||
```mlir
|
||||
%C = linalg.indexed_generic #trait_attribute %A, %B {other-attributes}
|
||||
: tensor<?x?xf32>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
-> (tensor<?x?xf32>)
|
||||
%C = linalg.indexed_generic #trait_attribute %A, %B {other-attributes}
|
||||
: tensor<?x?xf32>,
|
||||
memref<?x?xf32, stride_specification>
|
||||
-> (tensor<?x?xf32>)
|
||||
```
|
||||
|
||||
In this case, the number of outputs (args_out) must match the sum of (1) the
|
||||
|
|
|
@ -57,12 +57,12 @@ def ForOp : Loop_Op<"for",
|
|||
cases when it is absent from the custom format. For example:
|
||||
|
||||
```mlir
|
||||
loop.for %iv = %lb to %ub step %step {
|
||||
... // body
|
||||
}
|
||||
loop.for %iv = %lb to %ub step %step {
|
||||
... // body
|
||||
}
|
||||
```
|
||||
|
||||
"loop.for" can also operate on loop-carried variables and returns the final
|
||||
`loop.for` can also operate on loop-carried variables and returns the final
|
||||
values after loop termination. The initial values of the variables are
|
||||
passed as additional SSA operands to the "loop.for" following the 3 loop
|
||||
control SSA values mentioned above (lower bound, upper bound and step). The
|
||||
|
@ -120,7 +120,7 @@ def ForOp : Loop_Op<"for",
|
|||
}
|
||||
return %sum : f32
|
||||
}
|
||||
```
|
||||
```
|
||||
}];
|
||||
let arguments = (ins Index:$lowerBound,
|
||||
Index:$upperBound,
|
||||
|
@ -174,44 +174,47 @@ def IfOp : Loop_Op<"if",
|
|||
[SingleBlockImplicitTerminator<"YieldOp">, RecursiveSideEffects]> {
|
||||
let summary = "if-then-else operation";
|
||||
let description = [{
|
||||
The "loop.if" operation represents an if-then-else construct for
|
||||
The `loop.if` operation represents an if-then-else construct for
|
||||
conditionally executing two regions of code. The operand to an if operation
|
||||
is a boolean value. For example:
|
||||
|
||||
```mlir
|
||||
loop.if %b {
|
||||
...
|
||||
} else {
|
||||
...
|
||||
}
|
||||
loop.if %b {
|
||||
...
|
||||
} else {
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
"loop.if" may also return results that are defined in its regions. The
|
||||
`loop.if` may also return results that are defined in its regions. The
|
||||
values defined are determined by which execution path is taken.
|
||||
For example:
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%x, %y = loop.if %b -> (f32, f32) {
|
||||
%x_true = ...
|
||||
%y_true = ...
|
||||
loop.yield %x_true, %y_true : f32, f32
|
||||
} else {
|
||||
%x_false = ...
|
||||
%y_false = ...
|
||||
loop.yield %x_false, %y_false : f32, f32
|
||||
}
|
||||
%x, %y = loop.if %b -> (f32, f32) {
|
||||
%x_true = ...
|
||||
%y_true = ...
|
||||
loop.yield %x_true, %y_true : f32, f32
|
||||
} else {
|
||||
%x_false = ...
|
||||
%y_false = ...
|
||||
loop.yield %x_false, %y_false : f32, f32
|
||||
}
|
||||
```
|
||||
|
||||
"loop.if" regions are always terminated with "loop.yield". If "loop.if"
|
||||
`loop.if` regions are always terminated with "loop.yield". If "loop.if"
|
||||
defines no values, the "loop.yield" can be left out, and will be inserted
|
||||
implicitly. Otherwise, it must be explicit.
|
||||
Also, if "loop.if" defines one or more values, the 'else' block cannot be
|
||||
omitted.
|
||||
|
||||
For example:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
loop.if %b {
|
||||
...
|
||||
}
|
||||
loop.if %b {
|
||||
...
|
||||
}
|
||||
```
|
||||
}];
|
||||
let arguments = (ins I1:$condition);
|
||||
|
@ -256,7 +259,7 @@ def ParallelOp : Loop_Op<"parallel",
|
|||
The lower and upper bounds specify a half-open range: the range includes
|
||||
the lower bound but does not include the upper bound. The initial values
|
||||
have the same types as results of "loop.parallel". If there are no results,
|
||||
the keyword `init` can be omitted.
|
||||
the keyword `init` can be omitted.
|
||||
|
||||
Semantically we require that the iteration space can be iterated in any
|
||||
order, and the loop body can be executed in parallel. If there are data
|
||||
|
@ -274,19 +277,20 @@ def ParallelOp : Loop_Op<"parallel",
|
|||
The body region must contain exactly one block that terminates with
|
||||
"loop.yield" without operands. Parsing ParallelOp will create such a region
|
||||
and insert the terminator when it is absent from the custom format.
|
||||
For example:
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
loop.parallel (%iv) = (%lb) to (%ub) step (%step) -> f32 {
|
||||
%zero = constant 0.0 : f32
|
||||
loop.reduce(%zero) : f32 {
|
||||
^bb0(%lhs : f32, %rhs: f32):
|
||||
%res = addf %lhs, %rhs : f32
|
||||
loop.reduce.return %res : f32
|
||||
}
|
||||
}
|
||||
loop.parallel (%iv) = (%lb) to (%ub) step (%step) -> f32 {
|
||||
%zero = constant 0.0 : f32
|
||||
loop.reduce(%zero) : f32 {
|
||||
^bb0(%lhs : f32, %rhs: f32):
|
||||
%res = addf %lhs, %rhs : f32
|
||||
loop.reduce.return %res : f32
|
||||
}
|
||||
}
|
||||
```
|
||||
}];
|
||||
}];
|
||||
|
||||
let arguments = (ins Variadic<Index>:$lowerBound,
|
||||
Variadic<Index>:$upperBound,
|
||||
|
@ -343,14 +347,13 @@ def ReduceOp : Loop_Op<"reduce", [HasParent<"ParallelOp">]> {
|
|||
Example:
|
||||
|
||||
```mlir
|
||||
%operand = constant 1.0 : f32
|
||||
loop.reduce(%operand) : f32 {
|
||||
^bb0(%lhs : f32, %rhs: f32):
|
||||
%res = addf %lhs, %rhs : f32
|
||||
loop.reduce.return %res : f32
|
||||
}
|
||||
%operand = constant 1.0 : f32
|
||||
loop.reduce(%operand) : f32 {
|
||||
^bb0(%lhs : f32, %rhs: f32):
|
||||
%res = addf %lhs, %rhs : f32
|
||||
loop.reduce.return %res : f32
|
||||
}
|
||||
```
|
||||
|
||||
}];
|
||||
|
||||
let skipDefaultBuilders = 1;
|
||||
|
@ -373,7 +376,7 @@ def ReduceReturnOp :
|
|||
the operand of "loop.reduce". Example for the custom format:
|
||||
|
||||
```mlir
|
||||
loop.reduce.return %res : f32
|
||||
loop.reduce.return %res : f32
|
||||
```
|
||||
}];
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ def quant_QuantizeRegionOp : quant_Op<"region", [
|
|||
IsolatedFromAbove,
|
||||
SingleBlockImplicitTerminator<"ReturnOp">]> {
|
||||
let summary = [{
|
||||
The `region operation wraps high-precision ops as a logical low-precision
|
||||
The `region` operation wraps high-precision ops as a logical low-precision
|
||||
quantized kernel.
|
||||
}];
|
||||
|
||||
|
@ -119,8 +119,9 @@ def quant_ReturnOp : quant_Op<"return", [Terminator]> {
|
|||
|
||||
def quant_ConstFakeQuant : quant_Op<"const_fake_quant",
|
||||
[SameOperandsAndResultType, NoSideEffect]> {
|
||||
let summary =
|
||||
"Simulates the effect of uniform quantization with const range.";
|
||||
let summary = [{
|
||||
Simulates the effect of uniform quantization with const range.
|
||||
}];
|
||||
|
||||
let description = [{
|
||||
Given a const min, max, num_bits and narrow_range attribute, applies the
|
||||
|
@ -148,8 +149,9 @@ def quant_ConstFakeQuant : quant_Op<"const_fake_quant",
|
|||
|
||||
def quant_ConstFakeQuantPerAxis : quant_Op<"const_fake_quant_per_axis",
|
||||
[SameOperandsAndResultType, NoSideEffect]> {
|
||||
let summary =
|
||||
"Simulates the effect of per axis uniform quantization with const range.";
|
||||
let summary = [{
|
||||
Simulates the effect of per axis uniform quantization with const range.
|
||||
}];
|
||||
|
||||
let description = [{
|
||||
Given a const min, max, num_bits and narrow_range attribute, applies the
|
||||
|
@ -179,8 +181,7 @@ def quant_ConstFakeQuantPerAxis : quant_Op<"const_fake_quant_per_axis",
|
|||
}
|
||||
|
||||
def quant_StatisticsRefOp : quant_Op<"stats_ref", [SameOperandsAndResultType]> {
|
||||
let summary =
|
||||
"Indicates that statistics are resolved by reference.";
|
||||
let summary = "Indicates that statistics are resolved by reference.";
|
||||
|
||||
let description = [{
|
||||
This op acts as an identity that, when encountered at runtime, should result
|
||||
|
@ -198,8 +199,7 @@ def quant_StatisticsRefOp : quant_Op<"stats_ref", [SameOperandsAndResultType]> {
|
|||
}
|
||||
|
||||
def quant_StatisticsOp : quant_Op<"stats", [SameOperandsAndResultType]> {
|
||||
let summary =
|
||||
"Identity op which associates statistics with the value.";
|
||||
let summary = "Identity op which associates statistics with the value.";
|
||||
|
||||
let description = [{
|
||||
Associates statistics about the runtime ranges of values observed for
|
||||
|
@ -213,8 +213,11 @@ def quant_StatisticsOp : quant_Op<"stats", [SameOperandsAndResultType]> {
|
|||
`layerStats` must be a rank 1 tensor: [2]
|
||||
`axisStats` must be a rank 2 tensor: [N, 2], where N=the slice size
|
||||
splitted by the `axis` dimension. For example:
|
||||
<?x?x3x2>, axis=3 => N=2
|
||||
<?x?x3x2>, axis=2 => N=6
|
||||
|
||||
```
|
||||
<?x?x3x2>, axis=3 => N=2
|
||||
<?x?x3x2>, axis=2 => N=6
|
||||
```
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
|
@ -263,8 +266,9 @@ def quant_StatisticsOp : quant_Op<"stats", [SameOperandsAndResultType]> {
|
|||
}
|
||||
|
||||
def quant_CoupledRefOp : quant_Op<"coupled_ref", [SameOperandsAndResultType]> {
|
||||
let summary =
|
||||
"Indicates that one point of the computation is coupled to another.";
|
||||
let summary = [{
|
||||
Indicates that one point of the computation is coupled to another.
|
||||
}];
|
||||
|
||||
let description = [{
|
||||
Ordinarily, relationships between ops for the purposes of determining
|
||||
|
|
|
@ -1,5 +1,2 @@
|
|||
set(LLVM_TARGET_DEFINITIONS ShapeOps.td)
|
||||
mlir_tablegen(ShapeOps.h.inc -gen-op-decls)
|
||||
mlir_tablegen(ShapeOps.cpp.inc -gen-op-defs)
|
||||
mlir_tablegen(ShapeOpsDialect.h.inc -gen-dialect-decls)
|
||||
add_public_tablegen_target(MLIRShapeOpsIncGen)
|
||||
add_mlir_dialect(ShapeOps shape)
|
||||
add_mlir_doc(ShapeOps -gen-dialect-doc ShapeDialect Dialects/)
|
||||
|
|
|
@ -87,58 +87,59 @@ def Vector_ContractionOp :
|
|||
and acc arguments. An indexing map attribute specifies a mapping from each
|
||||
iterator in the iterator type list, to each dimension of an N-D vector.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
// Simple dot product (K = 0).
|
||||
#contraction_accesses = [
|
||||
affine_map<(i) -> (i)>,
|
||||
affine_map<(i) -> (i)>,
|
||||
affine_map<(i) -> ()>
|
||||
]
|
||||
#contraction_trait = {
|
||||
indexing_maps = #contraction_accesses,
|
||||
iterator_types = ["reduction"]
|
||||
}
|
||||
%3 = vector.contract #contraction_trait %0, %1, %2
|
||||
: vector<10xf32>, vector<10xf32> into f32
|
||||
// Simple dot product (K = 0).
|
||||
#contraction_accesses = [
|
||||
affine_map<(i) -> (i)>,
|
||||
affine_map<(i) -> (i)>,
|
||||
affine_map<(i) -> ()>
|
||||
]
|
||||
#contraction_trait = {
|
||||
indexing_maps = #contraction_accesses,
|
||||
iterator_types = ["reduction"]
|
||||
}
|
||||
%3 = vector.contract #contraction_trait %0, %1, %2
|
||||
: vector<10xf32>, vector<10xf32> into f32
|
||||
|
||||
// 2D vector contraction with one contracting dimension (matmul, K = 2).
|
||||
#contraction_accesses = [
|
||||
affine_map<(i, j, k) -> (i, k)>,
|
||||
affine_map<(i, j, k) -> (k, j)>,
|
||||
affine_map<(i, j, k) -> (i, j)>
|
||||
]
|
||||
#contraction_trait = {
|
||||
indexing_maps = #contraction_accesses,
|
||||
iterator_types = ["parallel", "parallel", "reduction"]
|
||||
}
|
||||
// 2D vector contraction with one contracting dimension (matmul, K = 2).
|
||||
#contraction_accesses = [
|
||||
affine_map<(i, j, k) -> (i, k)>,
|
||||
affine_map<(i, j, k) -> (k, j)>,
|
||||
affine_map<(i, j, k) -> (i, j)>
|
||||
]
|
||||
#contraction_trait = {
|
||||
indexing_maps = #contraction_accesses,
|
||||
iterator_types = ["parallel", "parallel", "reduction"]
|
||||
}
|
||||
|
||||
%3 = vector.contract #contraction_trait %0, %1, %2
|
||||
: vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32>
|
||||
%3 = vector.contract #contraction_trait %0, %1, %2
|
||||
: vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32>
|
||||
|
||||
// 4D to 3D vector contraction with two contracting dimensions and
|
||||
// one batch dimension (K = 3).
|
||||
#contraction_accesses = [
|
||||
affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>,
|
||||
affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>,
|
||||
affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)>
|
||||
]
|
||||
#contraction_trait = {
|
||||
indexing_maps = #contraction_accesses,
|
||||
iterator_types = ["parallel", "parallel", "parallel",
|
||||
"reduction", "reduction"]
|
||||
}
|
||||
// 4D to 3D vector contraction with two contracting dimensions and
|
||||
// one batch dimension (K = 3).
|
||||
#contraction_accesses = [
|
||||
affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>,
|
||||
affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>,
|
||||
affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)>
|
||||
]
|
||||
#contraction_trait = {
|
||||
indexing_maps = #contraction_accesses,
|
||||
iterator_types = ["parallel", "parallel", "parallel",
|
||||
"reduction", "reduction"]
|
||||
}
|
||||
|
||||
%4 = vector.contract #contraction_trait %0, %1, %2
|
||||
: vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
|
||||
%4 = vector.contract #contraction_trait %0, %1, %2
|
||||
: vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
|
||||
|
||||
// 4D vector contraction with two contracting dimensions and optional
|
||||
// vector mask arguments.
|
||||
%lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
|
||||
%rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>
|
||||
// 4D vector contraction with two contracting dimensions and optional
|
||||
// vector mask arguments.
|
||||
%lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
|
||||
%rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>
|
||||
|
||||
%5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask
|
||||
: vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32>
|
||||
%5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask
|
||||
: vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32>
|
||||
```
|
||||
}];
|
||||
let builders = [OpBuilder<
|
||||
|
@ -203,13 +204,14 @@ def Vector_ReductionOp :
|
|||
|
||||
http://llvm.org/docs/LangRef.html#experimental-vector-reduction-intrinsics
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%1 = vector.reduction "add", %0 : vector<16xf32> into f32
|
||||
%1 = vector.reduction "add", %0 : vector<16xf32> into f32
|
||||
|
||||
%3 = vector.reduction "xor", %2 : vector<4xi32> into i32
|
||||
%3 = vector.reduction "xor", %2 : vector<4xi32> into i32
|
||||
|
||||
%4 = vector.reduction "mul", %0, %1 : vector<16xf32> into f32
|
||||
%4 = vector.reduction "mul", %0, %1 : vector<16xf32> into f32
|
||||
```
|
||||
}];
|
||||
let extraClassDeclaration = [{
|
||||
|
@ -247,11 +249,12 @@ def Vector_BroadcastOp :
|
|||
dimension of 1. These rules imply that any scalar broadcast (k=0) to any
|
||||
shaped vector with the same element type is always legal.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%0 = constant 0.0 : f32
|
||||
%1 = vector.broadcast %0 : f32 to vector<16xf32>
|
||||
%2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32>
|
||||
%0 = constant 0.0 : f32
|
||||
%1 = vector.broadcast %0 : f32 to vector<16xf32>
|
||||
%2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32>
|
||||
```
|
||||
}];
|
||||
let extraClassDeclaration = [{
|
||||
|
@ -290,7 +293,8 @@ def Vector_ShuffleOp :
|
|||
mask values must be within range, viz. given two k-D operands v1 and v2
|
||||
above, all mask values are in the range [0,s_1+t_1)
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%0 = vector.shuffle %a, %b[0, 3]
|
||||
: vector<2xf32>, vector<2xf32> ; yields vector<2xf32>
|
||||
|
@ -298,7 +302,6 @@ def Vector_ShuffleOp :
|
|||
: vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32>
|
||||
%2 = vector.shuffle %a, %b[3, 2, 1, 0]
|
||||
: vector<2xf32>, vector<2xf32> ; yields vector<4xf32>
|
||||
|
||||
```
|
||||
}];
|
||||
let builders = [OpBuilder<"Builder *builder, OperationState &result,"
|
||||
|
@ -333,9 +336,10 @@ def Vector_ExtractElementOp :
|
|||
https://llvm.org/docs/LangRef.html#extractelement-instruction
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%c = constant 15 : i32
|
||||
%1 = vector.extractelement %0[%c : i32]: vector<16xf32>
|
||||
%c = constant 15 : i32
|
||||
%1 = vector.extractelement %0[%c : i32]: vector<16xf32>
|
||||
```
|
||||
}];
|
||||
let extraClassDeclaration = [{
|
||||
|
@ -360,10 +364,11 @@ def Vector_ExtractOp :
|
|||
Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at
|
||||
the proper position. Degenerates to an element type in the 0-D case.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%1 = vector.extract %0[3]: vector<4x8x16xf32>
|
||||
%2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32>
|
||||
%1 = vector.extract %0[3]: vector<4x8x16xf32>
|
||||
%2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32>
|
||||
```
|
||||
}];
|
||||
let builders = [OpBuilder<
|
||||
|
@ -396,19 +401,20 @@ def Vector_ExtractSlicesOp :
|
|||
linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
|
||||
Currently, only unit strides are supported.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%0 = vector.transfer_read ...: vector<4x2xf32>
|
||||
%0 = vector.transfer_read ...: vector<4x2xf32>
|
||||
|
||||
%1 = vector.extract_slices %0, [2, 2], [1, 1]
|
||||
: vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
|
||||
%1 = vector.extract_slices %0, [2, 2], [1, 1]
|
||||
: vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
|
||||
|
||||
// Example with partial slices at dimension boundaries.
|
||||
%2 = vector.transfer_read ...: vector<4x3xf32>
|
||||
// Example with partial slices at dimension boundaries.
|
||||
%2 = vector.transfer_read ...: vector<4x3xf32>
|
||||
|
||||
%3 = vector.extract_slices %2, [2, 2], [1, 1]
|
||||
: vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
|
||||
vector<2x2xf32>, vector<2x1xf32>>
|
||||
%3 = vector.extract_slices %2, [2, 2], [1, 1]
|
||||
: vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
|
||||
vector<2x2xf32>, vector<2x1xf32>>
|
||||
```
|
||||
}];
|
||||
let builders = [OpBuilder<
|
||||
|
@ -449,8 +455,9 @@ def Vector_FMAOp :
|
|||
to the `llvm.fma.*` intrinsic.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%3 = vector.fma %0, %1, %2: vector<8x16xf32>
|
||||
%3 = vector.fma %0, %1, %2: vector<8x16xf32>
|
||||
```
|
||||
}];
|
||||
// Fully specified by traits.
|
||||
|
@ -483,10 +490,11 @@ def Vector_InsertElementOp :
|
|||
https://llvm.org/docs/LangRef.html#insertelement-instruction
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%c = constant 15 : i32
|
||||
%f = constant 0.0f : f32
|
||||
%1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32>
|
||||
%c = constant 15 : i32
|
||||
%f = constant 0.0f : f32
|
||||
%1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32>
|
||||
```
|
||||
}];
|
||||
let extraClassDeclaration = [{
|
||||
|
@ -515,12 +523,11 @@ def Vector_InsertOp :
|
|||
and inserts the n-D source into the (n+k)-D destination at the proper
|
||||
position. Degenerates to a scalar source type when n = 0.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%2 = vector.insert %0, %1[3]:
|
||||
vector<8x16xf32> into vector<4x8x16xf32>
|
||||
%5 = vector.insert %3, %4[3, 3, 3]:
|
||||
f32 into vector<4x8x16xf32>
|
||||
%2 = vector.insert %0, %1[3] : vector<8x16xf32> into vector<4x8x16xf32>
|
||||
%5 = vector.insert %3, %4[3, 3, 3] : f32 into vector<4x8x16xf32>
|
||||
```
|
||||
}];
|
||||
let assemblyFormat = [{
|
||||
|
@ -558,22 +565,23 @@ def Vector_InsertSlicesOp :
|
|||
linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
|
||||
Currently, only unit strides are supported.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%0 = vector.extract_slices %0, [2, 2], [1, 1]
|
||||
: vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
|
||||
%0 = vector.extract_slices %0, [2, 2], [1, 1]
|
||||
: vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
|
||||
|
||||
%1 = vector.insert_slices %0, [2, 2], [1, 1]
|
||||
: tuple<vector<2x2xf32>, vector<2x2xf32>> into vector<4x2xf32>
|
||||
%1 = vector.insert_slices %0, [2, 2], [1, 1]
|
||||
: tuple<vector<2x2xf32>, vector<2x2xf32>> into vector<4x2xf32>
|
||||
|
||||
// Example with partial slices at dimension boundaries.
|
||||
%3 = vector.extract_slices %2, [2, 2], [1, 1]
|
||||
: vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
|
||||
vector<2x2xf32>, vector<2x1xf32>>
|
||||
// Example with partial slices at dimension boundaries.
|
||||
%3 = vector.extract_slices %2, [2, 2], [1, 1]
|
||||
: vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
|
||||
vector<2x2xf32>, vector<2x1xf32>>
|
||||
|
||||
%4 = vector.insert_slices %3, [2, 2], [1, 1]
|
||||
: tuple<vector<2x2xf32>, vector<2x1xf32>,
|
||||
vector<2x2xf32>, vector<2x1xf32>> into vector<4x3xf32>
|
||||
%4 = vector.insert_slices %3, [2, 2], [1, 1]
|
||||
: tuple<vector<2x2xf32>, vector<2x1xf32>,
|
||||
vector<2x2xf32>, vector<2x1xf32>> into vector<4x3xf32>
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -616,11 +624,12 @@ def Vector_InsertStridedSliceOp :
|
|||
the last k-D dimensions contain the k-D source vector elements strided at
|
||||
the proper location as specified by the offsets.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%2 = vector.insert_strided_slice %0, %1
|
||||
{offsets = [0, 0, 2], strides = [1, 1]}:
|
||||
vector<2x4xf32> into vector<16x4x8xf32>
|
||||
%2 = vector.insert_strided_slice %0, %1
|
||||
{offsets = [0, 0, 2], strides = [1, 1]}:
|
||||
vector<2x4xf32> into vector<16x4x8xf32>
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -658,14 +667,15 @@ def Vector_OuterProductOp :
|
|||
the LLVMIR dialect, this form emits `llvm.intr.fma`, which is guaranteed to
|
||||
lower to actual `fma` instructions on x86.
|
||||
|
||||
Examples:
|
||||
```mlir
|
||||
%2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32>
|
||||
return %2: vector<4x8xf32>
|
||||
Example:
|
||||
|
||||
%3 = vector.outerproduct %0, %1, %2:
|
||||
vector<4xf32>, vector<8xf32>, vector<4x8xf32>
|
||||
return %3: vector<4x8xf32>
|
||||
```
|
||||
%2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32>
|
||||
return %2: vector<4x8xf32>
|
||||
|
||||
%3 = vector.outerproduct %0, %1, %2:
|
||||
vector<4xf32>, vector<8xf32>, vector<4x8xf32>
|
||||
return %3: vector<4x8xf32>
|
||||
```
|
||||
}];
|
||||
let extraClassDeclaration = [{
|
||||
|
@ -708,8 +718,8 @@ def Vector_ReshapeOp :
|
|||
In the examples below, valid data elements are represented by an alphabetic
|
||||
character, and undefined data elements are represented by '-'.
|
||||
|
||||
Example:
|
||||
```mlir
|
||||
Example
|
||||
|
||||
vector<1x8xf32> with valid data shape [6], fixed vector sizes [8]
|
||||
|
||||
input: [a, b, c, d, e, f]
|
||||
|
@ -718,9 +728,8 @@ def Vector_ReshapeOp :
|
|||
|
||||
vector layout: [a, b, c, d, e, f, -, -]
|
||||
|
||||
```
|
||||
Example:
|
||||
```mlir
|
||||
Example
|
||||
|
||||
vector<2x8xf32> with valid data shape [10], fixed vector sizes [8]
|
||||
|
||||
input: [a, b, c, d, e, f, g, h, i, j]
|
||||
|
@ -729,9 +738,9 @@ def Vector_ReshapeOp :
|
|||
|
||||
vector layout: [[a, b, c, d, e, f, g, h],
|
||||
[i, j, -, -, -, -, -, -]]
|
||||
```
|
||||
Example:
|
||||
```mlir
|
||||
|
||||
Example
|
||||
|
||||
vector<2x2x2x3xf32> with valid data shape [3, 5], fixed vector sizes
|
||||
[2, 3]
|
||||
|
||||
|
@ -750,9 +759,9 @@ def Vector_ReshapeOp :
|
|||
[-, -, -]]
|
||||
[[n, o, -],
|
||||
[-, -, -]]]]
|
||||
```
|
||||
Example:
|
||||
```mlir
|
||||
|
||||
Example
|
||||
|
||||
%1 = vector.reshape %0, [%c3, %c6], [%c2, %c9], [4]
|
||||
: vector<3x2x4xf32> to vector<2x3x4xf32>
|
||||
|
||||
|
@ -776,7 +785,6 @@ def Vector_ReshapeOp :
|
|||
[[j, k, l, m],
|
||||
[n, o, p, q],
|
||||
[r, -, -, -]]]
|
||||
```
|
||||
}];
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
|
@ -828,16 +836,17 @@ def Vector_StridedSliceOp :
|
|||
attribute. The returned subvector contains the elements starting at offset
|
||||
`offsets` and ending at `offsets + sizes`.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%1 = vector.strided_slice %0
|
||||
{offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}:
|
||||
vector<4x8x16xf32> to vector<2x4x16xf32>
|
||||
```
|
||||
%1 = vector.strided_slice %0
|
||||
{offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}:
|
||||
vector<4x8x16xf32> to vector<2x4x16xf32>
|
||||
|
||||
// TODO(ntv) Evolve to a range form syntax similar to:
|
||||
%1 = vector.strided_slice %0[0:2:1][2:4:1]
|
||||
vector<4x8x16xf32> to vector<2x4x16xf32>
|
||||
```
|
||||
}];
|
||||
let builders = [OpBuilder<
|
||||
"Builder *builder, OperationState &result, Value source, " #
|
||||
|
@ -948,12 +957,13 @@ def Vector_TransferReadOp :
|
|||
implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`.
|
||||
|
||||
Syntax
|
||||
```mlir
|
||||
```
|
||||
operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list
|
||||
`{` attribute-entry `} :` memref-type `,` vector-type
|
||||
```
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
// Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32>
|
||||
// and pad with %f0 to handle the boundary case:
|
||||
|
@ -1026,14 +1036,7 @@ def Vector_TransferWriteOp :
|
|||
valid. Different lowerings may be pertinent depending on the hardware
|
||||
support.
|
||||
|
||||
Syntax:
|
||||
|
||||
```mlir
|
||||
operation ::= `vector.transfer_write` ssa-use-list `{` attribute-entry `} :
|
||||
` vector-type ', ' memref-type '
|
||||
```
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
// write vector<16x32x64xf32> into the slice
|
||||
|
@ -1099,7 +1102,7 @@ def Vector_ShapeCastOp :
|
|||
2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM
|
||||
is supported in that particular case, for now.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
// Example casting to a lower vector rank.
|
||||
|
@ -1139,7 +1142,7 @@ def Vector_TypeCastOp :
|
|||
|
||||
Syntax:
|
||||
|
||||
```mlir
|
||||
```
|
||||
operation ::= `vector.type_cast` ssa-use : memref-type to memref-type
|
||||
```
|
||||
|
||||
|
@ -1184,20 +1187,20 @@ def Vector_ConstantMaskOp :
|
|||
(otherwise element values are set to 0).
|
||||
|
||||
Example:
|
||||
```
|
||||
create a constant vector mask of size 4x3xi1 with elements in range
|
||||
0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
|
||||
|
||||
%1 = vector.constant_mask [3, 2] : vector<4x3xi1>
|
||||
```mlir
|
||||
// create a constant vector mask of size 4x3xi1 with elements in range
|
||||
// 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
|
||||
%1 = vector.constant_mask [3, 2] : vector<4x3xi1>
|
||||
|
||||
print %1
|
||||
columns
|
||||
0 1 2
|
||||
|------------
|
||||
0 | 1 1 0
|
||||
rows 1 | 1 1 0
|
||||
2 | 1 1 0
|
||||
3 | 0 0 0
|
||||
print %1
|
||||
columns
|
||||
0 1 2
|
||||
|------------
|
||||
0 | 1 1 0
|
||||
rows 1 | 1 1 0
|
||||
2 | 1 1 0
|
||||
3 | 0 0 0
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -1221,20 +1224,20 @@ def Vector_CreateMaskOp :
|
|||
(otherwise element values are set to 0).
|
||||
|
||||
Example:
|
||||
```
|
||||
create a vector mask of size 4x3xi1 where elements in range
|
||||
0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
|
||||
|
||||
%1 = vector.create_mask %c3, %c2 : vector<4x3xi1>
|
||||
```mlir
|
||||
// create a vector mask of size 4x3xi1 where elements in range
|
||||
// 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
|
||||
%1 = vector.create_mask %c3, %c2 : vector<4x3xi1>
|
||||
|
||||
print %1
|
||||
columns
|
||||
0 1 2
|
||||
|------------
|
||||
0 | 1 1 0
|
||||
rows 1 | 1 1 0
|
||||
2 | 1 1 0
|
||||
3 | 0 0 0
|
||||
print %1
|
||||
columns
|
||||
0 1 2
|
||||
|------------
|
||||
0 | 1 1 0
|
||||
rows 1 | 1 1 0
|
||||
2 | 1 1 0
|
||||
3 | 0 0 0
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -1254,16 +1257,17 @@ def Vector_TupleOp :
|
|||
transformation and should be removed before lowering to lower-level
|
||||
dialects.
|
||||
|
||||
Examples:
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%0 = vector.transfer_read ... : vector<2x2xf32>
|
||||
%1 = vector.transfer_read ... : vector<2x1xf32>
|
||||
%2 = vector.transfer_read ... : vector<2x2xf32>
|
||||
%3 = vector.transfer_read ... : vector<2x1xf32>
|
||||
|
||||
%4 = vector.tuple %0, %1, %2, %3
|
||||
: vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>
|
||||
%0 = vector.transfer_read ... : vector<2x2xf32>
|
||||
%1 = vector.transfer_read ... : vector<2x1xf32>
|
||||
%2 = vector.transfer_read ... : vector<2x2xf32>
|
||||
%3 = vector.transfer_read ... : vector<2x1xf32>
|
||||
|
||||
%4 = vector.tuple %0, %1, %2, %3
|
||||
: vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -1285,14 +1289,17 @@ def Vector_TransposeOp :
|
|||
Takes a n-D vector and returns the transposed n-D vector defined by
|
||||
the permutation of ranks in the n-sized integer array attribute.
|
||||
In the operation
|
||||
|
||||
```mlir
|
||||
%1 = vector.transpose %0, [i_1, .., i_n]
|
||||
: vector<d_1 x .. x d_n x f32>
|
||||
to vector<d_trans[0] x .. x d_trans[n-1] x f32>
|
||||
%1 = vector.transpose %0, [i_1, .., i_n]
|
||||
: vector<d_1 x .. x d_n x f32>
|
||||
to vector<d_trans[0] x .. x d_trans[n-1] x f32>
|
||||
```
|
||||
|
||||
the transp array [i_1, .., i_n] must be a permutation of [0, .., n-1].
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%1 = vector.transpose %0, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
|
||||
|
||||
|
@ -1326,14 +1333,15 @@ def Vector_TupleGetOp :
|
|||
transformation and should be removed before lowering to lower-level
|
||||
dialects.
|
||||
|
||||
Examples:
|
||||
```mlir
|
||||
%4 = vector.tuple %0, %1, %2, %3
|
||||
: vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>>
|
||||
Example:
|
||||
|
||||
%5 = vector.tuple_get %4, 1
|
||||
: tuple<vector<2x2xf32>, vector<2x1xf32>,
|
||||
vector<2x2xf32>, vector<2x1xf32>>
|
||||
```mlir
|
||||
%4 = vector.tuple %0, %1, %2, %3
|
||||
: vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>>
|
||||
|
||||
%5 = vector.tuple_get %4, 1
|
||||
: tuple<vector<2x2xf32>, vector<2x1xf32>,
|
||||
vector<2x2xf32>, vector<2x1xf32>>
|
||||
```
|
||||
}];
|
||||
|
||||
|
@ -1356,21 +1364,22 @@ def Vector_PrintOp :
|
|||
Prints the source vector (or scalar) to stdout in human readable
|
||||
format (for testing and debugging). No return value.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%0 = constant 0.0 : f32
|
||||
%1 = vector.broadcast %0 : f32 to vector<4xf32>
|
||||
vector.print %1 : vector<4xf32>
|
||||
%0 = constant 0.0 : f32
|
||||
%1 = vector.broadcast %0 : f32 to vector<4xf32>
|
||||
vector.print %1 : vector<4xf32>
|
||||
|
||||
when lowered to LLVM, the vector print is unrolled into
|
||||
elementary printing method calls that at runtime will yield
|
||||
when lowered to LLVM, the vector print is unrolled into
|
||||
elementary printing method calls that at runtime will yield
|
||||
|
||||
( 0.0, 0.0, 0.0, 0.0 )
|
||||
( 0.0, 0.0, 0.0, 0.0 )
|
||||
|
||||
on stdout when linked with a small runtime support library,
|
||||
which only needs to provide a few printing methods (single
|
||||
value for all data types, opening/closing bracket, comma,
|
||||
newline).
|
||||
on stdout when linked with a small runtime support library,
|
||||
which only needs to provide a few printing methods (single
|
||||
value for all data types, opening/closing bracket, comma,
|
||||
newline).
|
||||
```
|
||||
}];
|
||||
let verifier = ?;
|
||||
|
@ -1421,9 +1430,9 @@ def Vector_MatmulOp : Vector_Op<"matrix_multiply", [NoSideEffect,
|
|||
Example:
|
||||
|
||||
```mlir
|
||||
%C = vector.matrix_multiply %A, %B
|
||||
{ lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
|
||||
(vector<64xf64>, vector<48xf64>) -> vector<12xf64>
|
||||
%C = vector.matrix_multiply %A, %B
|
||||
{ lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
|
||||
(vector<64xf64>, vector<48xf64>) -> vector<12xf64>
|
||||
```
|
||||
}];
|
||||
let builders = [
|
||||
|
|
Loading…
Reference in New Issue