Update / cleanup pass documentation + Langref alloc examples

PiperOrigin-RevId: 234866323
This commit is contained in:
Uday Bondhugula 2019-02-20 14:12:21 -08:00 committed by jpienaar
parent 5162c58c78
commit 4056b98e22
2 changed files with 27 additions and 35 deletions

View File

@ -64,10 +64,10 @@ func @mul(%A: tensor<100x?xf32>, %B: tensor<?x50xf32>) -> (tensor<100x50xf32>) {
%n = dim %A, 1 : tensor<100x?xf32>
// Allocate addressable "buffers" and copy tensors %A and %B into them.
%A_m = alloc memref<100x?xf32>(%n)
%A_m = alloc(%n) : memref<100x?xf32>
tensor_store %A to %A_m : memref<100x?xf32>
%B_m = alloc memref<?x50xf32>(%n)
%B_m = alloc(%n) : memref<?x50xf32>
tensor_store %B to %B_m : memref<?x50xf32>
// Call function @multiply passing memrefs as arguments,
@ -96,7 +96,7 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
%n = dim %A, 1 : memref<100x?xf32>
// Allocate memory for the multiplication result.
%C = alloc memref<100x50xf32>()
%C = alloc() : memref<100x50xf32>
// Multiplication loop nest.
for %i = 0 to 100 {
@ -370,11 +370,11 @@ Examples:
// Use an affine mapping definition in an alloc instruction, binding the
// SSA value %N to the symbol s0.
%a = alloc memref<4x4xf32, #affine_map42> () [%N]
%a = alloc()[%N] : memref<4x4xf32, #affine_map42>
// Same thing with an inline affine mapping definition.
%b = alloc memref<4x4xf32, (d0, d1)[s0] -> (d0, d0 + d1 + floordiv(s0,2))
size (10, s0)> () [%N]
%b = alloc()[%N] : memref<4x4xf32, (d0, d1)[s0] -> (d0, d0 + d1 + floordiv(s0,2))
size (10, s0)>
```
### Semi-affine maps {#semi-affine-maps}
@ -1415,7 +1415,7 @@ Example:
```mlir {.mlir}
// Allocate base memref with dynamic 16x?xf32.
#lmapD = (i, j)[S0] -> (i, j) size (16, S0)
%D = alloc <16x?xf32, #lmapD, hbm>(%N)[%N]
%D = alloc(%N)[%N] <16x?xf32, #lmapD, hbm>
// Create memref which reshapes from 16x?xf32 to 16x4x?xf32.
#imapDR = (i, j, k)[S0] -> (i, j * S0 + k) size (16, 4 * S0)

View File

@ -149,9 +149,8 @@ nests.
## Loop unroll (`-loop-unroll`) {#loop-unroll}
This pass implements unrolling for loops ('for' instructions). It is able to
perform unrolling for loops with arbitrary bounds, and generate a cleanup loop
when necessary.
This pass implements loop unrolling. It is able to unroll loops with arbitrary
bounds, and generate a cleanup loop when necessary.
## Loop unroll and jam (`-loop-unroll-jam`) {#loop-unroll-jam}
@ -163,11 +162,11 @@ imperfect loop nests.
Performs fusion of loop nests using a slicing-based approach. The fused loop
nests, when possible, are rewritten to access significantly smaller local
buffers instead of the original memref's, and the latter are often
either completely optimized or contracted. This transformation leads to enhanced
locality and lower memory footprint through the elimination or contraction of
temporaries / intermediate memref's. These benefits are sometimes achieved at
the expense of redundant computation through a cost model that evaluates
available choices such as the depth at which a source slice should be
either completely optimized away or contracted. This transformation leads to
enhanced locality and lower memory footprint through the elimination or
contraction of temporaries / intermediate memref's. These benefits are sometimes
achieved at the expense of redundant computation through a cost model that
evaluates available choices such as the depth at which a source slice should be
materialized in the designation slice.
## Memref bound checking (`-memref-bound-check`) {#memref-bound-check}
@ -186,8 +185,8 @@ test/Transforms/memref-bound-check.mlir:19:13: error: 'load' op memref out of lo
## Memref dataflow optimization (`-memref-dataflow-opt`) {#memref-dataflow-opt}
Performs store to load forwarding for memref's to eliminate memory accesses and
potentially the entire memref if all its accesses are forwarded.
This pass performs store to load forwarding for memref's to eliminate memory
accesses and potentially the entire memref if all its accesses are forwarded.
Input
@ -232,8 +231,8 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
## Memref dependence analysis (`-memref-dependence-check`) {#memref-dependence-check}
Performs dependence analysis to determine dependences between pairs of memory
operations (load's and store's) on memref's. Dependence analysis exploits
This pass performs dependence analysis to determine dependences between pairs of
memory operations (load's and store's) on memref's. Dependence analysis exploits
polyhedral information available (affine maps, expressions, and affine.apply
operations) to precisely represent dependences using affine constraints, while
also computing dependence vectors from them, where each component of the
@ -247,18 +246,13 @@ test/Transforms/memref-dataflow-opt.mlir:232:7: note: dependence from 2 to 1 at
## Pipeline data transfer (`-pipeline-data-transfer`) {#pipeline-data-transfer}
Performs a transformation to overlap non-blocking DMA operations in a loop with
computations through double buffering and advancing dma_start operations with
respect to other operations.
This pass performs a transformation to overlap non-blocking DMA operations in a
loop with computations through double buffering. This is achieved by advancing
dma_start operations with respect to other operations.
Input
```mlir
#map1 = () -> (8)
#map2 = () -> (128)
#map3 = () -> (512)
#map4 = (d0) -> (d0 * 64)
#map5 = (d0, d1) -> ((d0 * 2048 + d1 * 256) floordiv 32)
#map6 = () -> (4)
func @loop_nest_dma() {
%0 = alloc() : memref<256xf32>
%1 = alloc() : memref<32xf32, 1>
%2 = alloc() : memref<1xf32>
@ -271,15 +265,15 @@ func @loop_nest_dma() {
%4 = "compute"(%3) : (f32) -> f32
store %4, %1[%i0] : memref<32xf32, 1>
}
return
}
```
Output
```mlir
#map2 = (d0) -> (d0 mod 2)
#map3 = (d0) -> (d0 - 1)
#map4 = (d0) -> (d0 - ((d0 - 1) floordiv 2) * 2 - 1)
func @loop_nest_dma() {
#map4 = (d0) -> ((d0 - 1) mod 2)
%c128 = constant 128 : index
%c0 = constant 0 : index
%c7 = constant 7 : index
@ -306,6 +300,4 @@ func @loop_nest_dma() {
store %11, %1[%c1, %c7] : memref<2x32xf32, 1>
dealloc %2 : memref<2x1xf32>
dealloc %1 : memref<2x32xf32, 1>
return
}
```