[MLIR] Make SuperVectorization use normalized AffineApplyOp

Supervectorization does not plan on handling multi-result AffineMaps and
non-canonical chains of > 1 AffineApplyOp.
This CL uses the simpler single-result unbounded AffineApplyOp in the
MaterializeVectors pass.

PiperOrigin-RevId: 228469085
This commit is contained in:
Nicolas Vasilache 2019-01-09 00:41:54 -08:00 committed by jpienaar
parent 3e5ee82b81
commit 1f78d63f05
5 changed files with 266 additions and 209 deletions

View File

@ -373,12 +373,16 @@ reindexAffineIndices(FuncBuilder *b, VectorType hwVectorType,
auto stride = vectorShape[i - numMemRefIndices - numSuperVectorIndices];
affineExprs.push_back(d_i + offset * stride);
}
auto affineMap = AffineMap::get(numIndices, 0, affineExprs, {});
// TODO(ntv): support a concrete map and composition.
auto app = b->create<AffineApplyOp>(b->getInsertionPoint()->getLoc(),
affineMap, memrefIndices);
return SmallVector<mlir::Value *, 8>{app->getResults()};
// Create a bunch of single result maps.
return functional::map(
[b, numIndices, memrefIndices](AffineExpr expr) {
auto map = AffineMap::get(numIndices, 0, expr, {});
auto app = makeNormalizedAffineApply(
b, b->getInsertionPoint()->getLoc(), map, memrefIndices);
return app->getResult(0);
},
affineExprs);
}
/// Returns attributes with the following substitutions applied:
@ -553,11 +557,17 @@ static bool instantiateMaterialization(Instruction *inst,
// Create a builder here for unroll-and-jam effects.
FuncBuilder b(inst);
auto *opInst = cast<OperationInst>(inst);
// AffineApplyOp are ignored: instantiating the proper vector op will take
// care of AffineApplyOps by composing them properly.
if (opInst->isa<AffineApplyOp>()) {
return false;
}
if (auto write = opInst->dyn_cast<VectorTransferWriteOp>()) {
auto *clone = instantiate(&b, write, state->hwVectorType,
state->hwVectorInstance, state->substitutionsMap);
return clone == nullptr;
} else if (auto read = opInst->dyn_cast<VectorTransferReadOp>()) {
}
if (auto read = opInst->dyn_cast<VectorTransferReadOp>()) {
auto *clone = instantiate(&b, read, state->hwVectorType,
state->hwVectorInstance, state->substitutionsMap);
if (!clone) {
@ -570,10 +580,12 @@ static bool instantiateMaterialization(Instruction *inst,
// The only op with 0 results reaching this point must, by construction, be
// VectorTransferWriteOps and have been caught above. Ops with >= 2 results
// are not yet supported. So just support 1 result.
if (opInst->getNumResults() != 1)
if (opInst->getNumResults() != 1) {
return inst->emitError("NYI: ops with != 1 results");
if (opInst->getResult(0)->getType() != state->superVectorType)
}
if (opInst->getResult(0)->getType() != state->superVectorType) {
return inst->emitError("Op does not return a supervector.");
}
auto *clone =
instantiate(&b, opInst, state->hwVectorType, state->substitutionsMap);
if (!clone) {

View File

@ -1,26 +1,33 @@
// RUN: mlir-opt %s -materialize-vectors -vector-size=4 -vector-size=4 | FileCheck %s
// CHECK-DAG: #[[map_instance_0:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1, d2, d3)
// CHECK-DAG: #[[map_instance_1:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 1, d2, d3)
// CHECK-DAG: #[[map_instance_2:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 2, d2, d3)
// CHECK-DAG: #[[map_instance_3:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 3, d2, d3)
// CHECK-DAG: #[[map_proj_d0d1d2d3d4_d1d0:map[0-9]+]] = (d0, d1, d2, d3) -> (d1, d0)
// CHECK-DAG: #[[D0D1D2D3TOD0:map[0-9]+]] = (d0, d1, d2, d3) -> (d0)
// CHECK-DAG: #[[D0D1D2D3TOD1:map[0-9]+]] = (d0, d1, d2, d3) -> (d1)
// CHECK-DAG: #[[D0D1D2D3TOD2:map[0-9]+]] = (d0, d1, d2, d3) -> (d2)
// CHECK-DAG: #[[D0D1D2D3TOD3:map[0-9]+]] = (d0, d1, d2, d3) -> (d3)
// CHECK-DAG: #[[D0D1D2D3TOD1D0:map[0-9]+]] = (d0, d1, d2, d3) -> (d1, d0)
// CHECK-DAG: #[[D0D1D2D3TOD1P1:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 1)
// CHECK-DAG: #[[D0D1D2D3TOD1P2:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 2)
// CHECK-DAG: #[[D0D1D2D3TOD1P3:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 3)
// CHECK-LABEL: func @materialize
func @materialize(%M : index, %N : index, %O : index, %P : index) {
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
%f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
// CHECK: for %i0 = 0 to %arg0 step 4 {
// CHECK: for %i1 = 0 to %arg1 step 4 {
// CHECK: for %i2 = 0 to %arg2 {
// CHECK: for %i3 = 0 to %arg3 step 4 {
// CHECK: %1 = affine_apply #[[map_instance_0]](%i0, %i1, %i2, %i3)
// CHECK: vector_transfer_write {{.*}}, %0, %1#0, %1#1, %1#2, %1#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
// CHECK: %2 = affine_apply #[[map_instance_1]](%i0, %i1, %i2, %i3)
// CHECK: vector_transfer_write {{.*}}, %0, %2#0, %2#1, %2#2, %2#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
// CHECK: %3 = affine_apply #[[map_instance_2]](%i0, %i1, %i2, %i3)
// CHECK: vector_transfer_write {{.*}}, %0, %3#0, %3#1, %3#2, %3#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
// CHECK: %4 = affine_apply #[[map_instance_3]](%i0, %i1, %i2, %i3)
// CHECK: vector_transfer_write {{.*}}, %0, %4#0, %4#1, %4#2, %4#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
// CHECK-NEXT: for %i1 = 0 to %arg1 step 4 {
// CHECK-NEXT: for %i2 = 0 to %arg2 {
// CHECK-NEXT: for %i3 = 0 to %arg3 step 4 {
// CHECK-NEXT: %[[a:[0-9]+]] = {{.*}}[[D0D1D2D3TOD0]](%i0, %i1, %i2, %i3)
// CHECK-NEXT: %[[b:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1]](%i0, %i1, %i2, %i3)
// CHECK-NEXT: %[[c:[0-9]+]] = {{.*}}[[D0D1D2D3TOD2]](%i0, %i1, %i2, %i3)
// CHECK-NEXT: %[[d:[0-9]+]] = {{.*}}[[D0D1D2D3TOD3]](%i0, %i1, %i2, %i3)
// CHECK-NEXT: vector_transfer_write {{.*}}, %0, %[[a]], %[[b]], %[[c]], %[[d]] {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
// CHECK: %[[b1:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P1]](%i0, %i1, %i2, %i3)
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b1]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
// CHECK: %[[b2:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P2]](%i0, %i1, %i2, %i3)
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
// CHECK: %[[b3:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P3]](%i0, %i1, %i2, %i3)
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
for %i0 = 0 to %M step 4 {
for %i1 = 0 to %N step 4 {
for %i2 = 0 to %O {

View File

@ -1,13 +1,13 @@
// RUN: mlir-opt %s -vectorize -virtual-vector-size 32 --test-fastest-varying=0 -materialize-vectors -vector-size=8 | FileCheck %s
// Capture permutation maps used in vectorization.
// CHECK-DAG: #[[map_proj_d0d1_d1:map[0-9]+]] = (d0, d1) -> (d1)
// vector<32xf32> -> vector<8xf32>
// CHECK-DAG: [[MAP0:#.*]] = (d0, d1) -> (d0, d1)
// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 8)
// CHECK-DAG: [[MAP2:#.*]] = (d0, d1) -> (d0, d1 + 16)
// CHECK-DAG: [[MAP3:#.*]] = (d0, d1) -> (d0, d1 + 24)
// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0)
// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1)
// CHECK-DAG: [[D0D1TOD1P8:#.*]] = (d0, d1) -> (d1 + 8)
// CHECK-DAG: [[D0D1TOD1P16:#.*]] = (d0, d1) -> (d1 + 16)
// CHECK-DAG: [[D0D1TOD1P24:#.*]] = (d0, d1) -> (d1 + 24)
// CHECK-LABEL: func @vector_add_2d
func @vector_add_2d(%M : index, %N : index) -> f32 {
%A = alloc (%M, %N) : memref<?x?xf32, 0>
%B = alloc (%M, %N) : memref<?x?xf32, 0>
@ -16,19 +16,23 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f2 = constant 2.0 : f32
// 4x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 {
// CHECK: for %i1 = 0 to %arg1 step 32 {
// CHECK: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: [[CST3:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: [[VAL0:%.*]] = affine_apply [[MAP0]]{{.*}}
// CHECK: vector_transfer_write [[CST0]], {{.*}}, [[VAL0]]#0, [[VAL0]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
// CHECK: [[VAL1:%.*]] = affine_apply [[MAP1]]{{.*}}
// CHECK: vector_transfer_write [[CST1]], {{.*}}, [[VAL1]]#0, [[VAL1]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
// CHECK: [[VAL2:%.*]] = affine_apply [[MAP2]]{{.*}}
// CHECK: vector_transfer_write [[CST2]], {{.*}}, [[VAL2]]#0, [[VAL2]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
// CHECK: [[VAL3:%.*]] = affine_apply [[MAP3]]{{.*}}
// CHECK: vector_transfer_write [[CST3]], {{.*}}, [[VAL3]]#0, [[VAL3]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST3:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST0]], {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST1]], {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
// CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1P16]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST2]], {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
// CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P24]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
//
for %i0 = 0 to %M {
for %i1 = 0 to %N {
@ -38,19 +42,23 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// 4x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 {
// CHECK: for %i3 = 0 to %arg1 step 32 {
// CHECK: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: [[CST3:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: [[VAL0:%.*]] = affine_apply [[MAP0]]{{.*}}
// CHECK: vector_transfer_write [[CST0]], {{.*}}, [[VAL0]]#0, [[VAL0]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
// CHECK: [[VAL1:%.*]] = affine_apply [[MAP1]]{{.*}}
// CHECK: vector_transfer_write [[CST1]], {{.*}}, [[VAL1]]#0, [[VAL1]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
// CHECK: [[VAL2:%.*]] = affine_apply [[MAP2]]{{.*}}
// CHECK: vector_transfer_write [[CST2]], {{.*}}, [[VAL2]]#0, [[VAL2]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
// CHECK: [[VAL3:%.*]] = affine_apply [[MAP3]]{{.*}}
// CHECK: vector_transfer_write [[CST3]], {{.*}}, [[VAL3]]#0, [[VAL3]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST3:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST0]], {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST1]], {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
// CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1P16]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST2]], {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
// CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P24]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
//
for %i2 = 0 to %M {
for %i3 = 0 to %N {
@ -60,35 +68,47 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// 4x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 {
// CHECK: for %i5 = 0 to %arg1 step 32 {
// CHECK: %11 = affine_apply #map0(%i4, %i5)
// CHECK: %12 = vector_transfer_read %0, %11#0, %11#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %13 = affine_apply #map2(%i4, %i5)
// CHECK: %14 = vector_transfer_read %0, %13#0, %13#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %15 = affine_apply #map3(%i4, %i5)
// CHECK: %16 = vector_transfer_read %0, %15#0, %15#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %17 = affine_apply #map4(%i4, %i5)
// CHECK: %18 = vector_transfer_read %0, %17#0, %17#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %19 = affine_apply #map0(%i4, %i5)
// CHECK: %20 = vector_transfer_read %1, %19#0, %19#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %21 = affine_apply #map2(%i4, %i5)
// CHECK: %22 = vector_transfer_read %1, %21#0, %21#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %23 = affine_apply #map3(%i4, %i5)
// CHECK: %24 = vector_transfer_read %1, %23#0, %23#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %25 = affine_apply #map4(%i4, %i5)
// CHECK: %26 = vector_transfer_read %1, %25#0, %25#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %27 = addf %12, %20 : vector<8xf32>
// CHECK: %28 = addf %14, %22 : vector<8xf32>
// CHECK: %29 = addf %16, %24 : vector<8xf32>
// CHECK: %30 = addf %18, %26 : vector<8xf32>
// CHECK: %31 = affine_apply #map0(%i4, %i5)
// CHECK: vector_transfer_write %27, %2, %31#0, %31#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %32 = affine_apply #map2(%i4, %i5)
// CHECK: vector_transfer_write %28, %2, %32#0, %32#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %33 = affine_apply #map3(%i4, %i5)
// CHECK: vector_transfer_write %29, %2, %33#0, %33#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %34 = affine_apply #map4(%i4, %i5)
// CHECK: vector_transfer_write %30, %2, %34#0, %34#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
//
for %i4 = 0 to %M {
for %i5 = 0 to %N {

View File

@ -1,15 +1,13 @@
// RUN: mlir-opt %s -vectorize -virtual-vector-size 3 -virtual-vector-size 16 --test-fastest-varying=1 --test-fastest-varying=0 -materialize-vectors -vector-size=8 | FileCheck %s
// Capture permutation maps used in vectorization.
// CHECK-DAG: #[[map_proj_d0d1_d1:map[0-9]+]] = (d0, d1) -> (d1)
// vector<3x16xf32> -> vector<8xf32>
// CHECK-DAG: [[MAP0:#.*]] = (d0, d1) -> (d0, d1)
// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 8)
// CHECK-DAG: [[MAP2:#.*]] = (d0, d1) -> (d0 + 1, d1)
// CHECK-DAG: [[MAP3:#.*]] = (d0, d1) -> (d0 + 1, d1 + 8)
// CHECK-DAG: [[MAP4:#.*]] = (d0, d1) -> (d0 + 2, d1)
// CHECK-DAG: [[MAP5:#.*]] = (d0, d1) -> (d0 + 2, d1 + 8)
// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0)
// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1)
// CHECK-DAG: [[D0D1TOD1P8:#.*]] = (d0, d1) -> (d1 + 8)
// CHECK-DAG: [[D0D1TOD0P1:#.*]] = (d0, d1) -> (d0 + 1)
// CHECK-DAG: [[D0D1TOD0P2:#.*]] = (d0, d1) -> (d0 + 2)
// CHECK-LABEL: func @vector_add_2d
func @vector_add_2d(%M : index, %N : index) -> f32 {
%A = alloc (%M, %N) : memref<?x?xf32, 0>
%B = alloc (%M, %N) : memref<?x?xf32, 0>
@ -18,25 +16,31 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f2 = constant 2.0 : f32
// (3x2)x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 step 3 {
// CHECK: for %i1 = 0 to %arg1 step 16 {
// CHECK: %cst_1 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: %cst_2 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: %cst_3 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: %cst_4 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: %cst_5 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: %cst_6 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK: %3 = affine_apply #map0(%i0, %i1)
// CHECK: vector_transfer_write %cst_1, %0, %3#0, %3#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %4 = affine_apply #map2(%i0, %i1)
// CHECK: vector_transfer_write %cst_2, %0, %4#0, %4#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %5 = affine_apply #map3(%i0, %i1)
// CHECK: vector_transfer_write %cst_3, %0, %5#0, %5#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %6 = affine_apply #map4(%i0, %i1)
// CHECK: vector_transfer_write %cst_4, %0, %6#0, %6#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %7 = affine_apply #map5(%i0, %i1)
// CHECK: vector_transfer_write %cst_5, %0, %7#0, %7#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %8 = affine_apply #map6(%i0, %i1)
// CHECK: vector_transfer_write %cst_6, %0, %8#0, %8#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK-NEXT: for %i1 = 0 to %arg1 step 16 {
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0P1]](%i0, %i1)
// CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0P1]](%i0, %i1)
// CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL40:%.*]] = affine_apply [[D0D1TOD0P2]](%i0, %i1)
// CHECK-NEXT: [[VAL41:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL40]], [[VAL41]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
// CHECK-NEXT: [[VAL50:%.*]] = affine_apply [[D0D1TOD0P2]](%i0, %i1)
// CHECK-NEXT: [[VAL51:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
// non-scoped %f1
@ -45,25 +49,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// (3x2)x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 step 3 {
// CHECK: for %i3 = 0 to %arg1 step 16 {
// CHECK: %cst_7 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: %cst_8 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: %cst_9 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: %cst_10 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: %cst_11 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: %cst_12 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK: %9 = affine_apply #map0(%i2, %i3)
// CHECK: vector_transfer_write %cst_7, %1, %9#0, %9#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %10 = affine_apply #map2(%i2, %i3)
// CHECK: vector_transfer_write %cst_8, %1, %10#0, %10#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %11 = affine_apply #map3(%i2, %i3)
// CHECK: vector_transfer_write %cst_9, %1, %11#0, %11#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %12 = affine_apply #map4(%i2, %i3)
// CHECK: vector_transfer_write %cst_10, %1, %12#0, %12#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %13 = affine_apply #map5(%i2, %i3)
// CHECK: vector_transfer_write %cst_11, %1, %13#0, %13#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %14 = affine_apply #map6(%i2, %i3)
// CHECK: vector_transfer_write %cst_12, %1, %14#0, %14#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK-NEXT: for %i3 = 0 to %arg1 step 16 {
// .....
for %i2 = 0 to %M {
for %i3 = 0 to %N {
// non-scoped %f2
@ -73,49 +60,68 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// (3x2)x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 step 3 {
// CHECK: for %i5 = 0 to %arg1 step 16 {
// CHECK: %15 = affine_apply #map0(%i4, %i5)
// CHECK: %16 = vector_transfer_read %0, %15#0, %15#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %17 = affine_apply #map2(%i4, %i5)
// CHECK: %18 = vector_transfer_read %0, %17#0, %17#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %19 = affine_apply #map3(%i4, %i5)
// CHECK: %20 = vector_transfer_read %0, %19#0, %19#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %21 = affine_apply #map4(%i4, %i5)
// CHECK: %22 = vector_transfer_read %0, %21#0, %21#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %23 = affine_apply #map5(%i4, %i5)
// CHECK: %24 = vector_transfer_read %0, %23#0, %23#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %25 = affine_apply #map6(%i4, %i5)
// CHECK: %26 = vector_transfer_read %0, %25#0, %25#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %27 = affine_apply #map0(%i4, %i5)
// CHECK: %28 = vector_transfer_read %1, %27#0, %27#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %29 = affine_apply #map2(%i4, %i5)
// CHECK: %30 = vector_transfer_read %1, %29#0, %29#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %31 = affine_apply #map3(%i4, %i5)
// CHECK: %32 = vector_transfer_read %1, %31#0, %31#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %33 = affine_apply #map4(%i4, %i5)
// CHECK: %34 = vector_transfer_read %1, %33#0, %33#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %35 = affine_apply #map5(%i4, %i5)
// CHECK: %36 = vector_transfer_read %1, %35#0, %35#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %37 = affine_apply #map6(%i4, %i5)
// CHECK: %38 = vector_transfer_read %1, %37#0, %37#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
// CHECK: %39 = addf %16, %28 : vector<8xf32>
// CHECK: %40 = addf %18, %30 : vector<8xf32>
// CHECK: %41 = addf %20, %32 : vector<8xf32>
// CHECK: %42 = addf %22, %34 : vector<8xf32>
// CHECK: %43 = addf %24, %36 : vector<8xf32>
// CHECK: %44 = addf %26, %38 : vector<8xf32>
// CHECK: %45 = affine_apply #map0(%i4, %i5)
// CHECK: vector_transfer_write %39, %2, %45#0, %45#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %46 = affine_apply #map2(%i4, %i5)
// CHECK: vector_transfer_write %40, %2, %46#0, %46#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %47 = affine_apply #map3(%i4, %i5)
// CHECK: vector_transfer_write %41, %2, %47#0, %47#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %48 = affine_apply #map4(%i4, %i5)
// CHECK: vector_transfer_write %42, %2, %48#0, %48#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %49 = affine_apply #map5(%i4, %i5)
// CHECK: vector_transfer_write %43, %2, %49#0, %49#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK: %50 = affine_apply #map6(%i4, %i5)
// CHECK: vector_transfer_write %44, %2, %50#0, %50#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
// CHECK-NEXT: for %i5 = 0 to %arg1 step 16 {
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
//
for %i4 = 0 to %M {
for %i5 = 0 to %N {
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>

View File

@ -1,10 +1,12 @@
// RUN: mlir-opt %s -vectorize -virtual-vector-size 3 -virtual-vector-size 32 --test-fastest-varying=1 --test-fastest-varying=0 -materialize-vectors -vector-size=3 -vector-size=16 | FileCheck %s
// Capture permutation maps used in vectorization.
// CHECK-DAG: #[[map_proj_d0d1_d0d1:map[0-9]+]] = (d0, d1) -> (d0, d1)
// vector<3x32xf32> -> vector<3x16xf32>
// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 16)
// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0)
// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1)
// CHECK-DAG: [[D0D1TOD0D1:#.*]] = (d0, d1) -> (d0, d1)
// CHECK-DAG: [[D0D1TOD1P16:#.*]] = (d0, d1) -> (d1 + 16)
// CHECK-LABEL: func @vector_add_2d
func @vector_add_2d(%M : index, %N : index) -> f32 {
%A = alloc (%M, %N) : memref<?x?xf32, 0>
%B = alloc (%M, %N) : memref<?x?xf32, 0>
@ -13,13 +15,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f2 = constant 2.0 : f32
// 2x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 step 3 {
// CHECK: for %i1 = 0 to %arg1 step 32 {
// CHECK: %cst_1 = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
// CHECK: %cst_2 = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
// CHECK: %3 = affine_apply #map0(%i0, %i1)
// CHECK: vector_transfer_write %cst_1, %0, %3#0, %3#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
// CHECK: %4 = affine_apply #map1(%i0, %i1)
// CHECK: vector_transfer_write %cst_2, %0, %4#0, %4#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P16]](%i0, %i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
//
for %i0 = 0 to %M {
for %i1 = 0 to %N {
@ -29,13 +33,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// 2x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 step 3 {
// CHECK: for %i3 = 0 to %arg1 step 32 {
// CHECK: %cst_3 = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
// CHECK: %cst_4 = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
// CHECK: %5 = affine_apply #map0(%i2, %i3)
// CHECK: vector_transfer_write %cst_3, %1, %5#0, %5#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
// CHECK: %6 = affine_apply #map1(%i2, %i3)
// CHECK: vector_transfer_write %cst_4, %1, %6#0, %6#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i2, %i3)
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i2, %i3)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i2, %i3)
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P16]](%i2, %i3)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
//
for %i2 = 0 to %M {
for %i3 = 0 to %N {
@ -45,21 +51,27 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// 2x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 step 3 {
// CHECK: for %i5 = 0 to %arg1 step 32 {
// CHECK: %7 = affine_apply #map0(%i4, %i5)
// CHECK: %8 = vector_transfer_read %0, %7#0, %7#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
// CHECK: %9 = affine_apply #map1(%i4, %i5)
// CHECK: %10 = vector_transfer_read %0, %9#0, %9#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
// CHECK: %11 = affine_apply #map0(%i4, %i5)
// CHECK: %12 = vector_transfer_read %1, %11#0, %11#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
// CHECK: %13 = affine_apply #map1(%i4, %i5)
// CHECK: %14 = vector_transfer_read %1, %13#0, %13#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
// CHECK: %15 = addf %8, %12 : vector<3x16xf32>
// CHECK: %16 = addf %10, %14 : vector<3x16xf32>
// CHECK: %17 = affine_apply #map0(%i4, %i5)
// CHECK: vector_transfer_write %15, %2, %17#0, %17#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
// CHECK: %18 = affine_apply #map1(%i4, %i5)
// CHECK: vector_transfer_write %16, %2, %18#0, %18#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<3x16xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<3x16xf32>
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: {{.*}} = affine_apply
// CHECK-NEXT: vector_transfer_write
//
for %i4 = 0 to %M {
for %i5 = 0 to %N {