forked from OSchip/llvm-project
[MLIR] Make SuperVectorization use normalized AffineApplyOp
Supervectorization does not plan on handling multi-result AffineMaps and non-canonical chains of > 1 AffineApplyOp. This CL uses the simpler single-result unbounded AffineApplyOp in the MaterializeVectors pass. PiperOrigin-RevId: 228469085
This commit is contained in:
parent
3e5ee82b81
commit
1f78d63f05
|
@ -373,12 +373,16 @@ reindexAffineIndices(FuncBuilder *b, VectorType hwVectorType,
|
|||
auto stride = vectorShape[i - numMemRefIndices - numSuperVectorIndices];
|
||||
affineExprs.push_back(d_i + offset * stride);
|
||||
}
|
||||
auto affineMap = AffineMap::get(numIndices, 0, affineExprs, {});
|
||||
|
||||
// TODO(ntv): support a concrete map and composition.
|
||||
auto app = b->create<AffineApplyOp>(b->getInsertionPoint()->getLoc(),
|
||||
affineMap, memrefIndices);
|
||||
return SmallVector<mlir::Value *, 8>{app->getResults()};
|
||||
// Create a bunch of single result maps.
|
||||
return functional::map(
|
||||
[b, numIndices, memrefIndices](AffineExpr expr) {
|
||||
auto map = AffineMap::get(numIndices, 0, expr, {});
|
||||
auto app = makeNormalizedAffineApply(
|
||||
b, b->getInsertionPoint()->getLoc(), map, memrefIndices);
|
||||
return app->getResult(0);
|
||||
},
|
||||
affineExprs);
|
||||
}
|
||||
|
||||
/// Returns attributes with the following substitutions applied:
|
||||
|
@ -553,11 +557,17 @@ static bool instantiateMaterialization(Instruction *inst,
|
|||
// Create a builder here for unroll-and-jam effects.
|
||||
FuncBuilder b(inst);
|
||||
auto *opInst = cast<OperationInst>(inst);
|
||||
// AffineApplyOp are ignored: instantiating the proper vector op will take
|
||||
// care of AffineApplyOps by composing them properly.
|
||||
if (opInst->isa<AffineApplyOp>()) {
|
||||
return false;
|
||||
}
|
||||
if (auto write = opInst->dyn_cast<VectorTransferWriteOp>()) {
|
||||
auto *clone = instantiate(&b, write, state->hwVectorType,
|
||||
state->hwVectorInstance, state->substitutionsMap);
|
||||
return clone == nullptr;
|
||||
} else if (auto read = opInst->dyn_cast<VectorTransferReadOp>()) {
|
||||
}
|
||||
if (auto read = opInst->dyn_cast<VectorTransferReadOp>()) {
|
||||
auto *clone = instantiate(&b, read, state->hwVectorType,
|
||||
state->hwVectorInstance, state->substitutionsMap);
|
||||
if (!clone) {
|
||||
|
@ -570,10 +580,12 @@ static bool instantiateMaterialization(Instruction *inst,
|
|||
// The only op with 0 results reaching this point must, by construction, be
|
||||
// VectorTransferWriteOps and have been caught above. Ops with >= 2 results
|
||||
// are not yet supported. So just support 1 result.
|
||||
if (opInst->getNumResults() != 1)
|
||||
if (opInst->getNumResults() != 1) {
|
||||
return inst->emitError("NYI: ops with != 1 results");
|
||||
if (opInst->getResult(0)->getType() != state->superVectorType)
|
||||
}
|
||||
if (opInst->getResult(0)->getType() != state->superVectorType) {
|
||||
return inst->emitError("Op does not return a supervector.");
|
||||
}
|
||||
auto *clone =
|
||||
instantiate(&b, opInst, state->hwVectorType, state->substitutionsMap);
|
||||
if (!clone) {
|
||||
|
|
|
@ -1,26 +1,33 @@
|
|||
// RUN: mlir-opt %s -materialize-vectors -vector-size=4 -vector-size=4 | FileCheck %s
|
||||
|
||||
// CHECK-DAG: #[[map_instance_0:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1, d2, d3)
|
||||
// CHECK-DAG: #[[map_instance_1:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 1, d2, d3)
|
||||
// CHECK-DAG: #[[map_instance_2:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 2, d2, d3)
|
||||
// CHECK-DAG: #[[map_instance_3:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 3, d2, d3)
|
||||
// CHECK-DAG: #[[map_proj_d0d1d2d3d4_d1d0:map[0-9]+]] = (d0, d1, d2, d3) -> (d1, d0)
|
||||
// CHECK-DAG: #[[D0D1D2D3TOD0:map[0-9]+]] = (d0, d1, d2, d3) -> (d0)
|
||||
// CHECK-DAG: #[[D0D1D2D3TOD1:map[0-9]+]] = (d0, d1, d2, d3) -> (d1)
|
||||
// CHECK-DAG: #[[D0D1D2D3TOD2:map[0-9]+]] = (d0, d1, d2, d3) -> (d2)
|
||||
// CHECK-DAG: #[[D0D1D2D3TOD3:map[0-9]+]] = (d0, d1, d2, d3) -> (d3)
|
||||
// CHECK-DAG: #[[D0D1D2D3TOD1D0:map[0-9]+]] = (d0, d1, d2, d3) -> (d1, d0)
|
||||
// CHECK-DAG: #[[D0D1D2D3TOD1P1:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 1)
|
||||
// CHECK-DAG: #[[D0D1D2D3TOD1P2:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 2)
|
||||
// CHECK-DAG: #[[D0D1D2D3TOD1P3:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 3)
|
||||
|
||||
// CHECK-LABEL: func @materialize
|
||||
func @materialize(%M : index, %N : index, %O : index, %P : index) {
|
||||
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
%f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
|
||||
// CHECK: for %i0 = 0 to %arg0 step 4 {
|
||||
// CHECK: for %i1 = 0 to %arg1 step 4 {
|
||||
// CHECK: for %i2 = 0 to %arg2 {
|
||||
// CHECK: for %i3 = 0 to %arg3 step 4 {
|
||||
// CHECK: %1 = affine_apply #[[map_instance_0]](%i0, %i1, %i2, %i3)
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, %1#0, %1#1, %1#2, %1#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
// CHECK: %2 = affine_apply #[[map_instance_1]](%i0, %i1, %i2, %i3)
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, %2#0, %2#1, %2#2, %2#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
// CHECK: %3 = affine_apply #[[map_instance_2]](%i0, %i1, %i2, %i3)
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, %3#0, %3#1, %3#2, %3#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
// CHECK: %4 = affine_apply #[[map_instance_3]](%i0, %i1, %i2, %i3)
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, %4#0, %4#1, %4#2, %4#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 4 {
|
||||
// CHECK-NEXT: for %i2 = 0 to %arg2 {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg3 step 4 {
|
||||
// CHECK-NEXT: %[[a:[0-9]+]] = {{.*}}[[D0D1D2D3TOD0]](%i0, %i1, %i2, %i3)
|
||||
// CHECK-NEXT: %[[b:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1]](%i0, %i1, %i2, %i3)
|
||||
// CHECK-NEXT: %[[c:[0-9]+]] = {{.*}}[[D0D1D2D3TOD2]](%i0, %i1, %i2, %i3)
|
||||
// CHECK-NEXT: %[[d:[0-9]+]] = {{.*}}[[D0D1D2D3TOD3]](%i0, %i1, %i2, %i3)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, %0, %[[a]], %[[b]], %[[c]], %[[d]] {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
// CHECK: %[[b1:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P1]](%i0, %i1, %i2, %i3)
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b1]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
// CHECK: %[[b2:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P2]](%i0, %i1, %i2, %i3)
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
// CHECK: %[[b3:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P3]](%i0, %i1, %i2, %i3)
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
for %i0 = 0 to %M step 4 {
|
||||
for %i1 = 0 to %N step 4 {
|
||||
for %i2 = 0 to %O {
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
// RUN: mlir-opt %s -vectorize -virtual-vector-size 32 --test-fastest-varying=0 -materialize-vectors -vector-size=8 | FileCheck %s
|
||||
|
||||
// Capture permutation maps used in vectorization.
|
||||
// CHECK-DAG: #[[map_proj_d0d1_d1:map[0-9]+]] = (d0, d1) -> (d1)
|
||||
|
||||
// vector<32xf32> -> vector<8xf32>
|
||||
// CHECK-DAG: [[MAP0:#.*]] = (d0, d1) -> (d0, d1)
|
||||
// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 8)
|
||||
// CHECK-DAG: [[MAP2:#.*]] = (d0, d1) -> (d0, d1 + 16)
|
||||
// CHECK-DAG: [[MAP3:#.*]] = (d0, d1) -> (d0, d1 + 24)
|
||||
// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0)
|
||||
// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1)
|
||||
// CHECK-DAG: [[D0D1TOD1P8:#.*]] = (d0, d1) -> (d1 + 8)
|
||||
// CHECK-DAG: [[D0D1TOD1P16:#.*]] = (d0, d1) -> (d1 + 16)
|
||||
// CHECK-DAG: [[D0D1TOD1P24:#.*]] = (d0, d1) -> (d1 + 24)
|
||||
|
||||
// CHECK-LABEL: func @vector_add_2d
|
||||
func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
%A = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
%B = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
|
@ -16,19 +16,23 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
%f2 = constant 2.0 : f32
|
||||
// 4x unroll (jammed by construction).
|
||||
// CHECK: for %i0 = 0 to %arg0 {
|
||||
// CHECK: for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: [[CST3:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: [[VAL0:%.*]] = affine_apply [[MAP0]]{{.*}}
|
||||
// CHECK: vector_transfer_write [[CST0]], {{.*}}, [[VAL0]]#0, [[VAL0]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
|
||||
// CHECK: [[VAL1:%.*]] = affine_apply [[MAP1]]{{.*}}
|
||||
// CHECK: vector_transfer_write [[CST1]], {{.*}}, [[VAL1]]#0, [[VAL1]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
|
||||
// CHECK: [[VAL2:%.*]] = affine_apply [[MAP2]]{{.*}}
|
||||
// CHECK: vector_transfer_write [[CST2]], {{.*}}, [[VAL2]]#0, [[VAL2]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
|
||||
// CHECK: [[VAL3:%.*]] = affine_apply [[MAP3]]{{.*}}
|
||||
// CHECK: vector_transfer_write [[CST3]], {{.*}}, [[VAL3]]#0, [[VAL3]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST3:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
|
||||
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST0]], {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
|
||||
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST1]], {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
|
||||
// CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1P16]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST2]], {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
|
||||
// CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P24]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
//
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
|
@ -38,19 +42,23 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
}
|
||||
// 4x unroll (jammed by construction).
|
||||
// CHECK: for %i2 = 0 to %arg0 {
|
||||
// CHECK: for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: [[CST3:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: [[VAL0:%.*]] = affine_apply [[MAP0]]{{.*}}
|
||||
// CHECK: vector_transfer_write [[CST0]], {{.*}}, [[VAL0]]#0, [[VAL0]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
|
||||
// CHECK: [[VAL1:%.*]] = affine_apply [[MAP1]]{{.*}}
|
||||
// CHECK: vector_transfer_write [[CST1]], {{.*}}, [[VAL1]]#0, [[VAL1]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
|
||||
// CHECK: [[VAL2:%.*]] = affine_apply [[MAP2]]{{.*}}
|
||||
// CHECK: vector_transfer_write [[CST2]], {{.*}}, [[VAL2]]#0, [[VAL2]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
|
||||
// CHECK: [[VAL3:%.*]] = affine_apply [[MAP3]]{{.*}}
|
||||
// CHECK: vector_transfer_write [[CST3]], {{.*}}, [[VAL3]]#0, [[VAL3]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST3:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
|
||||
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST0]], {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
|
||||
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST1]], {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
|
||||
// CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1P16]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST2]], {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
|
||||
// CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P24]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
//
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
|
@ -60,35 +68,47 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
}
|
||||
// 4x unroll (jammed by construction).
|
||||
// CHECK: for %i4 = 0 to %arg0 {
|
||||
// CHECK: for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK: %11 = affine_apply #map0(%i4, %i5)
|
||||
// CHECK: %12 = vector_transfer_read %0, %11#0, %11#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %13 = affine_apply #map2(%i4, %i5)
|
||||
// CHECK: %14 = vector_transfer_read %0, %13#0, %13#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %15 = affine_apply #map3(%i4, %i5)
|
||||
// CHECK: %16 = vector_transfer_read %0, %15#0, %15#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %17 = affine_apply #map4(%i4, %i5)
|
||||
// CHECK: %18 = vector_transfer_read %0, %17#0, %17#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %19 = affine_apply #map0(%i4, %i5)
|
||||
// CHECK: %20 = vector_transfer_read %1, %19#0, %19#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %21 = affine_apply #map2(%i4, %i5)
|
||||
// CHECK: %22 = vector_transfer_read %1, %21#0, %21#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %23 = affine_apply #map3(%i4, %i5)
|
||||
// CHECK: %24 = vector_transfer_read %1, %23#0, %23#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %25 = affine_apply #map4(%i4, %i5)
|
||||
// CHECK: %26 = vector_transfer_read %1, %25#0, %25#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %27 = addf %12, %20 : vector<8xf32>
|
||||
// CHECK: %28 = addf %14, %22 : vector<8xf32>
|
||||
// CHECK: %29 = addf %16, %24 : vector<8xf32>
|
||||
// CHECK: %30 = addf %18, %26 : vector<8xf32>
|
||||
// CHECK: %31 = affine_apply #map0(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %27, %2, %31#0, %31#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %32 = affine_apply #map2(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %28, %2, %32#0, %32#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %33 = affine_apply #map3(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %29, %2, %33#0, %33#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %34 = affine_apply #map4(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %30, %2, %34#0, %34#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
//
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
// RUN: mlir-opt %s -vectorize -virtual-vector-size 3 -virtual-vector-size 16 --test-fastest-varying=1 --test-fastest-varying=0 -materialize-vectors -vector-size=8 | FileCheck %s
|
||||
|
||||
// Capture permutation maps used in vectorization.
|
||||
// CHECK-DAG: #[[map_proj_d0d1_d1:map[0-9]+]] = (d0, d1) -> (d1)
|
||||
|
||||
// vector<3x16xf32> -> vector<8xf32>
|
||||
// CHECK-DAG: [[MAP0:#.*]] = (d0, d1) -> (d0, d1)
|
||||
// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 8)
|
||||
// CHECK-DAG: [[MAP2:#.*]] = (d0, d1) -> (d0 + 1, d1)
|
||||
// CHECK-DAG: [[MAP3:#.*]] = (d0, d1) -> (d0 + 1, d1 + 8)
|
||||
// CHECK-DAG: [[MAP4:#.*]] = (d0, d1) -> (d0 + 2, d1)
|
||||
// CHECK-DAG: [[MAP5:#.*]] = (d0, d1) -> (d0 + 2, d1 + 8)
|
||||
// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0)
|
||||
// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1)
|
||||
// CHECK-DAG: [[D0D1TOD1P8:#.*]] = (d0, d1) -> (d1 + 8)
|
||||
// CHECK-DAG: [[D0D1TOD0P1:#.*]] = (d0, d1) -> (d0 + 1)
|
||||
// CHECK-DAG: [[D0D1TOD0P2:#.*]] = (d0, d1) -> (d0 + 2)
|
||||
|
||||
// CHECK-LABEL: func @vector_add_2d
|
||||
func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
%A = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
%B = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
|
@ -18,25 +16,31 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
%f2 = constant 2.0 : f32
|
||||
// (3x2)x unroll (jammed by construction).
|
||||
// CHECK: for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK: for %i1 = 0 to %arg1 step 16 {
|
||||
// CHECK: %cst_1 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_2 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_3 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_4 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_5 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_6 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK: %3 = affine_apply #map0(%i0, %i1)
|
||||
// CHECK: vector_transfer_write %cst_1, %0, %3#0, %3#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %4 = affine_apply #map2(%i0, %i1)
|
||||
// CHECK: vector_transfer_write %cst_2, %0, %4#0, %4#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %5 = affine_apply #map3(%i0, %i1)
|
||||
// CHECK: vector_transfer_write %cst_3, %0, %5#0, %5#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %6 = affine_apply #map4(%i0, %i1)
|
||||
// CHECK: vector_transfer_write %cst_4, %0, %6#0, %6#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %7 = affine_apply #map5(%i0, %i1)
|
||||
// CHECK: vector_transfer_write %cst_5, %0, %7#0, %7#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %8 = affine_apply #map6(%i0, %i1)
|
||||
// CHECK: vector_transfer_write %cst_6, %0, %8#0, %8#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 16 {
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
|
||||
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
|
||||
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0P1]](%i0, %i1)
|
||||
// CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0P1]](%i0, %i1)
|
||||
// CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL40:%.*]] = affine_apply [[D0D1TOD0P2]](%i0, %i1)
|
||||
// CHECK-NEXT: [[VAL41:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL40]], [[VAL41]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
// CHECK-NEXT: [[VAL50:%.*]] = affine_apply [[D0D1TOD0P2]](%i0, %i1)
|
||||
// CHECK-NEXT: [[VAL51:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
// non-scoped %f1
|
||||
|
@ -45,25 +49,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
}
|
||||
// (3x2)x unroll (jammed by construction).
|
||||
// CHECK: for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK: for %i3 = 0 to %arg1 step 16 {
|
||||
// CHECK: %cst_7 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_8 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_9 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_10 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_11 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: %cst_12 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK: %9 = affine_apply #map0(%i2, %i3)
|
||||
// CHECK: vector_transfer_write %cst_7, %1, %9#0, %9#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %10 = affine_apply #map2(%i2, %i3)
|
||||
// CHECK: vector_transfer_write %cst_8, %1, %10#0, %10#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %11 = affine_apply #map3(%i2, %i3)
|
||||
// CHECK: vector_transfer_write %cst_9, %1, %11#0, %11#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %12 = affine_apply #map4(%i2, %i3)
|
||||
// CHECK: vector_transfer_write %cst_10, %1, %12#0, %12#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %13 = affine_apply #map5(%i2, %i3)
|
||||
// CHECK: vector_transfer_write %cst_11, %1, %13#0, %13#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %14 = affine_apply #map6(%i2, %i3)
|
||||
// CHECK: vector_transfer_write %cst_12, %1, %14#0, %14#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg1 step 16 {
|
||||
// .....
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
// non-scoped %f2
|
||||
|
@ -73,49 +60,68 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
}
|
||||
// (3x2)x unroll (jammed by construction).
|
||||
// CHECK: for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK: for %i5 = 0 to %arg1 step 16 {
|
||||
// CHECK: %15 = affine_apply #map0(%i4, %i5)
|
||||
// CHECK: %16 = vector_transfer_read %0, %15#0, %15#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %17 = affine_apply #map2(%i4, %i5)
|
||||
// CHECK: %18 = vector_transfer_read %0, %17#0, %17#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %19 = affine_apply #map3(%i4, %i5)
|
||||
// CHECK: %20 = vector_transfer_read %0, %19#0, %19#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %21 = affine_apply #map4(%i4, %i5)
|
||||
// CHECK: %22 = vector_transfer_read %0, %21#0, %21#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %23 = affine_apply #map5(%i4, %i5)
|
||||
// CHECK: %24 = vector_transfer_read %0, %23#0, %23#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %25 = affine_apply #map6(%i4, %i5)
|
||||
// CHECK: %26 = vector_transfer_read %0, %25#0, %25#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %27 = affine_apply #map0(%i4, %i5)
|
||||
// CHECK: %28 = vector_transfer_read %1, %27#0, %27#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %29 = affine_apply #map2(%i4, %i5)
|
||||
// CHECK: %30 = vector_transfer_read %1, %29#0, %29#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %31 = affine_apply #map3(%i4, %i5)
|
||||
// CHECK: %32 = vector_transfer_read %1, %31#0, %31#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %33 = affine_apply #map4(%i4, %i5)
|
||||
// CHECK: %34 = vector_transfer_read %1, %33#0, %33#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %35 = affine_apply #map5(%i4, %i5)
|
||||
// CHECK: %36 = vector_transfer_read %1, %35#0, %35#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %37 = affine_apply #map6(%i4, %i5)
|
||||
// CHECK: %38 = vector_transfer_read %1, %37#0, %37#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
|
||||
// CHECK: %39 = addf %16, %28 : vector<8xf32>
|
||||
// CHECK: %40 = addf %18, %30 : vector<8xf32>
|
||||
// CHECK: %41 = addf %20, %32 : vector<8xf32>
|
||||
// CHECK: %42 = addf %22, %34 : vector<8xf32>
|
||||
// CHECK: %43 = addf %24, %36 : vector<8xf32>
|
||||
// CHECK: %44 = addf %26, %38 : vector<8xf32>
|
||||
// CHECK: %45 = affine_apply #map0(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %39, %2, %45#0, %45#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %46 = affine_apply #map2(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %40, %2, %46#0, %46#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %47 = affine_apply #map3(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %41, %2, %47#0, %47#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %48 = affine_apply #map4(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %42, %2, %48#0, %48#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %49 = affine_apply #map5(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %43, %2, %49#0, %49#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %50 = affine_apply #map6(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %44, %2, %50#0, %50#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK-NEXT: for %i5 = 0 to %arg1 step 16 {
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
//
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
// RUN: mlir-opt %s -vectorize -virtual-vector-size 3 -virtual-vector-size 32 --test-fastest-varying=1 --test-fastest-varying=0 -materialize-vectors -vector-size=3 -vector-size=16 | FileCheck %s
|
||||
|
||||
// Capture permutation maps used in vectorization.
|
||||
// CHECK-DAG: #[[map_proj_d0d1_d0d1:map[0-9]+]] = (d0, d1) -> (d0, d1)
|
||||
|
||||
// vector<3x32xf32> -> vector<3x16xf32>
|
||||
// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 16)
|
||||
// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0)
|
||||
// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1)
|
||||
// CHECK-DAG: [[D0D1TOD0D1:#.*]] = (d0, d1) -> (d0, d1)
|
||||
// CHECK-DAG: [[D0D1TOD1P16:#.*]] = (d0, d1) -> (d1 + 16)
|
||||
|
||||
// CHECK-LABEL: func @vector_add_2d
|
||||
func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
%A = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
%B = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
|
@ -13,13 +15,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
%f2 = constant 2.0 : f32
|
||||
// 2x unroll (jammed by construction).
|
||||
// CHECK: for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK: for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK: %cst_1 = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
|
||||
// CHECK: %cst_2 = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
|
||||
// CHECK: %3 = affine_apply #map0(%i0, %i1)
|
||||
// CHECK: vector_transfer_write %cst_1, %0, %3#0, %3#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %4 = affine_apply #map1(%i0, %i1)
|
||||
// CHECK: vector_transfer_write %cst_2, %0, %4#0, %4#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
|
||||
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
|
||||
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
|
||||
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P16]](%i0, %i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
|
||||
//
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
|
@ -29,13 +33,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
}
|
||||
// 2x unroll (jammed by construction).
|
||||
// CHECK: for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK: for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK: %cst_3 = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
|
||||
// CHECK: %cst_4 = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
|
||||
// CHECK: %5 = affine_apply #map0(%i2, %i3)
|
||||
// CHECK: vector_transfer_write %cst_3, %1, %5#0, %5#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %6 = affine_apply #map1(%i2, %i3)
|
||||
// CHECK: vector_transfer_write %cst_4, %1, %6#0, %6#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i2, %i3)
|
||||
// CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i2, %i3)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
|
||||
// CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i2, %i3)
|
||||
// CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P16]](%i2, %i3)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
|
||||
//
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
|
@ -45,21 +51,27 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
|||
}
|
||||
// 2x unroll (jammed by construction).
|
||||
// CHECK: for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK: for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK: %7 = affine_apply #map0(%i4, %i5)
|
||||
// CHECK: %8 = vector_transfer_read %0, %7#0, %7#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
|
||||
// CHECK: %9 = affine_apply #map1(%i4, %i5)
|
||||
// CHECK: %10 = vector_transfer_read %0, %9#0, %9#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
|
||||
// CHECK: %11 = affine_apply #map0(%i4, %i5)
|
||||
// CHECK: %12 = vector_transfer_read %1, %11#0, %11#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
|
||||
// CHECK: %13 = affine_apply #map1(%i4, %i5)
|
||||
// CHECK: %14 = vector_transfer_read %1, %13#0, %13#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
|
||||
// CHECK: %15 = addf %8, %12 : vector<3x16xf32>
|
||||
// CHECK: %16 = addf %10, %14 : vector<3x16xf32>
|
||||
// CHECK: %17 = affine_apply #map0(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %15, %2, %17#0, %17#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK: %18 = affine_apply #map1(%i4, %i5)
|
||||
// CHECK: vector_transfer_write %16, %2, %18#0, %18#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
|
||||
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<3x16xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}} : vector<3x16xf32>
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: {{.*}} = affine_apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
//
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
|
|
Loading…
Reference in New Issue