[mlir] Set insertion point of vector constant to the top of the vectorized loop body

When we vectorize a scalar constant, the vector constant is inserted before its
first user if the scalar constant is defined outside the loops to be vectorized.
It is possible that the vector constant does not dominate all its users. To fix
the problem, we find the innermost vectorized loop that encloses that first user
and insert the vector constant at the top of the loop body.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D106609
This commit is contained in:
Amy Zhuang 2021-07-29 15:39:49 -07:00
parent 4c98e9455a
commit a8b7e56f65
3 changed files with 38 additions and 5 deletions

View File

@ -948,6 +948,16 @@ static ConstantOp vectorizeConstant(ConstantOp constOp,
auto vecTy = getVectorType(scalarTy, state.strategy);
auto vecAttr = DenseElementsAttr::get(vecTy, constOp.getValue());
OpBuilder::InsertionGuard guard(state.builder);
Operation *parentOp = state.builder.getInsertionBlock()->getParentOp();
// Find the innermost vectorized ancestor loop to insert the vector constant.
while (parentOp && !state.vecLoopToVecDim.count(parentOp))
parentOp = parentOp->getParentOp();
assert(parentOp && state.vecLoopToVecDim.count(parentOp) &&
isa<AffineForOp>(parentOp) && "Expected a vectorized for op");
auto vecForOp = cast<AffineForOp>(parentOp);
state.builder.setInsertionPointToStart(vecForOp.getBody());
auto newConstOp = state.builder.create<ConstantOp>(constOp.getLoc(), vecAttr);
// Register vector replacement for future uses in the scope.

View File

@ -113,12 +113,12 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
// CHECK: %[[SPLAT2:.*]] = constant dense<2.000000e+00> : vector<128xf32>
// CHECK: %[[SPLAT1:.*]] = constant dense<1.000000e+00> : vector<128xf32>
// CHECK: %[[A5:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{[a-zA-Z0-9_]*}} : memref<?x?xf32>, vector<128xf32>
// CHECK: %[[B5:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{[a-zA-Z0-9_]*}} : memref<?x?xf32>, vector<128xf32>
// CHECK: %[[S5:.*]] = addf %[[A5]], %[[B5]] : vector<128xf32>
// CHECK: %[[SPLAT1:.*]] = constant dense<1.000000e+00> : vector<128xf32>
// CHECK: %[[S6:.*]] = addf %[[S5]], %[[SPLAT1]] : vector<128xf32>
// CHECK: %[[SPLAT2:.*]] = constant dense<2.000000e+00> : vector<128xf32>
// CHECK: %[[S7:.*]] = addf %[[S5]], %[[SPLAT2]] : vector<128xf32>
// CHECK: %[[S8:.*]] = addf %[[S7]], %[[S6]] : vector<128xf32>
// CHECK: vector.transfer_write %[[S8]], {{.*}} : vector<128xf32>, memref<?x?xf32>
@ -142,6 +142,29 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// -----
// CHECK-LABEL: func @vec_constant_with_two_users
func @vec_constant_with_two_users(%M : index, %N : index) -> (f32, f32) {
%A = memref.alloc (%M, %N) : memref<?x?xf32, 0>
%B = memref.alloc (%M) : memref<?xf32, 0>
%f1 = constant 1.0 : f32
affine.for %i0 = 0 to %M { // vectorized
// CHECK: %[[C1:.*]] = constant dense<1.000000e+00> : vector<128xf32>
// CHECK-NEXT: affine.for
// CHECK-NEXT: vector.transfer_write %[[C1]], {{.*}} : vector<128xf32>, memref<?x?xf32>
affine.for %i1 = 0 to %N {
affine.store %f1, %A[%i1, %i0] : memref<?x?xf32, 0>
}
// CHECK: vector.transfer_write %[[C1]], {{.*}} : vector<128xf32>, memref<?xf32>
affine.store %f1, %B[%i0] : memref<?xf32, 0>
}
%c12 = constant 12 : index
%res1 = affine.load %A[%c12, %c12] : memref<?x?xf32, 0>
%res2 = affine.load %B[%c12] : memref<?xf32, 0>
return %res1, %res2 : f32, f32
}
// -----
// CHECK-LABEL: func @vec_rejected_1
func @vec_rejected_1(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
@ -551,8 +574,8 @@ func @vec_non_vecdim_reductions(%in0: memref<128x256xf32>, %in1: memref<128x256x
// CHECK-LABEL: @vec_non_vecdim_reductions
// CHECK: affine.for %{{.*}} = 0 to 256 step 128 {
// CHECK: %[[vzero:.*]] = constant dense<0.000000e+00> : vector<128xf32>
// CHECK: %[[vone:.*]] = constant dense<1> : vector<128xi32>
// CHECK: %[[vzero:.*]] = constant dense<0.000000e+00> : vector<128xf32>
// CHECK: %[[reds:.*]]:2 = affine.for %{{.*}} = 0 to 128
// CHECK-SAME: iter_args(%[[red_iter0:.*]] = %[[vzero]], %[[red_iter1:.*]] = %[[vone]]) -> (vector<128xf32>, vector<128xi32>) {
// CHECK: %[[ld0:.*]] = vector.transfer_read %{{.*}} : memref<128x256xf32>, vector<128xf32>

View File

@ -70,12 +70,12 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
// CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32>
// CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32>
// CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref<?x?xf32>, vector<32x256xf32>
// CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref<?x?xf32>, vector<32x256xf32>
// CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
// CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32>
// CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<32x256xf32>
// CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32>
// CHECK: [[S7:%.*]] = addf [[S5]], [[SPLAT2]] : vector<32x256xf32>
// CHECK: [[S8:%.*]] = addf [[S7]], [[S6]] : vector<32x256xf32>
// CHECK: vector.transfer_write [[S8]], {{.*}} : vector<32x256xf32>, memref<?x?xf32>