forked from OSchip/llvm-project
[mlir][Affine] Fix vectorizability check for multiple load/stores
This patch fixes a bug that allowed vectorizing of loops with loads and stores having indexing functions varying along different memory dimensions. Reviewed By: aartbik, dcaballe Differential Revision: https://reviews.llvm.org/D92702
This commit is contained in:
parent
fe3b244ef7
commit
2d3b9fdc19
|
@ -327,11 +327,23 @@ isVectorizableLoopBodyWithOpCond(AffineForOp loop,
|
||||||
|
|
||||||
bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
|
bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
|
||||||
NestedPattern &vectorTransferMatcher) {
|
NestedPattern &vectorTransferMatcher) {
|
||||||
|
*memRefDim = -1;
|
||||||
VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
|
VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
|
||||||
auto load = dyn_cast<AffineLoadOp>(op);
|
auto load = dyn_cast<AffineLoadOp>(op);
|
||||||
auto store = dyn_cast<AffineStoreOp>(op);
|
auto store = dyn_cast<AffineStoreOp>(op);
|
||||||
return load ? isContiguousAccess(loop.getInductionVar(), load, memRefDim)
|
int thisOpMemRefDim = -1;
|
||||||
: isContiguousAccess(loop.getInductionVar(), store, memRefDim);
|
bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load,
|
||||||
|
&thisOpMemRefDim)
|
||||||
|
: isContiguousAccess(loop.getInductionVar(), store,
|
||||||
|
&thisOpMemRefDim);
|
||||||
|
if (thisOpMemRefDim != -1) {
|
||||||
|
// If memory accesses vary across different dimensions then the loop is
|
||||||
|
// not vectorizable.
|
||||||
|
if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
|
||||||
|
return false;
|
||||||
|
*memRefDim = thisOpMemRefDim;
|
||||||
|
}
|
||||||
|
return isContiguous;
|
||||||
});
|
});
|
||||||
return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
|
return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
|
||||||
}
|
}
|
||||||
|
|
|
@ -397,6 +397,33 @@ func @vec_rejected_10(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: func @vec_rejected_11
|
||||||
|
func @vec_rejected_11(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||||
|
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||||
|
// CHECK-DAG: %[[C1:.*]] = constant 1 : index
|
||||||
|
// CHECK-DAG: %[[C2:.*]] = constant 2 : index
|
||||||
|
// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
|
||||||
|
// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
|
||||||
|
// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
|
||||||
|
%c0 = constant 0 : index
|
||||||
|
%c1 = constant 1 : index
|
||||||
|
%c2 = constant 2 : index
|
||||||
|
%M = dim %A, %c0 : memref<?x?xf32>
|
||||||
|
%N = dim %A, %c1 : memref<?x?xf32>
|
||||||
|
%P = dim %B, %c2 : memref<?x?x?xf32>
|
||||||
|
|
||||||
|
// CHECK: for [[IV10:%[arg0-9]*]] = 0 to %{{[0-9]*}} {
|
||||||
|
// CHECK: for [[IV11:%[arg0-9]*]] = 0 to %{{[0-9]*}} {
|
||||||
|
// This is similar to vec_rejected_5, but the order of indices is different.
|
||||||
|
affine.for %i10 = 0 to %M { // not vectorized
|
||||||
|
affine.for %i11 = 0 to %N { // not vectorized
|
||||||
|
%a11 = affine.load %A[%i11, %i10] : memref<?x?xf32>
|
||||||
|
affine.store %a11, %A[%i10, %i11] : memref<?x?xf32>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// This should not vectorize due to the sequential dependence in the scf.
|
// This should not vectorize due to the sequential dependence in the scf.
|
||||||
// CHECK-LABEL: @vec_rejected_sequential
|
// CHECK-LABEL: @vec_rejected_sequential
|
||||||
func @vec_rejected_sequential(%A : memref<?xf32>) {
|
func @vec_rejected_sequential(%A : memref<?xf32>) {
|
||||||
|
|
Loading…
Reference in New Issue