From 2d3b9fdc193fa835313f09f526b51bf6de3a54ef Mon Sep 17 00:00:00 2001 From: Sergei Grechanik Date: Wed, 9 Dec 2020 11:03:56 -0800 Subject: [PATCH] [mlir][Affine] Fix vectorizability check for multiple load/stores This patch fixes a bug that allowed vectorizing of loops with loads and stores having indexing functions varying along different memory dimensions. Reviewed By: aartbik, dcaballe Differential Revision: https://reviews.llvm.org/D92702 --- mlir/lib/Analysis/LoopAnalysis.cpp | 16 +++++++++-- .../Affine/SuperVectorize/vectorize_1d.mlir | 27 +++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp index 210b68eae3cb..932559cac197 100644 --- a/mlir/lib/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Analysis/LoopAnalysis.cpp @@ -327,11 +327,23 @@ isVectorizableLoopBodyWithOpCond(AffineForOp loop, bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim, NestedPattern &vectorTransferMatcher) { + *memRefDim = -1; VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) { auto load = dyn_cast(op); auto store = dyn_cast(op); - return load ? isContiguousAccess(loop.getInductionVar(), load, memRefDim) - : isContiguousAccess(loop.getInductionVar(), store, memRefDim); + int thisOpMemRefDim = -1; + bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load, + &thisOpMemRefDim) + : isContiguousAccess(loop.getInductionVar(), store, + &thisOpMemRefDim); + if (thisOpMemRefDim != -1) { + // If memory accesses vary across different dimensions then the loop is + // not vectorizable. + if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim) + return false; + *memRefDim = thisOpMemRefDim; + } + return isContiguous; }); return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher); } diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir index ca496b75432c..86749e2c7bab 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir @@ -397,6 +397,33 @@ func @vec_rejected_10(%A : memref, %B : memref) { return } +// CHECK-LABEL: func @vec_rejected_11 +func @vec_rejected_11(%A : memref, %B : memref) { + // CHECK-DAG: %[[C0:.*]] = constant 0 : index + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C2:.*]] = constant 2 : index + // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref + // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref + // CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %M = dim %A, %c0 : memref + %N = dim %A, %c1 : memref + %P = dim %B, %c2 : memref + + // CHECK: for [[IV10:%[arg0-9]*]] = 0 to %{{[0-9]*}} { + // CHECK: for [[IV11:%[arg0-9]*]] = 0 to %{{[0-9]*}} { + // This is similar to vec_rejected_5, but the order of indices is different. + affine.for %i10 = 0 to %M { // not vectorized + affine.for %i11 = 0 to %N { // not vectorized + %a11 = affine.load %A[%i11, %i10] : memref + affine.store %a11, %A[%i10, %i11] : memref + } + } + return +} + // This should not vectorize due to the sequential dependence in the scf. // CHECK-LABEL: @vec_rejected_sequential func @vec_rejected_sequential(%A : memref) {