forked from OSchip/llvm-project
[LoopVectorize] Fix bug where predicated loads/stores were dropped
This commit fixes a bug where the loop vectoriser fails to predicate loads/stores when interleaving for targets that support masked loads and stores. Code such as: 1 void foo(int *restrict data1, int *restrict data2) 2 { 3 int counter = 1024; 4 while (counter--) 5 if (data1[counter] > data2[counter]) 6 data1[counter] = data2[counter]; 7 } ... could previously be transformed in such a way that the predicated store implied by: if (data1[counter] > data2[counter]) data1[counter] = data2[counter]; ... was lost, resulting in miscompiles. This bug was causing some tests in llvm-test-suite to fail when built for SVE. Differential Revision: https://reviews.llvm.org/D99569
This commit is contained in:
parent
d4f5f23bbb
commit
2c551aedcf
|
@ -1514,14 +1514,14 @@ public:
|
|||
// Returns true if \p I is an instruction that will be predicated either
|
||||
// through scalar predication or masked load/store or masked gather/scatter.
|
||||
// Superset of instructions that return true for isScalarWithPredication.
|
||||
bool isPredicatedInst(Instruction *I) {
|
||||
bool isPredicatedInst(Instruction *I, ElementCount VF) {
|
||||
if (!blockNeedsPredication(I->getParent()))
|
||||
return false;
|
||||
// Loads and stores that need some form of masked operation are predicated
|
||||
// instructions.
|
||||
if (isa<LoadInst>(I) || isa<StoreInst>(I))
|
||||
return Legal->isMaskRequired(I);
|
||||
return isScalarWithPredication(I);
|
||||
return isScalarWithPredication(I, VF);
|
||||
}
|
||||
|
||||
/// Returns true if \p I is a memory instruction with consecutive memory
|
||||
|
@ -6552,7 +6552,8 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){
|
|||
// from moving "masked load/store" check from legality to cost model.
|
||||
// Masked Load/Gather emulation was previously never allowed.
|
||||
// Limited number of Masked Store/Scatter emulation was allowed.
|
||||
assert(isPredicatedInst(I) && "Expecting a scalar emulated instruction");
|
||||
assert(isPredicatedInst(I, ElementCount::getFixed(1)) &&
|
||||
"Expecting a scalar emulated instruction");
|
||||
return isa<LoadInst>(I) ||
|
||||
(isa<StoreInst>(I) &&
|
||||
NumPredStores > NumberOfStoresToPredicate);
|
||||
|
@ -6824,7 +6825,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
|
|||
// If we have a predicated load/store, it will need extra i1 extracts and
|
||||
// conditional branches, but may not be executed for each vector lane. Scale
|
||||
// the cost by the probability of executing the predicated block.
|
||||
if (isPredicatedInst(I)) {
|
||||
if (isPredicatedInst(I, ElementCount::getFixed(1))) {
|
||||
Cost /= getReciprocalPredBlockProb();
|
||||
|
||||
// Add the cost of an i1 extract and a branch
|
||||
|
@ -8677,8 +8678,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
|
|||
Range);
|
||||
|
||||
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
|
||||
[&](ElementCount VF) { return CM.isScalarWithPredication(I, VF); },
|
||||
Range);
|
||||
[&](ElementCount VF) { return CM.isPredicatedInst(I, VF); }, Range);
|
||||
|
||||
auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
|
||||
IsUniform, IsPredicated);
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
; RUN: opt -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S -o - < %s | FileCheck %s
|
||||
; RUN: opt -mattr=+sve -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S -o - < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; This test is defending against a bug that appeared when we have a target
|
||||
; configuration where masked loads/stores are legal -- e.g. AArch64 with SVE.
|
||||
; Predication would not be applied during interleaving, enabling the
|
||||
; possibility of superfluous loads/stores which could result in miscompiles.
|
||||
; This test checks that, when we disable vectorisation and force interleaving,
|
||||
; stores are predicated properly.
|
||||
;
|
||||
; This is _not_ an SVE-specific test. The same bug could manifest on any
|
||||
; architecture with masked loads/stores, but we use SVE for testing purposes
|
||||
; here.
|
||||
|
||||
define void @foo(i32* %data1, i32* %data2) {
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK: vector.body:
|
||||
; CHECK: br i1 {{%.*}}, label %pred.store.if, label %pred.store.continue
|
||||
; CHECK: pred.store.if:
|
||||
; CHECK-NEXT: store i32 {{%.*}}, i32* {{%.*}}
|
||||
; CHECK-NEXT: br label %pred.store.continue
|
||||
; CHECK: pred.store.continue:
|
||||
; CHECK-NEXT: br i1 {{%.*}}, label %pred.store.if2, label %pred.store.continue3
|
||||
; CHECK: pred.store.if2:
|
||||
; CHECK-NEXT: store i32 {{%.*}}, i32* {{%.*}}
|
||||
; CHECK-NEXT: br label %pred.store.continue3
|
||||
; CHECK: pred.store.continue3:
|
||||
|
||||
entry:
|
||||
br label %while.body
|
||||
|
||||
while.body:
|
||||
%i = phi i64 [ 1023, %entry ], [ %i.next, %if.end ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %data1, i64 %i
|
||||
%ld = load i32, i32* %arrayidx, align 4
|
||||
%cmp = icmp sgt i32 %ld, %ld
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
store i32 %ld, i32* %arrayidx, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%i.next = add nsw i64 %i, -1
|
||||
%tobool.not = icmp eq i64 %i, 0
|
||||
br i1 %tobool.not, label %while.end, label %while.body
|
||||
|
||||
while.end:
|
||||
ret void
|
||||
}
|
|
@ -65,9 +65,12 @@ for.end:
|
|||
; sink-scalar-operands optimization for predicated instructions.
|
||||
;
|
||||
; SINK-GATHER: vector.body:
|
||||
; SINK-GATHER: pred.udiv.if:
|
||||
; SINK-GATHER: pred.load.if:
|
||||
; SINK-GATHER: %[[T0:.+]] = load i32, i32* %{{.*}}, align 4
|
||||
; SINK-GATHER: %{{.*}} = udiv i32 %[[T0]], %{{.*}}
|
||||
; SINK-GATHER: pred.load.continue:
|
||||
; SINK-GATHER: %[[T1:.+]] = phi i32 [ poison, %vector.body ], [ %[[T0]], %pred.load.if ]
|
||||
; SINK-GATHER: pred.udiv.if:
|
||||
; SINK-GATHER: %{{.*}} = udiv i32 %[[T1]], %{{.*}}
|
||||
; SINK-GATHER: pred.udiv.continue:
|
||||
define i32 @scalarize_and_sink_gather(i32* %a, i1 %c, i32 %x, i64 %n) {
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue