forked from OSchip/llvm-project
Recommit "[LAA] Support pointer phis in loop by analyzing each incoming pointer."
SCEV does not look through non-header PHIs inside the loop. Such phis can be analyzed by adding separate accesses for each incoming pointer value. This results in 2 more loops vectorized in SPEC2000/186.crafty and avoids regressions when sinking instructions before vectorizing. Fixes PR50296, PR50288. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D102266
This commit is contained in:
parent
5a6dfbb8cd
commit
e248d69036
|
@ -177,21 +177,11 @@ public:
|
|||
|
||||
/// Register the location (instructions are given increasing numbers)
|
||||
/// of a write access.
|
||||
void addAccess(StoreInst *SI) {
|
||||
Value *Ptr = SI->getPointerOperand();
|
||||
Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
|
||||
InstMap.push_back(SI);
|
||||
++AccessIdx;
|
||||
}
|
||||
void addAccess(StoreInst *SI);
|
||||
|
||||
/// Register the location (instructions are given increasing numbers)
|
||||
/// of a write access.
|
||||
void addAccess(LoadInst *LI) {
|
||||
Value *Ptr = LI->getPointerOperand();
|
||||
Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
|
||||
InstMap.push_back(LI);
|
||||
++AccessIdx;
|
||||
}
|
||||
void addAccess(LoadInst *LI);
|
||||
|
||||
/// Check whether the dependencies between the accesses are safe.
|
||||
///
|
||||
|
|
|
@ -1263,6 +1263,47 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
|
|||
return Diff && *Diff == 1;
|
||||
}
|
||||
|
||||
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
|
||||
function_ref<void(Value *)> AddPointer) {
|
||||
SmallPtrSet<Value *, 8> Visited;
|
||||
SmallVector<Value *> WorkList;
|
||||
WorkList.push_back(StartPtr);
|
||||
|
||||
while (!WorkList.empty()) {
|
||||
Value *Ptr = WorkList.pop_back_val();
|
||||
if (!Visited.insert(Ptr).second)
|
||||
continue;
|
||||
auto *PN = dyn_cast<PHINode>(Ptr);
|
||||
// SCEV does not look through non-header PHIs inside the loop. Such phis
|
||||
// can be analyzed by adding separate accesses for each incoming pointer
|
||||
// value.
|
||||
if (PN && InnermostLoop.contains(PN->getParent()) &&
|
||||
PN->getParent() != InnermostLoop.getHeader()) {
|
||||
for (const Use &Inc : PN->incoming_values())
|
||||
WorkList.push_back(Inc);
|
||||
} else
|
||||
AddPointer(Ptr);
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryDepChecker::addAccess(StoreInst *SI) {
|
||||
visitPointers(SI->getPointerOperand(), *InnermostLoop,
|
||||
[this, SI](Value *Ptr) {
|
||||
Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
|
||||
InstMap.push_back(SI);
|
||||
++AccessIdx;
|
||||
});
|
||||
}
|
||||
|
||||
void MemoryDepChecker::addAccess(LoadInst *LI) {
|
||||
visitPointers(LI->getPointerOperand(), *InnermostLoop,
|
||||
[this, LI](Value *Ptr) {
|
||||
Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
|
||||
InstMap.push_back(LI);
|
||||
++AccessIdx;
|
||||
});
|
||||
}
|
||||
|
||||
MemoryDepChecker::VectorizationSafetyStatus
|
||||
MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
|
||||
switch (Type) {
|
||||
|
@ -1962,7 +2003,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
|
|||
if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
|
||||
Loc.AATags.TBAA = nullptr;
|
||||
|
||||
Accesses.addStore(Loc);
|
||||
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
|
||||
[&Accesses, Loc](Value *Ptr) {
|
||||
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
|
||||
Accesses.addStore(NewLoc);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2006,7 +2051,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
|
|||
if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
|
||||
Loc.AATags.TBAA = nullptr;
|
||||
|
||||
Accesses.addLoad(Loc, IsReadOnlyPtr);
|
||||
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
|
||||
[&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
|
||||
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
|
||||
Accesses.addLoad(NewLoc, IsReadOnlyPtr);
|
||||
});
|
||||
}
|
||||
|
||||
// If we write (or read-write) to a single destination and there are no
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
define i32 @load_with_pointer_phi_no_runtime_checks(%s1* %data) {
|
||||
; CHECK-LABEL: load_with_pointer_phi_no_runtime_checks
|
||||
; CHECK-NEXT: loop.header:
|
||||
; CHECK-NEXT: Report: cannot identify array bounds
|
||||
; CHECK-NEXT: Memory dependences are safe
|
||||
;
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
@ -40,7 +40,7 @@ exit: ; preds = %loop.latch
|
|||
define i32 @store_with_pointer_phi_no_runtime_checks(%s1* %data) {
|
||||
; CHECK-LABEL: 'store_with_pointer_phi_no_runtime_checks'
|
||||
; CHECK-NEXT: loop.header:
|
||||
; CHECK-NEXT: Report: cannot identify array bounds
|
||||
; CHECK-NEXT: Memory dependences are safe
|
||||
;
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
@ -75,7 +75,23 @@ exit: ; preds = %loop.latch
|
|||
define i32 @store_with_pointer_phi_runtime_checks(double* %A, double* %B, double* %C) {
|
||||
; CHECK-LABEL: 'store_with_pointer_phi_runtime_checks'
|
||||
; CHECK-NEXT: loop.header:
|
||||
; CHECK-NEXT: Report: cannot identify array bounds
|
||||
; CHECK-NEXT: Memory dependences are safe with run-time checks
|
||||
; CHECK: Run-time memory checks:
|
||||
; CHECK-NEXT: Check 0:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_B:.+]]):
|
||||
; CHECK-NEXT: %gep.1 = getelementptr inbounds double, double* %B, i64 %iv
|
||||
; CHECK-NEXT: Against group ([[GROUP_C:.+]]):
|
||||
; CHECK-NEXT: %gep.2 = getelementptr inbounds double, double* %C, i64 %iv
|
||||
; CHECK-NEXT: Check 1:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_B]]):
|
||||
; CHECK-NEXT: %gep.1 = getelementptr inbounds double, double* %B, i64 %iv
|
||||
; CHECK-NEXT: Against group ([[GROUP_A:.+]]):
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
|
||||
; CHECK-NEXT: Check 2:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_C]]):
|
||||
; CHECK-NEXT: %gep.2 = getelementptr inbounds double, double* %C, i64 %iv
|
||||
; CHECK-NEXT: Against group ([[GROUP_A]]):
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
|
||||
;
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
@ -184,10 +200,41 @@ exit: ; preds = %loop.latch
|
|||
define i32 @store_with_pointer_phi_incoming_phi(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) {
|
||||
; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi'
|
||||
; CHECK-NEXT: loop.header:
|
||||
; CHECK-NEXT: Report: cannot identify array bounds
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 ->
|
||||
; CHECK-NEXT: store double %mul16, double* %ptr.2, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Check 0:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]):
|
||||
; CHECK-NEXT: double* %C
|
||||
; CHECK-NEXT: Against group ([[GROUP_B:.+]]):
|
||||
; CHECK-NEXT: double* %B
|
||||
; CHECK-NEXT: Check 1:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_C]]):
|
||||
; CHECK-NEXT: double* %C
|
||||
; CHECK-NEXT: Against group ([[GROUP_A:.+]]):
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
|
||||
; CHECK-NEXT: double* %A
|
||||
; CHECK-NEXT: Check 2:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_B]]):
|
||||
; CHECK-NEXT: double* %B
|
||||
; CHECK-NEXT: Against group ([[GROUP_A]]):
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
|
||||
; CHECK-NEXT: double* %A
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group [[GROUP_C]]:
|
||||
; CHECK-NEXT: (Low: %C High: (8 + %C))
|
||||
; CHECK-NEXT: Member: %C
|
||||
; CHECK-NEXT: Group [[GROUP_B]]:
|
||||
; CHECK-NEXT: (Low: %B High: (8 + %B))
|
||||
; CHECK-NEXT: Member: %B
|
||||
; CHECK-NEXT: Group [[GROUP_A]]:
|
||||
; CHECK-NEXT: (Low: %A High: (256000 + %A))
|
||||
; CHECK-NEXT: Member: {%A,+,8}<nuw><%loop.header>
|
||||
; CHECK-NEXT: Member: %A
|
||||
; CHECK-EMPTY
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
@ -229,10 +276,41 @@ exit: ; preds = %loop.latch
|
|||
define i32 @store_with_pointer_phi_incoming_phi_irreducible_cycle(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) {
|
||||
; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi_irreducible_cycle'
|
||||
; CHECK-NEXT: loop.header:
|
||||
; CHECK-NEXT: Report: cannot identify array bounds
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 ->
|
||||
; CHECK-NEXT: store double %mul16, double* %ptr.3, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Check 0:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]):
|
||||
; CHECK-NEXT: double* %C
|
||||
; CHECK-NEXT: Against group ([[GROUP_B:.+]]):
|
||||
; CHECK-NEXT: double* %B
|
||||
; CHECK-NEXT: Check 1:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_C]]):
|
||||
; CHECK-NEXT: double* %C
|
||||
; CHECK-NEXT: Against group ([[GROUP_A:.+]]):
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
|
||||
; CHECK-NEXT: double* %A
|
||||
; CHECK-NEXT: Check 2:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_B]]):
|
||||
; CHECK-NEXT: double* %B
|
||||
; CHECK-NEXT: Against group ([[GROUP_A]]):
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
|
||||
; CHECK-NEXT: double* %A
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group [[GROUP_C]]
|
||||
; CHECK-NEXT: (Low: %C High: (8 + %C))
|
||||
; CHECK-NEXT: Member: %C
|
||||
; CHECK-NEXT: Group [[GROUP_B]]
|
||||
; CHECK-NEXT: (Low: %B High: (8 + %B))
|
||||
; CHECK-NEXT: Member: %B
|
||||
; CHECK-NEXT: Group [[GROUP_A]]
|
||||
; CHECK-NEXT: (Low: %A High: (256000 + %A))
|
||||
; CHECK-NEXT: Member: {%A,+,8}<nuw><%loop.header>
|
||||
; CHECK-NEXT: Member: %A
|
||||
; CHECK-EMPTY
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
@ -334,10 +412,59 @@ exit: ; preds = %loop.latch
|
|||
define void @phi_load_store_memdep_check(i1 %c, i16* %A, i16* %B, i16* %C) {
|
||||
; CHECK-LABEL: Loop access info in function 'phi_load_store_memdep_check':
|
||||
; CHECK-NEXT: for.body:
|
||||
; CHECK-NEXT: Report: cannot identify array bounds
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: %lv3 = load i16, i16* %c.sink, align 2 ->
|
||||
; CHECK-NEXT: store i16 %add, i16* %c.sink, align 1
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: %lv3 = load i16, i16* %c.sink, align 2 ->
|
||||
; CHECK-NEXT: store i16 %add, i16* %c.sink, align 1
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: %lv = load i16, i16* %A, align 1 ->
|
||||
; CHECK-NEXT: store i16 %lv, i16* %A, align 1
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: store i16 %lv, i16* %A, align 1 ->
|
||||
; CHECK-NEXT: %lv2 = load i16, i16* %A, align 1
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Check 0:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]):
|
||||
; CHECK-NEXT: i16* %A
|
||||
; CHECK-NEXT: i16* %A
|
||||
; CHECK-NEXT: Against group ([[GROUP_C:.+]]):
|
||||
; CHECK-NEXT: i16* %C
|
||||
; CHECK-NEXT: i16* %C
|
||||
; CHECK-NEXT: Check 1:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_A]]):
|
||||
; CHECK-NEXT: i16* %A
|
||||
; CHECK-NEXT: i16* %A
|
||||
; CHECK-NEXT: Against group ([[GROUP_B:.+]]):
|
||||
; CHECK-NEXT: i16* %B
|
||||
; CHECK-NEXT: i16* %B
|
||||
; CHECK-NEXT: Check 2:
|
||||
; CHECK-NEXT: Comparing group ([[GROUP_C]]):
|
||||
; CHECK-NEXT: i16* %C
|
||||
; CHECK-NEXT: i16* %C
|
||||
; CHECK-NEXT: Against group ([[GROUP_B]]):
|
||||
; CHECK-NEXT: i16* %B
|
||||
; CHECK-NEXT: i16* %B
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group [[GROUP_A]]
|
||||
; CHECK-NEXT: (Low: %A High: (2 + %A))
|
||||
; CHECK-NEXT: Member: %A
|
||||
; CHECK-NEXT: Member: %A
|
||||
; CHECK-NEXT: Group [[GROUP_C]]
|
||||
; CHECK-NEXT: (Low: %C High: (2 + %C))
|
||||
; CHECK-NEXT: Member: %C
|
||||
; CHECK-NEXT: Member: %C
|
||||
; CHECK-NEXT: Group [[GROUP_B]]
|
||||
; CHECK-NEXT: (Low: %B High: (2 + %B))
|
||||
; CHECK-NEXT: Member: %B
|
||||
; CHECK-NEXT: Member: %B
|
||||
; CHECK-EMPTY:
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -55,21 +55,6 @@ for.end.loopexit: ; preds = %if.end
|
|||
define void @phi_load_distribute(i1 %c, i16* %A, i16* %B, i16* %C) {
|
||||
; CHECK-LABEL: @phi_load_distribute(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[IF_END:%.*]] ]
|
||||
; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[A:%.*]], align 1
|
||||
; CHECK-NEXT: store i16 [[LV]], i16* [[A]], align 1
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END]]
|
||||
; CHECK: if.then:
|
||||
; CHECK-NEXT: [[LV2:%.*]] = load i16, i16* [[A]], align 1
|
||||
; CHECK-NEXT: br label [[IF_END]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[C_SINK:%.*]] = phi i16* [ [[B:%.*]], [[IF_THEN]] ], [ [[C:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[LV3:%.*]] = load i16, i16* [[C_SINK]], align 2
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i16 [[IV]], 1
|
||||
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i16 [[IV_NEXT]], 1000
|
||||
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
|
||||
; CHECK: for.end.loopexit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
|
|
|
@ -4,7 +4,8 @@
|
|||
|
||||
define i32 @load_with_pointer_phi_no_runtime_checks(%s1* %data) {
|
||||
; CHECK-LABEL: @load_with_pointer_phi_no_runtime_checks
|
||||
; CHECK-NOT: vector.body
|
||||
; CHECK-NOT: memcheck
|
||||
; CHECK: vector.body:
|
||||
;
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
@ -38,7 +39,8 @@ exit: ; preds = %loop.latch
|
|||
|
||||
define i32 @store_with_pointer_phi_no_runtime_checks(%s1* %data) {
|
||||
; CHECK-LABEL: @store_with_pointer_phi_no_runtime_checks
|
||||
; CHECK-NOT: vector.body
|
||||
; CHECK-NOT: memcheck
|
||||
; CHECK: vector.body
|
||||
;
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
@ -72,7 +74,8 @@ exit: ; preds = %loop.latch
|
|||
|
||||
define i32 @store_with_pointer_phi_runtime_checks(double* %A, double* %B, double* %C) {
|
||||
; CHECK-LABEL: @store_with_pointer_phi_runtime_checks
|
||||
; CHECK-NOT: vector.body
|
||||
; CHECK: memcheck
|
||||
; CHECK: vector.body
|
||||
;
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
|
Loading…
Reference in New Issue