[PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit

If any PHI nodes in loop exit blocks have incoming values from the
loop that are accesses of TLS variables with local dynamic or general
dynamic TLS model, the address will be computed inside the loop. Since
this includes a call to __tls_get_addr, this will in turn cause the
CTR loops verifier to complain.
Disable CTR loops in such cases.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=48527
This commit is contained in:
Nemanja Ivanovic 2020-12-28 20:31:46 -06:00
parent 0e9abcfc19
commit 4f568fbd21
2 changed files with 112 additions and 26 deletions

View File

@ -335,6 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
return BaseT::getUserCost(U, Operands, CostKind);
}
// Determining the address of a TLS variable results in a function call in
// certain TLS models.
static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
SmallPtrSetImpl<const Value *> &Visited) {
// No need to traverse again if we already checked this operand.
if (!Visited.insert(MemAddr).second)
return false;
const auto *GV = dyn_cast<GlobalValue>(MemAddr);
if (!GV) {
// Recurse to check for constants that refer to TLS global variables.
if (const auto *CV = dyn_cast<Constant>(MemAddr))
for (const auto &CO : CV->operands())
if (memAddrUsesCTR(CO, TM, Visited))
return true;
return false;
}
if (!GV->isThreadLocal())
return false;
TLSModel::Model Model = TM.getTLSModel(GV);
return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
}
bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
SmallPtrSetImpl<const Value *> &Visited) {
const PPCTargetMachine &TM = ST->getTargetMachine();
@ -353,31 +376,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
return false;
};
// Determining the address of a TLS variable results in a function call in
// certain TLS models.
std::function<bool(const Value *)> memAddrUsesCTR =
[&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool {
// No need to traverse again if we already checked this operand.
if (!Visited.insert(MemAddr).second)
return false;
const auto *GV = dyn_cast<GlobalValue>(MemAddr);
if (!GV) {
// Recurse to check for constants that refer to TLS global variables.
if (const auto *CV = dyn_cast<Constant>(MemAddr))
for (const auto &CO : CV->operands())
if (memAddrUsesCTR(CO))
return true;
return false;
}
if (!GV->isThreadLocal())
return false;
TLSModel::Model Model = TM.getTLSModel(GV);
return Model == TLSModel::GeneralDynamic ||
Model == TLSModel::LocalDynamic;
};
auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
@ -676,7 +674,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
}
for (Value *Operand : J->operands())
if (memAddrUsesCTR(Operand))
if (memAddrUsesCTR(Operand, TM, Visited))
return true;
}
@ -736,6 +734,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
}
}
// If an exit block has a PHI that accesses a TLS variable as one of the
// incoming values from the loop, we cannot produce a CTR loop because the
// address for that value will be computed in the loop.
SmallVector<BasicBlock *, 4> ExitBlocks;
L->getExitBlocks(ExitBlocks);
for (auto &BB : ExitBlocks) {
for (auto &PHI : BB->phis()) {
for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
Idx++) {
const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
const Value *IncomingValue = PHI.getIncomingValue(Idx);
if (L->contains(IncomingBB) &&
memAddrUsesCTR(IncomingValue, TM, Visited))
return false;
}
}
}
LLVMContext &C = L->getHeader()->getContext();
HWLoopInfo.CountType = TM.isPPC64() ?
Type::getInt64Ty(C) : Type::getInt32Ty(C);

View File

@ -0,0 +1,70 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -relocation-model=pic -verify-machineinstrs < %s \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
%struct.e.0.12.28.44.104.108.112.188 = type { i32 }
%struct.t.1.13.29.45.105.109.113.189 = type { i64, i64 }
@g = external local_unnamed_addr global %struct.e.0.12.28.44.104.108.112.188, align 4
@aj = external thread_local local_unnamed_addr global %struct.t.1.13.29.45.105.109.113.189, align 8
define void @_ZNK1q1rEv() local_unnamed_addr #0 align 2 {
; CHECK-LABEL: _ZNK1q1rEv:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr 0
; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-NEXT: std 0, 16(1)
; CHECK-NEXT: stdu 1, -64(1)
; CHECK-NEXT: lwz 30, 0(3)
; CHECK-NEXT: addis 3, 2, .LC0@toc@ha
; CHECK-NEXT: ld 29, .LC0@toc@l(3)
; CHECK-NEXT: addis 3, 2, aj@got@tlsgd@ha
; CHECK-NEXT: addi 3, 3, aj@got@tlsgd@l
; CHECK-NEXT: bl __tls_get_addr(aj@tlsgd)
; CHECK-NEXT: nop
; CHECK-NEXT: addi 4, 3, 8
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_1: # %monotonic.i
; CHECK-NEXT: #
; CHECK-NEXT: lwz 5, 0(29)
; CHECK-NEXT: andi. 5, 5, 255
; CHECK-NEXT: bne 0, .LBB0_4
; CHECK-NEXT: # %bb.2: # %for.cond.i
; CHECK-NEXT: #
; CHECK-NEXT: addi 30, 30, -1
; CHECK-NEXT: cmplwi 30, 0
; CHECK-NEXT: bne 0, .LBB0_1
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mr 4, 3
; CHECK-NEXT: .LBB0_4: # %if.end
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: std 3, 0(4)
; CHECK-NEXT: addi 1, 1, 64
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr 0
; CHECK-NEXT: blr
entry:
%0 = load i32, i32* undef, align 4
br label %monotonic.i
for.cond.i: ; preds = %monotonic.i
%exitcond.not = icmp eq i32 %inc.i, %0
br i1 %exitcond.not, label %if.end, label %monotonic.i
monotonic.i: ; preds = %for.cond.i, %entry
%i.018.i = phi i32 [ %inc.i, %for.cond.i ], [ 0, %entry ]
%1 = load atomic i32, i32* getelementptr inbounds (%struct.e.0.12.28.44.104.108.112.188, %struct.e.0.12.28.44.104.108.112.188* @g, i64 0, i32 0) monotonic, align 4
%conv.i = trunc i32 %1 to i8
%tobool.not.i = icmp eq i8 %conv.i, 0
%inc.i = add nuw nsw i32 %i.018.i, 1
br i1 %tobool.not.i, label %for.cond.i, label %if.end
if.end: ; preds = %monotonic.i, %for.cond.i
%.sink = phi i64* [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 1), %monotonic.i ], [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 0), %for.cond.i ]
store i64 1, i64* %.sink, align 8
ret void
}
attributes #0 = { nounwind }