forked from OSchip/llvm-project
[PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit
If any PHI nodes in loop exit blocks have incoming values from the loop that are accesses of TLS variables with local dynamic or general dynamic TLS model, the address will be computed inside the loop. Since this includes a call to __tls_get_addr, this will in turn cause the CTR loops verifier to complain. Disable CTR loops in such cases. Fixes: https://bugs.llvm.org/show_bug.cgi?id=48527
This commit is contained in:
parent
0e9abcfc19
commit
4f568fbd21
|
@ -335,6 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
|
||||||
return BaseT::getUserCost(U, Operands, CostKind);
|
return BaseT::getUserCost(U, Operands, CostKind);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determining the address of a TLS variable results in a function call in
|
||||||
|
// certain TLS models.
|
||||||
|
static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
|
||||||
|
SmallPtrSetImpl<const Value *> &Visited) {
|
||||||
|
// No need to traverse again if we already checked this operand.
|
||||||
|
if (!Visited.insert(MemAddr).second)
|
||||||
|
return false;
|
||||||
|
const auto *GV = dyn_cast<GlobalValue>(MemAddr);
|
||||||
|
if (!GV) {
|
||||||
|
// Recurse to check for constants that refer to TLS global variables.
|
||||||
|
if (const auto *CV = dyn_cast<Constant>(MemAddr))
|
||||||
|
for (const auto &CO : CV->operands())
|
||||||
|
if (memAddrUsesCTR(CO, TM, Visited))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!GV->isThreadLocal())
|
||||||
|
return false;
|
||||||
|
TLSModel::Model Model = TM.getTLSModel(GV);
|
||||||
|
return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
|
||||||
|
}
|
||||||
|
|
||||||
bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
|
bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
|
||||||
SmallPtrSetImpl<const Value *> &Visited) {
|
SmallPtrSetImpl<const Value *> &Visited) {
|
||||||
const PPCTargetMachine &TM = ST->getTargetMachine();
|
const PPCTargetMachine &TM = ST->getTargetMachine();
|
||||||
|
@ -353,31 +376,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Determining the address of a TLS variable results in a function call in
|
|
||||||
// certain TLS models.
|
|
||||||
std::function<bool(const Value *)> memAddrUsesCTR =
|
|
||||||
[&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool {
|
|
||||||
// No need to traverse again if we already checked this operand.
|
|
||||||
if (!Visited.insert(MemAddr).second)
|
|
||||||
return false;
|
|
||||||
const auto *GV = dyn_cast<GlobalValue>(MemAddr);
|
|
||||||
if (!GV) {
|
|
||||||
// Recurse to check for constants that refer to TLS global variables.
|
|
||||||
if (const auto *CV = dyn_cast<Constant>(MemAddr))
|
|
||||||
for (const auto &CO : CV->operands())
|
|
||||||
if (memAddrUsesCTR(CO))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!GV->isThreadLocal())
|
|
||||||
return false;
|
|
||||||
TLSModel::Model Model = TM.getTLSModel(GV);
|
|
||||||
return Model == TLSModel::GeneralDynamic ||
|
|
||||||
Model == TLSModel::LocalDynamic;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
|
auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
|
||||||
if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
|
if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
|
||||||
return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
|
return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
|
||||||
|
@ -676,7 +674,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Value *Operand : J->operands())
|
for (Value *Operand : J->operands())
|
||||||
if (memAddrUsesCTR(Operand))
|
if (memAddrUsesCTR(Operand, TM, Visited))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -736,6 +734,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If an exit block has a PHI that accesses a TLS variable as one of the
|
||||||
|
// incoming values from the loop, we cannot produce a CTR loop because the
|
||||||
|
// address for that value will be computed in the loop.
|
||||||
|
SmallVector<BasicBlock *, 4> ExitBlocks;
|
||||||
|
L->getExitBlocks(ExitBlocks);
|
||||||
|
for (auto &BB : ExitBlocks) {
|
||||||
|
for (auto &PHI : BB->phis()) {
|
||||||
|
for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
|
||||||
|
Idx++) {
|
||||||
|
const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
|
||||||
|
const Value *IncomingValue = PHI.getIncomingValue(Idx);
|
||||||
|
if (L->contains(IncomingBB) &&
|
||||||
|
memAddrUsesCTR(IncomingValue, TM, Visited))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
LLVMContext &C = L->getHeader()->getContext();
|
LLVMContext &C = L->getHeader()->getContext();
|
||||||
HWLoopInfo.CountType = TM.isPPC64() ?
|
HWLoopInfo.CountType = TM.isPPC64() ?
|
||||||
Type::getInt64Ty(C) : Type::getInt32Ty(C);
|
Type::getInt64Ty(C) : Type::getInt32Ty(C);
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc -relocation-model=pic -verify-machineinstrs < %s \
|
||||||
|
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
|
||||||
|
%struct.e.0.12.28.44.104.108.112.188 = type { i32 }
|
||||||
|
%struct.t.1.13.29.45.105.109.113.189 = type { i64, i64 }
|
||||||
|
|
||||||
|
@g = external local_unnamed_addr global %struct.e.0.12.28.44.104.108.112.188, align 4
|
||||||
|
@aj = external thread_local local_unnamed_addr global %struct.t.1.13.29.45.105.109.113.189, align 8
|
||||||
|
|
||||||
|
define void @_ZNK1q1rEv() local_unnamed_addr #0 align 2 {
|
||||||
|
; CHECK-LABEL: _ZNK1q1rEv:
|
||||||
|
; CHECK: # %bb.0: # %entry
|
||||||
|
; CHECK-NEXT: mflr 0
|
||||||
|
; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
|
||||||
|
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
|
||||||
|
; CHECK-NEXT: std 0, 16(1)
|
||||||
|
; CHECK-NEXT: stdu 1, -64(1)
|
||||||
|
; CHECK-NEXT: lwz 30, 0(3)
|
||||||
|
; CHECK-NEXT: addis 3, 2, .LC0@toc@ha
|
||||||
|
; CHECK-NEXT: ld 29, .LC0@toc@l(3)
|
||||||
|
; CHECK-NEXT: addis 3, 2, aj@got@tlsgd@ha
|
||||||
|
; CHECK-NEXT: addi 3, 3, aj@got@tlsgd@l
|
||||||
|
; CHECK-NEXT: bl __tls_get_addr(aj@tlsgd)
|
||||||
|
; CHECK-NEXT: nop
|
||||||
|
; CHECK-NEXT: addi 4, 3, 8
|
||||||
|
; CHECK-NEXT: .p2align 5
|
||||||
|
; CHECK-NEXT: .LBB0_1: # %monotonic.i
|
||||||
|
; CHECK-NEXT: #
|
||||||
|
; CHECK-NEXT: lwz 5, 0(29)
|
||||||
|
; CHECK-NEXT: andi. 5, 5, 255
|
||||||
|
; CHECK-NEXT: bne 0, .LBB0_4
|
||||||
|
; CHECK-NEXT: # %bb.2: # %for.cond.i
|
||||||
|
; CHECK-NEXT: #
|
||||||
|
; CHECK-NEXT: addi 30, 30, -1
|
||||||
|
; CHECK-NEXT: cmplwi 30, 0
|
||||||
|
; CHECK-NEXT: bne 0, .LBB0_1
|
||||||
|
; CHECK-NEXT: # %bb.3:
|
||||||
|
; CHECK-NEXT: mr 4, 3
|
||||||
|
; CHECK-NEXT: .LBB0_4: # %if.end
|
||||||
|
; CHECK-NEXT: li 3, 1
|
||||||
|
; CHECK-NEXT: std 3, 0(4)
|
||||||
|
; CHECK-NEXT: addi 1, 1, 64
|
||||||
|
; CHECK-NEXT: ld 0, 16(1)
|
||||||
|
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
|
||||||
|
; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
|
||||||
|
; CHECK-NEXT: mtlr 0
|
||||||
|
; CHECK-NEXT: blr
|
||||||
|
entry:
|
||||||
|
%0 = load i32, i32* undef, align 4
|
||||||
|
br label %monotonic.i
|
||||||
|
|
||||||
|
for.cond.i: ; preds = %monotonic.i
|
||||||
|
%exitcond.not = icmp eq i32 %inc.i, %0
|
||||||
|
br i1 %exitcond.not, label %if.end, label %monotonic.i
|
||||||
|
|
||||||
|
monotonic.i: ; preds = %for.cond.i, %entry
|
||||||
|
%i.018.i = phi i32 [ %inc.i, %for.cond.i ], [ 0, %entry ]
|
||||||
|
%1 = load atomic i32, i32* getelementptr inbounds (%struct.e.0.12.28.44.104.108.112.188, %struct.e.0.12.28.44.104.108.112.188* @g, i64 0, i32 0) monotonic, align 4
|
||||||
|
%conv.i = trunc i32 %1 to i8
|
||||||
|
%tobool.not.i = icmp eq i8 %conv.i, 0
|
||||||
|
%inc.i = add nuw nsw i32 %i.018.i, 1
|
||||||
|
br i1 %tobool.not.i, label %for.cond.i, label %if.end
|
||||||
|
|
||||||
|
if.end: ; preds = %monotonic.i, %for.cond.i
|
||||||
|
%.sink = phi i64* [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 1), %monotonic.i ], [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 0), %for.cond.i ]
|
||||||
|
store i64 1, i64* %.sink, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue