From 4f568fbd21636c7c8d071f1901084cc0ae87f3ee Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 28 Dec 2020 20:31:46 -0600 Subject: [PATCH] [PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit If any PHI nodes in loop exit blocks have incoming values from the loop that are accesses of TLS variables with local dynamic or general dynamic TLS model, the address will be computed inside the loop. Since this includes a call to __tls_get_addr, this will in turn cause the CTR loops verifier to complain. Disable CTR loops in such cases. Fixes: https://bugs.llvm.org/show_bug.cgi?id=48527 --- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 68 +++++++++++------- llvm/test/CodeGen/PowerPC/pr48527.ll | 70 +++++++++++++++++++ 2 files changed, 112 insertions(+), 26 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr48527.ll diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 011056c21b13..4de1f2aba416 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -335,6 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands, return BaseT::getUserCost(U, Operands, CostKind); } +// Determining the address of a TLS variable results in a function call in +// certain TLS models. +static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, + SmallPtrSetImpl &Visited) { + // No need to traverse again if we already checked this operand. + if (!Visited.insert(MemAddr).second) + return false; + const auto *GV = dyn_cast(MemAddr); + if (!GV) { + // Recurse to check for constants that refer to TLS global variables. + if (const auto *CV = dyn_cast(MemAddr)) + for (const auto &CO : CV->operands()) + if (memAddrUsesCTR(CO, TM, Visited)) + return true; + return false; + } + + if (!GV->isThreadLocal()) + return false; + TLSModel::Model Model = TM.getTLSModel(GV); + return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; +} + bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, SmallPtrSetImpl &Visited) { const PPCTargetMachine &TM = ST->getTargetMachine(); @@ -353,31 +376,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, return false; }; - // Determining the address of a TLS variable results in a function call in - // certain TLS models. - std::function memAddrUsesCTR = - [&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool { - // No need to traverse again if we already checked this operand. - if (!Visited.insert(MemAddr).second) - return false; - const auto *GV = dyn_cast(MemAddr); - if (!GV) { - // Recurse to check for constants that refer to TLS global variables. - if (const auto *CV = dyn_cast(MemAddr)) - for (const auto &CO : CV->operands()) - if (memAddrUsesCTR(CO)) - return true; - - return false; - } - - if (!GV->isThreadLocal()) - return false; - TLSModel::Model Model = TM.getTLSModel(GV); - return Model == TLSModel::GeneralDynamic || - Model == TLSModel::LocalDynamic; - }; - auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) { if (IntegerType *ITy = dyn_cast(Ty)) return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); @@ -676,7 +674,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, } for (Value *Operand : J->operands()) - if (memAddrUsesCTR(Operand)) + if (memAddrUsesCTR(Operand, TM, Visited)) return true; } @@ -736,6 +734,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, } } + // If an exit block has a PHI that accesses a TLS variable as one of the + // incoming values from the loop, we cannot produce a CTR loop because the + // address for that value will be computed in the loop. + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (auto &BB : ExitBlocks) { + for (auto &PHI : BB->phis()) { + for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx; + Idx++) { + const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx); + const Value *IncomingValue = PHI.getIncomingValue(Idx); + if (L->contains(IncomingBB) && + memAddrUsesCTR(IncomingValue, TM, Visited)) + return false; + } + } + } + LLVMContext &C = L->getHeader()->getContext(); HWLoopInfo.CountType = TM.isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); diff --git a/llvm/test/CodeGen/PowerPC/pr48527.ll b/llvm/test/CodeGen/PowerPC/pr48527.ll new file mode 100644 index 000000000000..eaff15ce071e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr48527.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -relocation-model=pic -verify-machineinstrs < %s \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +%struct.e.0.12.28.44.104.108.112.188 = type { i32 } +%struct.t.1.13.29.45.105.109.113.189 = type { i64, i64 } + +@g = external local_unnamed_addr global %struct.e.0.12.28.44.104.108.112.188, align 4 +@aj = external thread_local local_unnamed_addr global %struct.t.1.13.29.45.105.109.113.189, align 8 + +define void @_ZNK1q1rEv() local_unnamed_addr #0 align 2 { +; CHECK-LABEL: _ZNK1q1rEv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: lwz 30, 0(3) +; CHECK-NEXT: addis 3, 2, .LC0@toc@ha +; CHECK-NEXT: ld 29, .LC0@toc@l(3) +; CHECK-NEXT: addis 3, 2, aj@got@tlsgd@ha +; CHECK-NEXT: addi 3, 3, aj@got@tlsgd@l +; CHECK-NEXT: bl __tls_get_addr(aj@tlsgd) +; CHECK-NEXT: nop +; CHECK-NEXT: addi 4, 3, 8 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_1: # %monotonic.i +; CHECK-NEXT: # +; CHECK-NEXT: lwz 5, 0(29) +; CHECK-NEXT: andi. 5, 5, 255 +; CHECK-NEXT: bne 0, .LBB0_4 +; CHECK-NEXT: # %bb.2: # %for.cond.i +; CHECK-NEXT: # +; CHECK-NEXT: addi 30, 30, -1 +; CHECK-NEXT: cmplwi 30, 0 +; CHECK-NEXT: bne 0, .LBB0_1 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: .LBB0_4: # %if.end +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: std 3, 0(4) +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* undef, align 4 + br label %monotonic.i + +for.cond.i: ; preds = %monotonic.i + %exitcond.not = icmp eq i32 %inc.i, %0 + br i1 %exitcond.not, label %if.end, label %monotonic.i + +monotonic.i: ; preds = %for.cond.i, %entry + %i.018.i = phi i32 [ %inc.i, %for.cond.i ], [ 0, %entry ] + %1 = load atomic i32, i32* getelementptr inbounds (%struct.e.0.12.28.44.104.108.112.188, %struct.e.0.12.28.44.104.108.112.188* @g, i64 0, i32 0) monotonic, align 4 + %conv.i = trunc i32 %1 to i8 + %tobool.not.i = icmp eq i8 %conv.i, 0 + %inc.i = add nuw nsw i32 %i.018.i, 1 + br i1 %tobool.not.i, label %for.cond.i, label %if.end + +if.end: ; preds = %monotonic.i, %for.cond.i + %.sink = phi i64* [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 1), %monotonic.i ], [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 0), %for.cond.i ] + store i64 1, i64* %.sink, align 8 + ret void +} + +attributes #0 = { nounwind }