From 9b3330546bcec321d9be6f45edef142ae953c7e0 Mon Sep 17 00:00:00 2001 From: "Arnaud A. de Grandmaison" Date: Wed, 22 Oct 2014 12:40:20 +0000 Subject: [PATCH] [AArch64] Cleanup A57PBQPConstraints And add a long awaited testcase. llvm-svn: 220381 --- .../Target/AArch64/AArch64PBQPRegAlloc.cpp | 87 ++++++++------- llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h | 6 +- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 5 +- llvm/test/CodeGen/AArch64/PBQP-chain.ll | 104 ++++++++++++++++++ 4 files changed, 154 insertions(+), 48 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/PBQP-chain.ll diff --git a/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index f0105b0a2622..05692ee09681 100644 --- a/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -156,19 +156,17 @@ bool haveSameParity(unsigned reg1, unsigned reg2) { } -bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, +bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra) { if (Rd == Ra) return false; - const TargetRegisterInfo &TRI = - *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); LiveIntervals &LIs = G.getMetadata().LIS; - if (TRI.isPhysicalRegister(Rd) || TRI.isPhysicalRegister(Ra)) { - DEBUG(dbgs() << "Rd is a physical reg:" << TRI.isPhysicalRegister(Rd) + if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) { + DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd) << '\n'); - DEBUG(dbgs() << "Ra is a physical reg:" << TRI.isPhysicalRegister(Ra) + DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra) << '\n'); return false; } @@ -196,7 +194,7 @@ bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned pRd = (*vRdAllowed)[i]; for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) { unsigned pRa = (*vRaAllowed)[j]; - if (livesOverlap && TRI.regsOverlap(pRd, pRa)) + if (livesOverlap && TRI->regsOverlap(pRd, pRa)) costs[i + 1][j + 1] = std::numeric_limits::infinity(); else costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0; @@ -242,23 +240,20 @@ bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, return true; } -void A57PBQPConstraints::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, +void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra) { - const TargetRegisterInfo &TRI = - *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); - (void)TRI; LiveIntervals &LIs = G.getMetadata().LIS; // Do some Chain management if (Chains.count(Ra)) { if (Rd != Ra) { - DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, &TRI) << " to " - << PrintReg(Rd, &TRI) << '\n';); + DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to " + << PrintReg(Rd, TRI) << '\n';); Chains.remove(Ra); Chains.insert(Rd); } } else { - DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, &TRI) + DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI) << '\n';); Chains.insert(Rd); } @@ -322,24 +317,41 @@ void A57PBQPConstraints::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, } } -void A57PBQPConstraints::apply(PBQPRAGraph &G) { - MachineFunction &MF = G.getMetadata().MF; +static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg, + const MachineInstr &MI) { + LiveInterval LI = LIs.getInterval(reg); + SlotIndex SI = LIs.getInstructionIndex(&MI); + return LI.expiredAt(SI); +} - const TargetRegisterInfo &TRI = - *MF.getTarget().getSubtargetImpl()->getRegisterInfo(); - (void)TRI; +void A57ChainingConstraint::apply(PBQPRAGraph &G) { + const MachineFunction &MF = G.getMetadata().MF; + LiveIntervals &LIs = G.getMetadata().LIS; + + TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo(); DEBUG(MF.dump()); - for (MachineFunction::const_iterator mbbItr = MF.begin(), mbbEnd = MF.end(); - mbbItr != mbbEnd; ++mbbItr) { - const MachineBasicBlock *MBB = &*mbbItr; + for (const auto &MBB: MF) { Chains.clear(); // FIXME: really needed ? Could not work at MF level ? - for (MachineBasicBlock::const_iterator miItr = MBB->begin(), - miEnd = MBB->end(); - miItr != miEnd; ++miItr) { - const MachineInstr *MI = &*miItr; - switch (MI->getOpcode()) { + for (const auto &MI: MBB) { + + // Forget Chains which have expired + for (auto r : Chains) { + SmallVector toDel; + if(regJustKilledBefore(LIs, r, MI)) { + DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at "; + MI.print(dbgs());); + toDel.push_back(r); + } + + while (!toDel.empty()) { + Chains.remove(toDel.back()); + toDel.pop_back(); + } + } + + switch (MI.getOpcode()) { case AArch64::FMSUBSrrr: case AArch64::FMADDSrrr: case AArch64::FNMSUBSrrr: @@ -348,8 +360,8 @@ void A57PBQPConstraints::apply(PBQPRAGraph &G) { case AArch64::FMADDDrrr: case AArch64::FNMSUBDrrr: case AArch64::FNMADDDrrr: { - unsigned Rd = MI->getOperand(0).getReg(); - unsigned Ra = MI->getOperand(3).getReg(); + unsigned Rd = MI.getOperand(0).getReg(); + unsigned Ra = MI.getOperand(3).getReg(); if (addIntraChainConstraint(G, Rd, Ra)) addInterChainConstraint(G, Rd, Ra); @@ -358,26 +370,13 @@ void A57PBQPConstraints::apply(PBQPRAGraph &G) { case AArch64::FMLAv2f32: case AArch64::FMLSv2f32: { - unsigned Rd = MI->getOperand(0).getReg(); + unsigned Rd = MI.getOperand(0).getReg(); addInterChainConstraint(G, Rd, Rd); break; } default: - // Forget Chains which have been killed - for (auto r : Chains) { - SmallVector toDel; - if (MI->killsRegister(r)) { - DEBUG(dbgs() << "Killing chain " << PrintReg(r, &TRI) << " at "; - MI->print(dbgs());); - toDel.push_back(r); - } - - while (!toDel.empty()) { - Chains.remove(toDel.back()); - toDel.pop_back(); - } - } + break; } } } diff --git a/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h index 4bd780cd0ebc..4f656f94ea12 100644 --- a/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h +++ b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h @@ -15,14 +15,15 @@ namespace llvm { -class A57PBQPConstraints : public PBQPRAConstraint { +/// Add the accumulator chaining constraint to a PBQP graph +class A57ChainingConstraint : public PBQPRAConstraint { public: - // Add A57 specific constraints to the PBQP graph. void apply(PBQPRAGraph &G) override; private: SmallSetVector Chains; + const TargetRegisterInfo *TRI; // Add the accumulator chaining constraint, inside the chain, i.e. so that // parity(Rd) == parity(Ra). @@ -32,7 +33,6 @@ private: // Add constraints between existing chains void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra); }; - } #endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 22576dcd001b..4ccd57661cbe 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -137,5 +137,8 @@ bool AArch64Subtarget::enableEarlyIfConversion() const { std::unique_ptr AArch64Subtarget::getCustomPBQPConstraints() const { - return llvm::make_unique(); + if (!isCortexA57()) + return nullptr; + + return llvm::make_unique(); } diff --git a/llvm/test/CodeGen/AArch64/PBQP-chain.ll b/llvm/test/CodeGen/AArch64/PBQP-chain.ll new file mode 100644 index 000000000000..c4ba026ea428 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/PBQP-chain.ll @@ -0,0 +1,104 @@ +; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN +; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD +; +; Test PBQP is able to fulfill the accumulator chaining constraint. +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +; CHECK-LABEL: fir +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) { +entry: + %0 = load double* %c, align 8 + %1 = load double* %x, align 8 + %mul = fmul fast double %1, %0 + %2 = load double* %y, align 8 + %mul7 = fmul fast double %2, %0 + %arrayidx.1 = getelementptr inbounds double* %c, i64 1 + %3 = load double* %arrayidx.1, align 8 + %arrayidx2.1 = getelementptr inbounds double* %x, i64 1 + %4 = load double* %arrayidx2.1, align 8 + %mul.1 = fmul fast double %4, %3 + %add.1 = fadd fast double %mul.1, %mul + %arrayidx6.1 = getelementptr inbounds double* %y, i64 1 + %5 = load double* %arrayidx6.1, align 8 + %mul7.1 = fmul fast double %5, %3 + %add8.1 = fadd fast double %mul7.1, %mul7 + %arrayidx.2 = getelementptr inbounds double* %c, i64 2 + %6 = load double* %arrayidx.2, align 8 + %arrayidx2.2 = getelementptr inbounds double* %x, i64 2 + %7 = load double* %arrayidx2.2, align 8 + %mul.2 = fmul fast double %7, %6 + %add.2 = fadd fast double %mul.2, %add.1 + %arrayidx6.2 = getelementptr inbounds double* %y, i64 2 + %8 = load double* %arrayidx6.2, align 8 + %mul7.2 = fmul fast double %8, %6 + %add8.2 = fadd fast double %mul7.2, %add8.1 + %arrayidx.3 = getelementptr inbounds double* %c, i64 3 + %9 = load double* %arrayidx.3, align 8 + %arrayidx2.3 = getelementptr inbounds double* %x, i64 3 + %10 = load double* %arrayidx2.3, align 8 + %mul.3 = fmul fast double %10, %9 + %add.3 = fadd fast double %mul.3, %add.2 + %arrayidx6.3 = getelementptr inbounds double* %y, i64 3 + %11 = load double* %arrayidx6.3, align 8 + %mul7.3 = fmul fast double %11, %9 + %add8.3 = fadd fast double %mul7.3, %add8.2 + %arrayidx.4 = getelementptr inbounds double* %c, i64 4 + %12 = load double* %arrayidx.4, align 8 + %arrayidx2.4 = getelementptr inbounds double* %x, i64 4 + %13 = load double* %arrayidx2.4, align 8 + %mul.4 = fmul fast double %13, %12 + %add.4 = fadd fast double %mul.4, %add.3 + %arrayidx6.4 = getelementptr inbounds double* %y, i64 4 + %14 = load double* %arrayidx6.4, align 8 + %mul7.4 = fmul fast double %14, %12 + %add8.4 = fadd fast double %mul7.4, %add8.3 + %arrayidx.5 = getelementptr inbounds double* %c, i64 5 + %15 = load double* %arrayidx.5, align 8 + %arrayidx2.5 = getelementptr inbounds double* %x, i64 5 + %16 = load double* %arrayidx2.5, align 8 + %mul.5 = fmul fast double %16, %15 + %add.5 = fadd fast double %mul.5, %add.4 + %arrayidx6.5 = getelementptr inbounds double* %y, i64 5 + %17 = load double* %arrayidx6.5, align 8 + %mul7.5 = fmul fast double %17, %15 + %add8.5 = fadd fast double %mul7.5, %add8.4 + %arrayidx.6 = getelementptr inbounds double* %c, i64 6 + %18 = load double* %arrayidx.6, align 8 + %arrayidx2.6 = getelementptr inbounds double* %x, i64 6 + %19 = load double* %arrayidx2.6, align 8 + %mul.6 = fmul fast double %19, %18 + %add.6 = fadd fast double %mul.6, %add.5 + %arrayidx6.6 = getelementptr inbounds double* %y, i64 6 + %20 = load double* %arrayidx6.6, align 8 + %mul7.6 = fmul fast double %20, %18 + %add8.6 = fadd fast double %mul7.6, %add8.5 + %arrayidx.7 = getelementptr inbounds double* %c, i64 7 + %21 = load double* %arrayidx.7, align 8 + %arrayidx2.7 = getelementptr inbounds double* %x, i64 7 + %22 = load double* %arrayidx2.7, align 8 + %mul.7 = fmul fast double %22, %21 + %add.7 = fadd fast double %mul.7, %add.6 + %arrayidx6.7 = getelementptr inbounds double* %y, i64 7 + %23 = load double* %arrayidx6.7, align 8 + %mul7.7 = fmul fast double %23, %21 + %add8.7 = fadd fast double %mul7.7, %add8.6 + store double %add.7, double* %rx, align 8 + store double %add8.7, double* %ry, align 8 + ret void +} +