[AArch64] Cleanup A57PBQPConstraints

And add a long awaited testcase.

llvm-svn: 220381
This commit is contained in:
Arnaud A. de Grandmaison 2014-10-22 12:40:20 +00:00
parent c29520c5b3
commit 9b3330546b
4 changed files with 154 additions and 48 deletions

View File

@ -156,19 +156,17 @@ bool haveSameParity(unsigned reg1, unsigned reg2) {
}
bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
unsigned Ra) {
if (Rd == Ra)
return false;
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
LiveIntervals &LIs = G.getMetadata().LIS;
if (TRI.isPhysicalRegister(Rd) || TRI.isPhysicalRegister(Ra)) {
DEBUG(dbgs() << "Rd is a physical reg:" << TRI.isPhysicalRegister(Rd)
if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) {
DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd)
<< '\n');
DEBUG(dbgs() << "Ra is a physical reg:" << TRI.isPhysicalRegister(Ra)
DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra)
<< '\n');
return false;
}
@ -196,7 +194,7 @@ bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
unsigned pRd = (*vRdAllowed)[i];
for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
unsigned pRa = (*vRaAllowed)[j];
if (livesOverlap && TRI.regsOverlap(pRd, pRa))
if (livesOverlap && TRI->regsOverlap(pRd, pRa))
costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
else
costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0;
@ -242,23 +240,20 @@ bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
return true;
}
void A57PBQPConstraints::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
unsigned Ra) {
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
(void)TRI;
LiveIntervals &LIs = G.getMetadata().LIS;
// Do some Chain management
if (Chains.count(Ra)) {
if (Rd != Ra) {
DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, &TRI) << " to "
<< PrintReg(Rd, &TRI) << '\n';);
DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to "
<< PrintReg(Rd, TRI) << '\n';);
Chains.remove(Ra);
Chains.insert(Rd);
}
} else {
DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, &TRI)
DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI)
<< '\n';);
Chains.insert(Rd);
}
@ -322,24 +317,41 @@ void A57PBQPConstraints::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
}
}
void A57PBQPConstraints::apply(PBQPRAGraph &G) {
MachineFunction &MF = G.getMetadata().MF;
static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
const MachineInstr &MI) {
LiveInterval LI = LIs.getInterval(reg);
SlotIndex SI = LIs.getInstructionIndex(&MI);
return LI.expiredAt(SI);
}
const TargetRegisterInfo &TRI =
*MF.getTarget().getSubtargetImpl()->getRegisterInfo();
(void)TRI;
void A57ChainingConstraint::apply(PBQPRAGraph &G) {
const MachineFunction &MF = G.getMetadata().MF;
LiveIntervals &LIs = G.getMetadata().LIS;
TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo();
DEBUG(MF.dump());
for (MachineFunction::const_iterator mbbItr = MF.begin(), mbbEnd = MF.end();
mbbItr != mbbEnd; ++mbbItr) {
const MachineBasicBlock *MBB = &*mbbItr;
for (const auto &MBB: MF) {
Chains.clear(); // FIXME: really needed ? Could not work at MF level ?
for (MachineBasicBlock::const_iterator miItr = MBB->begin(),
miEnd = MBB->end();
miItr != miEnd; ++miItr) {
const MachineInstr *MI = &*miItr;
switch (MI->getOpcode()) {
for (const auto &MI: MBB) {
// Forget Chains which have expired
for (auto r : Chains) {
SmallVector<unsigned, 8> toDel;
if(regJustKilledBefore(LIs, r, MI)) {
DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at ";
MI.print(dbgs()););
toDel.push_back(r);
}
while (!toDel.empty()) {
Chains.remove(toDel.back());
toDel.pop_back();
}
}
switch (MI.getOpcode()) {
case AArch64::FMSUBSrrr:
case AArch64::FMADDSrrr:
case AArch64::FNMSUBSrrr:
@ -348,8 +360,8 @@ void A57PBQPConstraints::apply(PBQPRAGraph &G) {
case AArch64::FMADDDrrr:
case AArch64::FNMSUBDrrr:
case AArch64::FNMADDDrrr: {
unsigned Rd = MI->getOperand(0).getReg();
unsigned Ra = MI->getOperand(3).getReg();
unsigned Rd = MI.getOperand(0).getReg();
unsigned Ra = MI.getOperand(3).getReg();
if (addIntraChainConstraint(G, Rd, Ra))
addInterChainConstraint(G, Rd, Ra);
@ -358,26 +370,13 @@ void A57PBQPConstraints::apply(PBQPRAGraph &G) {
case AArch64::FMLAv2f32:
case AArch64::FMLSv2f32: {
unsigned Rd = MI->getOperand(0).getReg();
unsigned Rd = MI.getOperand(0).getReg();
addInterChainConstraint(G, Rd, Rd);
break;
}
default:
// Forget Chains which have been killed
for (auto r : Chains) {
SmallVector<unsigned, 8> toDel;
if (MI->killsRegister(r)) {
DEBUG(dbgs() << "Killing chain " << PrintReg(r, &TRI) << " at ";
MI->print(dbgs()););
toDel.push_back(r);
}
while (!toDel.empty()) {
Chains.remove(toDel.back());
toDel.pop_back();
}
}
break;
}
}
}

View File

@ -15,14 +15,15 @@
namespace llvm {
class A57PBQPConstraints : public PBQPRAConstraint {
/// Add the accumulator chaining constraint to a PBQP graph
class A57ChainingConstraint : public PBQPRAConstraint {
public:
// Add A57 specific constraints to the PBQP graph.
void apply(PBQPRAGraph &G) override;
private:
SmallSetVector<unsigned, 32> Chains;
const TargetRegisterInfo *TRI;
// Add the accumulator chaining constraint, inside the chain, i.e. so that
// parity(Rd) == parity(Ra).
@ -32,7 +33,6 @@ private:
// Add constraints between existing chains
void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
};
}
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H

View File

@ -137,5 +137,8 @@ bool AArch64Subtarget::enableEarlyIfConversion() const {
std::unique_ptr<PBQPRAConstraint>
AArch64Subtarget::getCustomPBQPConstraints() const {
return llvm::make_unique<A57PBQPConstraints>();
if (!isCortexA57())
return nullptr;
return llvm::make_unique<A57ChainingConstraint>();
}

View File

@ -0,0 +1,104 @@
; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
;
; Test PBQP is able to fulfill the accumulator chaining constraint.
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
; CHECK-LABEL: fir
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) {
entry:
%0 = load double* %c, align 8
%1 = load double* %x, align 8
%mul = fmul fast double %1, %0
%2 = load double* %y, align 8
%mul7 = fmul fast double %2, %0
%arrayidx.1 = getelementptr inbounds double* %c, i64 1
%3 = load double* %arrayidx.1, align 8
%arrayidx2.1 = getelementptr inbounds double* %x, i64 1
%4 = load double* %arrayidx2.1, align 8
%mul.1 = fmul fast double %4, %3
%add.1 = fadd fast double %mul.1, %mul
%arrayidx6.1 = getelementptr inbounds double* %y, i64 1
%5 = load double* %arrayidx6.1, align 8
%mul7.1 = fmul fast double %5, %3
%add8.1 = fadd fast double %mul7.1, %mul7
%arrayidx.2 = getelementptr inbounds double* %c, i64 2
%6 = load double* %arrayidx.2, align 8
%arrayidx2.2 = getelementptr inbounds double* %x, i64 2
%7 = load double* %arrayidx2.2, align 8
%mul.2 = fmul fast double %7, %6
%add.2 = fadd fast double %mul.2, %add.1
%arrayidx6.2 = getelementptr inbounds double* %y, i64 2
%8 = load double* %arrayidx6.2, align 8
%mul7.2 = fmul fast double %8, %6
%add8.2 = fadd fast double %mul7.2, %add8.1
%arrayidx.3 = getelementptr inbounds double* %c, i64 3
%9 = load double* %arrayidx.3, align 8
%arrayidx2.3 = getelementptr inbounds double* %x, i64 3
%10 = load double* %arrayidx2.3, align 8
%mul.3 = fmul fast double %10, %9
%add.3 = fadd fast double %mul.3, %add.2
%arrayidx6.3 = getelementptr inbounds double* %y, i64 3
%11 = load double* %arrayidx6.3, align 8
%mul7.3 = fmul fast double %11, %9
%add8.3 = fadd fast double %mul7.3, %add8.2
%arrayidx.4 = getelementptr inbounds double* %c, i64 4
%12 = load double* %arrayidx.4, align 8
%arrayidx2.4 = getelementptr inbounds double* %x, i64 4
%13 = load double* %arrayidx2.4, align 8
%mul.4 = fmul fast double %13, %12
%add.4 = fadd fast double %mul.4, %add.3
%arrayidx6.4 = getelementptr inbounds double* %y, i64 4
%14 = load double* %arrayidx6.4, align 8
%mul7.4 = fmul fast double %14, %12
%add8.4 = fadd fast double %mul7.4, %add8.3
%arrayidx.5 = getelementptr inbounds double* %c, i64 5
%15 = load double* %arrayidx.5, align 8
%arrayidx2.5 = getelementptr inbounds double* %x, i64 5
%16 = load double* %arrayidx2.5, align 8
%mul.5 = fmul fast double %16, %15
%add.5 = fadd fast double %mul.5, %add.4
%arrayidx6.5 = getelementptr inbounds double* %y, i64 5
%17 = load double* %arrayidx6.5, align 8
%mul7.5 = fmul fast double %17, %15
%add8.5 = fadd fast double %mul7.5, %add8.4
%arrayidx.6 = getelementptr inbounds double* %c, i64 6
%18 = load double* %arrayidx.6, align 8
%arrayidx2.6 = getelementptr inbounds double* %x, i64 6
%19 = load double* %arrayidx2.6, align 8
%mul.6 = fmul fast double %19, %18
%add.6 = fadd fast double %mul.6, %add.5
%arrayidx6.6 = getelementptr inbounds double* %y, i64 6
%20 = load double* %arrayidx6.6, align 8
%mul7.6 = fmul fast double %20, %18
%add8.6 = fadd fast double %mul7.6, %add8.5
%arrayidx.7 = getelementptr inbounds double* %c, i64 7
%21 = load double* %arrayidx.7, align 8
%arrayidx2.7 = getelementptr inbounds double* %x, i64 7
%22 = load double* %arrayidx2.7, align 8
%mul.7 = fmul fast double %22, %21
%add.7 = fadd fast double %mul.7, %add.6
%arrayidx6.7 = getelementptr inbounds double* %y, i64 7
%23 = load double* %arrayidx6.7, align 8
%mul7.7 = fmul fast double %23, %21
%add8.7 = fadd fast double %mul7.7, %add8.6
store double %add.7, double* %rx, align 8
store double %add8.7, double* %ry, align 8
ret void
}