forked from OSchip/llvm-project
[AArch64] Cleanup A57PBQPConstraints
And add a long awaited testcase. llvm-svn: 220381
This commit is contained in:
parent
c29520c5b3
commit
9b3330546b
|
@ -156,19 +156,17 @@ bool haveSameParity(unsigned reg1, unsigned reg2) {
|
|||
|
||||
}
|
||||
|
||||
bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
|
||||
bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
|
||||
unsigned Ra) {
|
||||
if (Rd == Ra)
|
||||
return false;
|
||||
|
||||
const TargetRegisterInfo &TRI =
|
||||
*G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
|
||||
LiveIntervals &LIs = G.getMetadata().LIS;
|
||||
|
||||
if (TRI.isPhysicalRegister(Rd) || TRI.isPhysicalRegister(Ra)) {
|
||||
DEBUG(dbgs() << "Rd is a physical reg:" << TRI.isPhysicalRegister(Rd)
|
||||
if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) {
|
||||
DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd)
|
||||
<< '\n');
|
||||
DEBUG(dbgs() << "Ra is a physical reg:" << TRI.isPhysicalRegister(Ra)
|
||||
DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra)
|
||||
<< '\n');
|
||||
return false;
|
||||
}
|
||||
|
@ -196,7 +194,7 @@ bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
|
|||
unsigned pRd = (*vRdAllowed)[i];
|
||||
for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
|
||||
unsigned pRa = (*vRaAllowed)[j];
|
||||
if (livesOverlap && TRI.regsOverlap(pRd, pRa))
|
||||
if (livesOverlap && TRI->regsOverlap(pRd, pRa))
|
||||
costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
|
||||
else
|
||||
costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0;
|
||||
|
@ -242,23 +240,20 @@ bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
|
|||
return true;
|
||||
}
|
||||
|
||||
void A57PBQPConstraints::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
|
||||
void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
|
||||
unsigned Ra) {
|
||||
const TargetRegisterInfo &TRI =
|
||||
*G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
|
||||
(void)TRI;
|
||||
LiveIntervals &LIs = G.getMetadata().LIS;
|
||||
|
||||
// Do some Chain management
|
||||
if (Chains.count(Ra)) {
|
||||
if (Rd != Ra) {
|
||||
DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, &TRI) << " to "
|
||||
<< PrintReg(Rd, &TRI) << '\n';);
|
||||
DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to "
|
||||
<< PrintReg(Rd, TRI) << '\n';);
|
||||
Chains.remove(Ra);
|
||||
Chains.insert(Rd);
|
||||
}
|
||||
} else {
|
||||
DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, &TRI)
|
||||
DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI)
|
||||
<< '\n';);
|
||||
Chains.insert(Rd);
|
||||
}
|
||||
|
@ -322,24 +317,41 @@ void A57PBQPConstraints::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
|
|||
}
|
||||
}
|
||||
|
||||
void A57PBQPConstraints::apply(PBQPRAGraph &G) {
|
||||
MachineFunction &MF = G.getMetadata().MF;
|
||||
static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
|
||||
const MachineInstr &MI) {
|
||||
LiveInterval LI = LIs.getInterval(reg);
|
||||
SlotIndex SI = LIs.getInstructionIndex(&MI);
|
||||
return LI.expiredAt(SI);
|
||||
}
|
||||
|
||||
const TargetRegisterInfo &TRI =
|
||||
*MF.getTarget().getSubtargetImpl()->getRegisterInfo();
|
||||
(void)TRI;
|
||||
void A57ChainingConstraint::apply(PBQPRAGraph &G) {
|
||||
const MachineFunction &MF = G.getMetadata().MF;
|
||||
LiveIntervals &LIs = G.getMetadata().LIS;
|
||||
|
||||
TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo();
|
||||
DEBUG(MF.dump());
|
||||
|
||||
for (MachineFunction::const_iterator mbbItr = MF.begin(), mbbEnd = MF.end();
|
||||
mbbItr != mbbEnd; ++mbbItr) {
|
||||
const MachineBasicBlock *MBB = &*mbbItr;
|
||||
for (const auto &MBB: MF) {
|
||||
Chains.clear(); // FIXME: really needed ? Could not work at MF level ?
|
||||
|
||||
for (MachineBasicBlock::const_iterator miItr = MBB->begin(),
|
||||
miEnd = MBB->end();
|
||||
miItr != miEnd; ++miItr) {
|
||||
const MachineInstr *MI = &*miItr;
|
||||
switch (MI->getOpcode()) {
|
||||
for (const auto &MI: MBB) {
|
||||
|
||||
// Forget Chains which have expired
|
||||
for (auto r : Chains) {
|
||||
SmallVector<unsigned, 8> toDel;
|
||||
if(regJustKilledBefore(LIs, r, MI)) {
|
||||
DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at ";
|
||||
MI.print(dbgs()););
|
||||
toDel.push_back(r);
|
||||
}
|
||||
|
||||
while (!toDel.empty()) {
|
||||
Chains.remove(toDel.back());
|
||||
toDel.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case AArch64::FMSUBSrrr:
|
||||
case AArch64::FMADDSrrr:
|
||||
case AArch64::FNMSUBSrrr:
|
||||
|
@ -348,8 +360,8 @@ void A57PBQPConstraints::apply(PBQPRAGraph &G) {
|
|||
case AArch64::FMADDDrrr:
|
||||
case AArch64::FNMSUBDrrr:
|
||||
case AArch64::FNMADDDrrr: {
|
||||
unsigned Rd = MI->getOperand(0).getReg();
|
||||
unsigned Ra = MI->getOperand(3).getReg();
|
||||
unsigned Rd = MI.getOperand(0).getReg();
|
||||
unsigned Ra = MI.getOperand(3).getReg();
|
||||
|
||||
if (addIntraChainConstraint(G, Rd, Ra))
|
||||
addInterChainConstraint(G, Rd, Ra);
|
||||
|
@ -358,26 +370,13 @@ void A57PBQPConstraints::apply(PBQPRAGraph &G) {
|
|||
|
||||
case AArch64::FMLAv2f32:
|
||||
case AArch64::FMLSv2f32: {
|
||||
unsigned Rd = MI->getOperand(0).getReg();
|
||||
unsigned Rd = MI.getOperand(0).getReg();
|
||||
addInterChainConstraint(G, Rd, Rd);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
// Forget Chains which have been killed
|
||||
for (auto r : Chains) {
|
||||
SmallVector<unsigned, 8> toDel;
|
||||
if (MI->killsRegister(r)) {
|
||||
DEBUG(dbgs() << "Killing chain " << PrintReg(r, &TRI) << " at ";
|
||||
MI->print(dbgs()););
|
||||
toDel.push_back(r);
|
||||
}
|
||||
|
||||
while (!toDel.empty()) {
|
||||
Chains.remove(toDel.back());
|
||||
toDel.pop_back();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,14 +15,15 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
class A57PBQPConstraints : public PBQPRAConstraint {
|
||||
/// Add the accumulator chaining constraint to a PBQP graph
|
||||
class A57ChainingConstraint : public PBQPRAConstraint {
|
||||
public:
|
||||
|
||||
// Add A57 specific constraints to the PBQP graph.
|
||||
void apply(PBQPRAGraph &G) override;
|
||||
|
||||
private:
|
||||
SmallSetVector<unsigned, 32> Chains;
|
||||
const TargetRegisterInfo *TRI;
|
||||
|
||||
// Add the accumulator chaining constraint, inside the chain, i.e. so that
|
||||
// parity(Rd) == parity(Ra).
|
||||
|
@ -32,7 +33,6 @@ private:
|
|||
// Add constraints between existing chains
|
||||
void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
|
||||
|
|
|
@ -137,5 +137,8 @@ bool AArch64Subtarget::enableEarlyIfConversion() const {
|
|||
|
||||
std::unique_ptr<PBQPRAConstraint>
|
||||
AArch64Subtarget::getCustomPBQPConstraints() const {
|
||||
return llvm::make_unique<A57PBQPConstraints>();
|
||||
if (!isCortexA57())
|
||||
return nullptr;
|
||||
|
||||
return llvm::make_unique<A57ChainingConstraint>();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
|
||||
; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
|
||||
;
|
||||
; Test PBQP is able to fulfill the accumulator chaining constraint.
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64"
|
||||
|
||||
; CHECK-LABEL: fir
|
||||
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
|
||||
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
|
||||
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
|
||||
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
|
||||
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
|
||||
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
|
||||
; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
|
||||
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
|
||||
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
|
||||
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
|
||||
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
|
||||
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
|
||||
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
|
||||
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
|
||||
define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) {
|
||||
entry:
|
||||
%0 = load double* %c, align 8
|
||||
%1 = load double* %x, align 8
|
||||
%mul = fmul fast double %1, %0
|
||||
%2 = load double* %y, align 8
|
||||
%mul7 = fmul fast double %2, %0
|
||||
%arrayidx.1 = getelementptr inbounds double* %c, i64 1
|
||||
%3 = load double* %arrayidx.1, align 8
|
||||
%arrayidx2.1 = getelementptr inbounds double* %x, i64 1
|
||||
%4 = load double* %arrayidx2.1, align 8
|
||||
%mul.1 = fmul fast double %4, %3
|
||||
%add.1 = fadd fast double %mul.1, %mul
|
||||
%arrayidx6.1 = getelementptr inbounds double* %y, i64 1
|
||||
%5 = load double* %arrayidx6.1, align 8
|
||||
%mul7.1 = fmul fast double %5, %3
|
||||
%add8.1 = fadd fast double %mul7.1, %mul7
|
||||
%arrayidx.2 = getelementptr inbounds double* %c, i64 2
|
||||
%6 = load double* %arrayidx.2, align 8
|
||||
%arrayidx2.2 = getelementptr inbounds double* %x, i64 2
|
||||
%7 = load double* %arrayidx2.2, align 8
|
||||
%mul.2 = fmul fast double %7, %6
|
||||
%add.2 = fadd fast double %mul.2, %add.1
|
||||
%arrayidx6.2 = getelementptr inbounds double* %y, i64 2
|
||||
%8 = load double* %arrayidx6.2, align 8
|
||||
%mul7.2 = fmul fast double %8, %6
|
||||
%add8.2 = fadd fast double %mul7.2, %add8.1
|
||||
%arrayidx.3 = getelementptr inbounds double* %c, i64 3
|
||||
%9 = load double* %arrayidx.3, align 8
|
||||
%arrayidx2.3 = getelementptr inbounds double* %x, i64 3
|
||||
%10 = load double* %arrayidx2.3, align 8
|
||||
%mul.3 = fmul fast double %10, %9
|
||||
%add.3 = fadd fast double %mul.3, %add.2
|
||||
%arrayidx6.3 = getelementptr inbounds double* %y, i64 3
|
||||
%11 = load double* %arrayidx6.3, align 8
|
||||
%mul7.3 = fmul fast double %11, %9
|
||||
%add8.3 = fadd fast double %mul7.3, %add8.2
|
||||
%arrayidx.4 = getelementptr inbounds double* %c, i64 4
|
||||
%12 = load double* %arrayidx.4, align 8
|
||||
%arrayidx2.4 = getelementptr inbounds double* %x, i64 4
|
||||
%13 = load double* %arrayidx2.4, align 8
|
||||
%mul.4 = fmul fast double %13, %12
|
||||
%add.4 = fadd fast double %mul.4, %add.3
|
||||
%arrayidx6.4 = getelementptr inbounds double* %y, i64 4
|
||||
%14 = load double* %arrayidx6.4, align 8
|
||||
%mul7.4 = fmul fast double %14, %12
|
||||
%add8.4 = fadd fast double %mul7.4, %add8.3
|
||||
%arrayidx.5 = getelementptr inbounds double* %c, i64 5
|
||||
%15 = load double* %arrayidx.5, align 8
|
||||
%arrayidx2.5 = getelementptr inbounds double* %x, i64 5
|
||||
%16 = load double* %arrayidx2.5, align 8
|
||||
%mul.5 = fmul fast double %16, %15
|
||||
%add.5 = fadd fast double %mul.5, %add.4
|
||||
%arrayidx6.5 = getelementptr inbounds double* %y, i64 5
|
||||
%17 = load double* %arrayidx6.5, align 8
|
||||
%mul7.5 = fmul fast double %17, %15
|
||||
%add8.5 = fadd fast double %mul7.5, %add8.4
|
||||
%arrayidx.6 = getelementptr inbounds double* %c, i64 6
|
||||
%18 = load double* %arrayidx.6, align 8
|
||||
%arrayidx2.6 = getelementptr inbounds double* %x, i64 6
|
||||
%19 = load double* %arrayidx2.6, align 8
|
||||
%mul.6 = fmul fast double %19, %18
|
||||
%add.6 = fadd fast double %mul.6, %add.5
|
||||
%arrayidx6.6 = getelementptr inbounds double* %y, i64 6
|
||||
%20 = load double* %arrayidx6.6, align 8
|
||||
%mul7.6 = fmul fast double %20, %18
|
||||
%add8.6 = fadd fast double %mul7.6, %add8.5
|
||||
%arrayidx.7 = getelementptr inbounds double* %c, i64 7
|
||||
%21 = load double* %arrayidx.7, align 8
|
||||
%arrayidx2.7 = getelementptr inbounds double* %x, i64 7
|
||||
%22 = load double* %arrayidx2.7, align 8
|
||||
%mul.7 = fmul fast double %22, %21
|
||||
%add.7 = fadd fast double %mul.7, %add.6
|
||||
%arrayidx6.7 = getelementptr inbounds double* %y, i64 7
|
||||
%23 = load double* %arrayidx6.7, align 8
|
||||
%mul7.7 = fmul fast double %23, %21
|
||||
%add8.7 = fadd fast double %mul7.7, %add8.6
|
||||
store double %add.7, double* %rx, align 8
|
||||
store double %add8.7, double* %ry, align 8
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue