forked from OSchip/llvm-project
[SystemZ] Move compare-and-branch generation even later
r187116 moved compare-and-branch generation from the instruction-selection pass to the peephole optimizer (via optimizeCompare). It turns out that even this is a bit too early. Fused compare-and-branch instructions don't interact well with predication, where a CC result is needed. They also make it harder to reuse the CC side-effects of earlier instructions (not yet implemented, but the subject of a later patch). Another problem was that the AnalyzeBranch family of routines weren't handling compares and branches, so we weren't able to reverse the fused form in cases where we would reverse a separate branch. This could have been fixed by extending AnalyzeBranch, but given the other problems, I've instead moved the fusing to the long-branch pass, which is also responsible for the opposite transformation: splitting out-of-range compares and branches into separate compares and long branches. I've added a test for the AnalyzeBranch problem. A test for the predication problem is included in the next patch, which fixes a bug in the choice of CC mask. llvm-svn: 187494
This commit is contained in:
parent
b0a75431ad
commit
8a757bba10
|
@ -1997,17 +1997,10 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
|
|||
if (IsSubWord)
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
|
||||
.addReg(OldVal).addReg(BitShift).addImm(0);
|
||||
unsigned FusedOpcode = TII->getCompareAndBranch(CompareOpcode);
|
||||
if (FusedOpcode)
|
||||
BuildMI(MBB, DL, TII->get(FusedOpcode))
|
||||
.addReg(RotatedOldVal).addReg(Src2)
|
||||
.addImm(KeepOldMask).addMBB(UpdateMBB);
|
||||
else {
|
||||
BuildMI(MBB, DL, TII->get(CompareOpcode))
|
||||
.addReg(RotatedOldVal).addReg(Src2);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
||||
.addImm(KeepOldMask).addMBB(UpdateMBB);
|
||||
}
|
||||
BuildMI(MBB, DL, TII->get(CompareOpcode))
|
||||
.addReg(RotatedOldVal).addReg(Src2);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
||||
.addImm(KeepOldMask).addMBB(UpdateMBB);
|
||||
MBB->addSuccessor(UpdateMBB);
|
||||
MBB->addSuccessor(UseAltMBB);
|
||||
|
||||
|
@ -2109,7 +2102,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
|
|||
// ^^ Replace the upper 32-BitSize bits of the
|
||||
// comparison value with those that we loaded,
|
||||
// so that we can use a full word comparison.
|
||||
// CRJNE %Dest, %RetryCmpVal, DoneMBB
|
||||
// CR %Dest, %RetryCmpVal
|
||||
// JNE DoneMBB
|
||||
// # Fall through to SetMBB
|
||||
MBB = LoopMBB;
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
|
||||
|
@ -2125,8 +2119,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
|
|||
.addReg(OldVal).addReg(BitShift).addImm(BitSize);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
|
||||
.addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::CRJ))
|
||||
.addReg(Dest).addReg(RetryCmpVal)
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::CR))
|
||||
.addReg(Dest).addReg(RetryCmpVal);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
||||
.addImm(MaskNE).addMBB(DoneMBB);
|
||||
MBB->addSuccessor(DoneMBB);
|
||||
MBB->addSuccessor(SetMBB);
|
||||
|
|
|
@ -282,109 +282,6 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
|||
return Count;
|
||||
}
|
||||
|
||||
bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
|
||||
unsigned &SrcReg, unsigned &SrcReg2,
|
||||
int &Mask, int &Value) const {
|
||||
assert(MI->isCompare() && "Caller should check that this is a compare");
|
||||
|
||||
// Ignore comparisons involving memory for now.
|
||||
if (MI->getNumExplicitOperands() != 2)
|
||||
return false;
|
||||
|
||||
SrcReg = MI->getOperand(0).getReg();
|
||||
if (MI->getOperand(1).isReg()) {
|
||||
SrcReg2 = MI->getOperand(1).getReg();
|
||||
Value = 0;
|
||||
Mask = ~0;
|
||||
return true;
|
||||
} else if (MI->getOperand(1).isImm()) {
|
||||
SrcReg2 = 0;
|
||||
Value = MI->getOperand(1).getImm();
|
||||
Mask = ~0;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return true if CC is live after MBBI. We can't rely on kill information
|
||||
// because of the way InsertBranch is used.
|
||||
static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
if (MBBI->killsRegister(SystemZ::CC, TRI))
|
||||
return false;
|
||||
|
||||
MachineBasicBlock *MBB = MBBI->getParent();
|
||||
MachineBasicBlock::iterator MBBE = MBB->end();
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI)
|
||||
if (MBBI->readsRegister(SystemZ::CC, TRI))
|
||||
return true;
|
||||
|
||||
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
|
||||
SE = MBB->succ_end(); SI != SE; ++SI)
|
||||
if ((*SI)->isLiveIn(SystemZ::CC))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
|
||||
unsigned SrcReg, unsigned SrcReg2,
|
||||
int Mask, int Value,
|
||||
const MachineRegisterInfo *MRI) const {
|
||||
MachineBasicBlock *MBB = Compare->getParent();
|
||||
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||
|
||||
// Try to fold a comparison into a following branch, if it is only used once.
|
||||
if (unsigned FusedOpcode = getCompareAndBranch(Compare->getOpcode(),
|
||||
Compare)) {
|
||||
MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->end();
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI) {
|
||||
if (MBBI->getOpcode() == SystemZ::BRC && !isCCLiveAfter(MBBI, TRI)) {
|
||||
// Read the branch mask and target.
|
||||
MachineOperand CCMask(MBBI->getOperand(0));
|
||||
MachineOperand Target(MBBI->getOperand(1));
|
||||
|
||||
// Clear out all current operands.
|
||||
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
|
||||
assert(CCUse >= 0 && "BRC must use CC");
|
||||
MBBI->RemoveOperand(CCUse);
|
||||
MBBI->RemoveOperand(1);
|
||||
MBBI->RemoveOperand(0);
|
||||
|
||||
// Rebuild MBBI as a fused compare and branch.
|
||||
MBBI->setDesc(get(FusedOpcode));
|
||||
MachineInstrBuilder(*MBB->getParent(), MBBI)
|
||||
.addOperand(Compare->getOperand(0))
|
||||
.addOperand(Compare->getOperand(1))
|
||||
.addOperand(CCMask)
|
||||
.addOperand(Target);
|
||||
|
||||
// Clear any intervening kills of SrcReg and SrcReg2.
|
||||
MBBI = Compare;
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI) {
|
||||
MBBI->clearRegisterKills(SrcReg, TRI);
|
||||
if (SrcReg2)
|
||||
MBBI->clearRegisterKills(SrcReg2, TRI);
|
||||
}
|
||||
Compare->removeFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Stop if we find another reference to CC before a branch.
|
||||
if (MBBI->readsRegister(SystemZ::CC, TRI) ||
|
||||
MBBI->modifiesRegister(SystemZ::CC, TRI))
|
||||
break;
|
||||
|
||||
// Stop if we find another assignment to the registers before the branch.
|
||||
if (MBBI->modifiesRegister(SrcReg, TRI) ||
|
||||
(SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// If Opcode is a move that has a conditional variant, return that variant,
|
||||
// otherwise return 0.
|
||||
static unsigned getConditionalMove(unsigned Opcode) {
|
||||
|
|
|
@ -104,14 +104,6 @@ public:
|
|||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const LLVM_OVERRIDE;
|
||||
virtual bool analyzeCompare(const MachineInstr *MI,
|
||||
unsigned &SrcReg, unsigned &SrcReg2,
|
||||
int &Mask, int &Value) const LLVM_OVERRIDE;
|
||||
virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
|
||||
unsigned SrcReg, unsigned SrcReg2,
|
||||
int Mask, int Value,
|
||||
const MachineRegisterInfo *MRI) const
|
||||
LLVM_OVERRIDE;
|
||||
virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE;
|
||||
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
|
||||
unsigned ExtraPredCycles,
|
||||
|
|
|
@ -7,16 +7,26 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass makes sure that all branches are in range. There are several ways
|
||||
// in which this could be done. One aggressive approach is to assume that all
|
||||
// branches are in range and successively replace those that turn out not
|
||||
// to be in range with a longer form (branch relaxation). A simple
|
||||
// implementation is to continually walk through the function relaxing
|
||||
// branches until no more changes are needed and a fixed point is reached.
|
||||
// However, in the pathological worst case, this implementation is
|
||||
// quadratic in the number of blocks; relaxing branch N can make branch N-1
|
||||
// go out of range, which in turn can make branch N-2 go out of range,
|
||||
// and so on.
|
||||
// This pass does two things:
|
||||
// (1) fuse compares and branches into COMPARE AND BRANCH instructions
|
||||
// (2) make sure that all branches are in range.
|
||||
//
|
||||
// We do (1) here rather than earlier because the fused form prevents
|
||||
// predication.
|
||||
//
|
||||
// Doing it so late makes it more likely that a register will be reused
|
||||
// between the compare and the branch, but it isn't clear whether preventing
|
||||
// that would be a win or not.
|
||||
//
|
||||
// There are several ways in which (2) could be done. One aggressive
|
||||
// approach is to assume that all branches are in range and successively
|
||||
// replace those that turn out not to be in range with a longer form
|
||||
// (branch relaxation). A simple implementation is to continually walk
|
||||
// through the function relaxing branches until no more changes are
|
||||
// needed and a fixed point is reached. However, in the pathological
|
||||
// worst case, this implementation is quadratic in the number of blocks;
|
||||
// relaxing branch N can make branch N-1 go out of range, which in turn
|
||||
// can make branch N-2 go out of range, and so on.
|
||||
//
|
||||
// An alternative approach is to assume that all branches must be
|
||||
// converted to their long forms, then reinstate the short forms of
|
||||
|
@ -146,6 +156,7 @@ namespace {
|
|||
void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
|
||||
bool AssumeRelaxed);
|
||||
TerminatorInfo describeTerminator(MachineInstr *MI);
|
||||
bool fuseCompareAndBranch(MachineInstr *Compare);
|
||||
uint64_t initMBBInfo();
|
||||
bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
|
||||
bool mustRelaxABranch();
|
||||
|
@ -243,6 +254,90 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
|
|||
return Terminator;
|
||||
}
|
||||
|
||||
// Return true if CC is live after MBBI.
|
||||
static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
if (MBBI->killsRegister(SystemZ::CC, TRI))
|
||||
return false;
|
||||
|
||||
MachineBasicBlock *MBB = MBBI->getParent();
|
||||
MachineBasicBlock::iterator MBBE = MBB->end();
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI) {
|
||||
if (MBBI->readsRegister(SystemZ::CC, TRI))
|
||||
return true;
|
||||
if (MBBI->definesRegister(SystemZ::CC, TRI))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
|
||||
SE = MBB->succ_end(); SI != SE; ++SI)
|
||||
if ((*SI)->isLiveIn(SystemZ::CC))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to fuse compare instruction Compare into a later branch. Return
|
||||
// true on success and if Compare is therefore redundant.
|
||||
bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) {
|
||||
if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
|
||||
return false;
|
||||
|
||||
unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
|
||||
Compare);
|
||||
if (!FusedOpcode)
|
||||
return false;
|
||||
|
||||
unsigned SrcReg = Compare->getOperand(0).getReg();
|
||||
unsigned SrcReg2 = (Compare->getOperand(1).isReg() ?
|
||||
Compare->getOperand(1).getReg() : 0);
|
||||
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
|
||||
MachineBasicBlock *MBB = Compare->getParent();
|
||||
MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->end();
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI) {
|
||||
if (MBBI->getOpcode() == SystemZ::BRC && !isCCLiveAfter(MBBI, TRI)) {
|
||||
// Read the branch mask and target.
|
||||
MachineOperand CCMask(MBBI->getOperand(0));
|
||||
MachineOperand Target(MBBI->getOperand(1));
|
||||
|
||||
// Clear out all current operands.
|
||||
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
|
||||
assert(CCUse >= 0 && "BRC must use CC");
|
||||
MBBI->RemoveOperand(CCUse);
|
||||
MBBI->RemoveOperand(1);
|
||||
MBBI->RemoveOperand(0);
|
||||
|
||||
// Rebuild MBBI as a fused compare and branch.
|
||||
MBBI->setDesc(TII->get(FusedOpcode));
|
||||
MachineInstrBuilder(*MBB->getParent(), MBBI)
|
||||
.addOperand(Compare->getOperand(0))
|
||||
.addOperand(Compare->getOperand(1))
|
||||
.addOperand(CCMask)
|
||||
.addOperand(Target);
|
||||
|
||||
// Clear any intervening kills of SrcReg and SrcReg2.
|
||||
MBBI = Compare;
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI) {
|
||||
MBBI->clearRegisterKills(SrcReg, TRI);
|
||||
if (SrcReg2)
|
||||
MBBI->clearRegisterKills(SrcReg2, TRI);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Stop if we find another reference to CC before a branch.
|
||||
if (MBBI->readsRegister(SystemZ::CC, TRI) ||
|
||||
MBBI->modifiesRegister(SystemZ::CC, TRI))
|
||||
return false;
|
||||
|
||||
// Stop if we find another assignment to the registers before the branch.
|
||||
if (MBBI->modifiesRegister(SrcReg, TRI) ||
|
||||
(SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill MBBs and Terminators, setting the addresses on the assumption
|
||||
// that no branches need relaxation. Return the size of the function under
|
||||
// this assumption.
|
||||
|
@ -268,8 +363,12 @@ uint64_t SystemZLongBranch::initMBBInfo() {
|
|||
MachineBasicBlock::iterator MI = MBB->begin();
|
||||
MachineBasicBlock::iterator End = MBB->end();
|
||||
while (MI != End && !MI->isTerminator()) {
|
||||
Block.Size += TII->getInstSizeInBytes(MI);
|
||||
MachineInstr *Current = MI;
|
||||
++MI;
|
||||
if (Current->isCompare() && fuseCompareAndBranch(Current))
|
||||
Current->removeFromParent();
|
||||
else
|
||||
Block.Size += TII->getInstSizeInBytes(Current);
|
||||
}
|
||||
skipNonTerminators(Position, Block);
|
||||
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
; Test SystemZInstrInfo::AnalyzeBranch and SystemZInstrInfo::InsertBranch.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
declare void @foo() noreturn
|
||||
|
||||
; Check a case where a separate branch is needed and where the original
|
||||
; order should be reversed.
|
||||
define i32 @f1(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: clr %r2, %r3
|
||||
; CHECK: jnhe .L[[LABEL:.*]]
|
||||
; CHECK: br %r14
|
||||
; CHECK: .L[[LABEL]]:
|
||||
; CHECK: brasl %r14, foo@PLT
|
||||
entry:
|
||||
%cmp = icmp ult i32 %a, %b
|
||||
br i1 %cmp, label %callit, label %return
|
||||
|
||||
callit:
|
||||
call void @foo()
|
||||
unreachable
|
||||
|
||||
return:
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
; Same again with a fused compare and branch.
|
||||
define i32 @f2(i32 %a) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: cijnlh %r2, 0, .L[[LABEL:.*]]
|
||||
; CHECK: br %r14
|
||||
; CHECK: .L[[LABEL]]:
|
||||
; CHECK: brasl %r14, foo@PLT
|
||||
entry:
|
||||
%cmp = icmp eq i32 %a, 0
|
||||
br i1 %cmp, label %callit, label %return
|
||||
|
||||
callit:
|
||||
call void @foo()
|
||||
unreachable
|
||||
|
||||
return:
|
||||
ret i32 1
|
||||
}
|
Loading…
Reference in New Issue