forked from OSchip/llvm-project
X86: enable CSE between CMP and SUB
We perform the following: 1> Use SUB instead of CMP for i8,i16,i32 and i64 in ISel lowering. 2> Modify MachineCSE to correctly handle implicit defs. 3> Convert SUB back to CMP if possible at peephole. Removed pattern matching of (a>b) ? (a-b):0 and like, since they are handled by peephole now. rdar://11873276 llvm-svn: 161462
This commit is contained in:
parent
3b9a442841
commit
1be131ba27
|
@ -417,6 +417,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
|
|||
bool Changed = false;
|
||||
|
||||
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
|
||||
SmallVector<unsigned, 2> ImplicitDefsToUpdate;
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
|
||||
MachineInstr *MI = &*I;
|
||||
++I;
|
||||
|
@ -486,15 +487,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
|
|||
|
||||
// Check if it's profitable to perform this CSE.
|
||||
bool DoCSE = true;
|
||||
unsigned NumDefs = MI->getDesc().getNumDefs();
|
||||
unsigned NumDefs = MI->getDesc().getNumDefs() +
|
||||
MI->getDesc().getNumImplicitDefs();
|
||||
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
|
||||
MachineOperand &MO = MI->getOperand(i);
|
||||
if (!MO.isReg() || !MO.isDef())
|
||||
continue;
|
||||
unsigned OldReg = MO.getReg();
|
||||
unsigned NewReg = CSMI->getOperand(i).getReg();
|
||||
if (OldReg == NewReg)
|
||||
|
||||
// Go through implicit defs of CSMI and MI, if a def is not dead at MI,
|
||||
// we should make sure it is not dead at CSMI.
|
||||
if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
|
||||
ImplicitDefsToUpdate.push_back(i);
|
||||
if (OldReg == NewReg) {
|
||||
--NumDefs;
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
|
||||
TargetRegisterInfo::isVirtualRegister(NewReg) &&
|
||||
|
@ -526,6 +536,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
|
|||
MRI->clearKillFlags(CSEPairs[i].second);
|
||||
}
|
||||
|
||||
// Go through implicit defs of CSMI and MI, if a def is not dead at MI,
|
||||
// we should make sure it is not dead at CSMI.
|
||||
for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
|
||||
CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
|
||||
|
||||
if (CrossMBBPhysDef) {
|
||||
// Add physical register defs now coming in from a predecessor to MBB
|
||||
// livein list.
|
||||
|
@ -549,6 +564,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
|
|||
Exps.push_back(MI);
|
||||
}
|
||||
CSEPairs.clear();
|
||||
ImplicitDefsToUpdate.clear();
|
||||
}
|
||||
|
||||
return Changed;
|
||||
|
|
|
@ -2438,7 +2438,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
case X86ISD::CMP: {
|
||||
case X86ISD::CMP:
|
||||
case X86ISD::SUB: {
|
||||
// Sometimes a SUB is used to perform comparison.
|
||||
if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
|
||||
// This node is not a CMP.
|
||||
break;
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
SDValue N1 = Node->getOperand(1);
|
||||
|
||||
|
|
|
@ -8333,11 +8333,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
|
|||
switch (Op.getNode()->getOpcode()) {
|
||||
default: llvm_unreachable("unexpected operator!");
|
||||
case ISD::SUB:
|
||||
// If the only use of SUB is EFLAGS, use CMP instead.
|
||||
if (Op.hasOneUse())
|
||||
Opcode = X86ISD::CMP;
|
||||
else
|
||||
Opcode = X86ISD::SUB;
|
||||
Opcode = X86ISD::SUB;
|
||||
break;
|
||||
case ISD::OR: Opcode = X86ISD::OR; break;
|
||||
case ISD::XOR: Opcode = X86ISD::XOR; break;
|
||||
|
@ -8391,6 +8387,14 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
|
|||
return EmitTest(Op0, X86CC, DAG);
|
||||
|
||||
DebugLoc dl = Op0.getDebugLoc();
|
||||
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
|
||||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
|
||||
// Use SUB instead of CMP to enable CSE between SUB and CMP.
|
||||
SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
|
||||
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
|
||||
Op0, Op1);
|
||||
return SDValue(Sub.getNode(), 1);
|
||||
}
|
||||
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
|
||||
}
|
||||
|
||||
|
@ -8765,46 +8769,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|||
Cond = NewCond;
|
||||
}
|
||||
|
||||
// Handle the following cases related to max and min:
|
||||
// (a > b) ? (a-b) : 0
|
||||
// (a >= b) ? (a-b) : 0
|
||||
// (b < a) ? (a-b) : 0
|
||||
// (b <= a) ? (a-b) : 0
|
||||
// Comparison is removed to use EFLAGS from SUB.
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2))
|
||||
if (Cond.getOpcode() == X86ISD::SETCC &&
|
||||
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
|
||||
(Op1.getOpcode() == ISD::SUB || Op1.getOpcode() == X86ISD::SUB) &&
|
||||
C->getAPIntValue() == 0) {
|
||||
SDValue Cmp = Cond.getOperand(1);
|
||||
unsigned CC = cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
|
||||
if ((DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(0)) &&
|
||||
DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(1)) &&
|
||||
(CC == X86::COND_G || CC == X86::COND_GE ||
|
||||
CC == X86::COND_A || CC == X86::COND_AE)) ||
|
||||
(DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(1)) &&
|
||||
DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(0)) &&
|
||||
(CC == X86::COND_L || CC == X86::COND_LE ||
|
||||
CC == X86::COND_B || CC == X86::COND_BE))) {
|
||||
|
||||
if (Op1.getOpcode() == ISD::SUB) {
|
||||
SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i32);
|
||||
SDValue New = DAG.getNode(X86ISD::SUB, DL, VTs,
|
||||
Op1.getOperand(0), Op1.getOperand(1));
|
||||
DAG.ReplaceAllUsesWith(Op1, New);
|
||||
Op1 = New;
|
||||
}
|
||||
|
||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
|
||||
unsigned NewCC = (CC == X86::COND_G || CC == X86::COND_GE ||
|
||||
CC == X86::COND_L ||
|
||||
CC == X86::COND_LE) ? X86::COND_GE : X86::COND_AE;
|
||||
SDValue Ops[] = { Op2, Op1, DAG.getConstant(NewCC, MVT::i8),
|
||||
SDValue(Op1.getNode(), 1) };
|
||||
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
|
||||
}
|
||||
}
|
||||
|
||||
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
|
||||
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
|
||||
// (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
|
||||
|
@ -8945,7 +8909,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|||
// a < b ? 0 : -1 -> RES = setcc_carry
|
||||
// a >= b ? -1 : 0 -> RES = setcc_carry
|
||||
// a >= b ? 0 : -1 -> RES = ~setcc_carry
|
||||
if (Cond.getOpcode() == X86ISD::CMP) {
|
||||
if (Cond.getOpcode() == X86ISD::SUB) {
|
||||
Cond = ConvertCmpIfNecessary(Cond, DAG);
|
||||
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
|
||||
|
||||
|
|
|
@ -1132,8 +1132,10 @@ defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
|
|||
X86xor_flag, xor, 1, 0>;
|
||||
defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
|
||||
X86add_flag, add, 1, 1>;
|
||||
let isCompare = 1 in {
|
||||
defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
|
||||
X86sub_flag, sub, 0, 0>;
|
||||
}
|
||||
|
||||
// Arithmetic.
|
||||
let Uses = [EFLAGS] in {
|
||||
|
|
|
@ -3031,6 +3031,37 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
|
|||
CmpMask = ~0;
|
||||
CmpValue = MI->getOperand(1).getImm();
|
||||
return true;
|
||||
// A SUB can be used to perform comparison.
|
||||
case X86::SUB64rm:
|
||||
case X86::SUB32rm:
|
||||
case X86::SUB16rm:
|
||||
case X86::SUB8rm:
|
||||
SrcReg = MI->getOperand(1).getReg();
|
||||
SrcReg2 = 0;
|
||||
CmpMask = ~0;
|
||||
CmpValue = 0;
|
||||
return true;
|
||||
case X86::SUB64rr:
|
||||
case X86::SUB32rr:
|
||||
case X86::SUB16rr:
|
||||
case X86::SUB8rr:
|
||||
SrcReg = MI->getOperand(1).getReg();
|
||||
SrcReg2 = MI->getOperand(2).getReg();
|
||||
CmpMask = ~0;
|
||||
CmpValue = 0;
|
||||
return true;
|
||||
case X86::SUB64ri32:
|
||||
case X86::SUB64ri8:
|
||||
case X86::SUB32ri:
|
||||
case X86::SUB32ri8:
|
||||
case X86::SUB16ri:
|
||||
case X86::SUB16ri8:
|
||||
case X86::SUB8ri:
|
||||
SrcReg = MI->getOperand(1).getReg();
|
||||
SrcReg2 = 0;
|
||||
CmpMask = ~0;
|
||||
CmpValue = MI->getOperand(2).getImm();
|
||||
return true;
|
||||
case X86::CMP64rr:
|
||||
case X86::CMP32rr:
|
||||
case X86::CMP16rr:
|
||||
|
@ -3139,6 +3170,55 @@ bool X86InstrInfo::
|
|||
optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
|
||||
int CmpMask, int CmpValue,
|
||||
const MachineRegisterInfo *MRI) const {
|
||||
// Check whether we can replace SUB with CMP.
|
||||
unsigned NewOpcode = 0;
|
||||
switch (CmpInstr->getOpcode()) {
|
||||
default: break;
|
||||
case X86::SUB64ri32:
|
||||
case X86::SUB64ri8:
|
||||
case X86::SUB32ri:
|
||||
case X86::SUB32ri8:
|
||||
case X86::SUB16ri:
|
||||
case X86::SUB16ri8:
|
||||
case X86::SUB8ri:
|
||||
case X86::SUB64rm:
|
||||
case X86::SUB32rm:
|
||||
case X86::SUB16rm:
|
||||
case X86::SUB8rm:
|
||||
case X86::SUB64rr:
|
||||
case X86::SUB32rr:
|
||||
case X86::SUB16rr:
|
||||
case X86::SUB8rr: {
|
||||
if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
|
||||
return false;
|
||||
// There is no use of the destination register, we can replace SUB with CMP.
|
||||
switch (CmpInstr->getOpcode()) {
|
||||
default: llvm_unreachable(0);
|
||||
case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
|
||||
case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
|
||||
case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
|
||||
case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
|
||||
case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
|
||||
case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
|
||||
case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
|
||||
case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
|
||||
case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
|
||||
case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
|
||||
case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
|
||||
case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
|
||||
case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
|
||||
case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
|
||||
case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
|
||||
}
|
||||
CmpInstr->setDesc(get(NewOpcode));
|
||||
CmpInstr->RemoveOperand(0);
|
||||
// Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
|
||||
if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
|
||||
NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the unique definition of SrcReg.
|
||||
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
|
||||
if (!MI) return false;
|
||||
|
|
|
@ -230,3 +230,26 @@ define i32 @q(i32 %j.4, i32 %w, i32 %el) {
|
|||
%j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
|
||||
ret i32 %j.5
|
||||
}
|
||||
; rdar://11873276
|
||||
define i8* @r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
|
||||
entry:
|
||||
; CHECK: r:
|
||||
; CHECK: sub
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: j
|
||||
; CHECK-NOT: sub
|
||||
; CHECK: ret
|
||||
%0 = load i32* %offset, align 8
|
||||
%cmp = icmp slt i32 %0, %size
|
||||
br i1 %cmp, label %return, label %if.end
|
||||
|
||||
if.end:
|
||||
%sub = sub nsw i32 %0, %size
|
||||
store i32 %sub, i32* %offset, align 8
|
||||
%add.ptr = getelementptr inbounds i8* %base, i32 %sub
|
||||
br label %return
|
||||
|
||||
return:
|
||||
%retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
|
||||
ret i8* %retval.0
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue