add X86-specific DAG optimization to simplify boolean test

- if a boolean test (X86ISD::CMP or X86ISD:SUB) checks a boolean value
  generated from X86ISD::SETCC, try to simplify the boolean value
  generation and checking by reusing the original EFLAGS with proper
  condition code
- add hooks to X86 specific SETCC/BRCOND/CMOV, the major 3 places
  consuming EFLAGS

part of patches fixing PR12312

llvm-svn: 161687
This commit is contained in:
Michael Liao 2012-08-10 19:58:13 +00:00
parent a7020aee00
commit 5248e9913f
2 changed files with 155 additions and 4 deletions

View File

@ -13782,6 +13782,72 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
// Check whether a boolean test is testing a boolean value generated by
// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
// code.
//
// Simplify the following patterns:
// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
// to (Op EFLAGS Cond)
//
// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
// to (Op EFLAGS !Cond)
//
// where Op could be BRCOND or CMOV.
//
static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
// Quit if not CMP and SUB with its value result used.
if (Cmp.getOpcode() != X86ISD::CMP &&
(Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
return SDValue();
// Quit if not used as a boolean value.
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
// Check CMP operands. One of them should be 0 or 1 and the other should be
// an SetCC or extended from it.
SDValue Op1 = Cmp.getOperand(0);
SDValue Op2 = Cmp.getOperand(1);
SDValue SetCC;
const ConstantSDNode* C = 0;
bool needOppositeCond = (CC == X86::COND_E);
if ((C = dyn_cast<ConstantSDNode>(Op1)))
SetCC = Op2;
else if ((C = dyn_cast<ConstantSDNode>(Op2)))
SetCC = Op1;
else // Quit if all operands are not constants.
return SDValue();
if (C->getZExtValue() == 1)
needOppositeCond = !needOppositeCond;
else if (C->getZExtValue() != 0)
// Quit if the constant is neither 0 or 1.
return SDValue();
// Skip 'zext' node.
if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
SetCC = SetCC.getOperand(0);
// Quit if not SETCC.
// FIXME: So far we only handle the boolean value generated from SETCC. If
// there is other ways to generate boolean values, we need handle them here
// as well.
if (SetCC.getOpcode() != X86ISD::SETCC)
return SDValue();
// Set the condition code or opposite one if necessary.
CC = X86::CondCode(SetCC.getConstantOperandVal(0));
if (needOppositeCond)
CC = X86::GetOppositeBranchCondition(CC);
return SetCC.getOperand(1);
}
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
@ -13795,6 +13861,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
SDValue TrueOp = N->getOperand(1);
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
if (CC == X86::COND_E || CC == X86::COND_NE) {
switch (Cond.getOpcode()) {
default: break;
@ -13806,6 +13873,16 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
}
}
SDValue Flags;
Flags = BoolTestSetCCCombine(Cond, CC);
if (Flags.getNode()) {
SDValue Ops[] = { FalseOp, TrueOp,
DAG.getConstant(CC, MVT::i8), Flags };
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
Ops, array_lengthof(Ops));
}
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
@ -15285,19 +15362,50 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
unsigned X86CC = N->getConstantOperandVal(0);
SDValue EFLAG = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
SDValue EFLAGS = N->getOperand(1);
// Materialize "setb reg" as "sbb reg,reg", since it can be extended without
// a zext and produces an all-ones bit which is more useful than 0/1 in some
// cases.
if (X86CC == X86::COND_B)
if (CC == X86::COND_B)
return DAG.getNode(ISD::AND, DL, MVT::i8,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), EFLAG),
DAG.getConstant(CC, MVT::i8), EFLAGS),
DAG.getConstant(1, MVT::i8));
SDValue Flags;
Flags = BoolTestSetCCCombine(EFLAGS, CC);
if (Flags.getNode()) {
SDValue Cond = DAG.getConstant(CC, MVT::i8);
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
}
return SDValue();
}
// Optimize branch condition evaluation.
//
static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
SDValue EFLAGS = N->getOperand(3);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
SDValue Flags;
Flags = BoolTestSetCCCombine(EFLAGS, CC);
if (Flags.getNode()) {
SDValue Cond = DAG.getConstant(CC, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
Flags);
}
return SDValue();
}
@ -15515,6 +15623,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGN:
case X86ISD::UNPCKH:

View File

@ -0,0 +1,42 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx | FileCheck %s
define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
%t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
%t2 = icmp ne i32 %t1, 0
%t3 = select i1 %t2, i32 %a, i32 %b
ret i32 %t3
; CHECK: foo
; CHECK: ptest
; CHECK-NOT: testl
; CHECK: cmov
; CHECK: ret
}
define i32 @bar(<2 x i64> %c) {
entry:
%0 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
%1 = icmp ne i32 %0, 0
br i1 %1, label %if-true-block, label %endif-block
if-true-block: ; preds = %entry
ret i32 0
endif-block: ; preds = %entry,
ret i32 1
; CHECK: bar
; CHECK: ptest
; CHECK-NOT: testl
; CHECK: jne
; CHECK: ret
}
define i32 @bax(<2 x i64> %c) {
%t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
%t2 = icmp eq i32 %t1, 1
%t3 = zext i1 %t2 to i32
ret i32 %t3
; CHECK: bax
; CHECK: ptest
; CHECK-NOT: cmpl
; CHECK: ret
}
declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone