forked from OSchip/llvm-project
improve the setcc -> setcc_carry optimization to happen more
consistently by moving it out of lowering into dag combine. Add some missing patterns for matching away extended versions of setcc_c. llvm-svn: 122201
This commit is contained in:
parent
fff42e6241
commit
9edf3f50bf
|
@ -7053,17 +7053,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
|||
if (X86CC == X86::COND_INVALID)
|
||||
return SDValue();
|
||||
|
||||
SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
|
||||
|
||||
// Use sbb x, x to materialize carry bit into a GPR.
|
||||
if (X86CC == X86::COND_B)
|
||||
return DAG.getNode(ISD::AND, dl, MVT::i8,
|
||||
DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
|
||||
DAG.getConstant(X86CC, MVT::i8), Cond),
|
||||
DAG.getConstant(1, MVT::i8));
|
||||
|
||||
SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
|
||||
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
|
||||
DAG.getConstant(X86CC, MVT::i8), Cond);
|
||||
DAG.getConstant(X86CC, MVT::i8), EFLAGS);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
@ -11430,13 +11422,31 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
|
||||
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
unsigned X86CC = N->getConstantOperandVal(0);
|
||||
SDValue EFLAG = N->getOperand(1);
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
// Materialize "setb reg" as "sbb reg,reg", since it can be extended without
|
||||
// a zext and produces an all-ones bit which is more useful than 0/1 in some
|
||||
// cases.
|
||||
if (X86CC == X86::COND_B)
|
||||
return DAG.getNode(ISD::AND, DL, MVT::i8,
|
||||
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
|
||||
DAG.getConstant(X86CC, MVT::i8), EFLAG),
|
||||
DAG.getConstant(1, MVT::i8));
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
|
||||
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
|
||||
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
|
||||
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
|
||||
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
|
||||
|
@ -11452,6 +11462,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
|
||||
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
|
||||
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
|
||||
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
|
||||
case X86ISD::SHUFPS: // Handle all target specific shuffles
|
||||
case X86ISD::SHUFPD:
|
||||
case X86ISD::PALIGN:
|
||||
|
|
|
@ -92,7 +92,7 @@ namespace llvm {
|
|||
|
||||
// Same as SETCC except it's materialized with a sbb and the value is all
|
||||
// one's or all zero's.
|
||||
SETCC_CARRY,
|
||||
SETCC_CARRY, // R = carry_bit ? ~0 : 0
|
||||
|
||||
/// X86 conditional moves. Operand 0 and operand 1 are the two values
|
||||
/// to select from. Operand 2 is the condition code, and operand 3 is the
|
||||
|
|
|
@ -193,9 +193,20 @@ def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
|
|||
} // isCodeGenOnly
|
||||
|
||||
|
||||
def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
||||
(SETB_C16r)>;
|
||||
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
||||
(SETB_C32r)>;
|
||||
def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
||||
(SETB_C64r)>;
|
||||
|
||||
def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
||||
(SETB_C16r)>;
|
||||
def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
||||
(SETB_C32r)>;
|
||||
def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
||||
(SETB_C64r)>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// String Pseudo Instructions
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
; <rdar://problem/8449754>
|
||||
|
||||
define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
|
||||
define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
|
||||
entry:
|
||||
; CHECK: test1:
|
||||
; CHECK: sbbl %ecx, %ecx
|
||||
; CHECK-NOT: addl
|
||||
; CHECK: subl %ecx, %eax
|
||||
|
@ -12,3 +13,22 @@ entry:
|
|||
%z.0 = add i32 %add4, %inc
|
||||
ret i32 %z.0
|
||||
}
|
||||
|
||||
; Instcombine transforms test1 into test2:
|
||||
; CHECK: test2:
|
||||
; CHECK: movl
|
||||
; CHECK-NEXT: addl
|
||||
; CHECK-NEXT: sbbl
|
||||
; CHECK-NEXT: subl
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
|
||||
entry:
|
||||
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
|
||||
%0 = extractvalue { i32, i1 } %uadd, 0
|
||||
%cmp = extractvalue { i32, i1 } %uadd, 1
|
||||
%inc = zext i1 %cmp to i32
|
||||
%z.0 = add i32 %0, %inc
|
||||
ret i32 %z.0
|
||||
}
|
||||
|
||||
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
|
||||
|
|
|
@ -114,8 +114,8 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno
|
|||
|
||||
define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK: vcomisd
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbbl %eax, %eax
|
||||
; CHECK: andl $1, %eax
|
||||
%res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -825,8 +825,7 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn
|
|||
|
||||
define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK: vucomisd
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbbl
|
||||
%res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -1183,8 +1182,7 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
|
|||
|
||||
define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK: vptest
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbbl
|
||||
%res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -1455,8 +1453,7 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
|
|||
|
||||
define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK: vcomiss
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbb
|
||||
%res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -1697,8 +1694,7 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
|
|||
|
||||
define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK: vucomiss
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbbl
|
||||
%res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -2173,8 +2169,7 @@ declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
|
|||
|
||||
define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK: vptest
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbbl
|
||||
%res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -2451,8 +2446,7 @@ declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) noun
|
|||
|
||||
define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK: vtestpd
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbbl
|
||||
%res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -2461,8 +2455,7 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon
|
|||
|
||||
define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
|
||||
; CHECK: vtestpd
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbbl
|
||||
%res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -2471,8 +2464,7 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea
|
|||
|
||||
define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK: vtestps
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbbl
|
||||
%res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
@ -2481,8 +2473,7 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
|
|||
|
||||
define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
|
||||
; CHECK: vtestps
|
||||
; CHECK: setb
|
||||
; CHECK: movzbl
|
||||
; CHECK: sbbl
|
||||
%res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
|
|
|
@ -200,11 +200,11 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
|
|||
ret i32 %tmp1
|
||||
; X32: _ptestz_2:
|
||||
; X32: ptest %xmm1, %xmm0
|
||||
; X32: setb %al
|
||||
; X32: sbbl %eax
|
||||
|
||||
; X64: _ptestz_2:
|
||||
; X64: ptest %xmm1, %xmm0
|
||||
; X64: setb %al
|
||||
; X64: sbbl %eax
|
||||
}
|
||||
|
||||
define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
|
||||
|
|
Loading…
Reference in New Issue