[X86] Add a DAG combine for (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) and remove isel patterns.

Same for any_extend though we don't have coverage for that.

The test changes are because isel didn't check one use of the
setcc_carry. So in isel we would end up with two different
sized setcc_carry instructions. And since it clobbers
the flags we would need to recreate the flags for the second
instruction.

This code handles additional uses by truncating the new wide
setcc_carry back to the original size for those uses.
This commit is contained in:
Craig Topper 2020-02-03 23:35:16 -08:00
parent 491a4a7ac9
commit a3d489e87e
3 changed files with 53 additions and 45 deletions

View File

@ -43806,6 +43806,23 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
EVT InVT = N0.getValueType();
SDLoc DL(N);
// (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
if (!DCI.isBeforeLegalizeOps() &&
N0.getOpcode() == X86ISD::SETCC_CARRY) {
SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0),
N0->getOperand(1));
bool ReplaceOtherUses = !N0.hasOneUse();
DCI.CombineTo(N, Setcc);
// Replace other uses with a truncate of the widened setcc_carry.
if (ReplaceOtherUses) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), Setcc);
DCI.CombineTo(N0.getNode(), Trunc);
}
return SDValue(N, 0);
}
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
return NewCMov;
@ -43936,6 +43953,24 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// (i32 (aext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
// FIXME: Is this needed? We don't seem to have any tests for it.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND &&
N0.getOpcode() == X86ISD::SETCC_CARRY) {
SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0),
N0->getOperand(1));
bool ReplaceOtherUses = !N0.hasOneUse();
DCI.CombineTo(N, Setcc);
// Replace other uses with a truncate of the widened setcc_carry.
if (ReplaceOtherUses) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), Setcc);
DCI.CombineTo(N0.getNode(), Trunc);
}
return SDValue(N, 0);
}
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
return NewCMov;

View File

@ -323,20 +323,6 @@ def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
} // isCodeGenOnly
def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C16r)>;
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C64r)>;
def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C16r)>;
def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C64r)>;
// Patterns to give priority when both inputs are zero so that we don't use
// an immediate for the RHS.
// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
@ -1900,15 +1886,6 @@ defm : one_bit_patterns<GR16, i16, BTR16rr, BTS16rr, BTC16rr, shiftMask16>;
defm : one_bit_patterns<GR32, i32, BTR32rr, BTS32rr, BTC32rr, shiftMask32>;
defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>;
// (anyext (setcc_carry)) -> (setcc_carry)
def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C16r)>;
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
//===----------------------------------------------------------------------===//
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//

View File

@ -293,44 +293,40 @@ bb1:
define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind {
; X32-LABEL: PR37431:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%ecx), %ecx
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: sarl $31, %edx
; X32-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
; X32-NEXT: sbbl %edx, %eax
; X32-NEXT: setb %cl
; X32-NEXT: sbbb %dl, %dl
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: movb %dl, (%edi)
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: xorl %edi, %edi
; X32-NEXT: subl %ecx, %edi
; X32-NEXT: movl (%edi), %edi
; X32-NEXT: movl %edi, %ebx
; X32-NEXT: sarl $31, %ebx
; X32-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
; X32-NEXT: sbbl %ebx, %esi
; X32-NEXT: sbbl %ebx, %ebx
; X32-NEXT: movb %bl, (%edx)
; X32-NEXT: cltd
; X32-NEXT: idivl %edi
; X32-NEXT: movb %dl, (%esi)
; X32-NEXT: idivl %ebx
; X32-NEXT: movb %dl, (%ecx)
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: PR37431:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: movq %rdx, %r9
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movslq (%rdi), %rdx
; X64-NEXT: cmpq %rdx, %r8
; X64-NEXT: sbbb %cl, %cl
; X64-NEXT: cmpq %rdx, %r8
; X64-NEXT: movb %cl, (%rsi)
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: sbbl %edi, %edi
; X64-NEXT: movb %dil, (%rsi)
; X64-NEXT: cltd
; X64-NEXT: idivl %ecx
; X64-NEXT: movb %dl, (%r9)
; X64-NEXT: idivl %edi
; X64-NEXT: movb %dl, (%rcx)
; X64-NEXT: retq
entry:
%tmp = load i32, i32* %arg1