forked from OSchip/llvm-project
[X86] Add a DAG combine for (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) and remove isel patterns.
Same for any_extend though we don't have coverage for that. The test changes are because isel didn't check one use of the setcc_carry. So in isel we would end up with two different sized setcc_carry instructions. And since it clobbers the flags we would need to recreate the flags for the second instruction. This code handles additional uses by truncating the new wide setcc_carry back to the original size for those uses.
This commit is contained in:
parent
491a4a7ac9
commit
a3d489e87e
|
@ -43806,6 +43806,23 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
|
||||||
EVT InVT = N0.getValueType();
|
EVT InVT = N0.getValueType();
|
||||||
SDLoc DL(N);
|
SDLoc DL(N);
|
||||||
|
|
||||||
|
// (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
|
||||||
|
if (!DCI.isBeforeLegalizeOps() &&
|
||||||
|
N0.getOpcode() == X86ISD::SETCC_CARRY) {
|
||||||
|
SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0),
|
||||||
|
N0->getOperand(1));
|
||||||
|
bool ReplaceOtherUses = !N0.hasOneUse();
|
||||||
|
DCI.CombineTo(N, Setcc);
|
||||||
|
// Replace other uses with a truncate of the widened setcc_carry.
|
||||||
|
if (ReplaceOtherUses) {
|
||||||
|
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
|
||||||
|
N0.getValueType(), Setcc);
|
||||||
|
DCI.CombineTo(N0.getNode(), Trunc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return SDValue(N, 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
|
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
|
||||||
return NewCMov;
|
return NewCMov;
|
||||||
|
|
||||||
|
@ -43936,6 +43953,24 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
|
||||||
SDValue N0 = N->getOperand(0);
|
SDValue N0 = N->getOperand(0);
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
|
|
||||||
|
// (i32 (aext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
|
||||||
|
// FIXME: Is this needed? We don't seem to have any tests for it.
|
||||||
|
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND &&
|
||||||
|
N0.getOpcode() == X86ISD::SETCC_CARRY) {
|
||||||
|
SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0),
|
||||||
|
N0->getOperand(1));
|
||||||
|
bool ReplaceOtherUses = !N0.hasOneUse();
|
||||||
|
DCI.CombineTo(N, Setcc);
|
||||||
|
// Replace other uses with a truncate of the widened setcc_carry.
|
||||||
|
if (ReplaceOtherUses) {
|
||||||
|
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
|
||||||
|
N0.getValueType(), Setcc);
|
||||||
|
DCI.CombineTo(N0.getNode(), Trunc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return SDValue(N, 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
|
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
|
||||||
return NewCMov;
|
return NewCMov;
|
||||||
|
|
||||||
|
|
|
@ -323,20 +323,6 @@ def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
|
||||||
} // isCodeGenOnly
|
} // isCodeGenOnly
|
||||||
|
|
||||||
|
|
||||||
def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|
||||||
(SETB_C16r)>;
|
|
||||||
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|
||||||
(SETB_C32r)>;
|
|
||||||
def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|
||||||
(SETB_C64r)>;
|
|
||||||
|
|
||||||
def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|
||||||
(SETB_C16r)>;
|
|
||||||
def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|
||||||
(SETB_C32r)>;
|
|
||||||
def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|
||||||
(SETB_C64r)>;
|
|
||||||
|
|
||||||
// Patterns to give priority when both inputs are zero so that we don't use
|
// Patterns to give priority when both inputs are zero so that we don't use
|
||||||
// an immediate for the RHS.
|
// an immediate for the RHS.
|
||||||
// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
|
// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
|
||||||
|
@ -1900,15 +1886,6 @@ defm : one_bit_patterns<GR16, i16, BTR16rr, BTS16rr, BTC16rr, shiftMask16>;
|
||||||
defm : one_bit_patterns<GR32, i32, BTR32rr, BTS32rr, BTC32rr, shiftMask32>;
|
defm : one_bit_patterns<GR32, i32, BTR32rr, BTS32rr, BTC32rr, shiftMask32>;
|
||||||
defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>;
|
defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>;
|
||||||
|
|
||||||
|
|
||||||
// (anyext (setcc_carry)) -> (setcc_carry)
|
|
||||||
def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|
||||||
(SETB_C16r)>;
|
|
||||||
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|
||||||
(SETB_C32r)>;
|
|
||||||
def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|
||||||
(SETB_C32r)>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// EFLAGS-defining Patterns
|
// EFLAGS-defining Patterns
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -293,44 +293,40 @@ bb1:
|
||||||
define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind {
|
define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind {
|
||||||
; X32-LABEL: PR37431:
|
; X32-LABEL: PR37431:
|
||||||
; X32: # %bb.0: # %entry
|
; X32: # %bb.0: # %entry
|
||||||
|
; X32-NEXT: pushl %ebx
|
||||||
; X32-NEXT: pushl %edi
|
; X32-NEXT: pushl %edi
|
||||||
; X32-NEXT: pushl %esi
|
; X32-NEXT: pushl %esi
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X32-NEXT: movl (%ecx), %ecx
|
|
||||||
; X32-NEXT: movl %ecx, %edx
|
|
||||||
; X32-NEXT: sarl $31, %edx
|
|
||||||
; X32-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
|
|
||||||
; X32-NEXT: sbbl %edx, %eax
|
|
||||||
; X32-NEXT: setb %cl
|
|
||||||
; X32-NEXT: sbbb %dl, %dl
|
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||||
; X32-NEXT: movb %dl, (%edi)
|
; X32-NEXT: movl (%edi), %edi
|
||||||
; X32-NEXT: movzbl %cl, %ecx
|
; X32-NEXT: movl %edi, %ebx
|
||||||
; X32-NEXT: xorl %edi, %edi
|
; X32-NEXT: sarl $31, %ebx
|
||||||
; X32-NEXT: subl %ecx, %edi
|
; X32-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
|
||||||
|
; X32-NEXT: sbbl %ebx, %esi
|
||||||
|
; X32-NEXT: sbbl %ebx, %ebx
|
||||||
|
; X32-NEXT: movb %bl, (%edx)
|
||||||
; X32-NEXT: cltd
|
; X32-NEXT: cltd
|
||||||
; X32-NEXT: idivl %edi
|
; X32-NEXT: idivl %ebx
|
||||||
; X32-NEXT: movb %dl, (%esi)
|
; X32-NEXT: movb %dl, (%ecx)
|
||||||
; X32-NEXT: popl %esi
|
; X32-NEXT: popl %esi
|
||||||
; X32-NEXT: popl %edi
|
; X32-NEXT: popl %edi
|
||||||
|
; X32-NEXT: popl %ebx
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: PR37431:
|
; X64-LABEL: PR37431:
|
||||||
; X64: # %bb.0: # %entry
|
; X64: # %bb.0: # %entry
|
||||||
; X64-NEXT: movl %ecx, %eax
|
; X64-NEXT: movl %ecx, %eax
|
||||||
; X64-NEXT: movq %rdx, %r9
|
; X64-NEXT: movq %rdx, %rcx
|
||||||
; X64-NEXT: movslq (%rdi), %rdx
|
; X64-NEXT: movslq (%rdi), %rdx
|
||||||
; X64-NEXT: cmpq %rdx, %r8
|
; X64-NEXT: cmpq %rdx, %r8
|
||||||
; X64-NEXT: sbbb %cl, %cl
|
; X64-NEXT: sbbl %edi, %edi
|
||||||
; X64-NEXT: cmpq %rdx, %r8
|
; X64-NEXT: movb %dil, (%rsi)
|
||||||
; X64-NEXT: movb %cl, (%rsi)
|
|
||||||
; X64-NEXT: sbbl %ecx, %ecx
|
|
||||||
; X64-NEXT: cltd
|
; X64-NEXT: cltd
|
||||||
; X64-NEXT: idivl %ecx
|
; X64-NEXT: idivl %edi
|
||||||
; X64-NEXT: movb %dl, (%r9)
|
; X64-NEXT: movb %dl, (%rcx)
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%tmp = load i32, i32* %arg1
|
%tmp = load i32, i32* %arg1
|
||||||
|
|
Loading…
Reference in New Issue