forked from OSchip/llvm-project
[X86] Prevent LowerSELECT from causing suboptimal codegen for __builtin_ffs(X) - 1.
LowerSELECT sees the CMP with 0 and wants to use a trick with SUB and SBB. But we can use the flags from the BSF/TZCNT. Fixes PR46203. Differential Revision: https://reviews.llvm.org/D81312
This commit is contained in:
parent
f3d8a93970
commit
2328cab16c
|
@ -22851,12 +22851,25 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||||
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
|
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
|
||||||
isNullConstant(Cond.getOperand(1).getOperand(1))) {
|
isNullConstant(Cond.getOperand(1).getOperand(1))) {
|
||||||
SDValue Cmp = Cond.getOperand(1);
|
SDValue Cmp = Cond.getOperand(1);
|
||||||
|
SDValue CmpOp0 = Cmp.getOperand(0);
|
||||||
unsigned CondCode = Cond.getConstantOperandVal(0);
|
unsigned CondCode = Cond.getConstantOperandVal(0);
|
||||||
|
|
||||||
if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
|
// Special handling for __builtin_ffs(X) - 1 pattern which looks like
|
||||||
|
// (select (seteq X, 0), -1, (cttz_zero_undef X)). Disable the special
|
||||||
|
// handle to keep the CMP with 0. This should be removed by
|
||||||
|
// optimizeCompareInst by using the flags from the BSR/TZCNT used for the
|
||||||
|
// cttz_zero_undef.
|
||||||
|
auto MatchFFSMinus1 = [&](SDValue Op1, SDValue Op2) {
|
||||||
|
return (Op1.getOpcode() == ISD::CTTZ_ZERO_UNDEF && Op1.hasOneUse() &&
|
||||||
|
Op1.getOperand(0) == CmpOp0 && isAllOnesConstant(Op2));
|
||||||
|
};
|
||||||
|
if (Subtarget.hasCMov() && (VT == MVT::i32 || VT == MVT::i64) &&
|
||||||
|
((CondCode == X86::COND_NE && MatchFFSMinus1(Op1, Op2)) ||
|
||||||
|
(CondCode == X86::COND_E && MatchFFSMinus1(Op2, Op1)))) {
|
||||||
|
// Keep Cmp.
|
||||||
|
} else if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
|
||||||
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
|
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
|
||||||
SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
|
SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
|
||||||
SDValue CmpOp0 = Cmp.getOperand(0);
|
|
||||||
|
|
||||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
|
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
|
||||||
SDVTList CmpVTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
|
SDVTList CmpVTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
|
||||||
|
@ -22886,7 +22899,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||||
} else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
|
} else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
|
||||||
Cmp.getOperand(0).getOpcode() == ISD::AND &&
|
Cmp.getOperand(0).getOpcode() == ISD::AND &&
|
||||||
isOneConstant(Cmp.getOperand(0).getOperand(1))) {
|
isOneConstant(Cmp.getOperand(0).getOperand(1))) {
|
||||||
SDValue CmpOp0 = Cmp.getOperand(0);
|
|
||||||
SDValue Src1, Src2;
|
SDValue Src1, Src2;
|
||||||
// true if Op2 is XOR or OR operator and one of its operands
|
// true if Op2 is XOR or OR operator and one of its operands
|
||||||
// is equal to Op1
|
// is equal to Op1
|
||||||
|
|
|
@ -436,19 +436,15 @@ define i32 @cttz_32_eq_select_ffs_m1(i32 %v) nounwind {
|
||||||
; NOBMI-LABEL: cttz_32_eq_select_ffs_m1:
|
; NOBMI-LABEL: cttz_32_eq_select_ffs_m1:
|
||||||
; NOBMI: # %bb.0:
|
; NOBMI: # %bb.0:
|
||||||
; NOBMI-NEXT: bsfl %edi, %ecx
|
; NOBMI-NEXT: bsfl %edi, %ecx
|
||||||
; NOBMI-NEXT: xorl %eax, %eax
|
; NOBMI-NEXT: movl $-1, %eax
|
||||||
; NOBMI-NEXT: cmpl $1, %edi
|
; NOBMI-NEXT: cmovnel %ecx, %eax
|
||||||
; NOBMI-NEXT: sbbl %eax, %eax
|
|
||||||
; NOBMI-NEXT: orl %ecx, %eax
|
|
||||||
; NOBMI-NEXT: retq
|
; NOBMI-NEXT: retq
|
||||||
;
|
;
|
||||||
; BMI-LABEL: cttz_32_eq_select_ffs_m1:
|
; BMI-LABEL: cttz_32_eq_select_ffs_m1:
|
||||||
; BMI: # %bb.0:
|
; BMI: # %bb.0:
|
||||||
; BMI-NEXT: tzcntl %edi, %ecx
|
; BMI-NEXT: tzcntl %edi, %ecx
|
||||||
; BMI-NEXT: xorl %eax, %eax
|
; BMI-NEXT: movl $-1, %eax
|
||||||
; BMI-NEXT: cmpl $1, %edi
|
; BMI-NEXT: cmovael %ecx, %eax
|
||||||
; BMI-NEXT: sbbl %eax, %eax
|
|
||||||
; BMI-NEXT: orl %ecx, %eax
|
|
||||||
; BMI-NEXT: retq
|
; BMI-NEXT: retq
|
||||||
|
|
||||||
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
|
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
|
||||||
|
@ -461,19 +457,15 @@ define i32 @cttz_32_ne_select_ffs_m1(i32 %v) nounwind {
|
||||||
; NOBMI-LABEL: cttz_32_ne_select_ffs_m1:
|
; NOBMI-LABEL: cttz_32_ne_select_ffs_m1:
|
||||||
; NOBMI: # %bb.0:
|
; NOBMI: # %bb.0:
|
||||||
; NOBMI-NEXT: bsfl %edi, %ecx
|
; NOBMI-NEXT: bsfl %edi, %ecx
|
||||||
; NOBMI-NEXT: xorl %eax, %eax
|
; NOBMI-NEXT: movl $-1, %eax
|
||||||
; NOBMI-NEXT: cmpl $1, %edi
|
; NOBMI-NEXT: cmovnel %ecx, %eax
|
||||||
; NOBMI-NEXT: sbbl %eax, %eax
|
|
||||||
; NOBMI-NEXT: orl %ecx, %eax
|
|
||||||
; NOBMI-NEXT: retq
|
; NOBMI-NEXT: retq
|
||||||
;
|
;
|
||||||
; BMI-LABEL: cttz_32_ne_select_ffs_m1:
|
; BMI-LABEL: cttz_32_ne_select_ffs_m1:
|
||||||
; BMI: # %bb.0:
|
; BMI: # %bb.0:
|
||||||
; BMI-NEXT: tzcntl %edi, %ecx
|
; BMI-NEXT: tzcntl %edi, %ecx
|
||||||
; BMI-NEXT: xorl %eax, %eax
|
; BMI-NEXT: movl $-1, %eax
|
||||||
; BMI-NEXT: cmpl $1, %edi
|
; BMI-NEXT: cmovael %ecx, %eax
|
||||||
; BMI-NEXT: sbbl %eax, %eax
|
|
||||||
; BMI-NEXT: orl %ecx, %eax
|
|
||||||
; BMI-NEXT: retq
|
; BMI-NEXT: retq
|
||||||
|
|
||||||
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
|
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
|
||||||
|
|
Loading…
Reference in New Issue