forked from OSchip/llvm-project
[TargetLowering] fix SETCC SETLT folding with FP types
The bug was introduced with: https://reviews.llvm.org/rL294863 ...and manifests as a selection failure in x86, but that's actually another bug. This fix prevents wrong codegen with -0.0, but in the more common case when we have NSZ and NNAN (-ffast-math), we should still be able to fold this setcc/compare. llvm-svn: 294924
This commit is contained in:
parent
31e1b8fe48
commit
0557a44287
|
@ -751,25 +751,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
||||||
KnownOne &= KnownOne2;
|
KnownOne &= KnownOne2;
|
||||||
KnownZero &= KnownZero2;
|
KnownZero &= KnownZero2;
|
||||||
break;
|
break;
|
||||||
case ISD::SETCC:
|
case ISD::SETCC: {
|
||||||
|
SDValue Op0 = Op.getOperand(0);
|
||||||
|
SDValue Op1 = Op.getOperand(1);
|
||||||
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
||||||
// If (1) we only need the sign-bit, (2) the setcc operands are the same
|
// If (1) we only need the sign-bit, (2) the setcc operands are the same
|
||||||
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
|
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
|
||||||
// -1, we may be able to bypass the setcc.
|
// -1, we may be able to bypass the setcc.
|
||||||
if (NewMask.isSignBit() &&
|
if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth &&
|
||||||
Op.getOperand(0).getScalarValueSizeInBits() == BitWidth &&
|
|
||||||
getBooleanContents(Op.getValueType()) ==
|
getBooleanContents(Op.getValueType()) ==
|
||||||
BooleanContent::ZeroOrNegativeOneBooleanContent) {
|
BooleanContent::ZeroOrNegativeOneBooleanContent) {
|
||||||
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
// If we're testing X < 0, then this compare isn't needed - just use X!
|
||||||
// If we're testing if X < 0, then this compare isn't needed - just use X!
|
// FIXME: We're limiting to integer types here, but this should also work
|
||||||
if (CC == ISD::SETLT &&
|
// if we don't care about FP signed-zero. The use of SETLT with FP means
|
||||||
(isNullConstant(Op.getOperand(1)) ||
|
// that we don't care about NaNs.
|
||||||
ISD::isBuildVectorAllZeros(Op.getOperand(1).getNode())))
|
if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
|
||||||
return TLO.CombineTo(Op, Op.getOperand(0));
|
(isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
|
||||||
|
return TLO.CombineTo(Op, Op0);
|
||||||
|
|
||||||
// TODO: Should we check for other forms of sign-bit comparisons?
|
// TODO: Should we check for other forms of sign-bit comparisons?
|
||||||
// Examples: X <= -1, X >= 0
|
// Examples: X <= -1, X >= 0
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case ISD::SHL:
|
case ISD::SHL:
|
||||||
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
||||||
unsigned ShAmt = SA->getZExtValue();
|
unsigned ShAmt = SA->getZExtValue();
|
||||||
|
|
|
@ -317,3 +317,27 @@ define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i6
|
||||||
ret <8 x double> %z
|
ret <8 x double> %z
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; If we have a floating-point compare:
|
||||||
|
; (1) Don't die.
|
||||||
|
; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded.
|
||||||
|
|
||||||
|
define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 {
|
||||||
|
; AVX12F-LABEL: signbit_sel_v4f32_fcmp:
|
||||||
|
; AVX12F: # BB#0:
|
||||||
|
; AVX12F-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||||
|
; AVX12F-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
|
||||||
|
; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX12F-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
|
||||||
|
; AVX512VL: # BB#0:
|
||||||
|
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
|
; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1
|
||||||
|
; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
||||||
|
; AVX512VL-NEXT: retq
|
||||||
|
%cmp = fcmp olt <4 x float> %x, zeroinitializer
|
||||||
|
%sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
|
||||||
|
ret <4 x float> %sel
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "no-nans-fp-math"="true" }
|
||||||
|
|
Loading…
Reference in New Issue