forked from OSchip/llvm-project
Re-apply r237247 - [AArch64] Codegen VMAX/VMIN for safe math cases
No longer breaks SPEC2000/2006 llvm-svn: 237361
This commit is contained in:
parent
708fa8e899
commit
a70dfe18d3
|
@ -491,6 +491,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||||
|
|
||||||
setTargetDAGCombine(ISD::SELECT);
|
setTargetDAGCombine(ISD::SELECT);
|
||||||
setTargetDAGCombine(ISD::VSELECT);
|
setTargetDAGCombine(ISD::VSELECT);
|
||||||
|
setTargetDAGCombine(ISD::SELECT_CC);
|
||||||
|
|
||||||
setTargetDAGCombine(ISD::INTRINSIC_VOID);
|
setTargetDAGCombine(ISD::INTRINSIC_VOID);
|
||||||
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
|
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
|
||||||
|
@ -3567,7 +3568,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||||
/// operations would *not* be semantically equivalent.
|
/// operations would *not* be semantically equivalent.
|
||||||
static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
|
static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
|
||||||
if (Cmp == Result)
|
if (Cmp == Result)
|
||||||
return true;
|
return (Cmp.getValueType() == MVT::f32 ||
|
||||||
|
Cmp.getValueType() == MVT::f64);
|
||||||
|
|
||||||
ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
|
ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
|
||||||
ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
|
ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
|
||||||
|
@ -3701,46 +3703,6 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
|
||||||
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
|
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
|
||||||
assert(LHS.getValueType() == RHS.getValueType());
|
assert(LHS.getValueType() == RHS.getValueType());
|
||||||
EVT VT = TVal.getValueType();
|
EVT VT = TVal.getValueType();
|
||||||
|
|
||||||
// Try to match this select into a max/min operation, which have dedicated
|
|
||||||
// opcode in the instruction set.
|
|
||||||
// FIXME: This is not correct in the presence of NaNs, so we only enable this
|
|
||||||
// in no-NaNs mode.
|
|
||||||
if (getTargetMachine().Options.NoNaNsFPMath) {
|
|
||||||
SDValue MinMaxLHS = TVal, MinMaxRHS = FVal;
|
|
||||||
if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) &&
|
|
||||||
selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) {
|
|
||||||
CC = ISD::getSetCCSwappedOperands(CC);
|
|
||||||
std::swap(MinMaxLHS, MinMaxRHS);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) &&
|
|
||||||
selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) {
|
|
||||||
switch (CC) {
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
case ISD::SETGT:
|
|
||||||
case ISD::SETGE:
|
|
||||||
case ISD::SETUGT:
|
|
||||||
case ISD::SETUGE:
|
|
||||||
case ISD::SETOGT:
|
|
||||||
case ISD::SETOGE:
|
|
||||||
return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS);
|
|
||||||
break;
|
|
||||||
case ISD::SETLT:
|
|
||||||
case ISD::SETLE:
|
|
||||||
case ISD::SETULT:
|
|
||||||
case ISD::SETULE:
|
|
||||||
case ISD::SETOLT:
|
|
||||||
case ISD::SETOLE:
|
|
||||||
return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
|
|
||||||
// and do the comparison.
|
|
||||||
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
|
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
|
||||||
|
|
||||||
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
|
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
|
||||||
|
@ -8735,6 +8697,75 @@ static SDValue performSelectCombine(SDNode *N,
|
||||||
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
|
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
|
||||||
|
/// to match FMIN/FMAX patterns.
|
||||||
|
static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
|
||||||
|
// Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
|
||||||
|
// Unless the NoNaNsFPMath option is set, be careful about NaNs:
|
||||||
|
// vmax/vmin return NaN if either operand is a NaN;
|
||||||
|
// only do the transformation when it matches that behavior.
|
||||||
|
|
||||||
|
SDValue CondLHS = N->getOperand(0);
|
||||||
|
SDValue CondRHS = N->getOperand(1);
|
||||||
|
SDValue LHS = N->getOperand(2);
|
||||||
|
SDValue RHS = N->getOperand(3);
|
||||||
|
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
|
||||||
|
|
||||||
|
unsigned Opcode;
|
||||||
|
bool IsReversed;
|
||||||
|
if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
|
||||||
|
selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
|
||||||
|
IsReversed = false; // x CC y ? x : y
|
||||||
|
} else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
|
||||||
|
selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
|
||||||
|
IsReversed = true ; // x CC y ? y : x
|
||||||
|
} else {
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsUnordered = false, IsOrEqual;
|
||||||
|
switch (CC) {
|
||||||
|
default:
|
||||||
|
return SDValue();
|
||||||
|
case ISD::SETULT:
|
||||||
|
case ISD::SETULE:
|
||||||
|
IsUnordered = true;
|
||||||
|
case ISD::SETOLT:
|
||||||
|
case ISD::SETOLE:
|
||||||
|
case ISD::SETLT:
|
||||||
|
case ISD::SETLE:
|
||||||
|
IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
|
||||||
|
Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ISD::SETUGT:
|
||||||
|
case ISD::SETUGE:
|
||||||
|
IsUnordered = true;
|
||||||
|
case ISD::SETOGT:
|
||||||
|
case ISD::SETOGE:
|
||||||
|
case ISD::SETGT:
|
||||||
|
case ISD::SETGE:
|
||||||
|
IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
|
||||||
|
Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If LHS is NaN, an ordered comparison will be false and the result will be
|
||||||
|
// the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
|
||||||
|
// that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
|
||||||
|
if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
|
||||||
|
// but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
|
||||||
|
// used for unsafe math or if one of the operands is known to be nonzero.
|
||||||
|
if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
|
||||||
|
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
DAGCombinerInfo &DCI) const {
|
DAGCombinerInfo &DCI) const {
|
||||||
SelectionDAG &DAG = DCI.DAG;
|
SelectionDAG &DAG = DCI.DAG;
|
||||||
|
@ -8767,6 +8798,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
return performSelectCombine(N, DCI);
|
return performSelectCombine(N, DCI);
|
||||||
case ISD::VSELECT:
|
case ISD::VSELECT:
|
||||||
return performVSelectCombine(N, DCI.DAG);
|
return performVSelectCombine(N, DCI.DAG);
|
||||||
|
case ISD::SELECT_CC:
|
||||||
|
return performSelectCCCombine(N, DCI.DAG);
|
||||||
case ISD::STORE:
|
case ISD::STORE:
|
||||||
return performSTORECombine(N, DCI, DAG, Subtarget);
|
return performSTORECombine(N, DCI, DAG, Subtarget);
|
||||||
case AArch64ISD::BRCOND:
|
case AArch64ISD::BRCOND:
|
||||||
|
|
|
@ -3521,9 +3521,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||||
// c = fcmp [?gt, ?ge, ?lt, ?le] a, b
|
// c = fcmp [?gt, ?ge, ?lt, ?le] a, b
|
||||||
// select c, a, b
|
// select c, a, b
|
||||||
// In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
|
// In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
|
||||||
// FIXME: There is similar code that allows some extensions in
|
|
||||||
// AArch64TargetLowering::LowerSELECT_CC that should be shared with this
|
|
||||||
// code.
|
|
||||||
bool swapSides = false;
|
bool swapSides = false;
|
||||||
if (!getTargetMachine().Options.NoNaNsFPMath) {
|
if (!getTargetMachine().Options.NoNaNsFPMath) {
|
||||||
// transformability may depend on which way around we compare
|
// transformability may depend on which way around we compare
|
||||||
|
|
|
@ -1,29 +1,49 @@
|
||||||
; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
|
; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
|
||||||
|
; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE
|
||||||
|
|
||||||
define double @test_direct(float %in) #1 {
|
define double @test_direct(float %in) {
|
||||||
; CHECK-LABEL: test_direct:
|
; CHECK-LABEL: test_direct:
|
||||||
|
; CHECK-SAFE-LABEL: test_direct:
|
||||||
%cmp = fcmp olt float %in, 0.000000e+00
|
%cmp = fcmp olt float %in, 0.000000e+00
|
||||||
%longer = fpext float %in to double
|
%longer = fpext float %in to double
|
||||||
%val = select i1 %cmp, double 0.000000e+00, double %longer
|
%val = select i1 %cmp, double 0.000000e+00, double %longer
|
||||||
ret double %val
|
ret double %val
|
||||||
|
|
||||||
; CHECK: fmax
|
; CHECK: fmax
|
||||||
|
; CHECK-SAFE: fmax
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @test_cross(float %in) #1 {
|
define double @test_cross(float %in) {
|
||||||
; CHECK-LABEL: test_cross:
|
; CHECK-LABEL: test_cross:
|
||||||
|
; CHECK-SAFE-LABEL: test_cross:
|
||||||
|
%cmp = fcmp ult float %in, 0.000000e+00
|
||||||
|
%longer = fpext float %in to double
|
||||||
|
%val = select i1 %cmp, double %longer, double 0.000000e+00
|
||||||
|
ret double %val
|
||||||
|
|
||||||
|
; CHECK: fmin
|
||||||
|
; CHECK-SAFE: fmin
|
||||||
|
}
|
||||||
|
|
||||||
|
; Same as previous, but with ordered comparison;
|
||||||
|
; can't be converted in safe-math mode.
|
||||||
|
define double @test_cross_fail_nan(float %in) {
|
||||||
|
; CHECK-LABEL: test_cross_fail_nan:
|
||||||
|
; CHECK-SAFE-LABEL: test_cross_fail_nan:
|
||||||
%cmp = fcmp olt float %in, 0.000000e+00
|
%cmp = fcmp olt float %in, 0.000000e+00
|
||||||
%longer = fpext float %in to double
|
%longer = fpext float %in to double
|
||||||
%val = select i1 %cmp, double %longer, double 0.000000e+00
|
%val = select i1 %cmp, double %longer, double 0.000000e+00
|
||||||
ret double %val
|
ret double %val
|
||||||
|
|
||||||
; CHECK: fmin
|
; CHECK: fmin
|
||||||
|
; CHECK-SAFE: fcsel d0, d1, d0, mi
|
||||||
}
|
}
|
||||||
|
|
||||||
; This isn't a min or a max, but passes the first condition for swapping the
|
; This isn't a min or a max, but passes the first condition for swapping the
|
||||||
; results. Make sure they're put back before we resort to the normal fcsel.
|
; results. Make sure they're put back before we resort to the normal fcsel.
|
||||||
define float @test_cross_fail(float %lhs, float %rhs) {
|
define float @test_cross_fail(float %lhs, float %rhs) {
|
||||||
; CHECK-LABEL: test_cross_fail:
|
; CHECK-LABEL: test_cross_fail:
|
||||||
|
; CHECK-SAFE-LABEL: test_cross_fail:
|
||||||
%tst = fcmp une float %lhs, %rhs
|
%tst = fcmp une float %lhs, %rhs
|
||||||
%res = select i1 %tst, float %rhs, float %lhs
|
%res = select i1 %tst, float %rhs, float %lhs
|
||||||
ret float %res
|
ret float %res
|
||||||
|
@ -31,4 +51,12 @@ define float @test_cross_fail(float %lhs, float %rhs) {
|
||||||
; The register allocator would have to decide to be deliberately obtuse before
|
; The register allocator would have to decide to be deliberately obtuse before
|
||||||
; other register were used.
|
; other register were used.
|
||||||
; CHECK: fcsel s0, s1, s0, ne
|
; CHECK: fcsel s0, s1, s0, ne
|
||||||
|
; CHECK-SAFE: fcsel s0, s1, s0, ne
|
||||||
|
}
|
||||||
|
|
||||||
|
; Make sure the transformation isn't triggered for integers
|
||||||
|
define i64 @test_integer(i64 %in) {
|
||||||
|
%cmp = icmp slt i64 %in, 0
|
||||||
|
%val = select i1 %cmp, i64 0, i64 %in
|
||||||
|
ret i64 %val
|
||||||
}
|
}
|
Loading…
Reference in New Issue