Re-apply r237247 - [AArch64] Codegen VMAX/VMIN for safe math cases

No longer breaks SPEC2000/2006 llvm-svn: 237361
2015-05-14 12:59:46 +00:00 · 2015-05-14 12:59:46 +00:00 · a70dfe18d3
parent 708fa8e899
commit a70dfe18d3
3 changed files with 105 additions and 47 deletions
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -491,6 +491,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
  setTargetDAGCombine(ISD::SELECT);
  setTargetDAGCombine(ISD::VSELECT);
  setTargetDAGCombine(ISD::SELECT_CC);
  setTargetDAGCombine(ISD::INTRINSIC_VOID);
  setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@ -3567,7 +3568,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 /// operations would *not* be semantically equivalent.
 static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
  if (Cmp == Result)
-    return true;
+    return (Cmp.getValueType() == MVT::f32 ||
            Cmp.getValueType() == MVT::f64);
  ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
  ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
@ -3701,46 +3703,6 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
  assert(LHS.getValueType() == RHS.getValueType());
  EVT VT = TVal.getValueType();
  // Try to match this select into a max/min operation, which have dedicated
  // opcode in the instruction set.
  // FIXME: This is not correct in the presence of NaNs, so we only enable this
  // in no-NaNs mode.
  if (getTargetMachine().Options.NoNaNsFPMath) {
    SDValue MinMaxLHS = TVal, MinMaxRHS = FVal;
    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) &&
        selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) {
      CC = ISD::getSetCCSwappedOperands(CC);
      std::swap(MinMaxLHS, MinMaxRHS);
    }
    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) &&
        selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) {
      switch (CC) {
      default:
        break;
      case ISD::SETGT:
      case ISD::SETGE:
      case ISD::SETUGT:
      case ISD::SETUGE:
      case ISD::SETOGT:
      case ISD::SETOGE:
        return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS);
        break;
      case ISD::SETLT:
      case ISD::SETLE:
      case ISD::SETULT:
      case ISD::SETULE:
      case ISD::SETOLT:
      case ISD::SETOLE:
        return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS);
        break;
      }
    }
  }
  // If that fails, we'll need to perform an FCMP + CSEL sequence.  Go ahead
  // and do the comparison.
  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
@ -8735,6 +8697,75 @@ static SDValue performSelectCombine(SDNode *N,
  return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
 }
 /// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
 /// to match FMIN/FMAX patterns.
 static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
  // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
  // Unless the NoNaNsFPMath option is set, be careful about NaNs:
  // vmax/vmin return NaN if either operand is a NaN;
  // only do the transformation when it matches that behavior.
  SDValue CondLHS = N->getOperand(0);
  SDValue CondRHS = N->getOperand(1);
  SDValue LHS = N->getOperand(2);
  SDValue RHS = N->getOperand(3);
  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
  unsigned Opcode;
  bool IsReversed;
  if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
      selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
    IsReversed = false; // x CC y ? x : y
  } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
             selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
    IsReversed = true ; // x CC y ? y : x
  } else {
    return SDValue();
  }
  bool IsUnordered = false, IsOrEqual;
  switch (CC) {
  default:
    return SDValue();
  case ISD::SETULT:
  case ISD::SETULE:
    IsUnordered = true;
  case ISD::SETOLT:
  case ISD::SETOLE:
  case ISD::SETLT:
  case ISD::SETLE:
    IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
    Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
    break;
  case ISD::SETUGT:
  case ISD::SETUGE:
    IsUnordered = true;
  case ISD::SETOGT:
  case ISD::SETOGE:
  case ISD::SETGT:
  case ISD::SETGE:
    IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
    Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
    break;
  }
  // If LHS is NaN, an ordered comparison will be false and the result will be
  // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
  // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
  if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
    return SDValue();
  // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
  // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
  // used for unsafe math or if one of the operands is known to be nonzero.
  if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
      !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
    return SDValue();
  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
 }
 SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
  SelectionDAG &DAG = DCI.DAG;
@ -8767,6 +8798,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
    return performSelectCombine(N, DCI);
  case ISD::VSELECT:
    return performVSelectCombine(N, DCI.DAG);
  case ISD::SELECT_CC:
    return performSelectCCCombine(N, DCI.DAG);
  case ISD::STORE:
    return performSTORECombine(N, DCI, DAG, Subtarget);
  case AArch64ISD::BRCOND:
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@ -3521,9 +3521,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
    //   c = fcmp [?gt, ?ge, ?lt, ?le] a, b
    //   select c, a, b
    // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
    // FIXME: There is similar code that allows some extensions in
    // AArch64TargetLowering::LowerSELECT_CC that should be shared with this
    // code.
    bool swapSides = false;
    if (!getTargetMachine().Options.NoNaNsFPMath) {
      // transformability may depend on which way around we compare
--- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
@ -1,29 +1,49 @@
 ; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
 ; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE
-define double @test_direct(float %in) #1 {
+define double @test_direct(float %in) {
 ; CHECK-LABEL: test_direct:
 ; CHECK-SAFE-LABEL: test_direct:
  %cmp = fcmp olt float %in, 0.000000e+00
  %longer = fpext float %in to double
  %val = select i1 %cmp, double 0.000000e+00, double %longer
  ret double %val
 ; CHECK: fmax
 ; CHECK-SAFE: fmax
 }
-define double @test_cross(float %in) #1 {
+define double @test_cross(float %in) {
 ; CHECK-LABEL: test_cross:
 ; CHECK-SAFE-LABEL: test_cross:
  %cmp = fcmp ult float %in, 0.000000e+00
  %longer = fpext float %in to double
  %val = select i1 %cmp, double %longer, double 0.000000e+00
  ret double %val
 ; CHECK: fmin
 ; CHECK-SAFE: fmin
 }
 ; Same as previous, but with ordered comparison;
 ; can't be converted in safe-math mode.
 define double @test_cross_fail_nan(float %in) {
 ; CHECK-LABEL: test_cross_fail_nan:
 ; CHECK-SAFE-LABEL: test_cross_fail_nan:
  %cmp = fcmp olt float %in, 0.000000e+00
  %longer = fpext float %in to double
  %val = select i1 %cmp, double %longer, double 0.000000e+00
  ret double %val
 ; CHECK: fmin
 ; CHECK-SAFE: fcsel d0, d1, d0, mi
 }
 ; This isn't a min or a max, but passes the first condition for swapping the
 ; results. Make sure they're put back before we resort to the normal fcsel.
 define float @test_cross_fail(float %lhs, float %rhs) {
 ; CHECK-LABEL: test_cross_fail:
 ; CHECK-SAFE-LABEL: test_cross_fail:
  %tst = fcmp une float %lhs, %rhs
  %res = select i1 %tst, float %rhs, float %lhs
  ret float %res
@ -31,4 +51,12 @@ define float @test_cross_fail(float %lhs, float %rhs) {
  ; The register allocator would have to decide to be deliberately obtuse before
  ; other register were used.
 ; CHECK: fcsel s0, s1, s0, ne
 ; CHECK-SAFE: fcsel s0, s1, s0, ne
 }
 ; Make sure the transformation isn't triggered for integers
 define i64 @test_integer(i64  %in) {
  %cmp = icmp slt i64 %in, 0
  %val = select i1 %cmp, i64 0, i64 %in
  ret i64 %val
 }