diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6d2a95129c3c..4b93f0780e37 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -491,6 +491,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -3567,7 +3568,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { /// operations would *not* be semantically equivalent. static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { if (Cmp == Result) - return true; + return (Cmp.getValueType() == MVT::f32 || + Cmp.getValueType() == MVT::f64); ConstantFPSDNode *CCmp = dyn_cast(Cmp); ConstantFPSDNode *CResult = dyn_cast(Result); @@ -3701,46 +3703,6 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); assert(LHS.getValueType() == RHS.getValueType()); EVT VT = TVal.getValueType(); - - // Try to match this select into a max/min operation, which have dedicated - // opcode in the instruction set. - // FIXME: This is not correct in the presence of NaNs, so we only enable this - // in no-NaNs mode. - if (getTargetMachine().Options.NoNaNsFPMath) { - SDValue MinMaxLHS = TVal, MinMaxRHS = FVal; - if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) && - selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) { - CC = ISD::getSetCCSwappedOperands(CC); - std::swap(MinMaxLHS, MinMaxRHS); - } - - if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) && - selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) { - switch (CC) { - default: - break; - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETOGT: - case ISD::SETOGE: - return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS); - break; - case ISD::SETLT: - case ISD::SETLE: - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETOLT: - case ISD::SETOLE: - return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS); - break; - } - } - } - - // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead - // and do the comparison. SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally @@ -8735,6 +8697,75 @@ static SDValue performSelectCombine(SDNode *N, return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); } +/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC +/// to match FMIN/FMAX patterns. +static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) { + // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y". + // Unless the NoNaNsFPMath option is set, be careful about NaNs: + // vmax/vmin return NaN if either operand is a NaN; + // only do the transformation when it matches that behavior. + + SDValue CondLHS = N->getOperand(0); + SDValue CondRHS = N->getOperand(1); + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + ISD::CondCode CC = cast(N->getOperand(4))->get(); + + unsigned Opcode; + bool IsReversed; + if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) && + selectCCOpsAreFMaxCompatible(CondRHS, RHS)) { + IsReversed = false; // x CC y ? x : y + } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) && + selectCCOpsAreFMaxCompatible(CondLHS, RHS)) { + IsReversed = true ; // x CC y ? y : x + } else { + return SDValue(); + } + + bool IsUnordered = false, IsOrEqual; + switch (CC) { + default: + return SDValue(); + case ISD::SETULT: + case ISD::SETULE: + IsUnordered = true; + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE); + Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN; + break; + + case ISD::SETUGT: + case ISD::SETUGE: + IsUnordered = true; + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE); + Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX; + break; + } + + // If LHS is NaN, an ordered comparison will be false and the result will be + // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking + // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN. + if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) + return SDValue(); + + // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true, + // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be + // used for unsafe math or if one of the operands is known to be nonzero. + if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) + return SDValue(); + + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -8767,6 +8798,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performSelectCombine(N, DCI); case ISD::VSELECT: return performVSelectCombine(N, DCI.DAG); + case ISD::SELECT_CC: + return performSelectCCCombine(N, DCI.DAG); case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); case AArch64ISD::BRCOND: diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 36b24ef99df3..5218234abeb8 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3521,9 +3521,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // c = fcmp [?gt, ?ge, ?lt, ?le] a, b // select c, a, b // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'. - // FIXME: There is similar code that allows some extensions in - // AArch64TargetLowering::LowerSELECT_CC that should be shared with this - // code. bool swapSides = false; if (!getTargetMachine().Options.NoNaNsFPMath) { // transformability may depend on which way around we compare diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll index 94b745437bd3..ea281528b84c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll @@ -1,29 +1,49 @@ ; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s +; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE -define double @test_direct(float %in) #1 { +define double @test_direct(float %in) { ; CHECK-LABEL: test_direct: +; CHECK-SAFE-LABEL: test_direct: %cmp = fcmp olt float %in, 0.000000e+00 %longer = fpext float %in to double %val = select i1 %cmp, double 0.000000e+00, double %longer ret double %val ; CHECK: fmax +; CHECK-SAFE: fmax } -define double @test_cross(float %in) #1 { +define double @test_cross(float %in) { ; CHECK-LABEL: test_cross: +; CHECK-SAFE-LABEL: test_cross: + %cmp = fcmp ult float %in, 0.000000e+00 + %longer = fpext float %in to double + %val = select i1 %cmp, double %longer, double 0.000000e+00 + ret double %val + +; CHECK: fmin +; CHECK-SAFE: fmin +} + +; Same as previous, but with ordered comparison; +; can't be converted in safe-math mode. +define double @test_cross_fail_nan(float %in) { +; CHECK-LABEL: test_cross_fail_nan: +; CHECK-SAFE-LABEL: test_cross_fail_nan: %cmp = fcmp olt float %in, 0.000000e+00 %longer = fpext float %in to double %val = select i1 %cmp, double %longer, double 0.000000e+00 ret double %val ; CHECK: fmin +; CHECK-SAFE: fcsel d0, d1, d0, mi } ; This isn't a min or a max, but passes the first condition for swapping the ; results. Make sure they're put back before we resort to the normal fcsel. define float @test_cross_fail(float %lhs, float %rhs) { ; CHECK-LABEL: test_cross_fail: +; CHECK-SAFE-LABEL: test_cross_fail: %tst = fcmp une float %lhs, %rhs %res = select i1 %tst, float %rhs, float %lhs ret float %res @@ -31,4 +51,12 @@ define float @test_cross_fail(float %lhs, float %rhs) { ; The register allocator would have to decide to be deliberately obtuse before ; other register were used. ; CHECK: fcsel s0, s1, s0, ne -} \ No newline at end of file +; CHECK-SAFE: fcsel s0, s1, s0, ne +} + +; Make sure the transformation isn't triggered for integers +define i64 @test_integer(i64 %in) { + %cmp = icmp slt i64 %in, 0 + %val = select i1 %cmp, i64 0, i64 %in + ret i64 %val +}