From d3bf06bc81e38202ab2684b17f6bdb23784173d7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 24 Jan 2020 13:28:11 -0800 Subject: [PATCH] [DAGCombiner] Add combine for (not (strict_fsetcc)) to create a strict_fsetcc with the opposite condition. Unlike the existing code that I modified here, I only handle the case where the strict_fsetcc has a single use. Not sure exactly how to handle multiples uses. Testing this on X86 is hard because we already have a other combines that get rid of lowered version of the integer setcc that this xor will eventually become. So this combine really just saves a bunch of extra nodes being created. Not sure about other targets. Differential Revision: https://reviews.llvm.org/D71816 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 37 ++++++++- llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll | 78 +++++++++++++++++++ 2 files changed, 111 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1d3c5fd6ed22..383d536e2603 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -522,7 +522,7 @@ namespace { SDValue rebuildSetCC(SDValue N); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) const; + SDValue &CC, bool MatchStrict = false) const; bool isOneUseSetCC(SDValue N) const; bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); @@ -814,7 +814,7 @@ static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { // the appropriate nodes based on the type of node we are checking. This // simplifies life a bit for the callers. bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) const { + SDValue &CC, bool MatchStrict) const { if (N.getOpcode() == ISD::SETCC) { LHS = N.getOperand(0); RHS = N.getOperand(1); @@ -822,6 +822,15 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, return true; } + if (MatchStrict && + (N.getOpcode() == ISD::STRICT_FSETCC || + N.getOpcode() == ISD::STRICT_FSETCCS)) { + LHS = N.getOperand(1); + RHS = N.getOperand(2); + CC = N.getOperand(3); + return true; + } + if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2).getNode()) || !TLI.isConstFalseVal(N.getOperand(3).getNode())) @@ -7058,7 +7067,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold !(x cc y) -> (x !cc y) unsigned N0Opcode = N0.getOpcode(); SDValue LHS, RHS, CC; - if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { + if (TLI.isConstTrueVal(N1.getNode()) && + isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), LHS.getValueType()); if (!LegalOperations || @@ -7071,6 +7081,21 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { case ISD::SELECT_CC: return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + if (N0.hasOneUse()) { + // FIXME Can we handle multiple uses? Could we token factor the chain + // results from the new/old setcc? + SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, + N0.getOperand(0), + N0Opcode == ISD::STRICT_FSETCCS); + CombineTo(N, SetCC); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1)); + recursivelyDeleteUnusedNodes(N0.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + break; + } } } } @@ -13541,8 +13566,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { } if (N1.hasOneUse()) { + // rebuildSetCC calls visitXor which may change the Chain when there is a + // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes. + HandleSDNode ChainHandle(Chain); if (SDValue NewN1 = rebuildSetCC(N1)) - return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, + ChainHandle.getValue(), NewN1, N2); } return SDValue(); diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll index 115e16583bf4..7bee1340a774 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll @@ -4136,6 +4136,84 @@ define i32 @test_f64_uno_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ret i32 %res } +define void @foo(float %0, float %1) #0 { +; SSE-32-LABEL: foo: +; SSE-32: # %bb.0: +; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-32-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 +; SSE-32-NEXT: jbe .LBB56_1 +; SSE-32-NEXT: # %bb.2: +; SSE-32-NEXT: jmp bar # TAILCALL +; SSE-32-NEXT: .LBB56_1: +; SSE-32-NEXT: retl +; +; SSE-64-LABEL: foo: +; SSE-64: # %bb.0: +; SSE-64-NEXT: ucomiss %xmm1, %xmm0 +; SSE-64-NEXT: jbe .LBB56_1 +; SSE-64-NEXT: # %bb.2: +; SSE-64-NEXT: jmp bar # TAILCALL +; SSE-64-NEXT: .LBB56_1: +; SSE-64-NEXT: retq +; +; AVX-32-LABEL: foo: +; AVX-32: # %bb.0: +; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-32-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0 +; AVX-32-NEXT: jbe .LBB56_1 +; AVX-32-NEXT: # %bb.2: +; AVX-32-NEXT: jmp bar # TAILCALL +; AVX-32-NEXT: .LBB56_1: +; AVX-32-NEXT: retl +; +; AVX-64-LABEL: foo: +; AVX-64: # %bb.0: +; AVX-64-NEXT: vucomiss %xmm1, %xmm0 +; AVX-64-NEXT: jbe .LBB56_1 +; AVX-64-NEXT: # %bb.2: +; AVX-64-NEXT: jmp bar # TAILCALL +; AVX-64-NEXT: .LBB56_1: +; AVX-64-NEXT: retq +; +; X87-LABEL: foo: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: wait +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jbe .LBB56_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: jmp bar # TAILCALL +; X87-NEXT: .LBB56_1: +; X87-NEXT: retl +; +; X87-CMOV-LABEL: foo: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait +; X87-CMOV-NEXT: jbe .LBB56_1 +; X87-CMOV-NEXT: # %bb.2: +; X87-CMOV-NEXT: jmp bar # TAILCALL +; X87-CMOV-NEXT: .LBB56_1: +; X87-CMOV-NEXT: retl + %3 = call i1 @llvm.experimental.constrained.fcmp.f32( float %0, float %1, metadata !"ogt", metadata !"fpexcept.strict") #0 + br i1 %3, label %4, label %5 + +4: ; preds = %2 + tail call void @bar() + br label %5 + +5: ; preds = %4, %2 + ret void +} +declare void @bar() + attributes #0 = { strictfp } declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)