From 3f739f736b8fed6f4d63569f56c985ef04b21cd1 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 23 Sep 2020 14:10:33 -0700 Subject: [PATCH] [SelectionDAG][GISel] Make LegalizeDAG lower FNEG using integer ops. Previously, if a floating-point type was legal, but FNEG wasn't legal, we would use FSUB. Instead, we should use integer ops, to preserve the semantics. (Alternatively, there's a compiler-rt call we could use, but there isn't much reason to use that.) It turns out we actually are still using this obscure codepath in a few cases: on some targets, we have "legal" floating-point types that don't actually support any floating-point operations. In particular, ARM and AArch64 are using this path. The implementation for SelectionDAG is pretty simple because we can reuse the infrastructure from FCOPYSIGN. See also 9a3dc3e, the corresponding change to type legalization. Also includes a "bonus" change to STRICT_FSUB legalization, so we can lower a STRICT_FSUB to a float libcall. Includes the changes to both LegalizeDAG and GlobalISel so we don't have inconsistent results in the future. Fixes https://bugs.llvm.org/show_bug.cgi?id=46792 . Differential Revision: https://reviews.llvm.org/D84287 --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 12 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 32 ++++-- llvm/lib/Target/ARM/ARMLegalizerInfo.cpp | 2 +- llvm/test/CodeGen/AArch64/arm64-fp128.ll | 20 ++-- .../ARM/GlobalISel/arm-legalize-binops.mir | 108 ++++++++++++++++++ .../ARM/GlobalISel/arm-legalize-fp.mir | 32 ++---- llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll | 39 +++---- .../CodeGen/X86/GlobalISel/legalize-fneg.mir | 12 +- 8 files changed, 173 insertions(+), 84 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f4c8acc9e82a..196dbf2dd7af 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2881,16 +2881,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { // represent them. if (Ty.isVector()) return UnableToLegalize; - LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty); - if (!ZeroTy) - return UnableToLegalize; - ConstantFP &ZeroForNegation = - *cast(ConstantFP::getZeroValueForNegation(ZeroTy)); - auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); + auto SignMask = + MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); Register SubByReg = MI.getOperand(1).getReg(); - Register ZeroReg = Zero.getReg(0); - MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags()); + MIRBuilder.buildXor(Res, SubByReg, SignMask); MI.eraseFromParent(); return Legalized; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9a718480aee8..83ade2d2fdca 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -173,6 +173,7 @@ private: SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; + SDValue ExpandFNEG(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain); void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl, SmallVectorImpl &Results); @@ -1573,6 +1574,22 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { return modifySignAsInt(MagAsInt, DL, CopiedSign); } +SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const { + // Get the sign bit as an integer. + SDLoc DL(Node); + FloatSignAsInt SignAsInt; + getSignAsIntValue(SignAsInt, DL, Node->getOperand(0)); + EVT IntVT = SignAsInt.IntValue.getValueType(); + + // Flip the sign. + SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); + SDValue SignFlip = + DAG.getNode(ISD::XOR, DL, IntVT, SignAsInt.IntValue, SignMask); + + // Convert back to float. + return modifySignAsInt(SignAsInt, DL, SignFlip); +} + SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { SDLoc DL(Node); SDValue Value = Node->getOperand(0); @@ -3252,12 +3269,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandFCOPYSIGN(Node)); break; case ISD::FNEG: - // Expand Y = FNEG(X) -> Y = SUB -0.0, X - Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); - // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. - Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, - Node->getOperand(0)); - Results.push_back(Tmp1); + Results.push_back(ExpandFNEG(Node)); break; case ISD::FABS: Results.push_back(ExpandFABS(Node)); @@ -3942,10 +3954,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { return true; break; case ISD::STRICT_FSUB: { - if (TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)) - == TargetLowering::Legal) + if (TLI.getStrictFPOperationAction( + ISD::STRICT_FSUB, Node->getValueType(0)) == TargetLowering::Legal) return true; + if (TLI.getStrictFPOperationAction( + ISD::STRICT_FADD, Node->getValueType(0)) != TargetLowering::Legal) + break; EVT VT = Node->getValueType(0); const SDNodeFlags Flags = Node->getFlags(); diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index f3657155f47e..d9b60f4c4eba 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -88,7 +88,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR}) .legalFor({s32}) - .minScalar(0, s32); + .clampScalar(0, s32, s32); if (ST.hasNEON()) getActionDefinitionsBuilder({G_ADD, G_SUB}) diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll index 3561d8fcdff9..ad5366320cdb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll @@ -262,19 +262,17 @@ define void @test_extend() { } define fp128 @test_neg(fp128 %in) { -; CHECK: [[$MINUS0:.LCPI[0-9]+_0]]: -; Make sure the weird hex constant below *is* -0.0 -; CHECK-NEXT: fp128 -0 - ; CHECK-LABEL: test_neg: - ; Could in principle be optimized to fneg which we can't select, this makes - ; sure that doesn't happen. +;; We convert this to fneg, and target-independent code expands it with +;; integer operations. %ret = fsub fp128 0xL00000000000000008000000000000000, %in -; CHECK: mov v1.16b, v0.16b -; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:[[$MINUS0]]] -; CHECK: bl __subtf3 - ret fp128 %ret -; CHECK: ret + +; CHECK: str q0, [sp, #-16]! +; CHECK-NEXT: ldrb w8, [sp, #15] +; CHECK-NEXT: eor w8, w8, #0x80 +; CHECK-NEXT: strb w8, [sp, #15] +; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: ret } diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir index dfbbdce4fb48..199b72a9bb84 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir @@ -16,14 +16,17 @@ define void @test_and_s8() { ret void } define void @test_and_s16() { ret void } define void @test_and_s32() { ret void } + define void @test_and_s64() { ret void } define void @test_or_s8() { ret void } define void @test_or_s16() { ret void } define void @test_or_s32() { ret void } + define void @test_or_s64() { ret void } define void @test_xor_s8() { ret void } define void @test_xor_s16() { ret void } define void @test_xor_s32() { ret void } + define void @test_xor_s64() { ret void } define void @test_lshr_s32() { ret void } define void @test_ashr_s32() { ret void } @@ -389,6 +392,41 @@ body: | $r0 = COPY %2(s32) BX_RET 14, $noreg, implicit $r0 +... +--- +name: test_and_s64 +# CHECK-LABEL: name: test_and_s64 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + + %0(s32) = COPY $r0 + %1(s32) = COPY $r1 + %2(s32) = COPY $r2 + %3(s32) = COPY $r3 + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %6(s64) = G_AND %4, %5 + %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) + $r0 = COPY %7(s32) + $r1 = COPY %8(s32) + BX_RET 14, $noreg, implicit $r0, implicit $r1 + ... --- name: test_or_s8 @@ -478,6 +516,41 @@ body: | $r0 = COPY %2(s32) BX_RET 14, $noreg, implicit $r0 +... +--- +name: test_or_s64 +# CHECK-LABEL: name: test_or_s64 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + + %0(s32) = COPY $r0 + %1(s32) = COPY $r1 + %2(s32) = COPY $r2 + %3(s32) = COPY $r3 + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %6(s64) = G_OR %4, %5 + %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) + $r0 = COPY %7(s32) + $r1 = COPY %8(s32) + BX_RET 14, $noreg, implicit $r0, implicit $r1 + ... --- name: test_xor_s8 @@ -567,6 +640,41 @@ body: | $r0 = COPY %2(s32) BX_RET 14, $noreg, implicit $r0 +... +--- +name: test_xor_s64 +# CHECK-LABEL: name: test_xor_s64 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + + %0(s32) = COPY $r0 + %1(s32) = COPY $r1 + %2(s32) = COPY $r2 + %3(s32) = COPY $r3 + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %6(s64) = G_XOR %4, %5 + %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) + $r0 = COPY %7(s32) + $r1 = COPY %8(s32) + BX_RET 14, $noreg, implicit $r0, implicit $r1 + ... --- name: test_lshr_s32 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir index 8b85b45d3f81..8038d73ddb7f 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir @@ -689,16 +689,8 @@ body: | ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY $r0 %0(s32) = COPY $r0 ; HARD: [[R:%[0-9]+]]:_(s32) = G_FNEG [[X]] - ; SOFT-NOT: G_FNEG - ; SOFT-DAG: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SOFT: ADJCALLSTACKDOWN - ; SOFT-DAG: $r0 = COPY [[ZERO]] - ; SOFT-DAG: $r1 = COPY [[X]] - ; SOFT-AEABI: BL{{.*}} &__aeabi_fsub, {{.*}}, implicit $r0, implicit $r1, implicit-def $r0 - ; SOFT-DEFAULT: BL{{.*}} &__subsf3, {{.*}}, implicit $r0, implicit $r1, implicit-def $r0 - ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY $r0 - ; SOFT: ADJCALLSTACKUP - ; SOFT-NOT: G_FNEG + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; SOFT: [[R:%[0-9]+]]:_(s32) = G_XOR [[X]], [[ZERO]] %1(s32) = G_FNEG %0 ; CHECK: $r0 = COPY [[R]] $r0 = COPY %1(s32) @@ -730,20 +722,14 @@ body: | ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]] %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) ; HARD: [[R:%[0-9]+]]:_(s64) = G_FNEG [[X]] - ; SOFT-NOT: G_FNEG - ; SOFT-DAG: [[NEGATIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; SOFT-DAG: [[POSITIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SOFT: ADJCALLSTACKDOWN - ; SOFT-DAG: $r{{[0-1]}} = COPY [[NEGATIVE_ZERO]] - ; SOFT-DAG: $r{{[0-1]}} = COPY [[POSITIVE_ZERO]] - ; SOFT-DAG: $r{{[2-3]}} = COPY [[X0]] - ; SOFT-DAG: $r{{[2-3]}} = COPY [[X1]] - ; SOFT-AEABI: BL{{.*}} &__aeabi_dsub, {{.*}}, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1 - ; SOFT-DEFAULT: BL{{.*}} &__subdf3, {{.*}}, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1 - ; SOFT: ADJCALLSTACKUP - ; SOFT-NOT: G_FNEG + ; HARD: G_UNMERGE_VALUES [[R]](s64) + ; SOFT: [[POSITIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT: [[NEGATIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; SOFT: [[LOWR:%[0-9]+]]:_(s32) = G_XOR [[X0]], [[POSITIVE_ZERO]] + ; SOFT: [[HIGHR:%[0-9]+]]:_(s32) = G_XOR [[X1]], [[NEGATIVE_ZERO]] + ; SOFT: $r0 = COPY [[LOWR]] + ; SOFT: $r1 = COPY [[HIGHR]] %3(s64) = G_FNEG %2 - ; HARD-DAG: G_UNMERGE_VALUES [[R]](s64) %4(s32),%5(s32) = G_UNMERGE_VALUES %3(s64) $r0 = COPY %4(s32) $r1 = COPY %5(s32) diff --git a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll index 50d8752176a1..f88242fb108c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll @@ -67,31 +67,20 @@ entry: define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) { ; CHECK-LABEL: fneg_float64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vldr d0, .LCPI2_0 -; CHECK-NEXT: vmov r2, r3, d9 -; CHECK-NEXT: vmov r4, r5, d0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dsub -; CHECK-NEXT: vmov r2, r3, d8 -; CHECK-NEXT: vmov d9, r0, r1 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dsub -; CHECK-NEXT: vmov d8, r0, r1 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} -; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI2_0: -; CHECK-NEXT: .long 0 @ double -0 -; CHECK-NEXT: .long 2147483648 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: vstr d1, [sp] +; CHECK-NEXT: ldrb.w r0, [sp, #7] +; CHECK-NEXT: vstr d0, [sp, #8] +; CHECK-NEXT: ldrb.w r1, [sp, #15] +; CHECK-NEXT: eor r0, r0, #128 +; CHECK-NEXT: strb.w r0, [sp, #7] +; CHECK-NEXT: vldr d1, [sp] +; CHECK-NEXT: eor r0, r1, #128 +; CHECK-NEXT: strb.w r0, [sp, #15] +; CHECK-NEXT: vldr d0, [sp, #8] +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: bx lr entry: %0 = fsub nnan ninf nsz <2 x double> , %src ret <2 x double> %0 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir index dabe3acc93b4..0f7a59b5cdba 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir @@ -22,9 +22,9 @@ body: | liveins: ; CHECK-LABEL: name: test_fneg_f32 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -0.000000e+00 - ; CHECK: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[C]], [[DEF]] - ; CHECK: $edi = COPY [[FSUB]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[C]] + ; CHECK: $edi = COPY [[XOR]](s32) %0(s32) = IMPLICIT_DEF %1(s32) = G_FNEG %0 $edi = COPY %1 @@ -39,9 +39,9 @@ body: | liveins: ; CHECK-LABEL: name: test_fneg_f64 ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00 - ; CHECK: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[C]], [[DEF]] - ; CHECK: $rdi = COPY [[FSUB]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[C]] + ; CHECK: $rdi = COPY [[XOR]](s64) %0(s64) = G_IMPLICIT_DEF %1(s64) = G_FNEG %0 $rdi = COPY %1