[SelectionDAG][GISel] Make LegalizeDAG lower FNEG using integer ops.

Previously, if a floating-point type was legal, but FNEG wasn't legal, we would use FSUB. Instead, we should use integer ops, to preserve the semantics. (Alternatively, there's a compiler-rt call we could use, but there isn't much reason to use that.) It turns out we actually are still using this obscure codepath in a few cases: on some targets, we have "legal" floating-point types that don't actually support any floating-point operations. In particular, ARM and AArch64 are using this path. The implementation for SelectionDAG is pretty simple because we can reuse the infrastructure from FCOPYSIGN. See also 9a3dc3e, the corresponding change to type legalization. Also includes a "bonus" change to STRICT_FSUB legalization, so we can lower a STRICT_FSUB to a float libcall. Includes the changes to both LegalizeDAG and GlobalISel so we don't have inconsistent results in the future. Fixes https://bugs.llvm.org/show_bug.cgi?id=46792 . Differential Revision: https://reviews.llvm.org/D84287
2020-09-23 14:10:33 -07:00 · 2020-09-23 14:10:33 -07:00 · 3f739f736b
parent e8413ac97f
commit 3f739f736b
8 changed files with 173 additions and 84 deletions
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@ -2881,16 +2881,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
    // represent them.
    if (Ty.isVector())
      return UnableToLegalize;
-    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
-    Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty);
-    if (!ZeroTy)
-      return UnableToLegalize;
-    ConstantFP &ZeroForNegation =
-        *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
-    auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
+    auto SignMask =
+        MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
    Register SubByReg = MI.getOperand(1).getReg();
-    Register ZeroReg = Zero.getReg(0);
-    MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
+    MIRBuilder.buildXor(Res, SubByReg, SignMask);
    MI.eraseFromParent();
    return Legalized;
  }
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -173,6 +173,7 @@ private:
                          SDValue NewIntValue) const;
  SDValue ExpandFCOPYSIGN(SDNode *Node) const;
  SDValue ExpandFABS(SDNode *Node) const;
+  SDValue ExpandFNEG(SDNode *Node) const;
  SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
  void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
                             SmallVectorImpl<SDValue> &Results);
@ -1573,6 +1574,22 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
  return modifySignAsInt(MagAsInt, DL, CopiedSign);
 }

+SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const {
+  // Get the sign bit as an integer.
+  SDLoc DL(Node);
+  FloatSignAsInt SignAsInt;
+  getSignAsIntValue(SignAsInt, DL, Node->getOperand(0));
+  EVT IntVT = SignAsInt.IntValue.getValueType();
+
+  // Flip the sign.
+  SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+  SDValue SignFlip =
+      DAG.getNode(ISD::XOR, DL, IntVT, SignAsInt.IntValue, SignMask);
+
+  // Convert back to float.
+  return modifySignAsInt(SignAsInt, DL, SignFlip);
+}
+
 SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
  SDLoc DL(Node);
  SDValue Value = Node->getOperand(0);
@ -3252,12 +3269,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
    Results.push_back(ExpandFCOPYSIGN(Node));
    break;
  case ISD::FNEG:
-    // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
-    Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
-    // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
-    Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
-                       Node->getOperand(0));
-    Results.push_back(Tmp1);
+    Results.push_back(ExpandFNEG(Node));
    break;
  case ISD::FABS:
    Results.push_back(ExpandFABS(Node));
@ -3942,10 +3954,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
        return true;
      break;
    case ISD::STRICT_FSUB: {
-      if (TLI.getStrictFPOperationAction(Node->getOpcode(),
-                                         Node->getValueType(0))
-          == TargetLowering::Legal)
+      if (TLI.getStrictFPOperationAction(
+              ISD::STRICT_FSUB, Node->getValueType(0)) == TargetLowering::Legal)
        return true;
+      if (TLI.getStrictFPOperationAction(
+              ISD::STRICT_FADD, Node->getValueType(0)) != TargetLowering::Legal)
+        break;

      EVT VT = Node->getValueType(0);
      const SDNodeFlags Flags = Node->getFlags();
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@ -88,7 +88,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {

  getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR})
      .legalFor({s32})
-      .minScalar(0, s32);
+      .clampScalar(0, s32, s32);

  if (ST.hasNEON())
    getActionDefinitionsBuilder({G_ADD, G_SUB})
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@ -262,19 +262,17 @@ define void @test_extend() {
 }

 define fp128 @test_neg(fp128 %in) {
-; CHECK: [[$MINUS0:.LCPI[0-9]+_0]]:
-; Make sure the weird hex constant below *is* -0.0
-; CHECK-NEXT: fp128 -0
-
 ; CHECK-LABEL: test_neg:

-  ; Could in principle be optimized to fneg which we can't select, this makes
-  ; sure that doesn't happen.
+;; We convert this to fneg, and target-independent code expands it with
+;; integer operations.
  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
-; CHECK: mov v1.16b, v0.16b
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:[[$MINUS0]]]
-; CHECK: bl __subtf3
-
  ret fp128 %ret
-; CHECK: ret
+
+; CHECK:      str q0, [sp, #-16]!
+; CHECK-NEXT: ldrb w8, [sp, #15]
+; CHECK-NEXT: eor w8, w8, #0x80
+; CHECK-NEXT: strb w8, [sp, #15]
+; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: ret
 }
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir
@ -16,14 +16,17 @@
  define void @test_and_s8() { ret void }
  define void @test_and_s16() { ret void }
  define void @test_and_s32() { ret void }
+  define void @test_and_s64() { ret void }

  define void @test_or_s8() { ret void }
  define void @test_or_s16() { ret void }
  define void @test_or_s32() { ret void }
+  define void @test_or_s64() { ret void }

  define void @test_xor_s8() { ret void }
  define void @test_xor_s16() { ret void }
  define void @test_xor_s32() { ret void }
+  define void @test_xor_s64() { ret void }

  define void @test_lshr_s32() { ret void }
  define void @test_ashr_s32() { ret void }
@ -389,6 +392,41 @@ body:             |
    $r0 = COPY %2(s32)
    BX_RET 14, $noreg, implicit $r0

+...
+---
+name:            test_and_s64
+# CHECK-LABEL: name: test_and_s64
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+body:             |
+  bb.0:
+    liveins: $r0, $r1, $r2, $r3
+
+    %0(s32) = COPY $r0
+    %1(s32) = COPY $r1
+    %2(s32) = COPY $r2
+    %3(s32) = COPY $r3
+    %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %6(s64) = G_AND %4, %5
+    %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
+    $r0 = COPY %7(s32)
+    $r1 = COPY %8(s32)
+    BX_RET 14, $noreg, implicit $r0, implicit $r1
+
 ...
 ---
 name:            test_or_s8
@ -478,6 +516,41 @@ body:             |
    $r0 = COPY %2(s32)
    BX_RET 14, $noreg, implicit $r0

+...
+---
+name:            test_or_s64
+# CHECK-LABEL: name: test_or_s64
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+body:             |
+  bb.0:
+    liveins: $r0, $r1, $r2, $r3
+
+    %0(s32) = COPY $r0
+    %1(s32) = COPY $r1
+    %2(s32) = COPY $r2
+    %3(s32) = COPY $r3
+    %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %6(s64) = G_OR %4, %5
+    %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
+    $r0 = COPY %7(s32)
+    $r1 = COPY %8(s32)
+    BX_RET 14, $noreg, implicit $r0, implicit $r1
+
 ...
 ---
 name:            test_xor_s8
@ -567,6 +640,41 @@ body:             |
    $r0 = COPY %2(s32)
    BX_RET 14, $noreg, implicit $r0

+...
+---
+name:            test_xor_s64
+# CHECK-LABEL: name: test_xor_s64
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+body:             |
+  bb.0:
+    liveins: $r0, $r1, $r2, $r3
+
+    %0(s32) = COPY $r0
+    %1(s32) = COPY $r1
+    %2(s32) = COPY $r2
+    %3(s32) = COPY $r3
+    %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %6(s64) = G_XOR %4, %5
+    %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
+    $r0 = COPY %7(s32)
+    $r1 = COPY %8(s32)
+    BX_RET 14, $noreg, implicit $r0, implicit $r1
+
 ...
 ---
 name:            test_lshr_s32
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
@ -689,16 +689,8 @@ body:             |
    ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY $r0
    %0(s32) = COPY $r0
    ; HARD: [[R:%[0-9]+]]:_(s32) = G_FNEG [[X]]
-    ; SOFT-NOT: G_FNEG
-    ; SOFT-DAG: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
-    ; SOFT: ADJCALLSTACKDOWN
-    ; SOFT-DAG: $r0 = COPY [[ZERO]]
-    ; SOFT-DAG: $r1 = COPY [[X]]
-    ; SOFT-AEABI: BL{{.*}} &__aeabi_fsub, {{.*}}, implicit $r0, implicit $r1, implicit-def $r0
-    ; SOFT-DEFAULT: BL{{.*}} &__subsf3, {{.*}}, implicit $r0, implicit $r1, implicit-def $r0
-    ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY $r0
-    ; SOFT: ADJCALLSTACKUP
-    ; SOFT-NOT: G_FNEG
+    ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
+    ; SOFT: [[R:%[0-9]+]]:_(s32) = G_XOR [[X]], [[ZERO]]
    %1(s32) = G_FNEG %0
    ; CHECK: $r0 = COPY [[R]]
    $r0 = COPY %1(s32)
@ -730,20 +722,14 @@ body:             |
    ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]]
    %2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
    ; HARD: [[R:%[0-9]+]]:_(s64) = G_FNEG [[X]]
-    ; SOFT-NOT: G_FNEG
-    ; SOFT-DAG: [[NEGATIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
-    ; SOFT-DAG: [[POSITIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SOFT: ADJCALLSTACKDOWN
-    ; SOFT-DAG: $r{{[0-1]}} = COPY [[NEGATIVE_ZERO]]
-    ; SOFT-DAG: $r{{[0-1]}} = COPY [[POSITIVE_ZERO]]
-    ; SOFT-DAG: $r{{[2-3]}} = COPY [[X0]]
-    ; SOFT-DAG: $r{{[2-3]}} = COPY [[X1]]
-    ; SOFT-AEABI: BL{{.*}} &__aeabi_dsub, {{.*}}, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
-    ; SOFT-DEFAULT: BL{{.*}} &__subdf3, {{.*}}, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
-    ; SOFT: ADJCALLSTACKUP
-    ; SOFT-NOT: G_FNEG
+    ; HARD: G_UNMERGE_VALUES [[R]](s64)
+    ; SOFT: [[POSITIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SOFT: [[NEGATIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
+    ; SOFT: [[LOWR:%[0-9]+]]:_(s32) = G_XOR [[X0]], [[POSITIVE_ZERO]]
+    ; SOFT: [[HIGHR:%[0-9]+]]:_(s32) = G_XOR [[X1]], [[NEGATIVE_ZERO]]
+    ; SOFT: $r0 = COPY [[LOWR]]
+    ; SOFT: $r1 = COPY [[HIGHR]]
    %3(s64) = G_FNEG %2
-    ; HARD-DAG: G_UNMERGE_VALUES [[R]](s64)
    %4(s32),%5(s32) = G_UNMERGE_VALUES %3(s64)
    $r0 = COPY %4(s32)
    $r1 = COPY %5(s32)
--- a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
@ -67,31 +67,20 @@ entry:
 define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) {
 ; CHECK-LABEL: fneg_float64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    .vsave {d8, d9}
-; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    vldr d0, .LCPI2_0
-; CHECK-NEXT:    vmov r2, r3, d9
-; CHECK-NEXT:    vmov r4, r5, d0
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dsub
-; CHECK-NEXT:    vmov r2, r3, d8
-; CHECK-NEXT:    vmov d9, r0, r1
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dsub
-; CHECK-NEXT:    vmov d8, r0, r1
-; CHECK-NEXT:    vmov q0, q4
-; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-NEXT:    .p2align 3
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI2_0:
-; CHECK-NEXT:    .long 0 @ double -0
-; CHECK-NEXT:    .long 2147483648
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    vstr d1, [sp]
+; CHECK-NEXT:    ldrb.w r0, [sp, #7]
+; CHECK-NEXT:    vstr d0, [sp, #8]
+; CHECK-NEXT:    ldrb.w r1, [sp, #15]
+; CHECK-NEXT:    eor r0, r0, #128
+; CHECK-NEXT:    strb.w r0, [sp, #7]
+; CHECK-NEXT:    vldr d1, [sp]
+; CHECK-NEXT:    eor r0, r1, #128
+; CHECK-NEXT:    strb.w r0, [sp, #15]
+; CHECK-NEXT:    vldr d0, [sp, #8]
+; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    bx lr
 entry:
  %0 = fsub nnan ninf nsz <2 x double> <double 0.0e0, double 0.0e0>, %src
  ret <2 x double> %0
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir
@ -22,9 +22,9 @@ body:             |
    liveins:
    ; CHECK-LABEL: name: test_fneg_f32
    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -0.000000e+00
-    ; CHECK: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[C]], [[DEF]]
-    ; CHECK: $edi = COPY [[FSUB]](s32)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
+    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[C]]
+    ; CHECK: $edi = COPY [[XOR]](s32)
    %0(s32) = IMPLICIT_DEF
    %1(s32) = G_FNEG %0
    $edi = COPY %1
@ -39,9 +39,9 @@ body:             |
    liveins:
    ; CHECK-LABEL: name: test_fneg_f64
    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
-    ; CHECK: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[C]], [[DEF]]
-    ; CHECK: $rdi = COPY [[FSUB]](s64)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
+    ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[C]]
+    ; CHECK: $rdi = COPY [[XOR]](s64)
    %0(s64) = G_IMPLICIT_DEF
    %1(s64) = G_FNEG %0
    $rdi = COPY %1