[AArch64][GlobalISel] Ensure atomic loads always get assigned GPR destinations

The default register bank selection code for G_LOAD assumes that we ought to use a FPR when the load is casted to a float/double. For atomics, this isn't true; we should always use GPRs. Without this patch, we crash in the following example: https://godbolt.org/z/MThjas441 Also make the code a little more stylistically consistent while we're here. Also test some other weird cast combinations as well. Differential Revision: https://reviews.llvm.org/D109771
2021-09-14 10:03:42 -07:00 · 2021-09-14 10:03:42 -07:00 · c8b3d7d6d6
parent e159d3cbfc
commit c8b3d7d6d6
2 changed files with 138 additions and 17 deletions
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@ -15,6 +15,7 @@
 #include "AArch64InstrInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
@ -751,24 +752,33 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
    // for the greedy mode the cost of the cross bank copy will
    // offset this number.
    // FIXME: Should be derived from the scheduling model.
-    if (OpRegBankIdx[0] != PMI_FirstGPR)
+    if (OpRegBankIdx[0] != PMI_FirstGPR) {
      Cost = 2;
-    else
-      // Check if that load feeds fp instructions.
-      // In that case, we want the default mapping to be on FPR
-      // instead of blind map every scalar to GPR.
-      for (const MachineInstr &UseMI :
-           MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
-        // If we have at least one direct use in a FP instruction,
-        // assume this was a floating point load in the IR.
-        // If it was not, we would have had a bitcast before
-        // reaching that instruction.
-        // Int->FP conversion operations are also captured in onlyDefinesFP().
-        if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
-          OpRegBankIdx[0] = PMI_FirstFPR;
-          break;
-        }
-      }
+      break;
+    }
+
+    if (cast<GLoad>(MI).isAtomic()) {
+      // Atomics always use GPR destinations. Don't refine any further.
+      OpRegBankIdx[0] = PMI_FirstGPR;
+      break;
+    }
+
+    // Check if that load feeds fp instructions.
+    // In that case, we want the default mapping to be on FPR
+    // instead of blind map every scalar to GPR.
+    if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+               [&](const MachineInstr &UseMI) {
+                 // If we have at least one direct use in a FP instruction,
+                 // assume this was a floating point load in the IR. If it was
+                 // not, we would have had a bitcast before reaching that
+                 // instruction.
+                 //
+                 // Int->FP conversion operations are also captured in
+                 // onlyDefinesFP().
+                 return onlyUsesFP(UseMI, MRI, TRI) ||
+                        onlyDefinesFP(UseMI, MRI, TRI);
+               }))
+      OpRegBankIdx[0] = PMI_FirstFPR;
    break;
  case TargetOpcode::G_STORE:
    // Check if that store is fed by fp instructions.
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@ -2825,4 +2825,115 @@ define { i16, i1 } @cmpxchg_i16(i16* %ptr, i16 %desired, i16 %new) {
  ret { i16, i1 } %res
 }

+define internal double @bitcast_to_double(i64* %ptr) {
+; CHECK-NOLSE-LABEL: bitcast_to_double:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldar x8, [x0]
+; CHECK-NOLSE-NEXT:    fmov d0, x8
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: bitcast_to_double:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldar x8, [x0]
+; CHECK-LSE-O1-NEXT:    fmov d0, x8
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: bitcast_to_double:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldar x8, [x0]
+; CHECK-LSE-O0-NEXT:    fmov d0, x8
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i64, i64* %ptr seq_cst, align 8
+  %bitcast = bitcast i64 %load to double
+  ret double %bitcast
+}
+
+define internal float @bitcast_to_float(i32* %ptr) {
+; CHECK-NOLSE-LABEL: bitcast_to_float:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldar w8, [x0]
+; CHECK-NOLSE-NEXT:    fmov s0, w8
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: bitcast_to_float:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldar w8, [x0]
+; CHECK-LSE-O1-NEXT:    fmov s0, w8
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: bitcast_to_float:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldar w8, [x0]
+; CHECK-LSE-O0-NEXT:    fmov s0, w8
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i32, i32* %ptr seq_cst, align 8
+  %bitcast = bitcast i32 %load to float
+  ret float %bitcast
+}
+
+define internal half @bitcast_to_half(i16* %ptr) {
+; CHECK-NOLSE-LABEL: bitcast_to_half:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldarh w8, [x0]
+; CHECK-NOLSE-NEXT:    fmov s0, w8
+; CHECK-NOLSE-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: bitcast_to_half:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldarh w8, [x0]
+; CHECK-LSE-O1-NEXT:    fmov s0, w8
+; CHECK-LSE-O1-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: bitcast_to_half:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldarh w8, [x0]
+; CHECK-LSE-O0-NEXT:    fmov s0, w8
+; CHECK-LSE-O0-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i16, i16* %ptr seq_cst, align 8
+  %bitcast = bitcast i16 %load to half
+  ret half %bitcast
+}
+
+define internal i64* @inttoptr(i64* %ptr) {
+; CHECK-NOLSE-LABEL: inttoptr:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldar x0, [x0]
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: inttoptr:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldar x0, [x0]
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: inttoptr:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldar x0, [x0]
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i64, i64* %ptr seq_cst, align 8
+  %bitcast = inttoptr i64 %load to i64*
+  ret i64* %bitcast
+}
+
+define internal i64* @load_ptr(i64** %ptr) {
+; CHECK-NOLSE-LABEL: load_ptr:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldar x0, [x0]
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: load_ptr:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldar x0, [x0]
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: load_ptr:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldar x0, [x0]
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i64*, i64** %ptr seq_cst, align 8
+  ret i64* %load
+}
+
 attributes #0 = { nounwind }