From c8b3d7d6d6de37af68b2f379d0e37304f78e115f Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 14 Sep 2021 10:03:42 -0700 Subject: [PATCH] [AArch64][GlobalISel] Ensure atomic loads always get assigned GPR destinations The default register bank selection code for G_LOAD assumes that we ought to use a FPR when the load is casted to a float/double. For atomics, this isn't true; we should always use GPRs. Without this patch, we crash in the following example: https://godbolt.org/z/MThjas441 Also make the code a little more stylistically consistent while we're here. Also test some other weird cast combinations as well. Differential Revision: https://reviews.llvm.org/D109771 --- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 44 ++++--- .../AArch64/GlobalISel/arm64-atomic.ll | 111 ++++++++++++++++++ 2 files changed, 138 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index ffe64c56d419..5af0b2d9e202 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -15,6 +15,7 @@ #include "AArch64InstrInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" @@ -751,24 +752,33 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // for the greedy mode the cost of the cross bank copy will // offset this number. // FIXME: Should be derived from the scheduling model. - if (OpRegBankIdx[0] != PMI_FirstGPR) + if (OpRegBankIdx[0] != PMI_FirstGPR) { Cost = 2; - else - // Check if that load feeds fp instructions. - // In that case, we want the default mapping to be on FPR - // instead of blind map every scalar to GPR. - for (const MachineInstr &UseMI : - MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) { - // If we have at least one direct use in a FP instruction, - // assume this was a floating point load in the IR. - // If it was not, we would have had a bitcast before - // reaching that instruction. - // Int->FP conversion operations are also captured in onlyDefinesFP(). - if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) { - OpRegBankIdx[0] = PMI_FirstFPR; - break; - } - } + break; + } + + if (cast(MI).isAtomic()) { + // Atomics always use GPR destinations. Don't refine any further. + OpRegBankIdx[0] = PMI_FirstGPR; + break; + } + + // Check if that load feeds fp instructions. + // In that case, we want the default mapping to be on FPR + // instead of blind map every scalar to GPR. + if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), + [&](const MachineInstr &UseMI) { + // If we have at least one direct use in a FP instruction, + // assume this was a floating point load in the IR. If it was + // not, we would have had a bitcast before reaching that + // instruction. + // + // Int->FP conversion operations are also captured in + // onlyDefinesFP(). + return onlyUsesFP(UseMI, MRI, TRI) || + onlyDefinesFP(UseMI, MRI, TRI); + })) + OpRegBankIdx[0] = PMI_FirstFPR; break; case TargetOpcode::G_STORE: // Check if that store is fed by fp instructions. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll index 0308874c74b2..633c6e8c2032 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -2825,4 +2825,115 @@ define { i16, i1 } @cmpxchg_i16(i16* %ptr, i16 %desired, i16 %new) { ret { i16, i1 } %res } +define internal double @bitcast_to_double(i64* %ptr) { +; CHECK-NOLSE-LABEL: bitcast_to_double: +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: ldar x8, [x0] +; CHECK-NOLSE-NEXT: fmov d0, x8 +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: bitcast_to_double: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldar x8, [x0] +; CHECK-LSE-O1-NEXT: fmov d0, x8 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: bitcast_to_double: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldar x8, [x0] +; CHECK-LSE-O0-NEXT: fmov d0, x8 +; CHECK-LSE-O0-NEXT: ret + %load = load atomic i64, i64* %ptr seq_cst, align 8 + %bitcast = bitcast i64 %load to double + ret double %bitcast +} + +define internal float @bitcast_to_float(i32* %ptr) { +; CHECK-NOLSE-LABEL: bitcast_to_float: +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: ldar w8, [x0] +; CHECK-NOLSE-NEXT: fmov s0, w8 +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: bitcast_to_float: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldar w8, [x0] +; CHECK-LSE-O1-NEXT: fmov s0, w8 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: bitcast_to_float: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldar w8, [x0] +; CHECK-LSE-O0-NEXT: fmov s0, w8 +; CHECK-LSE-O0-NEXT: ret + %load = load atomic i32, i32* %ptr seq_cst, align 8 + %bitcast = bitcast i32 %load to float + ret float %bitcast +} + +define internal half @bitcast_to_half(i16* %ptr) { +; CHECK-NOLSE-LABEL: bitcast_to_half: +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: ldarh w8, [x0] +; CHECK-NOLSE-NEXT: fmov s0, w8 +; CHECK-NOLSE-NEXT: ; kill: def $h0 killed $h0 killed $s0 +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: bitcast_to_half: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldarh w8, [x0] +; CHECK-LSE-O1-NEXT: fmov s0, w8 +; CHECK-LSE-O1-NEXT: ; kill: def $h0 killed $h0 killed $s0 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: bitcast_to_half: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldarh w8, [x0] +; CHECK-LSE-O0-NEXT: fmov s0, w8 +; CHECK-LSE-O0-NEXT: ; kill: def $h0 killed $h0 killed $s0 +; CHECK-LSE-O0-NEXT: ret + %load = load atomic i16, i16* %ptr seq_cst, align 8 + %bitcast = bitcast i16 %load to half + ret half %bitcast +} + +define internal i64* @inttoptr(i64* %ptr) { +; CHECK-NOLSE-LABEL: inttoptr: +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: ldar x0, [x0] +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: inttoptr: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldar x0, [x0] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: inttoptr: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldar x0, [x0] +; CHECK-LSE-O0-NEXT: ret + %load = load atomic i64, i64* %ptr seq_cst, align 8 + %bitcast = inttoptr i64 %load to i64* + ret i64* %bitcast +} + +define internal i64* @load_ptr(i64** %ptr) { +; CHECK-NOLSE-LABEL: load_ptr: +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: ldar x0, [x0] +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: load_ptr: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldar x0, [x0] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: load_ptr: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldar x0, [x0] +; CHECK-LSE-O0-NEXT: ret + %load = load atomic i64*, i64** %ptr seq_cst, align 8 + ret i64* %load +} + attributes #0 = { nounwind }