forked from OSchip/llvm-project
[AArch64][GlobalISel] Ensure atomic loads always get assigned GPR destinations
The default register bank selection code for G_LOAD assumes that we ought to use a FPR when the load is casted to a float/double. For atomics, this isn't true; we should always use GPRs. Without this patch, we crash in the following example: https://godbolt.org/z/MThjas441 Also make the code a little more stylistically consistent while we're here. Also test some other weird cast combinations as well. Differential Revision: https://reviews.llvm.org/D109771
This commit is contained in:
parent
e159d3cbfc
commit
c8b3d7d6d6
|
@ -15,6 +15,7 @@
|
|||
#include "AArch64InstrInfo.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
|
||||
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
|
||||
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
|
@ -751,24 +752,33 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
// for the greedy mode the cost of the cross bank copy will
|
||||
// offset this number.
|
||||
// FIXME: Should be derived from the scheduling model.
|
||||
if (OpRegBankIdx[0] != PMI_FirstGPR)
|
||||
if (OpRegBankIdx[0] != PMI_FirstGPR) {
|
||||
Cost = 2;
|
||||
else
|
||||
// Check if that load feeds fp instructions.
|
||||
// In that case, we want the default mapping to be on FPR
|
||||
// instead of blind map every scalar to GPR.
|
||||
for (const MachineInstr &UseMI :
|
||||
MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
|
||||
// If we have at least one direct use in a FP instruction,
|
||||
// assume this was a floating point load in the IR.
|
||||
// If it was not, we would have had a bitcast before
|
||||
// reaching that instruction.
|
||||
// Int->FP conversion operations are also captured in onlyDefinesFP().
|
||||
if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
|
||||
OpRegBankIdx[0] = PMI_FirstFPR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (cast<GLoad>(MI).isAtomic()) {
|
||||
// Atomics always use GPR destinations. Don't refine any further.
|
||||
OpRegBankIdx[0] = PMI_FirstGPR;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if that load feeds fp instructions.
|
||||
// In that case, we want the default mapping to be on FPR
|
||||
// instead of blind map every scalar to GPR.
|
||||
if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
|
||||
[&](const MachineInstr &UseMI) {
|
||||
// If we have at least one direct use in a FP instruction,
|
||||
// assume this was a floating point load in the IR. If it was
|
||||
// not, we would have had a bitcast before reaching that
|
||||
// instruction.
|
||||
//
|
||||
// Int->FP conversion operations are also captured in
|
||||
// onlyDefinesFP().
|
||||
return onlyUsesFP(UseMI, MRI, TRI) ||
|
||||
onlyDefinesFP(UseMI, MRI, TRI);
|
||||
}))
|
||||
OpRegBankIdx[0] = PMI_FirstFPR;
|
||||
break;
|
||||
case TargetOpcode::G_STORE:
|
||||
// Check if that store is fed by fp instructions.
|
||||
|
|
|
@ -2825,4 +2825,115 @@ define { i16, i1 } @cmpxchg_i16(i16* %ptr, i16 %desired, i16 %new) {
|
|||
ret { i16, i1 } %res
|
||||
}
|
||||
|
||||
define internal double @bitcast_to_double(i64* %ptr) {
|
||||
; CHECK-NOLSE-LABEL: bitcast_to_double:
|
||||
; CHECK-NOLSE: ; %bb.0:
|
||||
; CHECK-NOLSE-NEXT: ldar x8, [x0]
|
||||
; CHECK-NOLSE-NEXT: fmov d0, x8
|
||||
; CHECK-NOLSE-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O1-LABEL: bitcast_to_double:
|
||||
; CHECK-LSE-O1: ; %bb.0:
|
||||
; CHECK-LSE-O1-NEXT: ldar x8, [x0]
|
||||
; CHECK-LSE-O1-NEXT: fmov d0, x8
|
||||
; CHECK-LSE-O1-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O0-LABEL: bitcast_to_double:
|
||||
; CHECK-LSE-O0: ; %bb.0:
|
||||
; CHECK-LSE-O0-NEXT: ldar x8, [x0]
|
||||
; CHECK-LSE-O0-NEXT: fmov d0, x8
|
||||
; CHECK-LSE-O0-NEXT: ret
|
||||
%load = load atomic i64, i64* %ptr seq_cst, align 8
|
||||
%bitcast = bitcast i64 %load to double
|
||||
ret double %bitcast
|
||||
}
|
||||
|
||||
define internal float @bitcast_to_float(i32* %ptr) {
|
||||
; CHECK-NOLSE-LABEL: bitcast_to_float:
|
||||
; CHECK-NOLSE: ; %bb.0:
|
||||
; CHECK-NOLSE-NEXT: ldar w8, [x0]
|
||||
; CHECK-NOLSE-NEXT: fmov s0, w8
|
||||
; CHECK-NOLSE-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O1-LABEL: bitcast_to_float:
|
||||
; CHECK-LSE-O1: ; %bb.0:
|
||||
; CHECK-LSE-O1-NEXT: ldar w8, [x0]
|
||||
; CHECK-LSE-O1-NEXT: fmov s0, w8
|
||||
; CHECK-LSE-O1-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O0-LABEL: bitcast_to_float:
|
||||
; CHECK-LSE-O0: ; %bb.0:
|
||||
; CHECK-LSE-O0-NEXT: ldar w8, [x0]
|
||||
; CHECK-LSE-O0-NEXT: fmov s0, w8
|
||||
; CHECK-LSE-O0-NEXT: ret
|
||||
%load = load atomic i32, i32* %ptr seq_cst, align 8
|
||||
%bitcast = bitcast i32 %load to float
|
||||
ret float %bitcast
|
||||
}
|
||||
|
||||
define internal half @bitcast_to_half(i16* %ptr) {
|
||||
; CHECK-NOLSE-LABEL: bitcast_to_half:
|
||||
; CHECK-NOLSE: ; %bb.0:
|
||||
; CHECK-NOLSE-NEXT: ldarh w8, [x0]
|
||||
; CHECK-NOLSE-NEXT: fmov s0, w8
|
||||
; CHECK-NOLSE-NEXT: ; kill: def $h0 killed $h0 killed $s0
|
||||
; CHECK-NOLSE-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O1-LABEL: bitcast_to_half:
|
||||
; CHECK-LSE-O1: ; %bb.0:
|
||||
; CHECK-LSE-O1-NEXT: ldarh w8, [x0]
|
||||
; CHECK-LSE-O1-NEXT: fmov s0, w8
|
||||
; CHECK-LSE-O1-NEXT: ; kill: def $h0 killed $h0 killed $s0
|
||||
; CHECK-LSE-O1-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O0-LABEL: bitcast_to_half:
|
||||
; CHECK-LSE-O0: ; %bb.0:
|
||||
; CHECK-LSE-O0-NEXT: ldarh w8, [x0]
|
||||
; CHECK-LSE-O0-NEXT: fmov s0, w8
|
||||
; CHECK-LSE-O0-NEXT: ; kill: def $h0 killed $h0 killed $s0
|
||||
; CHECK-LSE-O0-NEXT: ret
|
||||
%load = load atomic i16, i16* %ptr seq_cst, align 8
|
||||
%bitcast = bitcast i16 %load to half
|
||||
ret half %bitcast
|
||||
}
|
||||
|
||||
define internal i64* @inttoptr(i64* %ptr) {
|
||||
; CHECK-NOLSE-LABEL: inttoptr:
|
||||
; CHECK-NOLSE: ; %bb.0:
|
||||
; CHECK-NOLSE-NEXT: ldar x0, [x0]
|
||||
; CHECK-NOLSE-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O1-LABEL: inttoptr:
|
||||
; CHECK-LSE-O1: ; %bb.0:
|
||||
; CHECK-LSE-O1-NEXT: ldar x0, [x0]
|
||||
; CHECK-LSE-O1-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O0-LABEL: inttoptr:
|
||||
; CHECK-LSE-O0: ; %bb.0:
|
||||
; CHECK-LSE-O0-NEXT: ldar x0, [x0]
|
||||
; CHECK-LSE-O0-NEXT: ret
|
||||
%load = load atomic i64, i64* %ptr seq_cst, align 8
|
||||
%bitcast = inttoptr i64 %load to i64*
|
||||
ret i64* %bitcast
|
||||
}
|
||||
|
||||
define internal i64* @load_ptr(i64** %ptr) {
|
||||
; CHECK-NOLSE-LABEL: load_ptr:
|
||||
; CHECK-NOLSE: ; %bb.0:
|
||||
; CHECK-NOLSE-NEXT: ldar x0, [x0]
|
||||
; CHECK-NOLSE-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O1-LABEL: load_ptr:
|
||||
; CHECK-LSE-O1: ; %bb.0:
|
||||
; CHECK-LSE-O1-NEXT: ldar x0, [x0]
|
||||
; CHECK-LSE-O1-NEXT: ret
|
||||
;
|
||||
; CHECK-LSE-O0-LABEL: load_ptr:
|
||||
; CHECK-LSE-O0: ; %bb.0:
|
||||
; CHECK-LSE-O0-NEXT: ldar x0, [x0]
|
||||
; CHECK-LSE-O0-NEXT: ret
|
||||
%load = load atomic i64*, i64** %ptr seq_cst, align 8
|
||||
ret i64* %load
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
Loading…
Reference in New Issue