forked from OSchip/llvm-project
[AArch64][GlobalISel] Avoid copies to target register bank for subregister copies
Previously for any copy from a register bigger than the destination: Copied to a same-sized register in the destination register bank. Subregister copy of that to the destination. This fails for copies from 128-bit FPRs to GPRs because the GPR register bank can't accomodate 128-bit values. Instead of special-casing such copies to perform the truncation beforehand in the source register bank, generalize this: a) Perform a subregister copy straight from source register whenever possible. This results in shorter MIR and fixes the above problem. b) Perform a full copy to target bank and then do a subregister copy only if source bank can't support target's size. E.g. GPR to 8-bit FPR copy. Patch by Raul Tambre (tambre)! Differential Revision: https://reviews.llvm.org/D75421
This commit is contained in:
parent
3e851f4a68
commit
ef4282e0ee
|
@ -448,6 +448,18 @@ static bool getSubRegForClass(const TargetRegisterClass *RC,
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Returns the minimum size the given register bank can hold.
|
||||
static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
|
||||
switch (RB.getID()) {
|
||||
case AArch64::GPRRegBankID:
|
||||
return 32;
|
||||
case AArch64::FPRRegBankID:
|
||||
return 8;
|
||||
default:
|
||||
llvm_unreachable("Tried to get minimum size for unknown register bank.");
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether \p I is a currently unsupported binary operation:
|
||||
/// - it has an unsized type
|
||||
/// - an operand is not a vreg
|
||||
|
@ -636,23 +648,20 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
|
|||
}
|
||||
#endif
|
||||
|
||||
/// Helper function for selectCopy. Inserts a subregister copy from
|
||||
/// \p *From to \p *To, linking it up to \p I.
|
||||
/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
|
||||
/// to \p *To.
|
||||
///
|
||||
/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
|
||||
///
|
||||
/// CopyReg (From class) = COPY SrcReg
|
||||
/// SubRegCopy (To class) = COPY CopyReg:SubReg
|
||||
/// Dst = COPY SubRegCopy
|
||||
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
|
||||
/// E.g "To = COPY SrcReg:SubReg"
|
||||
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
|
||||
const RegisterBankInfo &RBI, Register SrcReg,
|
||||
const TargetRegisterClass *From,
|
||||
const TargetRegisterClass *To,
|
||||
unsigned SubReg) {
|
||||
const TargetRegisterClass *To, unsigned SubReg) {
|
||||
assert(SrcReg.isValid() && "Expected a valid source register?");
|
||||
assert(To && "Destination register class cannot be null");
|
||||
assert(SubReg && "Expected a valid subregister");
|
||||
|
||||
MachineIRBuilder MIB(I);
|
||||
auto Copy = MIB.buildCopy({From}, {SrcReg});
|
||||
auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
|
||||
.addReg(Copy.getReg(0), 0, SubReg);
|
||||
auto SubRegCopy =
|
||||
MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
|
||||
MachineOperand &RegOp = I.getOperand(1);
|
||||
RegOp.setReg(SubRegCopy.getReg(0));
|
||||
|
||||
|
@ -747,25 +756,28 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
|||
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
|
||||
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
|
||||
|
||||
// If we're doing a cross-bank copy on different-sized registers, we need
|
||||
// to do a bit more work.
|
||||
// If the source register is bigger than the destination we need to perform
|
||||
// a subregister copy.
|
||||
if (SrcSize > DstSize) {
|
||||
// We're doing a cross-bank copy into a smaller register. We need a
|
||||
// subregister copy. First, get a register class that's on the same bank
|
||||
// as the destination, but the same size as the source.
|
||||
const TargetRegisterClass *SubregRC =
|
||||
getMinClassForRegBank(DstRegBank, SrcSize, true);
|
||||
assert(SubregRC && "Didn't get a register class for subreg?");
|
||||
|
||||
// Get the appropriate subregister for the destination.
|
||||
unsigned SubReg = 0;
|
||||
if (!getSubRegForClass(DstRC, TRI, SubReg)) {
|
||||
LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
|
||||
return false;
|
||||
|
||||
// If the source bank doesn't support a subregister copy small enough,
|
||||
// then we first need to copy to the destination bank.
|
||||
if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
|
||||
const TargetRegisterClass *SubregRC = getMinClassForRegBank(
|
||||
DstRegBank, SrcSize, /* GetAllRegSet = */ true);
|
||||
getSubRegForClass(DstRC, TRI, SubReg);
|
||||
|
||||
MachineIRBuilder MIB(I);
|
||||
auto Copy = MIB.buildCopy({SubregRC}, {SrcReg});
|
||||
copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
|
||||
} else {
|
||||
const TargetRegisterClass *SubregRC = getMinClassForRegBank(
|
||||
SrcRegBank, DstSize, /* GetAllRegSet = */ true);
|
||||
getSubRegForClass(SubregRC, TRI, SubReg);
|
||||
copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
|
||||
}
|
||||
|
||||
// Now, insert a subregister copy using the new register class.
|
||||
selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
|
||||
return CheckCopy();
|
||||
}
|
||||
|
||||
|
|
|
@ -412,10 +412,9 @@ body: |
|
|||
; CHECK: liveins: $x0
|
||||
; CHECK: %base:gpr64sp = COPY $x0
|
||||
; CHECK: %imp:gpr64 = IMPLICIT_DEF
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %imp
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
|
||||
; CHECK: %load:gpr64 = LDRXroW %base, [[COPY2]], 0, 1 :: (load 8)
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %imp.sub_32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: %load:gpr64 = LDRXroW %base, [[COPY1]], 0, 1 :: (load 8)
|
||||
; CHECK: $x1 = COPY %load
|
||||
; CHECK: RET_ReallyLR implicit $x1
|
||||
%base:gpr(p0) = COPY $x0
|
||||
|
|
|
@ -85,10 +85,9 @@ body: |
|
|||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %copy:gpr64 = COPY $x0
|
||||
; CHECK: %fold_me:gpr64sp = ANDXri %copy, 4098
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
|
||||
; CHECK: TBNZW [[COPY2]], 3, %bb.1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: TBNZW [[COPY1]], 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
|
|
|
@ -113,10 +113,9 @@ body: |
|
|||
; CHECK: %copy:gpr32 = COPY $w0
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32
|
||||
; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %zext
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
|
||||
; CHECK: TBNZW [[COPY2]], 3, %bb.1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %zext.sub_32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: TBNZW [[COPY1]], 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: $x0 = COPY %zext
|
||||
|
|
|
@ -49,10 +49,9 @@ body: |
|
|||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %copy:gpr64 = COPY $x0
|
||||
; CHECK: %fold_me:gpr64 = UBFMXri %copy, 59, 58
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
|
||||
; CHECK: TBNZW [[COPY2]], 3, %bb.1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: TBNZW [[COPY1]], 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
|
@ -87,10 +86,9 @@ body: |
|
|||
; CHECK: %copy:gpr64 = COPY $x0
|
||||
; CHECK: %fold_cst:gpr64 = MOVi64imm -5
|
||||
; CHECK: %fold_me:gpr64 = LSLVXr %copy, %fold_cst
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
|
||||
; CHECK: TBNZW [[COPY2]], 3, %bb.1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: TBNZW [[COPY1]], 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
|
@ -125,10 +123,9 @@ body: |
|
|||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %copy:gpr64 = COPY $x0
|
||||
; CHECK: %shl:gpr64 = UBFMXri %copy, 62, 61
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %shl
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
|
||||
; CHECK: TBNZW [[COPY2]], 3, %bb.1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %shl.sub_32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: TBNZW [[COPY1]], 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: %second_use:gpr64sp = ORRXri %shl, 8000
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||
---
|
||||
name: test_128_fpr_truncation
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_128_fpr_truncation
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRQui]].ssub
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
|
||||
; CHECK: TBNZW [[COPY2]], 0, %bb.1
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
%1:gpr(p0) = COPY $x0
|
||||
%3:gpr(s64) = G_CONSTANT i64 1
|
||||
%5:gpr(s64) = G_CONSTANT i64 0
|
||||
%0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load 16)
|
||||
%2:fpr(s64) = G_TRUNC %0:fpr(s128)
|
||||
%8:gpr(s64) = COPY %2:fpr(s64)
|
||||
%4:gpr(s64) = G_AND %8:gpr, %3:gpr
|
||||
%7:gpr(s32) = G_ICMP intpred(ne), %4:gpr(s64), %5:gpr
|
||||
%6:gpr(s1) = G_TRUNC %7:gpr(s32)
|
||||
G_BRCOND %6:gpr(s1), %bb.1
|
||||
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
Loading…
Reference in New Issue