[AArch64] Allow pairing lr with other GPRs for WinCFI

This saves one instruction per prologue/epilogue for any function with
an odd number of callee-saved GPRs, but more importantly, allows such
functions to match the packed unwind format.

Differential Revision: https://reviews.llvm.org/D88699
This commit is contained in:
Martin Storsjö 2020-09-23 14:26:45 +03:00
parent 3780a4e568
commit 890af2f003
6 changed files with 236 additions and 13 deletions

View File

@ -1314,6 +1314,14 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
case AArch64::SEH_SaveRegP:
if (MI->getOperand(1).getImm() == 30 && MI->getOperand(0).getImm() >= 19 &&
MI->getOperand(0).getImm() <= 28) {
assert((MI->getOperand(0).getImm() - 19) % 2 == 0 &&
"Register paired with LR must be odd");
TS->EmitARM64WinCFISaveLRPair(MI->getOperand(0).getImm(),
MI->getOperand(2).getImm());
return;
}
assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
"Non-consecutive registers not allowed for save_regp");
TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(),

View File

@ -1988,21 +1988,28 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
bool NeedsWinCFI) {
bool NeedsWinCFI, bool IsFirst) {
// If we are generating register pairs for a Windows function that requires
// EH support, then pair consecutive registers only. There are no unwind
// opcodes for saves/restores of non-consectuve register pairs.
// The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
// The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
// save_lrpair.
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
// TODO: LR can be paired with any register. We don't support this yet in
// the MCLayer. We need to add support for the save_lrpair unwind code.
if (Reg2 == AArch64::FP)
return true;
if (!NeedsWinCFI)
return false;
if (Reg2 == Reg1 + 1)
return false;
// If pairing a GPR with LR, the pair can be described by the save_lrpair
// opcode. If this is the first register pair, it would end up with a
// predecrement, but there's no save_lrpair_x opcode, so we can only do this
// if LR is paired with something else than the first register.
// The save_lrpair opcode requires the first register to be an odd one.
if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
(Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
return false;
return true;
}
@ -2011,9 +2018,10 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
/// LR and FP need to be allocated together when the frame needs to save
/// the frame-record. This means any other register pairing with LR is invalid.
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord) {
bool UsesWinAAPCS, bool NeedsWinCFI,
bool NeedsFrameRecord, bool IsFirst) {
if (UsesWinAAPCS)
return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI);
return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst);
// If we need to store the frame record, don't pair any register
// with LR other than FP.
@ -2112,16 +2120,18 @@ static void computeCalleeSaveRegisterPairs(
// Add the next reg to the pair if it is in the same register class.
if (unsigned(i + RegInc) < Count) {
unsigned NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, NeedsWinCFI,
NeedsFrameRecord))
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
NeedsWinCFI, NeedsFrameRecord, IsFirst))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR64:
if (AArch64::FPR64RegClass.contains(NextReg) &&
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
IsFirst))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR128:

View File

@ -213,8 +213,7 @@ declare void @llvm.stackrestore(i8*)
; CHECK-LABEL: snprintf
; CHECK-DAG: sub sp, sp, #96
; CHECK-DAG: stp x19, x20, [sp, #16]
; CHECK-DAG: str x21, [sp, #32]
; CHECK-DAG: str x30, [sp, #40]
; CHECK-DAG: stp x21, x30, [sp, #32]
; CHECK-DAG: add x8, sp, #56
; CHECK-DAG: mov x19, x2
; CHECK-DAG: mov x20, x1
@ -232,8 +231,7 @@ declare void @llvm.stackrestore(i8*)
; CHECK-DAG: mov x3, x19
; CHECK-DAG: mov x4, xzr
; CHECK-DAG: bl __stdio_common_vsprintf
; CHECK-DAG: ldr x30, [sp, #40]
; CHECK-DAG: ldr x21, [sp, #32]
; CHECK-DAG: ldp x21, x30, [sp, #32]
; CHECK-DAG: ldp x19, x20, [sp, #16]
; CHECK-DAG: cmp w0, #0
; CHECK-DAG: csinv w0, w0, wzr, ge

View File

@ -0,0 +1,74 @@
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
# RUN: -stop-after=prologepilog | FileCheck %s
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
# RUN: | FileCheck --check-prefix=ASM %s
# Check that an odd callee-saved GPR is paired with lr
# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -4
# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -32
# CHECK-NEXT: frame-setup STPXi killed $x21, killed $lr, $sp, 2
# CHECK-NEXT: frame-setup SEH_SaveRegP 21, 30, 16
# CHECK-NEXT: frame-setup SEH_PrologEnd
# ASM: stp x19, x20, [sp, #-32]!
# ASM-NEXT: .seh_save_regp_x x19, 32
# ASM-NEXT: stp x21, x30, [sp, #16]
# ASM-NEXT: .seh_save_lrpair x21, 16
# ASM-NEXT: .seh_endprologue
--- |
define dso_local i32 @func(i32 %a) { ret i32 %a }
declare dso_local i32 @other()
...
---
name: func
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 4
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x21, $x19, $x20
BL @other, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
$x19 = ADDXrr $x0, $x0
$x20 = ADDXrr $x19, $x0
$x21 = ADDXrr $x20, killed $x19
$x0 = ADDXrr $x0, killed $x21
RET_ReallyLR
...

View File

@ -0,0 +1,65 @@
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
# RUN: -stop-after=prologepilog | FileCheck %s
# Check that lr isn't paired with a GPR if it's the first pair, as
# that can't be described as a SEH opcode if combined with predecrement.
# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -16
# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -16
# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 1
# CHECK-NEXT: frame-setup SEH_SaveReg 30, 8
# CHECK-NEXT: frame-setup SEH_PrologEnd
--- |
define dso_local i32 @func(i32 %a) { ret i32 %a }
declare dso_local i32 @other()
...
---
name: func
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 4
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x19
BL @other, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
$x19 = ADDXrr $x0, $x0
$x0 = ADDXrr $x0, killed $x19
RET_ReallyLR
...

View File

@ -0,0 +1,68 @@
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
# RUN: -stop-after=prologepilog | FileCheck %s
# Check that an unpaired register that is even isn't paired with lr.
# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -4
# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -32
# CHECK-NEXT: frame-setup STRXui killed $x22, $sp, 2
# CHECK-NEXT: frame-setup SEH_SaveReg 22, 16
# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 3
# CHECK-NEXT: frame-setup SEH_SaveReg 30, 24
# CHECK-NEXT: frame-setup SEH_PrologEnd
--- |
define dso_local i32 @func(i32 %a) { ret i32 %a }
declare dso_local i32 @other()
...
---
name: func
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 4
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x22, $x19, $x20
BL @other, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
$x19 = ADDXrr $x0, $x0
$x20 = ADDXrr $x19, $x0
$x22 = ADDXrr $x20, killed $x19
$x0 = ADDXrr $x0, killed $x22
RET_ReallyLR
...