forked from OSchip/llvm-project
[AArch64] Allow pairing lr with other GPRs for WinCFI
This saves one instruction per prologue/epilogue for any function with an odd number of callee-saved GPRs, but more importantly, allows such functions to match the packed unwind format. Differential Revision: https://reviews.llvm.org/D88699
This commit is contained in:
parent
3780a4e568
commit
890af2f003
|
@ -1314,6 +1314,14 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case AArch64::SEH_SaveRegP:
|
case AArch64::SEH_SaveRegP:
|
||||||
|
if (MI->getOperand(1).getImm() == 30 && MI->getOperand(0).getImm() >= 19 &&
|
||||||
|
MI->getOperand(0).getImm() <= 28) {
|
||||||
|
assert((MI->getOperand(0).getImm() - 19) % 2 == 0 &&
|
||||||
|
"Register paired with LR must be odd");
|
||||||
|
TS->EmitARM64WinCFISaveLRPair(MI->getOperand(0).getImm(),
|
||||||
|
MI->getOperand(2).getImm());
|
||||||
|
return;
|
||||||
|
}
|
||||||
assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
|
assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
|
||||||
"Non-consecutive registers not allowed for save_regp");
|
"Non-consecutive registers not allowed for save_regp");
|
||||||
TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(),
|
TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(),
|
||||||
|
|
|
@ -1988,21 +1988,28 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
|
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
|
||||||
bool NeedsWinCFI) {
|
bool NeedsWinCFI, bool IsFirst) {
|
||||||
// If we are generating register pairs for a Windows function that requires
|
// If we are generating register pairs for a Windows function that requires
|
||||||
// EH support, then pair consecutive registers only. There are no unwind
|
// EH support, then pair consecutive registers only. There are no unwind
|
||||||
// opcodes for saves/restores of non-consectuve register pairs.
|
// opcodes for saves/restores of non-consectuve register pairs.
|
||||||
// The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
|
// The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
|
||||||
|
// save_lrpair.
|
||||||
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
|
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
|
||||||
|
|
||||||
// TODO: LR can be paired with any register. We don't support this yet in
|
|
||||||
// the MCLayer. We need to add support for the save_lrpair unwind code.
|
|
||||||
if (Reg2 == AArch64::FP)
|
if (Reg2 == AArch64::FP)
|
||||||
return true;
|
return true;
|
||||||
if (!NeedsWinCFI)
|
if (!NeedsWinCFI)
|
||||||
return false;
|
return false;
|
||||||
if (Reg2 == Reg1 + 1)
|
if (Reg2 == Reg1 + 1)
|
||||||
return false;
|
return false;
|
||||||
|
// If pairing a GPR with LR, the pair can be described by the save_lrpair
|
||||||
|
// opcode. If this is the first register pair, it would end up with a
|
||||||
|
// predecrement, but there's no save_lrpair_x opcode, so we can only do this
|
||||||
|
// if LR is paired with something else than the first register.
|
||||||
|
// The save_lrpair opcode requires the first register to be an odd one.
|
||||||
|
if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
|
||||||
|
(Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
|
||||||
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2011,9 +2018,10 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
|
||||||
/// LR and FP need to be allocated together when the frame needs to save
|
/// LR and FP need to be allocated together when the frame needs to save
|
||||||
/// the frame-record. This means any other register pairing with LR is invalid.
|
/// the frame-record. This means any other register pairing with LR is invalid.
|
||||||
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
|
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
|
||||||
bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord) {
|
bool UsesWinAAPCS, bool NeedsWinCFI,
|
||||||
|
bool NeedsFrameRecord, bool IsFirst) {
|
||||||
if (UsesWinAAPCS)
|
if (UsesWinAAPCS)
|
||||||
return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI);
|
return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst);
|
||||||
|
|
||||||
// If we need to store the frame record, don't pair any register
|
// If we need to store the frame record, don't pair any register
|
||||||
// with LR other than FP.
|
// with LR other than FP.
|
||||||
|
@ -2112,16 +2120,18 @@ static void computeCalleeSaveRegisterPairs(
|
||||||
// Add the next reg to the pair if it is in the same register class.
|
// Add the next reg to the pair if it is in the same register class.
|
||||||
if (unsigned(i + RegInc) < Count) {
|
if (unsigned(i + RegInc) < Count) {
|
||||||
unsigned NextReg = CSI[i + RegInc].getReg();
|
unsigned NextReg = CSI[i + RegInc].getReg();
|
||||||
|
bool IsFirst = i == FirstReg;
|
||||||
switch (RPI.Type) {
|
switch (RPI.Type) {
|
||||||
case RegPairInfo::GPR:
|
case RegPairInfo::GPR:
|
||||||
if (AArch64::GPR64RegClass.contains(NextReg) &&
|
if (AArch64::GPR64RegClass.contains(NextReg) &&
|
||||||
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, NeedsWinCFI,
|
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
|
||||||
NeedsFrameRecord))
|
NeedsWinCFI, NeedsFrameRecord, IsFirst))
|
||||||
RPI.Reg2 = NextReg;
|
RPI.Reg2 = NextReg;
|
||||||
break;
|
break;
|
||||||
case RegPairInfo::FPR64:
|
case RegPairInfo::FPR64:
|
||||||
if (AArch64::FPR64RegClass.contains(NextReg) &&
|
if (AArch64::FPR64RegClass.contains(NextReg) &&
|
||||||
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
|
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
|
||||||
|
IsFirst))
|
||||||
RPI.Reg2 = NextReg;
|
RPI.Reg2 = NextReg;
|
||||||
break;
|
break;
|
||||||
case RegPairInfo::FPR128:
|
case RegPairInfo::FPR128:
|
||||||
|
|
|
@ -213,8 +213,7 @@ declare void @llvm.stackrestore(i8*)
|
||||||
; CHECK-LABEL: snprintf
|
; CHECK-LABEL: snprintf
|
||||||
; CHECK-DAG: sub sp, sp, #96
|
; CHECK-DAG: sub sp, sp, #96
|
||||||
; CHECK-DAG: stp x19, x20, [sp, #16]
|
; CHECK-DAG: stp x19, x20, [sp, #16]
|
||||||
; CHECK-DAG: str x21, [sp, #32]
|
; CHECK-DAG: stp x21, x30, [sp, #32]
|
||||||
; CHECK-DAG: str x30, [sp, #40]
|
|
||||||
; CHECK-DAG: add x8, sp, #56
|
; CHECK-DAG: add x8, sp, #56
|
||||||
; CHECK-DAG: mov x19, x2
|
; CHECK-DAG: mov x19, x2
|
||||||
; CHECK-DAG: mov x20, x1
|
; CHECK-DAG: mov x20, x1
|
||||||
|
@ -232,8 +231,7 @@ declare void @llvm.stackrestore(i8*)
|
||||||
; CHECK-DAG: mov x3, x19
|
; CHECK-DAG: mov x3, x19
|
||||||
; CHECK-DAG: mov x4, xzr
|
; CHECK-DAG: mov x4, xzr
|
||||||
; CHECK-DAG: bl __stdio_common_vsprintf
|
; CHECK-DAG: bl __stdio_common_vsprintf
|
||||||
; CHECK-DAG: ldr x30, [sp, #40]
|
; CHECK-DAG: ldp x21, x30, [sp, #32]
|
||||||
; CHECK-DAG: ldr x21, [sp, #32]
|
|
||||||
; CHECK-DAG: ldp x19, x20, [sp, #16]
|
; CHECK-DAG: ldp x19, x20, [sp, #16]
|
||||||
; CHECK-DAG: cmp w0, #0
|
; CHECK-DAG: cmp w0, #0
|
||||||
; CHECK-DAG: csinv w0, w0, wzr, ge
|
; CHECK-DAG: csinv w0, w0, wzr, ge
|
||||||
|
|
|
@ -0,0 +1,74 @@
|
||||||
|
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
|
||||||
|
# RUN: -stop-after=prologepilog | FileCheck %s
|
||||||
|
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
|
||||||
|
# RUN: | FileCheck --check-prefix=ASM %s
|
||||||
|
|
||||||
|
# Check that an odd callee-saved GPR is paired with lr
|
||||||
|
|
||||||
|
# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -4
|
||||||
|
# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -32
|
||||||
|
# CHECK-NEXT: frame-setup STPXi killed $x21, killed $lr, $sp, 2
|
||||||
|
# CHECK-NEXT: frame-setup SEH_SaveRegP 21, 30, 16
|
||||||
|
# CHECK-NEXT: frame-setup SEH_PrologEnd
|
||||||
|
|
||||||
|
# ASM: stp x19, x20, [sp, #-32]!
|
||||||
|
# ASM-NEXT: .seh_save_regp_x x19, 32
|
||||||
|
# ASM-NEXT: stp x21, x30, [sp, #16]
|
||||||
|
# ASM-NEXT: .seh_save_lrpair x21, 16
|
||||||
|
# ASM-NEXT: .seh_endprologue
|
||||||
|
|
||||||
|
--- |
|
||||||
|
|
||||||
|
define dso_local i32 @func(i32 %a) { ret i32 %a }
|
||||||
|
declare dso_local i32 @other()
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: func
|
||||||
|
alignment: 4
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
failedISel: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
hasWinCFI: false
|
||||||
|
registers: []
|
||||||
|
liveins: []
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 4
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 0
|
||||||
|
cvBytesOfCalleeSavedRegisters: 0
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
localFrameSize: 4
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack: []
|
||||||
|
stack: []
|
||||||
|
callSites: []
|
||||||
|
constants: []
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $x0, $x21, $x19, $x20
|
||||||
|
|
||||||
|
BL @other, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
|
||||||
|
$x19 = ADDXrr $x0, $x0
|
||||||
|
$x20 = ADDXrr $x19, $x0
|
||||||
|
$x21 = ADDXrr $x20, killed $x19
|
||||||
|
$x0 = ADDXrr $x0, killed $x21
|
||||||
|
|
||||||
|
RET_ReallyLR
|
||||||
|
|
||||||
|
...
|
|
@ -0,0 +1,65 @@
|
||||||
|
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
|
||||||
|
# RUN: -stop-after=prologepilog | FileCheck %s
|
||||||
|
|
||||||
|
# Check that lr isn't paired with a GPR if it's the first pair, as
|
||||||
|
# that can't be described as a SEH opcode if combined with predecrement.
|
||||||
|
|
||||||
|
# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -16
|
||||||
|
# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -16
|
||||||
|
# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 1
|
||||||
|
# CHECK-NEXT: frame-setup SEH_SaveReg 30, 8
|
||||||
|
# CHECK-NEXT: frame-setup SEH_PrologEnd
|
||||||
|
|
||||||
|
--- |
|
||||||
|
|
||||||
|
define dso_local i32 @func(i32 %a) { ret i32 %a }
|
||||||
|
declare dso_local i32 @other()
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: func
|
||||||
|
alignment: 4
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
failedISel: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
hasWinCFI: false
|
||||||
|
registers: []
|
||||||
|
liveins: []
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 4
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 0
|
||||||
|
cvBytesOfCalleeSavedRegisters: 0
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
localFrameSize: 4
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack: []
|
||||||
|
stack: []
|
||||||
|
callSites: []
|
||||||
|
constants: []
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $x0, $x19
|
||||||
|
|
||||||
|
BL @other, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
|
||||||
|
$x19 = ADDXrr $x0, $x0
|
||||||
|
$x0 = ADDXrr $x0, killed $x19
|
||||||
|
|
||||||
|
RET_ReallyLR
|
||||||
|
|
||||||
|
...
|
|
@ -0,0 +1,68 @@
|
||||||
|
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
|
||||||
|
# RUN: -stop-after=prologepilog | FileCheck %s
|
||||||
|
|
||||||
|
# Check that an unpaired register that is even isn't paired with lr.
|
||||||
|
|
||||||
|
# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -4
|
||||||
|
# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -32
|
||||||
|
# CHECK-NEXT: frame-setup STRXui killed $x22, $sp, 2
|
||||||
|
# CHECK-NEXT: frame-setup SEH_SaveReg 22, 16
|
||||||
|
# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 3
|
||||||
|
# CHECK-NEXT: frame-setup SEH_SaveReg 30, 24
|
||||||
|
# CHECK-NEXT: frame-setup SEH_PrologEnd
|
||||||
|
|
||||||
|
--- |
|
||||||
|
|
||||||
|
define dso_local i32 @func(i32 %a) { ret i32 %a }
|
||||||
|
declare dso_local i32 @other()
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: func
|
||||||
|
alignment: 4
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
failedISel: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
hasWinCFI: false
|
||||||
|
registers: []
|
||||||
|
liveins: []
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 4
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 0
|
||||||
|
cvBytesOfCalleeSavedRegisters: 0
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
localFrameSize: 4
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack: []
|
||||||
|
stack: []
|
||||||
|
callSites: []
|
||||||
|
constants: []
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $x0, $x22, $x19, $x20
|
||||||
|
|
||||||
|
BL @other, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
|
||||||
|
$x19 = ADDXrr $x0, $x0
|
||||||
|
$x20 = ADDXrr $x19, $x0
|
||||||
|
$x22 = ADDXrr $x20, killed $x19
|
||||||
|
$x0 = ADDXrr $x0, killed $x22
|
||||||
|
|
||||||
|
RET_ReallyLR
|
||||||
|
|
||||||
|
...
|
Loading…
Reference in New Issue