[MC] [Win64EH] Write packed ARM64 epilogues if possible

This gives a pretty substantial size reduction; for a 6.5 MB
DLL with 300 KB .xdata, the .xdata shrinks by 66 KB.

Differential Revision: https://reviews.llvm.org/D87369
This commit is contained in:
Martin Storsjö 2020-09-08 00:00:07 +03:00
parent 700fbe591a
commit 1308bb99e0
7 changed files with 266 additions and 63 deletions

View File

@ -26,6 +26,14 @@ struct Instruction {
Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off)
: Label(L), Offset(Off), Register(Reg), Operation(Op) {}
bool operator==(const Instruction &I) const {
// Check whether two instructions refer to the same operation
// applied at a different spot (i.e. pointing at a different label).
return Offset == I.Offset && Register == I.Register &&
Operation == I.Operation;
}
bool operator!=(const Instruction &I) const { return !(*this == I); }
};
struct FrameInfo {

View File

@ -264,8 +264,7 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
return value;
}
static uint32_t
ARM64CountOfUnwindCodes(const std::vector<WinEH::Instruction> &Insns) {
static uint32_t ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
uint32_t Count = 0;
for (const auto &I : Insns) {
switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
@ -553,18 +552,23 @@ static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
// Convert 2-byte opcodes into equivalent 1-byte ones.
if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) {
Inst.Operation = Win64EH::UOP_SaveFPLR;
Inst.Register = -1;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
Inst.Register == 29) {
Inst.Operation = Win64EH::UOP_SaveFPLRX;
Inst.Register = -1;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
Inst.Register == 19 && Inst.Offset <= 248) {
Inst.Operation = Win64EH::UOP_SaveR19R20X;
Inst.Register = -1;
} else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) {
Inst.Operation = Win64EH::UOP_SetFP;
} else if (Inst.Operation == Win64EH::UOP_SaveRegP &&
Inst.Register == PrevRegister + 2 &&
Inst.Offset == PrevOffset + 16) {
Inst.Operation = Win64EH::UOP_SaveNext;
Inst.Register = -1;
Inst.Offset = 0;
// Intentionally not creating UOP_SaveNext for float register pairs,
// as current versions of Windows (up to at least 20.04) is buggy
// regarding SaveNext for float pairs.
@ -601,6 +605,47 @@ static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
}
}
static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info,
int PrologCodeBytes) {
// Can only pack if there's one single epilog
if (info->EpilogMap.size() != 1)
return -1;
const std::vector<WinEH::Instruction> &Epilog =
info->EpilogMap.begin()->second;
// Can pack if the epilog is a subset of the prolog but not vice versa
if (Epilog.size() > info->Instructions.size())
return -1;
// Check that the epilog actually is a perfect match for the end (backwrds)
// of the prolog.
for (int I = Epilog.size() - 1; I >= 0; I--) {
if (info->Instructions[I] != Epilog[Epilog.size() - 1 - I])
return -1;
}
// Check that the epilog actually is at the very end of the function,
// otherwise it can't be packed.
uint32_t DistanceFromEnd = (uint32_t)GetAbsDifference(
streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first);
if (DistanceFromEnd / 4 != Epilog.size())
return -1;
int Offset = ARM64CountOfUnwindCodes(
ArrayRef<WinEH::Instruction>(&info->Instructions[Epilog.size()],
info->Instructions.size() - Epilog.size()));
// Check that the offset and prolog size fits in the first word; it's
// unclear whether the epilog count in the extension word can be taken
// as packed epilog offset.
if (Offset > 31 || PrologCodeBytes > 124)
return -1;
info->EpilogMap.clear();
return Offset;
}
// Populate the .xdata section. The format of .xdata on ARM64 is documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
@ -679,6 +724,8 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions);
uint32_t TotalCodeBytes = PrologCodeBytes;
int PackedEpilogOffset = checkPackedEpilog(streamer, info, PrologCodeBytes);
// Process epilogs.
MapVector<MCSymbol *, uint32_t> EpilogInfo;
// Epilogs processed so far.
@ -711,15 +758,17 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
uint32_t CodeWordsMod = TotalCodeBytes % 4;
if (CodeWordsMod)
CodeWords++;
uint32_t EpilogCount = info->EpilogMap.size();
uint32_t EpilogCount =
PackedEpilogOffset >= 0 ? PackedEpilogOffset : info->EpilogMap.size();
bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124;
if (!ExtensionWord) {
row1 |= (EpilogCount & 0x1F) << 22;
row1 |= (CodeWords & 0x1F) << 27;
}
// E is always 0 right now, TODO: packed epilog setup
if (info->HandlesExceptions) // X
row1 |= 1 << 20;
if (PackedEpilogOffset >= 0) // E
row1 |= 1 << 21;
row1 |= FuncLength & 0x3FFFF;
streamer.emitInt32(row1);

View File

@ -8,9 +8,9 @@
# CHECK-NEXT: FunctionLength: 124
# CHECK-NEXT: Version: 0
# CHECK-NEXT: ExceptionData: No
# CHECK-NEXT: EpiloguePacked: No
# CHECK-NEXT: EpilogueScopes: 1
# CHECK-NEXT: ByteCodeLength: 32
# CHECK-NEXT: EpiloguePacked: Yes
# CHECK-NEXT: EpilogueOffset: 0
# CHECK-NEXT: ByteCodeLength: 16
# CHECK-NEXT: Prologue [
# CHECK-NEXT: 0xc80c ; stp x19, x20, [sp, #96]
# CHECK-NEXT: 0xc88a ; stp x21, x22, [sp, #80]
@ -21,22 +21,6 @@
# CHECK-NEXT: 0xda8d ; stp d10, d11, [sp, #-112]!
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: EpilogueScopes [
# CHECK-NEXT: EpilogueScope {
# CHECK-NEXT: StartOffset: 23
# CHECK-NEXT: EpilogueStartIndex: 15
# CHECK-NEXT: Opcodes [
# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96]
# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80]
# CHECK-NEXT: 0xc908 ; ldp x23, x24, [sp, #64]
# CHECK-NEXT: 0xc986 ; ldp x25, x26, [sp, #48]
# CHECK-NEXT: 0xca04 ; ldp x27, x28, [sp, #32]
# CHECK-NEXT: 0xd802 ; ldp d8, d9, [sp, #16]
# CHECK-NEXT: 0xda8d ; ldp d10, d11, [sp], #112
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: ]
# CHECK-NEXT: }
...
---

View File

@ -6,25 +6,19 @@
# CHECK-NEXT: FunctionLength: 92
# CHECK-NEXT: Version: 0
# CHECK-NEXT: ExceptionData: No
# CHECK-NEXT: EpiloguePacked: No
# CHECK-NEXT: EpilogueScopes: 1
# CHECK-NEXT: ByteCodeLength: 8
# CHECK-NEXT: EpiloguePacked: Yes
# CHECK-NEXT: EpilogueOffset: 1
# CHECK-NEXT: ByteCodeLength: 4
# CHECK-NEXT: Prologue [
# CHECK-NEXT: 0x02 ; sub sp, #32
# CHECK-NEXT: 0xe1 ; mov fp, sp
# CHECK-NEXT: 0x81 ; stp x29, x30, [sp, #-16]!
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: EpilogueScopes [
# CHECK-NEXT: EpilogueScope {
# CHECK-NEXT: StartOffset: 20
# CHECK-NEXT: EpilogueStartIndex: 4
# CHECK-NEXT: Opcodes [
# CHECK-NEXT: 0xe1 ; mov sp, fp
# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: Epilogue [
# CHECK-NEXT: 0xe1 ; mov sp, fp
# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: }
...

View File

@ -6,9 +6,9 @@
# CHECK-NEXT: FunctionLength: 72
# CHECK-NEXT: Version: 0
# CHECK-NEXT: ExceptionData: No
# CHECK-NEXT: EpiloguePacked: No
# CHECK-NEXT: EpilogueScopes: 1
# CHECK-NEXT: ByteCodeLength: 16
# CHECK-NEXT: EpiloguePacked: Yes
# CHECK-NEXT: EpilogueOffset: 0
# CHECK-NEXT: ByteCodeLength: 8
# CHECK-NEXT: Prologue [
# CHECK-NEXT: 0xe204 ; add fp, sp, #32
# CHECK-NEXT: 0x44 ; stp x29, x30, [sp, #32]
@ -16,19 +16,6 @@
# CHECK-NEXT: 0xcc85 ; stp x21, x22, [sp, #-48]!
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: EpilogueScopes [
# CHECK-NEXT: EpilogueScope {
# CHECK-NEXT: StartOffset: 13
# CHECK-NEXT: EpilogueStartIndex: 8
# CHECK-NEXT: Opcodes [
# CHECK-NEXT: 0xe204 ; sub sp, fp, #32
# CHECK-NEXT: 0x44 ; ldp x29, x30, [sp, #32]
# CHECK-NEXT: 0xc802 ; ldp x19, x20, [sp, #16]
# CHECK-NEXT: 0xcc85 ; ldp x21, x22, [sp], #48
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: }

View File

@ -0,0 +1,187 @@
// This test checks that the epilogue is packed where possible.
// RUN: llvm-mc -triple aarch64-pc-win32 -filetype=obj %s -o %t.o
// RUN: llvm-readobj -u %t.o | FileCheck %s
// CHECK: UnwindInformation [
// CHECK-NEXT: RuntimeFunction {
// CHECK-NEXT: Function: func
// CHECK-NEXT: ExceptionRecord: .xdata
// CHECK-NEXT: ExceptionData {
// CHECK-NEXT: FunctionLength:
// CHECK-NEXT: Version:
// CHECK-NEXT: ExceptionData:
// CHECK-NEXT: EpiloguePacked: Yes
// CHECK-NEXT: EpilogueOffset: 2
// CHECK-NEXT: ByteCodeLength:
// CHECK-NEXT: Prologue [
// CHECK-NEXT: 0xdc04 ; str d8, [sp, #32]
// CHECK-NEXT: 0xe1 ; mov fp, sp
// CHECK-NEXT: 0x42 ; stp x29, x30, [sp, #16]
// CHECK-NEXT: 0x85 ; stp x29, x30, [sp, #-48]!
// CHECK-NEXT: 0xe6 ; save next
// CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]!
// CHECK-NEXT: 0xc842 ; stp x20, x21, [sp, #16]
// CHECK-NEXT: 0x03 ; sub sp, #48
// CHECK-NEXT: 0xe4 ; end
// CHECK-NEXT: ]
// CHECK-NEXT: Epilogue [
// CHECK-NEXT: 0xe1 ; mov sp, fp
// CHECK-NEXT: 0x42 ; ldp x29, x30, [sp, #16]
// CHECK-NEXT: 0x85 ; ldp x29, x30, [sp], #48
// CHECK-NEXT: 0xe6 ; restore next
// CHECK-NEXT: 0x24 ; ldp x19, x20, [sp], #32
// CHECK-NEXT: 0xc842 ; ldp x20, x21, [sp, #16]
// CHECK-NEXT: 0x03 ; add sp, #48
// CHECK-NEXT: 0xe4 ; end
// CHECK-NEXT: ]
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK: RuntimeFunction {
// CHECK-NEXT: Function: packed2
// CHECK-NEXT: ExceptionRecord:
// CHECK-NEXT: ExceptionData {
// CHECK: ExceptionData:
// CHECK-NEXT: EpiloguePacked: Yes
// CHECK: RuntimeFunction {
// CHECK-NEXT: Function: nonpacked1
// CHECK-NEXT: ExceptionRecord:
// CHECK-NEXT: ExceptionData {
// CHECK: ExceptionData:
// CHECK-NEXT: EpiloguePacked: No
// CHECK: RuntimeFunction {
// CHECK-NEXT: Function: nonpacked2
// CHECK-NEXT: ExceptionRecord:
// CHECK-NEXT: ExceptionData {
// CHECK: ExceptionData:
// CHECK-NEXT: EpiloguePacked: No
// CHECK: RuntimeFunction {
// CHECK-NEXT: Function: nonpacked3
// CHECK-NEXT: ExceptionRecord:
// CHECK-NEXT: ExceptionData {
// CHECK: ExceptionData:
// CHECK-NEXT: EpiloguePacked: No
.text
.globl func
.seh_proc func
func:
sub sp, sp, #48
.seh_stackalloc 48
// Check that canonical opcode forms (r19r20_x, fplr, fplr_x, save_next,
// set_fp) are treated as a match even if one (in prologue or epilogue)
// was simplified from the more generic opcodes.
stp x20, x21, [sp, #16]
.seh_save_regp x20, 16
stp x19, x20, [sp, #-32]!
.seh_save_r19r20_x 32
stp x21, x22, [sp, #16]
.seh_save_regp x21, 16
stp x29, x30, [sp, #-48]!
.seh_save_regp_x x29, 48
stp x29, x30, [sp, #16]
.seh_save_regp x29, 16
add x29, sp, #0
.seh_add_fp 0
str d8, [sp, #32]
.seh_save_freg d8, 32
.seh_endprologue
nop
.seh_startepilogue
mov sp, x29
.seh_set_fp
ldp x29, x30, [sp, #16]
.seh_save_fplr 16
ldp x29, x30, [sp, #-48]!
.seh_save_fplr_x 48
ldp x21, x22, [sp, #16]
.seh_save_next
ldp x19, x20, [sp], #32
.seh_save_regp_x x19, 32
ldp x20, x21, [sp, #16]
.seh_save_regp x20, 16
add sp, sp, #48
.seh_stackalloc 48
.seh_endepilogue
ret
.seh_endproc
// Test a perfectly matching epilog with no offset.
.seh_proc packed2
packed2:
sub sp, sp, #48
.seh_stackalloc 48
stp x29, lr, [sp, #-32]!
.seh_save_fplr_x 32
.seh_endprologue
nop
.seh_startepilogue
ldp x29, lr, [sp], #32
.seh_save_fplr_x 32
add sp, sp, #48
.seh_stackalloc 48
.seh_endepilogue
ret
.seh_endproc
.seh_proc nonpacked1
nonpacked1:
sub sp, sp, #48
.seh_stackalloc 48
.seh_endprologue
nop
.seh_startepilogue
add sp, sp, #48
.seh_stackalloc 48
.seh_endepilogue
// This epilogue isn't packed with the prologue, as it doesn't align with
// the end of the function (one extra nop before the ret).
nop
ret
.seh_endproc
.seh_proc nonpacked2
nonpacked2:
sub sp, sp, #48
.seh_stackalloc 48
sub sp, sp, #32
.seh_stackalloc 32
.seh_endprologue
nop
.seh_startepilogue
// Not packed; the epilogue mismatches at the second opcode.
add sp, sp, #16
.seh_stackalloc 16
add sp, sp, #48
.seh_stackalloc 48
.seh_endepilogue
ret
.seh_endproc
.seh_proc nonpacked3
nonpacked3:
sub sp, sp, #48
.seh_stackalloc 48
sub sp, sp, #32
.seh_stackalloc 32
.seh_endprologue
nop
.seh_startepilogue
// Not packed; the epilogue is longer than the prologue.
mov sp, x29
.seh_set_fp
add sp, sp, #32
.seh_stackalloc 32
add sp, sp, #48
.seh_stackalloc 48
.seh_endepilogue
ret
.seh_endproc

View File

@ -20,7 +20,7 @@
// CHECK-NEXT: }
// CHECK: Section {
// CHECK: Name: .xdata
// CHECK: RawDataSize: 56
// CHECK: RawDataSize: 52
// CHECK: RelocationCount: 1
// CHECK: Characteristics [
// CHECK-NEXT: ALIGN_4BYTES
@ -41,7 +41,7 @@
// CHECK-NEXT: Relocations [
// CHECK-NEXT: Section (4) .xdata {
// CHECK-NEXT: 0x2C IMAGE_REL_ARM64_ADDR32NB __C_specific_handler
// CHECK-NEXT: 0x28 IMAGE_REL_ARM64_ADDR32NB __C_specific_handler
// CHECK-NEXT: }
// CHECK-NEXT: Section (5) .pdata {
// CHECK-NEXT: 0x0 IMAGE_REL_ARM64_ADDR32NB func
@ -80,15 +80,9 @@
// CHECK-NEXT: 0x01 ; sub sp, #16
// CHECK-NEXT: 0xe4 ; end
// CHECK-NEXT: ]
// CHECK-NEXT: EpilogueScopes [
// CHECK-NEXT: EpilogueScope {
// CHECK-NEXT: StartOffset: 23
// CHECK-NEXT: EpilogueStartIndex: 33
// CHECK-NEXT: Opcodes [
// CHECK-NEXT: 0x01 ; add sp, #16
// CHECK-NEXT: 0xe4 ; end
// CHECK-NEXT: ]
// CHECK-NEXT: }
// CHECK-NEXT: Epilogue [
// CHECK-NEXT: 0x01 ; add sp, #16
// CHECK-NEXT: 0xe4 ; end
// CHECK-NEXT: ]
// CHECK-NEXT: ExceptionHandler [
// CHECK-NEXT: Routine: __C_specific_handler (0x0)