[MC] [Win64EH] Canonicalize ARM64 unwind opcodes

Convert 2-byte opcodes to equivalent 1-byte ones.

Adjust the existing exhaustive testcase to avoid being altered by
the simplification rules (to keep that test exercising all individual
opcodes).

Fix the assembler parser limits for register pairs; for .seh_save_regp
and .seh_save_regp_x, we can allow up to x29, for a x29+x30 pair
(which gets remapped to the UOP_SaveFPLR(X) opcodes), for .seh_save_fregp
and .seh_save_fregpx, allow up to d14+d15.

Not creating .seh_save_next for float register pairs, as the
actual unwinder implementation in current versions of Windows is buggy
for that case.

This gives a minimal but measurable size reduction. (For a 6.5 MB
DLL with 300 KB .xdata, the .xdata shrinks by 48 bytes. The opcode
sequences are padded to a 4 byte boundary, so very small improvements
might not end up mattering directly.)

Differential Revision: https://reviews.llvm.org/D87367
This commit is contained in:
Martin Storsjö 2020-09-07 14:45:37 +03:00
parent 46416f0803
commit 700fbe591a
4 changed files with 180 additions and 13 deletions

View File

@ -544,6 +544,63 @@ FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
return nullptr;
}
static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
bool Reverse) {
unsigned PrevOffset = -1;
unsigned PrevRegister = -1;
auto VisitInstruction = [&](WinEH::Instruction &Inst) {
// Convert 2-byte opcodes into equivalent 1-byte ones.
if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) {
Inst.Operation = Win64EH::UOP_SaveFPLR;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
Inst.Register == 29) {
Inst.Operation = Win64EH::UOP_SaveFPLRX;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
Inst.Register == 19 && Inst.Offset <= 248) {
Inst.Operation = Win64EH::UOP_SaveR19R20X;
} else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) {
Inst.Operation = Win64EH::UOP_SetFP;
} else if (Inst.Operation == Win64EH::UOP_SaveRegP &&
Inst.Register == PrevRegister + 2 &&
Inst.Offset == PrevOffset + 16) {
Inst.Operation = Win64EH::UOP_SaveNext;
// Intentionally not creating UOP_SaveNext for float register pairs,
// as current versions of Windows (up to at least 20.04) is buggy
// regarding SaveNext for float pairs.
}
// Update info about the previous instruction, for detecting if
// the next one can be made a UOP_SaveNext
if (Inst.Operation == Win64EH::UOP_SaveR19R20X) {
PrevOffset = 0;
PrevRegister = 19;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX) {
PrevOffset = 0;
PrevRegister = Inst.Register;
} else if (Inst.Operation == Win64EH::UOP_SaveRegP) {
PrevOffset = Inst.Offset;
PrevRegister = Inst.Register;
} else if (Inst.Operation == Win64EH::UOP_SaveNext) {
PrevRegister += 2;
PrevOffset += 16;
} else {
PrevRegister = -1;
PrevOffset = -1;
}
};
// Iterate over instructions in a forward order (for prologues),
// backwards for epilogues (i.e. always reverse compared to how the
// opcodes are stored).
if (Reverse) {
for (auto It = Instructions.rbegin(); It != Instructions.rend(); It++)
VisitInstruction(*It);
} else {
for (WinEH::Instruction &Inst : Instructions)
VisitInstruction(Inst);
}
}
// Populate the .xdata section. The format of .xdata on ARM64 is documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
@ -572,6 +629,10 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
return;
}
simplifyOpcodes(info->Instructions, false);
for (auto &I : info->EpilogMap)
simplifyOpcodes(I.second, true);
MCContext &context = streamer.getContext();
MCSymbol *Label = context.createTempSymbol();

View File

@ -5725,7 +5725,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegX(SMLoc L) {
bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) {
unsigned Reg;
int64_t Offset;
if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
parseComma() || parseImmExpr(Offset))
return true;
getTargetStreamer().EmitARM64WinCFISaveRegP(Reg, Offset);
@ -5737,7 +5737,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) {
bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) {
unsigned Reg;
int64_t Offset;
if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::X28) ||
if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
parseComma() || parseImmExpr(Offset))
return true;
getTargetStreamer().EmitARM64WinCFISaveRegPX(Reg, Offset);
@ -5789,7 +5789,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveFRegX(SMLoc L) {
bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) {
unsigned Reg;
int64_t Offset;
if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
parseComma() || parseImmExpr(Offset))
return true;
getTargetStreamer().EmitARM64WinCFISaveFRegP(Reg, Offset);
@ -5801,7 +5801,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) {
bool AArch64AsmParser::parseDirectiveSEHSaveFRegPX(SMLoc L) {
unsigned Reg;
int64_t Offset;
if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
parseComma() || parseImmExpr(Offset))
return true;
getTargetStreamer().EmitARM64WinCFISaveFRegPX(Reg, Offset);

View File

@ -0,0 +1,106 @@
// This test checks that the unwinding opcodes are remapped to more
// efficient ones where possible.
// RUN: llvm-mc -triple aarch64-pc-win32 -filetype=obj %s -o %t.o
// RUN: llvm-readobj -u %t.o | FileCheck %s
// CHECK: UnwindInformation [
// CHECK-NEXT: RuntimeFunction {
// CHECK-NEXT: Function: func
// CHECK-NEXT: ExceptionRecord: .xdata
// CHECK-NEXT: ExceptionData {
// CHECK: Prologue [
// CHECK-NEXT: 0xd882 ; stp d10, d11, [sp, #16]
// CHECK-NEXT: 0xda07 ; stp d8, d9, [sp, #-64]!
// CHECK-NEXT: 0xe6 ; save next
// CHECK-NEXT: 0x28 ; stp x19, x20, [sp, #-64]!
// CHECK-NEXT: 0xca49 ; stp x28, x29, [sp, #72]
// CHECK-NEXT: 0xe6 ; save next
// CHECK-NEXT: 0xe6 ; save next
// CHECK-NEXT: 0xe6 ; save next
// CHECK-NEXT: 0xcc47 ; stp x20, x21, [sp, #-64]!
// CHECK-NEXT: 0x42 ; stp x29, x30, [sp, #16]
// CHECK-NEXT: 0xca02 ; stp x27, x28, [sp, #16]
// CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]!
// CHECK-NEXT: 0xce03 ; stp x27, x28, [sp, #-32]!
// CHECK-NEXT: 0xe1 ; mov fp, sp
// CHECK-NEXT: 0xe201 ; add fp, sp, #8
// CHECK-NEXT: 0xe4 ; end
// CHECK-NEXT: ]
// CHECK-NEXT: EpilogueScopes [
// CHECK-NEXT: EpilogueScope {
// CHECK: Opcodes [
// CHECK-NEXT: 0xc904 ; ldp x23, x24, [sp, #32]
// CHECK-NEXT: 0xe6 ; restore next
// CHECK-NEXT: 0xcc83 ; ldp x21, x22, [sp], #32
// CHECK-NEXT: 0x24 ; ldp x19, x20, [sp], #32
// CHECK-NEXT: 0xcc1f ; ldp x19, x20, [sp], #256
// CHECK-NEXT: 0xe4 ; end
// CHECK-NEXT: ]
// CHECK-NEXT: }
// CHECK-NEXT: ]
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: ]
.text
.globl func
.seh_proc func
func:
add x29, sp, #8
.seh_add_fp 8
add x29, sp, #0
.seh_add_fp 0
stp x27, x28, [sp, #-32]!
.seh_save_regp_x x27, 32
stp x29, x30, [sp, #-32]!
.seh_save_regp_x x29, 32
stp x27, x28, [sp, #16]
.seh_save_regp x27, 16
stp x29, x30, [sp, #16]
.seh_save_regp x29, 16
stp x20, x21, [sp, #-64]!
.seh_save_regp_x x20, 64
stp x22, x23, [sp, #16]
.seh_save_regp x22, 16
stp x24, x25, [sp, #32]
.seh_save_next
stp x26, x27, [sp, #48]
.seh_save_regp x26, 48
stp x28, x29, [sp, #72]
.seh_save_regp x28, 72
stp x19, x20, [sp, #-64]!
.seh_save_r19r20_x 64
stp x21, x22, [sp, #16]
.seh_save_regp x21, 16
stp d8, d9, [sp, #-64]!
.seh_save_fregp_x d8, 64
stp d10, d11, [sp, #16]
// This is intentionally not converted into a save_next, to avoid
// bugs in the windows unwinder.
.seh_save_fregp d10, 16
.seh_endprologue
nop
.seh_startepilogue
ldp x27, x28, [sp, #32]
.seh_save_regp x23, 32
ldp x23, x24, [sp, #16]
.seh_save_regp x23, 16
ldp x21, x22, [sp], #32
.seh_save_regp_x x21, 32
ldp x19, x20, [sp], #32
.seh_save_regp_x x19, 32
ldp x19, x20, [sp], #256
.seh_save_regp_x x19, 256
.seh_endepilogue
ret
.seh_endproc

View File

@ -64,8 +64,8 @@
// CHECK-NEXT: 0xe202 ; add fp, sp, #16
// CHECK-NEXT: 0xdd41 ; str d13, [sp, #8]
// CHECK-NEXT: 0xde83 ; str d12, [sp, #-32]!
// CHECK-NEXT: 0xd882 ; stp d10, d11, [sp, #16]
// CHECK-NEXT: 0xda03 ; stp d8, d9, [sp, #-32]!
// CHECK-NEXT: 0xd884 ; stp d10, d11, [sp, #32]
// CHECK-NEXT: 0xda05 ; stp d8, d9, [sp, #-48]!
// CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]!
// CHECK-NEXT: 0x46 ; stp x29, x30, [sp, #48]
// CHECK-NEXT: 0xd141 ; str x24, [sp, #8]
@ -74,7 +74,7 @@
// CHECK-NEXT: 0xc882 ; stp x21, x22, [sp, #16]
// CHECK-NEXT: 0xd6c2 ; stp x25, lr, [sp, #16]
// CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]!
// CHECK-NEXT: 0xcc03 ; stp x19, x20, [sp, #-32]!
// CHECK-NEXT: 0xcc83 ; stp x21, x22, [sp, #-32]!
// CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]!
// CHECK-NEXT: 0xe1 ; mov fp, sp
// CHECK-NEXT: 0x01 ; sub sp, #16
@ -113,8 +113,8 @@ func:
.seh_set_fp
stp x29, x30, [sp, #-32]!
.seh_save_fplr_x 32
stp x19, x20, [sp, #-32]!
.seh_save_regp_x x19, 32
stp x21, x22, [sp, #-32]!
.seh_save_regp_x x21, 32
stp x19, x20, [sp, #-32]!
.seh_save_r19r20_x 32
stp x25, x30, [sp, #16]
@ -131,10 +131,10 @@ func:
.seh_save_fplr 48
stp x29, x30, [sp, #-32]!
.seh_save_fplr_x 32
stp d8, d9, [sp, #-32]!
.seh_save_fregp_x d8, 32
stp d10, d11, [sp, #16]
.seh_save_fregp d10, 16
stp d8, d9, [sp, #-48]!
.seh_save_fregp_x d8, 48
stp d10, d11, [sp, #32]
.seh_save_fregp d10, 32
str d12, [sp, #-32]!
.seh_save_freg_x d12, 32
str d13, [sp, #8]