[X86] Generate .cfi_adjust_cfa_offset correctly when pushing arguments

When push instructions are being used to pass function arguments on
the stack, and either EH or debugging are enabled, we need to generate
.cfi_adjust_cfa_offset directives appropriately. For (synch) EH, it is
enough for the CFA offset to be correct at every call site, while
for debugging we want to be correct after every push.

Darwin does not support this well, so don't use pushes whenever it
would be required.

Differential Revision: http://reviews.llvm.org/D13767

llvm-svn: 251904
This commit is contained in:
Michael Kuperstein 2015-11-03 08:17:25 +00:00
parent 4ec5abffae
commit 73dc85293f
11 changed files with 359 additions and 119 deletions

View File

@ -245,6 +245,11 @@ public:
bool hasDebugInfo() const { return DbgInfoAvailable; } bool hasDebugInfo() const { return DbgInfoAvailable; }
void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; } void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
// Returns true if we need to generate precise CFI. Currently
// this is equivalent to hasDebugInfo(), but if we ever implement
// async EH, it will require precise CFI as well.
bool usePreciseUnwindInfo() const { return hasDebugInfo(); }
bool callsEHReturn() const { return CallsEHReturn; } bool callsEHReturn() const { return CallsEHReturn; }
void setCallsEHReturn(bool b) { CallsEHReturn = b; } void setCallsEHReturn(bool b) { CallsEHReturn = b; }

View File

@ -216,6 +216,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpDefCfaOffset: case MCCFIInstruction::OpDefCfaOffset:
OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset()); OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
break; break;
case MCCFIInstruction::OpAdjustCfaOffset:
OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset());
break;
case MCCFIInstruction::OpDefCfa: case MCCFIInstruction::OpDefCfa:
OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
break; break;

View File

@ -103,7 +103,8 @@ private:
const char *getPassName() const override { return "X86 Optimize Call Frame"; } const char *getPassName() const override { return "X86 Optimize Call Frame"; }
const TargetInstrInfo *TII; const TargetInstrInfo *TII;
const TargetFrameLowering *TFL; const X86FrameLowering *TFL;
const X86Subtarget *STI;
const MachineRegisterInfo *MRI; const MachineRegisterInfo *MRI;
static char ID; static char ID;
}; };
@ -127,13 +128,15 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
// No point in running this in 64-bit mode, since some arguments are // No point in running this in 64-bit mode, since some arguments are
// passed in-register in all common calling conventions, so the pattern // passed in-register in all common calling conventions, so the pattern
// we're looking for will never match. // we're looking for will never match.
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); if (STI->is64Bit())
if (STI.is64Bit())
return false; return false;
// We can't encode multiple DW_CFA_GNU_args_size in the compact // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
// unwind encoding that Darwin uses. // in the compact unwind encoding that Darwin uses. So, bail if there
if (STI.isTargetDarwin() && !MF.getMMI().getLandingPads().empty()) // is a danger of that being generated.
if (STI->isTargetDarwin() &&
(!MF.getMMI().getLandingPads().empty() ||
(MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF))))
return false; return false;
// You would expect straight-line code between call-frame setup and // You would expect straight-line code between call-frame setup and
@ -216,8 +219,9 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
} }
bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo(); STI = &MF.getSubtarget<X86Subtarget>();
TFL = MF.getSubtarget().getFrameLowering(); TII = STI->getInstrInfo();
TFL = STI->getFrameLowering();
MRI = &MF.getRegInfo(); MRI = &MF.getRegInfo();
if (!isLegal(MF)) if (!isLegal(MF))
@ -312,7 +316,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
// Check that this particular call sequence is amenable to the // Check that this particular call sequence is amenable to the
// transformation. // transformation.
const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
MF.getSubtarget().getRegisterInfo()); STI->getRegisterInfo());
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
// We expect to enter this at the beginning of a call sequence // We expect to enter this at the beginning of a call sequence
@ -455,6 +459,7 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) { for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
MachineBasicBlock::iterator MOV = *Context.MovVector[Idx]; MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
MachineBasicBlock::iterator Push = nullptr;
if (MOV->getOpcode() == X86::MOV32mi) { if (MOV->getOpcode() == X86::MOV32mi) {
unsigned PushOpcode = X86::PUSHi32; unsigned PushOpcode = X86::PUSHi32;
// If the operand is a small (8-bit) immediate, we can use a // If the operand is a small (8-bit) immediate, we can use a
@ -466,21 +471,20 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
if (isInt<8>(Val)) if (isInt<8>(Val))
PushOpcode = X86::PUSH32i8; PushOpcode = X86::PUSH32i8;
} }
BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp); Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
.addOperand(PushOp);
} else { } else {
unsigned int Reg = PushOp.getReg(); unsigned int Reg = PushOp.getReg();
// If PUSHrmm is not slow on this target, try to fold the source of the // If PUSHrmm is not slow on this target, try to fold the source of the
// push into the instruction. // push into the instruction.
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
// Check that this is legal to fold. Right now, we're extremely // Check that this is legal to fold. Right now, we're extremely
// conservative about that. // conservative about that.
MachineInstr *DefMov = nullptr; MachineInstr *DefMov = nullptr;
if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
MachineInstr *Push = Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
unsigned NumOps = DefMov->getDesc().getNumOperands(); unsigned NumOps = DefMov->getDesc().getNumOperands();
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@ -488,12 +492,18 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
DefMov->eraseFromParent(); DefMov->eraseFromParent();
} else { } else {
BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
.addReg(Reg) .addReg(Reg)
.getInstr(); .getInstr();
} }
} }
// For debugging, when using SP-based CFA, we need to adjust the CFA
// offset after each push.
if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo())
TFL->BuildCFI(MBB, std::next(Push), DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
MBB.erase(MOV); MBB.erase(MOV);
} }

View File

@ -2105,18 +2105,23 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
unsigned StackAlign = getStackAlignment(); unsigned StackAlign = getStackAlignment();
Amount = RoundUpToAlignment(Amount, StackAlign); Amount = RoundUpToAlignment(Amount, StackAlign);
MachineModuleInfo &MMI = MF.getMMI();
const Function *Fn = MF.getFunction();
bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool DwarfCFI = !WindowsCFI &&
(MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
// If we have any exception handlers in this function, and we adjust // If we have any exception handlers in this function, and we adjust
// the SP before calls, we may need to indicate this to the unwinder, // the SP before calls, we may need to indicate this to the unwinder
// using GNU_ARGS_SIZE. Note that this may be necessary // using GNU_ARGS_SIZE. Note that this may be necessary even when
// even when Amount == 0, because the preceding function may have // Amount == 0, because the preceding function may have set a non-0
// set a non-0 GNU_ARGS_SIZE. // GNU_ARGS_SIZE.
// TODO: We don't need to reset this between subsequent functions, // TODO: We don't need to reset this between subsequent functions,
// if it didn't change. // if it didn't change.
bool HasDwarfEHHandlers = bool HasDwarfEHHandlers = !WindowsCFI &&
!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && !MF.getMMI().getLandingPads().empty();
!MF.getMMI().getLandingPads().empty();
if (HasDwarfEHHandlers && !isDestroy && if (HasDwarfEHHandlers && !isDestroy &&
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
BuildCFI(MBB, I, DL, BuildCFI(MBB, I, DL,
MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
@ -2128,15 +2133,37 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// (Pushes of argument for frame setup, callee pops for frame destroy) // (Pushes of argument for frame setup, callee pops for frame destroy)
Amount -= InternalAmt; Amount -= InternalAmt;
// If this is a callee-pop calling convention, and we're emitting precise
// SP-based CFI, emit a CFA adjust for the amount the callee popped.
if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF) &&
MMI.usePreciseUnwindInfo())
BuildCFI(MBB, I, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
if (Amount) { if (Amount) {
// Add Amount to SP to destroy a frame, and subtract to setup. // Add Amount to SP to destroy a frame, and subtract to setup.
int Offset = isDestroy ? Amount : -Amount; int Offset = isDestroy ? Amount : -Amount;
if (!(MF.getFunction()->optForMinSize() && if (!(Fn->optForMinSize() &&
adjustStackWithPops(MBB, I, DL, Offset))) adjustStackWithPops(MBB, I, DL, Offset)))
BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
} }
if (DwarfCFI && !hasFP(MF)) {
// If we don't have FP, but need to generate unwind information,
// we need to set the correct CFA offset after the stack adjustment.
// How much we adjust the CFA offset depends on whether we're emitting
// CFI only for EH purposes or for debugging. EH only requires the CFA
// offset to be correct at each call site, while for debugging we want
// it to be more precise.
int CFAOffset = Amount;
if (!MMI.usePreciseUnwindInfo())
CFAOffset += InternalAmt;
CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
BuildCFI(MBB, I, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
}
return; return;
} }

View File

@ -125,13 +125,13 @@ public:
/// \p MBB will be correctly handled by the target. /// \p MBB will be correctly handled by the target.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
private:
uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
/// Wraps up getting a CFI index and building a MachineInstr for it. /// Wraps up getting a CFI index and building a MachineInstr for it.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL, MCCFIInstruction CFIInst) const; DebugLoc DL, MCCFIInstruction CFIInst) const;
private:
uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
/// Aligns the stack pointer by ANDing it with -MaxAlign. /// Aligns the stack pointer by ANDing it with -MaxAlign.
void BuildStackAlignAND(MachineBasicBlock &MBB, void BuildStackAlignAND(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL, MachineBasicBlock::iterator MBBI, DebugLoc DL,

View File

@ -30,7 +30,7 @@ declare i8* @__cxa_begin_catch(i8*)
declare void @__cxa_end_catch() declare void @__cxa_end_catch()
attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { optsize } attributes #1 = { optsize }
attributes #2 = { nounwind } attributes #2 = { nounwind }

View File

@ -3,7 +3,7 @@
declare void @foo(i32 %r) declare void @foo(i32 %r)
define void @test(i32 %a, i32 %b) optsize { define void @test(i32 %a, i32 %b) optsize nounwind {
; CHECK-LABEL: test: ; CHECK-LABEL: test:
; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK: movl [[EAX:%e..]], (%esp)
; CHECK-NEXT: pushl [[EAX]] ; CHECK-NEXT: pushl [[EAX]]
@ -22,7 +22,7 @@ define void @test(i32 %a, i32 %b) optsize {
ret void ret void
} }
define void @test_min(i32 %a, i32 %b) minsize { define void @test_min(i32 %a, i32 %b) minsize nounwind {
; CHECK-LABEL: test_min: ; CHECK-LABEL: test_min:
; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK: movl [[EAX:%e..]], (%esp)
; CHECK-NEXT: pushl [[EAX]] ; CHECK-NEXT: pushl [[EAX]]

View File

@ -9,7 +9,7 @@ declare void @param3(i32 %a, i32 %b, i32 %c)
declare void @param8(i64, i64, i64, i64, i64, i64, i64, i64) declare void @param8(i64, i64, i64, i64, i64, i64, i64, i64)
define void @test() minsize { define void @test() minsize nounwind {
; CHECK-LABEL: test: ; CHECK-LABEL: test:
; CHECK: calll _param1 ; CHECK: calll _param1
; CHECK-NEXT: popl %eax ; CHECK-NEXT: popl %eax
@ -48,7 +48,7 @@ define void @negative(i32 %k) {
ret void ret void
} }
define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize { define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize nounwind {
; CHECK-LABEL: spill: ; CHECK-LABEL: spill:
; CHECK-DAG: movl %ecx, ; CHECK-DAG: movl %ecx,
; CHECK-DAG: movl %edx, ; CHECK-DAG: movl %edx,
@ -63,7 +63,7 @@ define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize {
ret void ret void
} }
define void @test_linux64(i32 %size) minsize { define void @test_linux64(i32 %size) minsize nounwind {
; LINUX64-LABEL: test_linux64: ; LINUX64-LABEL: test_linux64:
; LINUX64: pushq %rbp ; LINUX64: pushq %rbp
; LINUX64: callq param8 ; LINUX64: callq param8

View File

@ -0,0 +1,53 @@
; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
; Function Attrs: optsize
declare void @foo(i32, i32) #0
declare x86_stdcallcc void @stdfoo(i32, i32) #0
; CHECK-LABEL: test1:
; CHECK: subl $8, %esp
; CHECK: .cfi_adjust_cfa_offset 8
; CHECK: pushl $2
; CHECK: .cfi_adjust_cfa_offset 4
; CHECK: pushl $1
; CHECK: .cfi_adjust_cfa_offset 4
; CHECK: calll foo
; CHECK: addl $16, %esp
; CHECK: .cfi_adjust_cfa_offset -16
; CHECK: subl $8, %esp
; CHECK: .cfi_adjust_cfa_offset 8
; CHECK: pushl $4
; CHECK: .cfi_adjust_cfa_offset 4
; CHECK: pushl $3
; CHECK: .cfi_adjust_cfa_offset 4
; CHECK: calll stdfoo
; CHECK: .cfi_adjust_cfa_offset -8
; CHECK: addl $8, %esp
; CHECK: .cfi_adjust_cfa_offset -8
define void @test1() #0 {
entry:
tail call void @foo(i32 1, i32 2) #1, !dbg !10
tail call x86_stdcallcc void @stdfoo(i32 3, i32 4) #1, !dbg !11
ret void, !dbg !12
}
attributes #0 = { nounwind optsize }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250289)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
!1 = !DIFile(filename: "foo.c", directory: "foo")
!2 = !{}
!3 = !{!4}
!4 = distinct !DISubprogram(name: "test1", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, function: void ()* @test1, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{!"clang version 3.8.0 (trunk 250289)"}
!10 = !DILocation(line: 4, column: 3, scope: !4)
!11 = !DILocation(line: 5, column: 3, scope: !4)
!12 = !DILocation(line: 6, column: 1, scope: !4)

View File

@ -1,36 +1,36 @@
; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s ; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=LINUX
; RUN: llc < %s -mtriple=i686-darwin-macosx10.7 -filetype=obj | llvm-readobj -sections | FileCheck -check-prefix=DARWIN %s ; RUN: llc < %s -mtriple=i686-darwin-macosx10.7 -filetype=obj | llvm-readobj -sections | FileCheck -check-prefix=DARWIN %s
; On darwin, check that we manage to generate the compact unwind section ; On darwin, check that we manage to generate the compact unwind section
; DARWIN: Name: __compact_unwind ; DARWIN: Name: __compact_unwind
; DARWIN: Segment: __LD ; DARWIN: Segment: __LD
; CHECK: Index: 8 ; LINUX: Index: 8
; CHECK-NEXT: Name: .eh_frame (41) ; LINUX-NEXT: Name: .eh_frame (41)
; CHECK-NEXT: Type: SHT_PROGBITS (0x1) ; LINUX-NEXT: Type: SHT_PROGBITS (0x1)
; CHECK-NEXT: Flags [ (0x2) ; LINUX-NEXT: Flags [ (0x2)
; CHECK-NEXT: SHF_ALLOC (0x2) ; LINUX-NEXT: SHF_ALLOC (0x2)
; CHECK-NEXT: ] ; LINUX-NEXT: ]
; CHECK-NEXT: Address: 0x0 ; LINUX-NEXT: Address: 0x0
; CHECK-NEXT: Offset: 0x64 ; LINUX-NEXT: Offset: 0x68
; CHECK-NEXT: Size: 60 ; LINUX-NEXT: Size: 64
; CHECK-NEXT: Link: 0 ; LINUX-NEXT: Link: 0
; CHECK-NEXT: Info: 0 ; LINUX-NEXT: Info: 0
; CHECK-NEXT: AddressAlignment: 4 ; LINUX-NEXT: AddressAlignment: 4
; CHECK-NEXT: EntrySize: 0 ; LINUX-NEXT: EntrySize: 0
; CHECK-NEXT: Relocations [ ; LINUX-NEXT: Relocations [
; CHECK-NEXT: ] ; LINUX-NEXT: ]
; CHECK-NEXT: SectionData ( ; LINUX-NEXT: SectionData (
; CHECK-NEXT: 0000: 1C000000 00000000 017A504C 5200017C |.........zPLR..|| ; LINUX-NEXT: 0000: 1C000000 00000000 017A504C 5200017C |.........zPLR..||
; CHECK-NEXT: 0010: 08070000 00000000 1B0C0404 88010000 |................| ; LINUX-NEXT: 0010: 08070000 00000000 1B0C0404 88010000 |................|
; CHECK-NEXT: 0020: 18000000 24000000 00000000 19000000 |....$...........| ; LINUX-NEXT: 0020: 1C000000 24000000 00000000 1D000000 |....$...........|
; CHECK-NEXT: 0030: 04000000 00430E10 2E100000 |.....C......| ; LINUX-NEXT: 0030: 04000000 00410E08 8502420D 05432E10 |.....A....B..C..|
; CHECK-NEXT: ) ; LINUX-NEXT: )
declare i32 @__gxx_personality_v0(...) declare i32 @__gxx_personality_v0(...)
declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
define void @test() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { define void @test() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry: entry:
invoke void @good(i32 1, i32 2, i32 3, i32 4) invoke void @good(i32 1, i32 2, i32 3, i32 4)
to label %continue unwind label %cleanup to label %continue unwind label %cleanup
@ -41,3 +41,5 @@ cleanup:
cleanup cleanup
ret void ret void
} }
attributes #0 = { optsize "no-frame-pointer-elim"="true" }

View File

@ -1,21 +1,51 @@
; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s ; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX -check-prefix=CHECK
; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=DARWIN -check-prefix=CHECK
declare i32 @__gxx_personality_v0(...) declare i32 @__gxx_personality_v0(...)
declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f)
declare void @empty() declare void @empty()
; We use an invoke, and expect a .cfi_escape GNU_ARGS_SIZE with size 16 ; When we use an invoke, and have FP, we expect a .cfi_escape GNU_ARGS_SIZE
; before the invocation ; with size 16 before the invocation. Without FP, we expect.cfi_adjust_cfa_offset
; CHECK-LABEL: test1: ; before and after.
; CHECK: .cfi_escape 0x2e, 0x10 ; Darwin should not generate pushes in neither circumstance.
; CHECK-NEXT: pushl $4 ; CHECK-LABEL: test1_nofp:
; CHECK-NEXT: pushl $3 ; LINUX: .cfi_escape 0x2e, 0x10
; CHECK-NEXT: pushl $2 ; LINUX: .cfi_adjust_cfa_offset 16
; CHECK-NEXT: pushl $1 ; LINUX-NEXT: pushl $4
; CHECK-NEXT: call ; LINUX-NEXT: pushl $3
; CHECK-NEXT: addl $16, %esp ; LINUX-NEXT: pushl $2
define void @test1() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; LINUX-NEXT: pushl $1
; LINUX-NEXT: call
; LINUX-NEXT: addl $16, %esp
; LINUX: .cfi_adjust_cfa_offset -16
; DARWIN-NOT: .cfi_escape
; DARWIN-NOT: pushl
define void @test1_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
invoke void @good(i32 1, i32 2, i32 3, i32 4)
to label %continue unwind label %cleanup
continue:
ret void
cleanup:
landingpad { i8*, i32 }
cleanup
ret void
}
; CHECK-LABEL: test1_fp:
; LINUX: .cfi_escape 0x2e, 0x10
; LINUX-NEXT: pushl $4
; LINUX-NEXT: pushl $3
; LINUX-NEXT: pushl $2
; LINUX-NEXT: pushl $1
; LINUX-NEXT: call
; LINUX-NEXT: addl $16, %esp
; DARWIN: pushl %ebp
; DARWIN-NOT: .cfi_escape
; DARWIN-NOT: pushl
define void @test1_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry: entry:
invoke void @good(i32 1, i32 2, i32 3, i32 4) invoke void @good(i32 1, i32 2, i32 3, i32 4)
to label %continue unwind label %cleanup to label %continue unwind label %cleanup
@ -28,27 +58,69 @@ cleanup:
} }
; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE, ; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE,
; even if it has an unwind table. ; even if it has an unwind table. Without FP, we still need cfi_adjust_cfa_offset,
; CHECK-LABEL: test2: ; so darwin should not generate pushes.
; CHECK-NOT: .cfi_escape ; CHECK-LABEL: test2_nofp:
; CHECK: pushl $4 ; LINUX-NOT: .cfi_escape
; CHECK-NEXT: pushl $3 ; LINUX: .cfi_adjust_cfa_offset 16
; CHECK-NEXT: pushl $2 ; LINUX-NEXT: pushl $4
; CHECK-NEXT: pushl $1 ; LINUX-NEXT: pushl $3
; CHECK-NEXT: call ; LINUX-NEXT: pushl $2
; CHECK-NEXT: addl $16, %esp ; LINUX-NEXT: pushl $1
define void @test2() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; LINUX-NEXT: call
; LINUX-NEXT: addl $16, %esp
; LINUX: .cfi_adjust_cfa_offset -16
; DARWIN-NOT: .cfi_escape
; DARWIN-NOT: pushl
define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry: entry:
call void @good(i32 1, i32 2, i32 3, i32 4) call void @good(i32 1, i32 2, i32 3, i32 4)
ret void ret void
} }
; If we did not end up using any pushes, no need for GNU_ARGS_SIZE anywhere ; CHECK-LABEL: test2_fp:
; CHECK-LABEL: test3:
; CHECK-NOT: .cfi_escape ; CHECK-NOT: .cfi_escape
; CHECK-NOT: pushl ; CHECK-NOT: .cfi_adjust_cfa_offset
; CHECK: retl ; CHECK: pushl $4
define void @test3() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-NEXT: pushl $3
; CHECK-NEXT: pushl $2
; CHECK-NEXT: pushl $1
; CHECK-NEXT: call
; CHECK-NEXT: addl $24, %esp
define void @test2_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
; cfi_adjust_cfa_offset.
; CHECK-LABEL: test3_nofp:
; LINUX-NOT: .cfi_escape
; LINUX-NOT: .cfi_adjust_cfa_offset
; LINUX-NOT: pushl
; LINUX: retl
define void @test3_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
invoke void @empty()
to label %continue unwind label %cleanup
continue:
ret void
cleanup:
landingpad { i8*, i32 }
cleanup
ret void
}
; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
; cfi_adjust_cfa_offset.
; CHECK-LABEL: test3_fp:
; LINUX: pushl %ebp
; LINUX-NOT: .cfi_escape
; LINUX-NOT: .cfi_adjust_cfa_offset
; LINUX-NOT: pushl
; LINUX: retl
define void @test3_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry: entry:
invoke void @empty() invoke void @empty()
to label %continue unwind label %cleanup to label %continue unwind label %cleanup
@ -62,24 +134,24 @@ cleanup:
; Different sized stacks need different GNU_ARGS_SIZEs ; Different sized stacks need different GNU_ARGS_SIZEs
; CHECK-LABEL: test4: ; CHECK-LABEL: test4:
; CHECK: .cfi_escape 0x2e, 0x10 ; LINUX: .cfi_escape 0x2e, 0x10
; CHECK-NEXT: pushl $4 ; LINUX-NEXT: pushl $4
; CHECK-NEXT: pushl $3 ; LINUX-NEXT: pushl $3
; CHECK-NEXT: pushl $2 ; LINUX-NEXT: pushl $2
; CHECK-NEXT: pushl $1 ; LINUX-NEXT: pushl $1
; CHECK-NEXT: call ; LINUX-NEXT: call
; CHECK-NEXT: addl $16, %esp ; LINUX-NEXT: addl $16, %esp
; CHECK: .cfi_escape 0x2e, 0x20 ; LINUX: .cfi_escape 0x2e, 0x20
; CHECK-NEXT: subl $8, %esp ; LINUX: subl $8, %esp
; CHECK-NEXT: pushl $11 ; LINUX-NEXT: pushl $11
; CHECK-NEXT: pushl $10 ; LINUX-NEXT: pushl $10
; CHECK-NEXT: pushl $9 ; LINUX-NEXT: pushl $9
; CHECK-NEXT: pushl $8 ; LINUX-NEXT: pushl $8
; CHECK-NEXT: pushl $7 ; LINUX-NEXT: pushl $7
; CHECK-NEXT: pushl $6 ; LINUX-NEXT: pushl $6
; CHECK-NEXT: calll large ; LINUX-NEXT: calll large
; CHECK-NEXT: addl $32, %esp ; LINUX-NEXT: addl $32, %esp
define void @test4() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { define void @test4() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry: entry:
invoke void @good(i32 1, i32 2, i32 3, i32 4) invoke void @good(i32 1, i32 2, i32 3, i32 4)
to label %continue1 unwind label %cleanup to label %continue1 unwind label %cleanup
@ -95,18 +167,22 @@ cleanup:
} }
; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call ; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call
; without parameters ; without parameters, but don't need to adjust the cfa offset
; CHECK-LABEL: test5: ; CHECK-LABEL: test5_nofp:
; CHECK: .cfi_escape 0x2e, 0x10 ; LINUX: .cfi_escape 0x2e, 0x10
; CHECK-NEXT: pushl $4 ; LINUX: .cfi_adjust_cfa_offset 16
; CHECK-NEXT: pushl $3 ; LINUX-NEXT: pushl $4
; CHECK-NEXT: pushl $2 ; LINUX-NEXT: pushl $3
; CHECK-NEXT: pushl $1 ; LINUX-NEXT: pushl $2
; CHECK-NEXT: call ; LINUX-NEXT: pushl $1
; CHECK-NEXT: addl $16, %esp ; LINUX-NEXT: call
; CHECK: .cfi_escape 0x2e, 0x00 ; LINUX-NEXT: addl $16, %esp
; CHECK-NEXT: call ; LINUX: .cfi_adjust_cfa_offset -16
define void @test5() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; LINUX-NOT: .cfi_adjust_cfa_offset
; LINUX: .cfi_escape 0x2e, 0x00
; LINUX-NOT: .cfi_adjust_cfa_offset
; LINUX: call
define void @test5_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry: entry:
invoke void @good(i32 1, i32 2, i32 3, i32 4) invoke void @good(i32 1, i32 2, i32 3, i32 4)
to label %continue1 unwind label %cleanup to label %continue1 unwind label %cleanup
@ -121,13 +197,39 @@ cleanup:
ret void ret void
} }
; This is actually inefficient - we don't need to repeat the .cfi_escape twice. ; CHECK-LABEL: test5_fp:
; LINUX: .cfi_escape 0x2e, 0x10
; LINUX-NEXT: pushl $4
; LINUX-NEXT: pushl $3
; LINUX-NEXT: pushl $2
; LINUX-NEXT: pushl $1
; LINUX-NEXT: call
; LINUX-NEXT: addl $16, %esp
; LINUX: .cfi_escape 0x2e, 0x00
; LINUX-NOT: .cfi_adjust_cfa_offset
; LINUX: call
define void @test5_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
invoke void @good(i32 1, i32 2, i32 3, i32 4)
to label %continue1 unwind label %cleanup
continue1:
invoke void @empty()
to label %continue2 unwind label %cleanup
continue2:
ret void
cleanup:
landingpad { i8*, i32 }
cleanup
ret void
}
; FIXME: This is actually inefficient - we don't need to repeat the .cfi_escape twice.
; CHECK-LABEL: test6: ; CHECK-LABEL: test6:
; CHECK: .cfi_escape 0x2e, 0x10 ; LINUX: .cfi_escape 0x2e, 0x10
; CHECK: call ; LINUX: call
; CHECK: .cfi_escape 0x2e, 0x10 ; LINUX: .cfi_escape 0x2e, 0x10
; CHECK: call ; LINUX: call
define void @test6() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { define void @test6() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry: entry:
invoke void @good(i32 1, i32 2, i32 3, i32 4) invoke void @good(i32 1, i32 2, i32 3, i32 4)
to label %continue1 unwind label %cleanup to label %continue1 unwind label %cleanup
@ -141,3 +243,41 @@ cleanup:
cleanup cleanup
ret void ret void
} }
; Darwin should generate pushes in the presense of FP and an unwind table,
; but not FP and invoke.
; CHECK-LABEL: test7:
; DARWIN: pushl %ebp
; DARWIN: movl %esp, %ebp
; DARWIN: .cfi_def_cfa_register %ebp
; DARWIN-NOT: .cfi_adjust_cfa_offset
; DARWIN: pushl $4
; DARWIN-NEXT: pushl $3
; DARWIN-NEXT: pushl $2
; DARWIN-NEXT: pushl $1
; DARWIN-NEXT: call
define void @test7() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; CHECK-LABEL: test8:
; DARWIN: pushl %ebp
; DARWIN: movl %esp, %ebp
; DARWIN-NOT: .cfi_adjust_cfa_offset
; DARWIN-NOT: pushl
define void @test8() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
invoke void @good(i32 1, i32 2, i32 3, i32 4)
to label %continue unwind label %cleanup
continue:
ret void
cleanup:
landingpad { i8*, i32 }
cleanup
ret void
}
attributes #0 = { optsize }
attributes #1 = { optsize "no-frame-pointer-elim"="true" }