[X86] Generate .cfi_adjust_cfa_offset correctly when pushing arguments

When push instructions are being used to pass function arguments on the stack, and either EH or debugging are enabled, we need to generate .cfi_adjust_cfa_offset directives appropriately. For (synch) EH, it is enough for the CFA offset to be correct at every call site, while for debugging we want to be correct after every push. Darwin does not support this well, so don't use pushes whenever it would be required. Differential Revision: http://reviews.llvm.org/D13767 llvm-svn: 251904
2015-11-03 08:17:25 +00:00 · 2015-11-03 08:17:25 +00:00 · 73dc85293f
parent 4ec5abffae
commit 73dc85293f
11 changed files with 359 additions and 119 deletions
--- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@ -245,6 +245,11 @@ public:
  bool hasDebugInfo() const { return DbgInfoAvailable; }
  void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
  // Returns true if we need to generate precise CFI. Currently
  // this is equivalent to hasDebugInfo(), but if we ever implement
  // async EH, it will require precise CFI as well.
  bool usePreciseUnwindInfo() const { return hasDebugInfo(); }
  bool callsEHReturn() const { return CallsEHReturn; }
  void setCallsEHReturn(bool b) { CallsEHReturn = b; }
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@ -216,6 +216,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
  case MCCFIInstruction::OpDefCfaOffset:
    OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
    break;
  case MCCFIInstruction::OpAdjustCfaOffset:
    OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset());
    break;
  case MCCFIInstruction::OpDefCfa:
    OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
    break;
--- a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@ -103,7 +103,8 @@ private:
  const char *getPassName() const override { return "X86 Optimize Call Frame"; }
  const TargetInstrInfo *TII;
-  const TargetFrameLowering *TFL;
+  const X86FrameLowering *TFL;
  const X86Subtarget *STI;
  const MachineRegisterInfo *MRI;
  static char ID;
 };
@ -127,13 +128,15 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
  // No point in running this in 64-bit mode, since some arguments are
  // passed in-register in all common calling conventions, so the pattern
  // we're looking for will never match.
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  if (STI->is64Bit())
  if (STI.is64Bit())
    return false;
-  // We can't encode multiple DW_CFA_GNU_args_size in the compact
+  // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
-  // unwind encoding that Darwin uses.
+  // in the compact unwind encoding that Darwin uses. So, bail if there
-  if (STI.isTargetDarwin() && !MF.getMMI().getLandingPads().empty())
+  // is a danger of that being generated.
  if (STI->isTargetDarwin() && 
     (!MF.getMMI().getLandingPads().empty() || 
       (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF))))
    return false;
  // You would expect straight-line code between call-frame setup and
@ -216,8 +219,9 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
 }
 bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getSubtarget().getInstrInfo();
+  STI = &MF.getSubtarget<X86Subtarget>();
-  TFL = MF.getSubtarget().getFrameLowering();
+  TII = STI->getInstrInfo();
  TFL = STI->getFrameLowering();
  MRI = &MF.getRegInfo();
  if (!isLegal(MF))
@ -312,7 +316,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
  // Check that this particular call sequence is amenable to the
  // transformation.
  const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
-                                       MF.getSubtarget().getRegisterInfo());
+                                       STI->getRegisterInfo());
  unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
  // We expect to enter this at the beginning of a call sequence
@ -455,6 +459,7 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
  for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
    MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
    MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
    MachineBasicBlock::iterator Push = nullptr;
    if (MOV->getOpcode() == X86::MOV32mi) {
      unsigned PushOpcode = X86::PUSHi32;
      // If the operand is a small (8-bit) immediate, we can use a
@ -466,21 +471,20 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
        if (isInt<8>(Val))
          PushOpcode = X86::PUSH32i8;
      }
-      BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
+      Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
          .addOperand(PushOp);
    } else {
      unsigned int Reg = PushOp.getReg();
      // If PUSHrmm is not slow on this target, try to fold the source of the
      // push into the instruction.
-      const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+      bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
      bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
      // Check that this is legal to fold. Right now, we're extremely
      // conservative about that.
      MachineInstr *DefMov = nullptr;
      if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
-        MachineInstr *Push =
+        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
            BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
        unsigned NumOps = DefMov->getDesc().getNumOperands();
        for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@ -488,12 +492,18 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
        DefMov->eraseFromParent();
      } else {
-        BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
+        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
            .addReg(Reg)
            .getInstr();
      }
    }
    // For debugging, when using SP-based CFA, we need to adjust the CFA
    // offset after each push.
    if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo())
      TFL->BuildCFI(MBB, std::next(Push), DL, 
                    MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
    MBB.erase(MOV);
  }
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@ -2105,18 +2105,23 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
    unsigned StackAlign = getStackAlignment();
    Amount = RoundUpToAlignment(Amount, StackAlign);
    MachineModuleInfo &MMI = MF.getMMI();
    const Function *Fn = MF.getFunction();
    bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
    bool DwarfCFI = !WindowsCFI && 
                    (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
    // If we have any exception handlers in this function, and we adjust
-    // the SP before calls, we may need to indicate this to the unwinder,
+    // the SP before calls, we may need to indicate this to the unwinder
-    // using GNU_ARGS_SIZE. Note that this may be necessary
+    // using GNU_ARGS_SIZE. Note that this may be necessary even when
-    // even when Amount == 0, because the preceding function may have
+    // Amount == 0, because the preceding function may have set a non-0
-    // set a non-0 GNU_ARGS_SIZE.
+    // GNU_ARGS_SIZE.
    // TODO: We don't need to reset this between subsequent functions,
    // if it didn't change.
-    bool HasDwarfEHHandlers =
+    bool HasDwarfEHHandlers = !WindowsCFI &&
-      !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+                              !MF.getMMI().getLandingPads().empty();
      !MF.getMMI().getLandingPads().empty();
-    if (HasDwarfEHHandlers && !isDestroy && 
+    if (HasDwarfEHHandlers && !isDestroy &&
        MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
      BuildCFI(MBB, I, DL,
               MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
@ -2128,15 +2133,37 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
    // (Pushes of argument for frame setup, callee pops for frame destroy)
    Amount -= InternalAmt;
    // If this is a callee-pop calling convention, and we're emitting precise
    // SP-based CFI, emit a CFA adjust for the amount the callee popped.
    if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF) && 
        MMI.usePreciseUnwindInfo())
      BuildCFI(MBB, I, DL, 
               MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
    if (Amount) {
      // Add Amount to SP to destroy a frame, and subtract to setup.
      int Offset = isDestroy ? Amount : -Amount;
-      if (!(MF.getFunction()->optForMinSize() && 
+      if (!(Fn->optForMinSize() && 
            adjustStackWithPops(MBB, I, DL, Offset)))
        BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
    }
    if (DwarfCFI && !hasFP(MF)) {
      // If we don't have FP, but need to generate unwind information,
      // we need to set the correct CFA offset after the stack adjustment.
      // How much we adjust the CFA offset depends on whether we're emitting
      // CFI only for EH purposes or for debugging. EH only requires the CFA
      // offset to be correct at each call site, while for debugging we want
      // it to be more precise.
      int CFAOffset = Amount;
      if (!MMI.usePreciseUnwindInfo())
        CFAOffset += InternalAmt;
      CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
      BuildCFI(MBB, I, DL, 
               MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
    }
    return;
  }
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@ -125,13 +125,13 @@ public:
  /// \p MBB will be correctly handled by the target.
  bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
 private:
  uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
  /// Wraps up getting a CFI index and building a MachineInstr for it.
  void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                DebugLoc DL, MCCFIInstruction CFIInst) const;
 private:
  uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
  /// Aligns the stack pointer by ANDing it with -MaxAlign.
  void BuildStackAlignAND(MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator MBBI, DebugLoc DL,
--- a/llvm/test/CodeGen/X86/debugloc-argsize.ll
+++ b/llvm/test/CodeGen/X86/debugloc-argsize.ll
@ -30,7 +30,7 @@ declare i8* @__cxa_begin_catch(i8*)
 declare void @__cxa_end_catch()
-attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { optsize }
 attributes #2 = { nounwind }
--- a/llvm/test/CodeGen/X86/fold-push.ll
+++ b/llvm/test/CodeGen/X86/fold-push.ll
@ -3,7 +3,7 @@
 declare void @foo(i32 %r)
-define void @test(i32 %a, i32 %b) optsize {
+define void @test(i32 %a, i32 %b) optsize nounwind {
 ; CHECK-LABEL: test:
 ; CHECK: movl [[EAX:%e..]], (%esp)
 ; CHECK-NEXT: pushl [[EAX]]
@ -22,7 +22,7 @@ define void @test(i32 %a, i32 %b) optsize {
  ret void
 }
-define void @test_min(i32 %a, i32 %b) minsize {
+define void @test_min(i32 %a, i32 %b) minsize nounwind {
 ; CHECK-LABEL: test_min:
 ; CHECK: movl [[EAX:%e..]], (%esp)
 ; CHECK-NEXT: pushl [[EAX]]
--- a/llvm/test/CodeGen/X86/pop-stack-cleanup.ll
+++ b/llvm/test/CodeGen/X86/pop-stack-cleanup.ll
@ -9,7 +9,7 @@ declare void @param3(i32 %a, i32 %b, i32 %c)
 declare void @param8(i64, i64, i64, i64, i64, i64, i64, i64)
-define void @test() minsize {
+define void @test() minsize nounwind {
 ; CHECK-LABEL: test:
 ; CHECK: calll _param1
 ; CHECK-NEXT: popl %eax
@ -48,7 +48,7 @@ define void @negative(i32 %k) {
  ret void
 }
-define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize {
+define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize nounwind {
 ; CHECK-LABEL: spill:
 ; CHECK-DAG: movl %ecx,
 ; CHECK-DAG: movl %edx,
@ -63,7 +63,7 @@ define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize {
  ret void
 }
-define void @test_linux64(i32 %size) minsize {
+define void @test_linux64(i32 %size) minsize nounwind {
 ; LINUX64-LABEL: test_linux64:
 ; LINUX64: pushq %rbp
 ; LINUX64: callq param8
--- a/llvm/test/CodeGen/X86/push-cfi-debug.ll
+++ b/llvm/test/CodeGen/X86/push-cfi-debug.ll
@ -0,0 +1,53 @@
 ; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
 ; Function Attrs: optsize
 declare void @foo(i32, i32) #0
 declare x86_stdcallcc void @stdfoo(i32, i32) #0
 ; CHECK-LABEL: test1:
 ; CHECK: subl $8, %esp
 ; CHECK: .cfi_adjust_cfa_offset 8
 ; CHECK: pushl $2
 ; CHECK: .cfi_adjust_cfa_offset 4
 ; CHECK: pushl $1
 ; CHECK: .cfi_adjust_cfa_offset 4
 ; CHECK: calll foo
 ; CHECK: addl $16, %esp
 ; CHECK: .cfi_adjust_cfa_offset -16
 ; CHECK: subl $8, %esp
 ; CHECK: .cfi_adjust_cfa_offset 8
 ; CHECK: pushl $4
 ; CHECK: .cfi_adjust_cfa_offset 4
 ; CHECK: pushl $3
 ; CHECK: .cfi_adjust_cfa_offset 4
 ; CHECK: calll stdfoo
 ; CHECK: .cfi_adjust_cfa_offset -8
 ; CHECK: addl $8, %esp
 ; CHECK: .cfi_adjust_cfa_offset -8
 define void @test1() #0 {
 entry:
  tail call void @foo(i32 1, i32 2) #1, !dbg !10
  tail call x86_stdcallcc void @stdfoo(i32 3, i32 4) #1, !dbg !11
  ret void, !dbg !12
 }
 attributes #0 = { nounwind optsize }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250289)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
 !1 = !DIFile(filename: "foo.c", directory: "foo")
 !2 = !{}
 !3 = !{!4}
 !4 = distinct !DISubprogram(name: "test1", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, function: void ()* @test1, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{null}
 !7 = !{i32 2, !"Dwarf Version", i32 4}
 !8 = !{i32 2, !"Debug Info Version", i32 3}
 !9 = !{!"clang version 3.8.0 (trunk 250289)"}
 !10 = !DILocation(line: 4, column: 3, scope: !4)
 !11 = !DILocation(line: 5, column: 3, scope: !4)
 !12 = !DILocation(line: 6, column: 1, scope: !4)
--- a/llvm/test/CodeGen/X86/push-cfi-obj.ll
+++ b/llvm/test/CodeGen/X86/push-cfi-obj.ll
@ -1,36 +1,36 @@
-; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=LINUX
 ; RUN: llc < %s -mtriple=i686-darwin-macosx10.7 -filetype=obj | llvm-readobj -sections | FileCheck -check-prefix=DARWIN %s
 ; On darwin, check that we manage to generate the compact unwind section
 ; DARWIN: Name: __compact_unwind
 ; DARWIN: Segment: __LD
-; CHECK:         Index: 8
+; LINUX:         Index: 8
-; CHECK-NEXT:    Name: .eh_frame (41)
+; LINUX-NEXT:    Name: .eh_frame (41)
-; CHECK-NEXT:    Type: SHT_PROGBITS (0x1)
+; LINUX-NEXT:    Type: SHT_PROGBITS (0x1)
-; CHECK-NEXT:    Flags [ (0x2)
+; LINUX-NEXT:    Flags [ (0x2)
-; CHECK-NEXT:      SHF_ALLOC (0x2)
+; LINUX-NEXT:      SHF_ALLOC (0x2)
-; CHECK-NEXT:    ]
+; LINUX-NEXT:    ]
-; CHECK-NEXT:    Address: 0x0
+; LINUX-NEXT:    Address: 0x0
-; CHECK-NEXT:    Offset: 0x64
+; LINUX-NEXT:    Offset: 0x68
-; CHECK-NEXT:    Size: 60
+; LINUX-NEXT:    Size: 64
-; CHECK-NEXT:    Link: 0
+; LINUX-NEXT:    Link: 0
-; CHECK-NEXT:    Info: 0
+; LINUX-NEXT:    Info: 0
-; CHECK-NEXT:    AddressAlignment: 4
+; LINUX-NEXT:    AddressAlignment: 4
-; CHECK-NEXT:    EntrySize: 0
+; LINUX-NEXT:    EntrySize: 0
-; CHECK-NEXT:    Relocations [
+; LINUX-NEXT:    Relocations [
-; CHECK-NEXT:    ]
+; LINUX-NEXT:    ]
-; CHECK-NEXT:    SectionData (
+; LINUX-NEXT:    SectionData (
-; CHECK-NEXT:      0000: 1C000000 00000000 017A504C 5200017C  |.........zPLR..||
+; LINUX-NEXT:      0000: 1C000000 00000000 017A504C 5200017C  |.........zPLR..||
-; CHECK-NEXT:      0010: 08070000 00000000 1B0C0404 88010000  |................|
+; LINUX-NEXT:      0010: 08070000 00000000 1B0C0404 88010000  |................|
-; CHECK-NEXT:      0020: 18000000 24000000 00000000 19000000  |....$...........|
+; LINUX-NEXT:      0020: 1C000000 24000000 00000000 1D000000  |....$...........|
-; CHECK-NEXT:      0030: 04000000 00430E10 2E100000           |.....C......|
+; LINUX-NEXT:      0030: 04000000 00410E08 8502420D 05432E10  |.....A....B..C..|
-; CHECK-NEXT:    )
+; LINUX-NEXT:    )
 declare i32 @__gxx_personality_v0(...)
 declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
-define void @test() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @test() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @good(i32 1, i32 2, i32 3, i32 4)
          to label %continue unwind label %cleanup
@ -41,3 +41,5 @@ cleanup:
     cleanup
  ret void
 }
 attributes #0 = { optsize "no-frame-pointer-elim"="true" }
--- a/llvm/test/CodeGen/X86/push-cfi.ll
+++ b/llvm/test/CodeGen/X86/push-cfi.ll
@ -1,21 +1,51 @@
-; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX -check-prefix=CHECK
 ; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=DARWIN -check-prefix=CHECK
 declare i32 @__gxx_personality_v0(...)
 declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
 declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f)
 declare void @empty()
-; We use an invoke, and expect a .cfi_escape GNU_ARGS_SIZE with size 16
+; When we use an invoke, and have FP, we expect a .cfi_escape GNU_ARGS_SIZE
-; before the invocation
+; with size 16 before the invocation. Without FP, we expect.cfi_adjust_cfa_offset
-; CHECK-LABEL: test1:
+; before and after.
-; CHECK: .cfi_escape 0x2e, 0x10
+; Darwin should not generate pushes in neither circumstance.
-; CHECK-NEXT: pushl   $4
+; CHECK-LABEL: test1_nofp:
-; CHECK-NEXT: pushl   $3
+; LINUX: .cfi_escape 0x2e, 0x10
-; CHECK-NEXT: pushl   $2
+; LINUX: .cfi_adjust_cfa_offset 16
-; CHECK-NEXT: pushl   $1
+; LINUX-NEXT: pushl   $4
-; CHECK-NEXT: call
+; LINUX-NEXT: pushl   $3
-; CHECK-NEXT: addl $16, %esp
+; LINUX-NEXT: pushl   $2
-define void @test1() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; LINUX-NEXT: pushl   $1
 ; LINUX-NEXT: call
 ; LINUX-NEXT: addl $16, %esp
 ; LINUX: .cfi_adjust_cfa_offset -16
 ; DARWIN-NOT: .cfi_escape
 ; DARWIN-NOT: pushl
 define void @test1_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @good(i32 1, i32 2, i32 3, i32 4)
          to label %continue unwind label %cleanup
 continue:
  ret void
 cleanup:  
  landingpad { i8*, i32 }
     cleanup
  ret void
 }
 ; CHECK-LABEL: test1_fp:
 ; LINUX: .cfi_escape 0x2e, 0x10
 ; LINUX-NEXT: pushl   $4
 ; LINUX-NEXT: pushl   $3
 ; LINUX-NEXT: pushl   $2
 ; LINUX-NEXT: pushl   $1
 ; LINUX-NEXT: call
 ; LINUX-NEXT: addl $16, %esp
 ; DARWIN: pushl %ebp
 ; DARWIN-NOT: .cfi_escape
 ; DARWIN-NOT: pushl
 define void @test1_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @good(i32 1, i32 2, i32 3, i32 4)
          to label %continue unwind label %cleanup
@ -28,27 +58,69 @@ cleanup:
 }
 ; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE,
-; even if it has an unwind table.
+; even if it has an unwind table. Without FP, we still need cfi_adjust_cfa_offset,
-; CHECK-LABEL: test2:
+; so darwin should not generate pushes.
-; CHECK-NOT: .cfi_escape
+; CHECK-LABEL: test2_nofp:
-; CHECK: pushl   $4
+; LINUX-NOT: .cfi_escape
-; CHECK-NEXT: pushl   $3
+; LINUX: .cfi_adjust_cfa_offset 16
-; CHECK-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $4
-; CHECK-NEXT: pushl   $1
+; LINUX-NEXT: pushl   $3
-; CHECK-NEXT: call
+; LINUX-NEXT: pushl   $2
-; CHECK-NEXT: addl $16, %esp
+; LINUX-NEXT: pushl   $1
-define void @test2() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; LINUX-NEXT: call
 ; LINUX-NEXT: addl $16, %esp
 ; LINUX: .cfi_adjust_cfa_offset -16
 ; DARWIN-NOT: .cfi_escape
 ; DARWIN-NOT: pushl
 define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  call void @good(i32 1, i32 2, i32 3, i32 4)
  ret void
 }
-; If we did not end up using any pushes, no need for GNU_ARGS_SIZE anywhere
+; CHECK-LABEL: test2_fp:
 ; CHECK-LABEL: test3:
 ; CHECK-NOT: .cfi_escape
-; CHECK-NOT: pushl
+; CHECK-NOT: .cfi_adjust_cfa_offset
-; CHECK: retl
+; CHECK: pushl   $4
-define void @test3() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-NEXT: pushl   $3
 ; CHECK-NEXT: pushl   $2
 ; CHECK-NEXT: pushl   $1
 ; CHECK-NEXT: call
 ; CHECK-NEXT: addl $24, %esp
 define void @test2_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  call void @good(i32 1, i32 2, i32 3, i32 4)
  ret void
 }
 ; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
 ; cfi_adjust_cfa_offset.
 ; CHECK-LABEL: test3_nofp:
 ; LINUX-NOT: .cfi_escape
 ; LINUX-NOT: .cfi_adjust_cfa_offset
 ; LINUX-NOT: pushl
 ; LINUX: retl
 define void @test3_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @empty()
          to label %continue unwind label %cleanup
 continue:
  ret void
 cleanup:  
  landingpad { i8*, i32 }
     cleanup
  ret void
 }
 ; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
 ; cfi_adjust_cfa_offset.
 ; CHECK-LABEL: test3_fp:
 ; LINUX: pushl %ebp
 ; LINUX-NOT: .cfi_escape
 ; LINUX-NOT: .cfi_adjust_cfa_offset
 ; LINUX-NOT: pushl
 ; LINUX: retl
 define void @test3_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @empty()
          to label %continue unwind label %cleanup
@ -62,24 +134,24 @@ cleanup:
 ; Different sized stacks need different GNU_ARGS_SIZEs
 ; CHECK-LABEL: test4:
-; CHECK: .cfi_escape 0x2e, 0x10
+; LINUX: .cfi_escape 0x2e, 0x10
-; CHECK-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $4
-; CHECK-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $3
-; CHECK-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $2
-; CHECK-NEXT: pushl   $1
+; LINUX-NEXT: pushl   $1
-; CHECK-NEXT: call
+; LINUX-NEXT: call
-; CHECK-NEXT: addl $16, %esp
+; LINUX-NEXT: addl $16, %esp
-; CHECK: .cfi_escape 0x2e, 0x20
+; LINUX: .cfi_escape 0x2e, 0x20
-; CHECK-NEXT: subl    $8, %esp
+; LINUX: subl    $8, %esp
-; CHECK-NEXT: pushl   $11
+; LINUX-NEXT: pushl   $11
-; CHECK-NEXT: pushl   $10
+; LINUX-NEXT: pushl   $10
-; CHECK-NEXT: pushl   $9
+; LINUX-NEXT: pushl   $9
-; CHECK-NEXT: pushl   $8
+; LINUX-NEXT: pushl   $8
-; CHECK-NEXT: pushl   $7
+; LINUX-NEXT: pushl   $7
-; CHECK-NEXT: pushl   $6
+; LINUX-NEXT: pushl   $6
-; CHECK-NEXT: calll   large
+; LINUX-NEXT: calll   large
-; CHECK-NEXT: addl $32, %esp
+; LINUX-NEXT: addl $32, %esp
-define void @test4() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @test4() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @good(i32 1, i32 2, i32 3, i32 4)
          to label %continue1 unwind label %cleanup
@ -95,18 +167,22 @@ cleanup:
 }
 ; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call
-; without parameters
+; without parameters, but don't need to adjust the cfa offset
-; CHECK-LABEL: test5:
+; CHECK-LABEL: test5_nofp:
-; CHECK: .cfi_escape 0x2e, 0x10
+; LINUX: .cfi_escape 0x2e, 0x10
-; CHECK-NEXT: pushl   $4
+; LINUX: .cfi_adjust_cfa_offset 16
-; CHECK-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $4
-; CHECK-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $3
-; CHECK-NEXT: pushl   $1
+; LINUX-NEXT: pushl   $2
-; CHECK-NEXT: call
+; LINUX-NEXT: pushl   $1
-; CHECK-NEXT: addl $16, %esp
+; LINUX-NEXT: call
-; CHECK: .cfi_escape 0x2e, 0x00
+; LINUX-NEXT: addl $16, %esp
-; CHECK-NEXT: call
+; LINUX: .cfi_adjust_cfa_offset -16
-define void @test5() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; LINUX-NOT: .cfi_adjust_cfa_offset
 ; LINUX: .cfi_escape 0x2e, 0x00
 ; LINUX-NOT: .cfi_adjust_cfa_offset
 ; LINUX: call
 define void @test5_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @good(i32 1, i32 2, i32 3, i32 4)
          to label %continue1 unwind label %cleanup
@ -121,13 +197,39 @@ cleanup:
  ret void
 }
-; This is actually inefficient - we don't need to repeat the .cfi_escape twice.
+; CHECK-LABEL: test5_fp:
 ; LINUX: .cfi_escape 0x2e, 0x10
 ; LINUX-NEXT: pushl   $4
 ; LINUX-NEXT: pushl   $3
 ; LINUX-NEXT: pushl   $2
 ; LINUX-NEXT: pushl   $1
 ; LINUX-NEXT: call
 ; LINUX-NEXT: addl $16, %esp
 ; LINUX: .cfi_escape 0x2e, 0x00
 ; LINUX-NOT: .cfi_adjust_cfa_offset
 ; LINUX: call
 define void @test5_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @good(i32 1, i32 2, i32 3, i32 4)
          to label %continue1 unwind label %cleanup
 continue1:
  invoke void @empty()
          to label %continue2 unwind label %cleanup
 continue2:
  ret void          
 cleanup:  
  landingpad { i8*, i32 }
     cleanup
  ret void
 }
 ; FIXME: This is actually inefficient - we don't need to repeat the .cfi_escape twice.
 ; CHECK-LABEL: test6:
-; CHECK: .cfi_escape 0x2e, 0x10
+; LINUX: .cfi_escape 0x2e, 0x10
-; CHECK: call
+; LINUX: call
-; CHECK: .cfi_escape 0x2e, 0x10
+; LINUX: .cfi_escape 0x2e, 0x10
-; CHECK: call
+; LINUX: call
-define void @test6() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @test6() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @good(i32 1, i32 2, i32 3, i32 4)
          to label %continue1 unwind label %cleanup
@ -141,3 +243,41 @@ cleanup:
     cleanup
  ret void
 }
 ; Darwin should generate pushes in the presense of FP and an unwind table,
 ; but not FP and invoke.
 ; CHECK-LABEL: test7:
 ; DARWIN: pushl %ebp
 ; DARWIN: movl %esp, %ebp
 ; DARWIN: .cfi_def_cfa_register %ebp
 ; DARWIN-NOT: .cfi_adjust_cfa_offset
 ; DARWIN: pushl   $4
 ; DARWIN-NEXT: pushl   $3
 ; DARWIN-NEXT: pushl   $2
 ; DARWIN-NEXT: pushl   $1
 ; DARWIN-NEXT: call
 define void @test7() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  call void @good(i32 1, i32 2, i32 3, i32 4)
  ret void
 }
 ; CHECK-LABEL: test8:
 ; DARWIN: pushl %ebp
 ; DARWIN: movl %esp, %ebp
 ; DARWIN-NOT: .cfi_adjust_cfa_offset
 ; DARWIN-NOT: pushl
 define void @test8() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
  invoke void @good(i32 1, i32 2, i32 3, i32 4)
          to label %continue unwind label %cleanup
 continue:
  ret void
 cleanup:  
  landingpad { i8*, i32 }
     cleanup
  ret void
 }
 attributes #0 = { optsize }
 attributes #1 = { optsize "no-frame-pointer-elim"="true" }