[x32] Use ebp/esp as frame and stack pointer

Summary:
Since pointers are 32-bit on x32 we can use ebp and esp as frame and stack
pointer. Some operations like PUSH/POP and CFI_INSTRUCTION still
require 64-bit register, so using 64-bit MachineFramePtr where required.

X86_64 NaCl uses 64-bit frame/stack pointers, however it's been found that
both isTarget64BitLP64 and isTarget64BitILP32 are true for NaCl. Addressing
this issue here as well by making isTarget64BitLP64 false.

Also mark hasReservedSpillSlot unreachable on X86. See inlined comments.

Test Plan: Add one new simple test and upgrade 2 existing with x32 target case.

Reviewers: nadav, dschuff

Subscribers: llvm-commits, zinovy.nis

Differential Revision: http://reviews.llvm.org/D4617

llvm-svn: 215091
This commit is contained in:
Pavel Chupin 2014-08-07 09:41:19 +00:00
parent 27046758de
commit f55eb450e5
7 changed files with 111 additions and 42 deletions

View File

@ -148,32 +148,32 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
static
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, int64_t NumBytes,
bool Is64Bit, bool IsLP64, bool UseLEA,
bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA,
const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
unsigned Opc;
if (UseLEA)
Opc = getLEArOpcode(IsLP64);
Opc = getLEArOpcode(Is64BitStackPtr);
else
Opc = isSub
? getSUBriOpcode(IsLP64, Offset)
: getADDriOpcode(IsLP64, Offset);
? getSUBriOpcode(Is64BitStackPtr, Offset)
: getADDriOpcode(Is64BitStackPtr, Offset);
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
while (Offset) {
uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
if (ThisVal == (Is64Bit ? 8 : 4)) {
if (ThisVal == (Is64BitTarget ? 8 : 4)) {
// Use push / pop instead.
unsigned Reg = isSub
? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
: findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
: findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
if (Reg) {
Opc = isSub
? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
: (Is64Bit ? X86::POP64r : X86::POP32r);
? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
: (Is64BitTarget ? X86::POP64r : X86::POP32r);
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
.addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
if (isSub)
@ -449,7 +449,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
bool HasFP = hasFP(MF);
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
bool IsLP64 = STI.isTarget64BitLP64();
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
bool IsWin64 = STI.isTargetWin64();
bool IsWinEH =
MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
@ -461,6 +462,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
const unsigned MachineFramePtr = STI.isTarget64BitILP32() ?
getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
unsigned StackPtr = RegInfo->getStackRegister();
unsigned BasePtr = RegInfo->getBaseRegister();
DebugLoc DL;
@ -507,7 +510,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (TailCallReturnAddrDelta < 0) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)),
TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
StackPtr)
.addReg(StackPtr)
.addImm(-TailCallReturnAddrDelta)
@ -551,7 +554,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Save EBP/RBP into the appropriate stack slot.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
.addReg(FramePtr, RegState::Kill)
.addReg(MachineFramePtr, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
if (NeedsDwarfCFI) {
@ -564,7 +567,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
// Change the rule for the FramePtr to be an "offset" rule.
unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr,
DwarfFramePtr, 2 * stackGrowth));
@ -580,14 +583,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
if (NeedsDwarfCFI) {
// Mark effective beginning of when frame pointer becomes valid.
// Define the current CFA to use the EBP/RBP register.
unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
@ -596,7 +599,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Mark the FramePtr as live-in in every block.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
I->addLiveIn(FramePtr);
I->addLiveIn(MachineFramePtr);
} else {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
@ -635,7 +638,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
TII.get(Uses64BitFramePtr ? X86::AND64ri32 : X86::AND32ri), StackPtr)
.addReg(StackPtr)
.addImm(-MaxAlign)
.setMIFlag(MachineInstr::FrameSetup);
@ -730,7 +733,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MBB.insert(MBBI, MI);
}
} else if (NumBytes) {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
UseLEA, TII, *RegInfo);
}
@ -804,7 +807,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// to reference locals.
if (RegInfo->hasBasePointer(MF)) {
// Update the base pointer with the current stack pointer.
unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
@ -842,11 +845,15 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc DL = MBBI->getDebugLoc();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
bool IsLP64 = STI.isTarget64BitLP64();
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
const bool Is64BitILP32 = STI.isTarget64BitILP32();
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned MachineFramePtr {Is64BitILP32 ?
getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr};
unsigned StackPtr = RegInfo->getStackRegister();
bool IsWinEH =
@ -903,7 +910,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Pop EBP.
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
} else {
NumBytes = StackSize - CSSize;
}
@ -935,19 +942,19 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (RegInfo->needsStackRealignment(MF))
MBBI = FirstCSPop;
if (CSSize != 0) {
unsigned Opc = getLEArOpcode(IsLP64);
unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
FramePtr, false, -CSSize);
--MBBI;
} else {
unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(FramePtr);
--MBBI;
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA,
emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA,
TII, *RegInfo);
--MBBI;
}
@ -967,7 +974,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineOperand &DestAddr = MBBI->getOperand(0);
assert(DestAddr.isReg() && "Offset should be in register!");
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
StackPtr).addReg(DestAddr.getReg());
} else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
RetOpcode == X86::TCRETURNmi ||
@ -993,7 +1000,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64,
emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
UseLEA, TII, *RegInfo);
}
@ -1038,7 +1045,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII,
emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII,
*RegInfo);
}
}
@ -1124,7 +1131,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// about avoiding it later.
unsigned FPReg = RegInfo->getFrameRegister(MF);
for (unsigned i = 0; i < CSI.size(); ++i) {
if (CSI[i].getReg() == FPReg) {
if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
CSI.erase(CSI.begin() + i);
break;
}

View File

@ -659,8 +659,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.

View File

@ -68,8 +68,10 @@ X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI)
if (Is64Bit) {
SlotSize = 8;
StackPtr = X86::RSP;
FramePtr = X86::RBP;
StackPtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ?
X86::RSP : X86::ESP;
FramePtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ?
X86::RBP : X86::EBP;
} else {
SlotSize = 4;
StackPtr = X86::ESP;
@ -459,13 +461,9 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (Reg == FramePtr && TFI->hasFP(MF)) {
FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
return true;
}
return false;
// Since X86 defines assignCalleeSavedSpillSlots which always return true
// this function neither used nor tested.
llvm_unreachable("Unused function on X86. Otherwise need a test case.");
}
void

View File

@ -312,7 +312,8 @@ public:
/// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
bool isTarget64BitLP64() const {
return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32);
return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32 &&
TargetTriple.getOS() != Triple::NaCl);
}
PICStyles::Style getPICStyle() const { return PICStyle; }

View File

@ -1,4 +1,5 @@
; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnux32 -enable-misched=false | FileCheck %s -check-prefix=X32ABI
declare void @bar(<2 x i64>* %n)
@ -6,15 +7,29 @@ define void @foo(i64 %h) {
%p = alloca <2 x i64>, i64 %h
call void @bar(<2 x i64>* %p)
ret void
; CHECK: foo
; CHECK-LABEL: foo
; CHECK-NOT: andq $-32, %rax
; X32ABI-LABEL: foo
; X32ABI-NOT: andl $-32, %eax
}
define void @foo2(i64 %h) {
%p = alloca <2 x i64>, i64 %h, align 32
call void @bar(<2 x i64>* %p)
ret void
; CHECK: foo2
; CHECK-LABEL: foo2
; CHECK: andq $-32, %rsp
; CHECK: andq $-32, %rax
; X32ABI-LABEL: foo2
; X32ABI: andl $-32, %esp
; X32ABI: andl $-32, %eax
}
define void @foo3(i64 %h) {
%p = alloca <2 x i64>, i64 %h
ret void
; CHECK-LABEL: foo3
; CHECK: movq %rbp, %rsp
; X32ABI-LABEL: foo3
; X32ABI: movl %ebp, %esp
}

View File

@ -2,6 +2,8 @@
; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-32
; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64
; RUN: llc < %s -mtriple=x86_64-gnux32 | FileCheck %s --check-prefix=CHECK-X32ABI
; RUN: llc < %s -mtriple=x86_64-gnux32 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-X32ABI
define i8* @test1() nounwind {
entry:
@ -17,6 +19,12 @@ entry:
; CHECK-64-NEXT: movq %rbp, %rax
; CHECK-64-NEXT: pop
; CHECK-64-NEXT: ret
; CHECK-X32ABI-LABEL: test1
; CHECK-X32ABI: pushq %rbp
; CHECK-X32ABI-NEXT: movl %esp, %ebp
; CHECK-X32ABI-NEXT: movl %ebp, %eax
; CHECK-X32ABI-NEXT: popq %rbp
; CHECK-X32ABI-NEXT: ret
%0 = tail call i8* @llvm.frameaddress(i32 0)
ret i8* %0
}
@ -37,6 +45,13 @@ entry:
; CHECK-64-NEXT: movq (%rax), %rax
; CHECK-64-NEXT: pop
; CHECK-64-NEXT: ret
; CHECK-X32ABI-LABEL: test2
; CHECK-X32ABI: pushq %rbp
; CHECK-X32ABI-NEXT: movl %esp, %ebp
; CHECK-X32ABI-NEXT: movl (%ebp), %eax
; CHECK-X32ABI-NEXT: movl (%eax), %eax
; CHECK-X32ABI-NEXT: popq %rbp
; CHECK-X32ABI-NEXT: ret
%0 = tail call i8* @llvm.frameaddress(i32 2)
ret i8* %0
}

View File

@ -0,0 +1,34 @@
; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
; RUN: llc -mtriple=x86_64-pc-nacl < %s | FileCheck -check-prefix=NACL %s
; x32 uses %esp, %ebp as stack and frame pointers
; CHECK-LABEL: foo
; CHECK: pushq %rbp
; CHECK: movq %rsp, %rbp
; CHECK: movq %rdi, -8(%rbp)
; CHECK: popq %rbp
; X32ABI-LABEL: foo
; X32ABI: pushq %rbp
; X32ABI: movl %esp, %ebp
; X32ABI: movl %edi, -4(%ebp)
; X32ABI: popq %rbp
; NACL-LABEL: foo
; NACL: pushq %rbp
; NACL: movq %rsp, %rbp
; NACL: movl %edi, -4(%rbp)
; NACL: popq %rbp
define void @foo(i32* %a) #0 {
entry:
%a.addr = alloca i32*, align 4
%b = alloca i32*, align 4
store i32* %a, i32** %a.addr, align 4
ret void
}
attributes #0 = { nounwind uwtable "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"}