2012-02-18 20:03:15 +08:00
|
|
|
//===-- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*-===//
|
2010-11-15 08:06:54 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
//
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-08-14 00:26:38 +08:00
|
|
|
#ifndef LLVM_LIB_TARGET_SPARC_SPARCFRAMELOWERING_H
|
|
|
|
#define LLVM_LIB_TARGET_SPARC_SPARCFRAMELOWERING_H
|
2010-11-15 08:06:54 +08:00
|
|
|
|
|
|
|
#include "Sparc.h"
|
2017-11-04 06:32:11 +08:00
|
|
|
#include "llvm/CodeGen/TargetFrameLowering.h"
|
2010-11-15 08:06:54 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
2014-06-27 06:33:50 +08:00
|
|
|
class SparcSubtarget;
|
2011-01-10 20:39:04 +08:00
|
|
|
class SparcFrameLowering : public TargetFrameLowering {
|
2010-11-15 08:06:54 +08:00
|
|
|
public:
|
2014-06-27 06:33:50 +08:00
|
|
|
explicit SparcFrameLowering(const SparcSubtarget &ST);
|
2010-11-15 08:06:54 +08:00
|
|
|
|
|
|
|
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
|
|
|
|
/// the function.
|
[ShrinkWrap] Add (a simplified version) of shrink-wrapping.
This patch introduces a new pass that computes the safe point to insert the
prologue and epilogue of the function.
The interest is to find safe points that are cheaper than the entry and exits
blocks.
As an example and to avoid regressions to be introduce, this patch also
implements the required bits to enable the shrink-wrapping pass for AArch64.
** Context **
Currently we insert the prologue and epilogue of the method/function in the
entry and exits blocks. Although this is correct, we can do a better job when
those are not immediately required and insert them at less frequently executed
places.
The job of the shrink-wrapping pass is to identify such places.
** Motivating example **
Let us consider the following function that perform a call only in one branch of
a if:
define i32 @f(i32 %a, i32 %b) {
%tmp = alloca i32, align 4
%tmp2 = icmp slt i32 %a, %b
br i1 %tmp2, label %true, label %false
true:
store i32 %a, i32* %tmp, align 4
%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
br label %false
false:
%tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
ret i32 %tmp.0
}
On AArch64 this code generates (removing the cfi directives to ease
readabilities):
_f: ; @f
; BB#0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #16 ; =16
cmp w0, w1
b.ge LBB0_2
; BB#1: ; %true
stur w0, [x29, #-4]
sub x1, x29, #4 ; =4
mov w0, wzr
bl _doSomething
LBB0_2: ; %false
mov sp, x29
ldp x29, x30, [sp], #16
ret
With shrink-wrapping we could generate:
_f: ; @f
; BB#0:
cmp w0, w1
b.ge LBB0_2
; BB#1: ; %true
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #16 ; =16
stur w0, [x29, #-4]
sub x1, x29, #4 ; =4
mov w0, wzr
bl _doSomething
add sp, x29, #16 ; =16
ldp x29, x30, [sp], #16
LBB0_2: ; %false
ret
Therefore, we would pay the overhead of setting up/destroying the frame only if
we actually do the call.
** Proposed Solution **
This patch introduces a new machine pass that perform the shrink-wrapping
analysis (See the comments at the beginning of ShrinkWrap.cpp for more details).
It then stores the safe save and restore point into the MachineFrameInfo
attached to the MachineFunction.
This information is then used by the PrologEpilogInserter (PEI) to place the
related code at the right place. This pass runs right before the PEI.
Unlike the original paper of Chow from PLDI’88, this implementation of
shrink-wrapping does not use expensive data-flow analysis and does not need hack
to properly avoid frequently executed point. Instead, it relies on dominance and
loop properties.
The pass is off by default and each target can opt-in by setting the
EnableShrinkWrap boolean to true in their derived class of TargetPassConfig.
This setting can also be overwritten on the command line by using
-enable-shrink-wrap.
Before you try out the pass for your target, make sure you properly fix your
emitProlog/emitEpilog/adjustForXXX method to cope with basic blocks that are not
necessarily the entry block.
** Design Decisions **
1. ShrinkWrap is its own pass right now. It could frankly be merged into PEI but
for debugging and clarity I thought it was best to have its own file.
2. Right now, we only support one save point and one restore point. At some
point we can expand this to several save point and restore point, the impacted
component would then be:
- The pass itself: New algorithm needed.
- MachineFrameInfo: Hold a list or set of Save/Restore point instead of one
pointer.
- PEI: Should loop over the save point and restore point.
Anyhow, at least for this first iteration, I do not believe this is interesting
to support the complex cases. We should revisit that when we motivating
examples.
Differential Revision: http://reviews.llvm.org/D9210
<rdar://problem/3201744>
llvm-svn: 236507
2015-05-06 01:38:16 +08:00
|
|
|
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
2014-04-29 15:57:13 +08:00
|
|
|
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
2010-11-19 05:19:35 +08:00
|
|
|
|
2016-04-01 02:33:38 +08:00
|
|
|
MachineBasicBlock::iterator
|
2014-04-29 15:57:13 +08:00
|
|
|
eliminateCallFramePseudoInstr(MachineFunction &MF,
|
|
|
|
MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator I) const override;
|
2013-02-22 04:05:00 +08:00
|
|
|
|
2014-04-29 15:57:13 +08:00
|
|
|
bool hasReservedCallFrame(const MachineFunction &MF) const override;
|
|
|
|
bool hasFP(const MachineFunction &MF) const override;
|
2015-07-15 01:17:13 +08:00
|
|
|
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
|
|
|
|
RegScavenger *RS = nullptr) const override;
|
2013-05-29 12:46:31 +08:00
|
|
|
|
[Sparc] Support user-specified stack object overalignment.
Note: I do not implement a base pointer, so it's still impossible to
have dynamic realignment AND dynamic alloca in the same function.
This also moves the code for determining the frame index reference
into getFrameIndexReference, where it belongs, instead of inline in
eliminateFrameIndex.
[Begin long-winded screed]
Now, stack realignment for Sparc is actually a silly thing to support,
because the Sparc ABI has no need for it -- unlike the situation on
x86, the stack is ALWAYS aligned to the required alignment for the CPU
instructions: 8 bytes on sparcv8, and 16 bytes on sparcv9.
However, LLVM unfortunately implements user-specified overalignment
using stack realignment support, so for now, I'm going to go along
with that tradition. GCC instead treats objects which have alignment
specification greater than the maximum CPU-required alignment for the
target as a separate block of stack memory, with their own virtual
base pointer (which gets aligned). Doing it that way avoids needing to
implement per-target support for stack realignment, except for the
targets which *actually* have an ABI-specified stack alignment which
is too small for the CPU's requirements.
Further unfortunately in LLVM, the default canRealignStack for all
targets effectively returns true, despite that implementing that is
something a target needs to do specifically. So, the previous behavior
on Sparc was to silently ignore the user's specified stack
alignment. Ugh.
Yet MORE unfortunate, if a target actually does return false from
canRealignStack, that also causes the user-specified alignment to be
*silently ignored*, rather than emitting an error.
(I started looking into fixing that last, but it broke a bunch of
tests, because LLVM actually *depends* on having it silently ignored:
some architectures (e.g. non-linux i386) have smaller stack alignment
than spilled-register alignment. But, the fact that a register needs
spilling is not known until within the register allocator. And by that
point, the decision to not reserve the frame pointer has been frozen
in place. And without a frame pointer, stack realignment is not
possible. So, canRealignStack() returns false, and
needsStackRealignment() then returns false, assuming everyone can just
go on their merry way assuming the alignment requirements were
probably just suggestions after-all. Sigh...)
Differential Revision: http://reviews.llvm.org/D12208
llvm-svn: 245668
2015-08-21 12:17:56 +08:00
|
|
|
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
|
|
|
unsigned &FrameReg) const override;
|
2015-08-27 01:57:51 +08:00
|
|
|
|
|
|
|
/// targetHandlesStackFrameRounding - Returns true if the target is
|
|
|
|
/// responsible for rounding up the stack frame (probably at emitPrologue
|
|
|
|
/// time).
|
|
|
|
bool targetHandlesStackFrameRounding() const override { return true; }
|
|
|
|
|
2013-05-29 12:46:31 +08:00
|
|
|
private:
|
2013-06-05 02:33:25 +08:00
|
|
|
// Remap input registers to output registers for leaf procedure.
|
2013-05-29 12:46:31 +08:00
|
|
|
void remapRegsForLeafProc(MachineFunction &MF) const;
|
|
|
|
|
2013-06-05 02:33:25 +08:00
|
|
|
// Returns true if MF is a leaf procedure.
|
2013-05-29 12:46:31 +08:00
|
|
|
bool isLeafProc(MachineFunction &MF) const;
|
2013-11-25 04:23:25 +08:00
|
|
|
|
|
|
|
|
|
|
|
// Emits code for adjusting SP in function prologue/epilogue.
|
|
|
|
void emitSPAdjustment(MachineFunction &MF,
|
|
|
|
MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI,
|
|
|
|
int NumBytes, unsigned ADDrr, unsigned ADDri) const;
|
|
|
|
|
2010-11-15 08:06:54 +08:00
|
|
|
};
|
|
|
|
|
2015-06-23 17:49:53 +08:00
|
|
|
} // End llvm namespace
|
2010-11-15 08:06:54 +08:00
|
|
|
|
|
|
|
#endif
|