forked from OSchip/llvm-project
[WebAssembly] Stackify code emitted by eliminateFrameIndex and SP writeback
Summary: MRI::eliminateFrameIndex can emit several instructions to do address calculations; these can usually be stackified. Because instructions with FI operands can have subsequent operands which may be expression trees, find the top of the leftmost tree and insert the code before it, to keep the LIFO property. Also use stackified registers when writing back the SP value to memory in the epilog; it's unnecessary because SP will not be used after the epilog, and it results in better code. Differential Revision: http://reviews.llvm.org/D18234 llvm-svn: 263725
This commit is contained in:
parent
93bbc7cd66
commit
d4207ba0f6
|
@ -78,19 +78,20 @@ bool WebAssemblyFrameLowering::needsSPWriteback(
|
|||
|
||||
static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &InsertPt,
|
||||
MachineBasicBlock::iterator &InsertAddr,
|
||||
MachineBasicBlock::iterator &InsertStore,
|
||||
DebugLoc DL) {
|
||||
auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
|
||||
unsigned SPAddr =
|
||||
MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
|
||||
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
|
||||
|
||||
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
|
||||
BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
|
||||
.addExternalSymbol(SPSymbol);
|
||||
auto *MMO = new MachineMemOperand(MachinePointerInfo(),
|
||||
MachineMemOperand::MOStore, 4, 4);
|
||||
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32),
|
||||
WebAssembly::SP32)
|
||||
BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32),
|
||||
SrcReg)
|
||||
.addImm(0)
|
||||
.addReg(SPAddr)
|
||||
.addImm(2) // p2align
|
||||
|
@ -108,7 +109,7 @@ void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
|
|||
if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
|
||||
needsSPWriteback(MF, *MF.getFrameInfo())) {
|
||||
DebugLoc DL = I->getDebugLoc();
|
||||
writeSPToMemory(WebAssembly::SP32, MF, MBB, I, DL);
|
||||
writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL);
|
||||
}
|
||||
MBB.erase(I);
|
||||
}
|
||||
|
@ -171,7 +172,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
.addReg(WebAssembly::SP32);
|
||||
}
|
||||
if (StackSize && needsSPWriteback(MF, *MFI)) {
|
||||
writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, DL);
|
||||
writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -192,18 +193,24 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
|
||||
// Restore the stack pointer. If we had fixed-size locals, add the offset
|
||||
// subtracted in the prolog.
|
||||
unsigned SPReg = 0;
|
||||
MachineBasicBlock::iterator InsertAddr = InsertPt;
|
||||
if (StackSize) {
|
||||
unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
|
||||
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
|
||||
.addImm(StackSize);
|
||||
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32),
|
||||
WebAssembly::SP32)
|
||||
InsertAddr =
|
||||
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
|
||||
.addImm(StackSize);
|
||||
// In the epilog we don't need to write the result back to the SP32 physreg
|
||||
// because it won't be used again. We can use a stackified register instead.
|
||||
SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
|
||||
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg)
|
||||
.addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32)
|
||||
.addReg(OffsetReg);
|
||||
WFI->stackifyVReg(OffsetReg);
|
||||
WFI->stackifyVReg(SPReg);
|
||||
} else {
|
||||
SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
|
||||
}
|
||||
|
||||
writeSPToMemory(
|
||||
(!StackSize && hasFP(MF)) ? WebAssembly::FP32 : WebAssembly::SP32, MF,
|
||||
MBB, InsertPt, DL);
|
||||
writeSPToMemory(SPReg, MF, MBB, InsertAddr, InsertPt, DL);
|
||||
}
|
||||
|
|
|
@ -51,6 +51,51 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const {
|
|||
return Reserved;
|
||||
}
|
||||
|
||||
static bool isStackifiedVReg(const WebAssemblyFunctionInfo *WFI,
|
||||
const MachineOperand& Op) {
|
||||
if (Op.isReg()) {
|
||||
unsigned Reg = Op.getReg();
|
||||
return TargetRegisterInfo::isVirtualRegister(Reg) &&
|
||||
WFI->isVRegStackified(Reg);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool canStackifyOperand(const MachineInstr& Inst) {
|
||||
unsigned Op = Inst.getOpcode();
|
||||
return Op != TargetOpcode::PHI &&
|
||||
Op != TargetOpcode::INLINEASM &&
|
||||
Op != TargetOpcode::DBG_VALUE;
|
||||
}
|
||||
|
||||
// Determine if the FI sequence can be stackified, and if so, where the code can
|
||||
// be inserted. If stackification is possible, returns true and ajusts II to
|
||||
// point to the insertion point.
|
||||
bool findInsertPt(const WebAssemblyFunctionInfo *WFI, MachineBasicBlock &MBB,
|
||||
unsigned OperandNum, MachineBasicBlock::iterator &II) {
|
||||
if (!canStackifyOperand(*II)) return false;
|
||||
|
||||
MachineBasicBlock::iterator InsertPt(II);
|
||||
int StackCount = 0;
|
||||
// Operands are popped in reverse order, so any operands after FIOperand
|
||||
// impose a constraint
|
||||
for (unsigned i = OperandNum; i < II->getNumOperands(); i++) {
|
||||
if (isStackifiedVReg(WFI, II->getOperand(i))) ++StackCount;
|
||||
}
|
||||
// Walk backwards, tracking stack depth. When it reaches 0 we have reached the
|
||||
// top of the subtree.
|
||||
while (StackCount) {
|
||||
if (InsertPt == MBB.begin()) return false;
|
||||
--InsertPt;
|
||||
for (const auto &def : InsertPt->defs())
|
||||
if (isStackifiedVReg(WFI, def)) --StackCount;
|
||||
for (const auto &use : InsertPt->explicit_uses())
|
||||
if (isStackifiedVReg(WFI, use)) ++StackCount;
|
||||
}
|
||||
II = InsertPt;
|
||||
return true;
|
||||
}
|
||||
|
||||
void WebAssemblyRegisterInfo::eliminateFrameIndex(
|
||||
MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
|
||||
RegScavenger * /*RS*/) const {
|
||||
|
@ -78,20 +123,34 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
|
|||
MI.getOperand(FIOperandNum)
|
||||
.ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
|
||||
} else {
|
||||
// Otherwise create an i32.add SP, offset and make it the operand.
|
||||
// Otherwise calculate the address
|
||||
auto &MRI = MF.getRegInfo();
|
||||
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
|
||||
|
||||
unsigned FIRegOperand = WebAssembly::SP32;
|
||||
if (FrameOffset) {
|
||||
FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
|
||||
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32),
|
||||
FIRegOperand)
|
||||
// Create i32.add SP, offset and make it the operand. We want to stackify
|
||||
// this sequence, but we need to preserve the LIFO expr stack ordering
|
||||
// (i.e. we can't insert our code in between MI and any operands it
|
||||
// pops before FIOperand).
|
||||
auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();
|
||||
bool CanStackifyFI = findInsertPt(WFI, MBB, FIOperandNum, II);
|
||||
|
||||
unsigned OffsetOp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
|
||||
BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
|
||||
OffsetOp)
|
||||
.addImm(FrameOffset);
|
||||
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32),
|
||||
if (CanStackifyFI) {
|
||||
WFI->stackifyVReg(OffsetOp);
|
||||
FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
|
||||
WFI->stackifyVReg(FIRegOperand);
|
||||
} else {
|
||||
FIRegOperand = OffsetOp;
|
||||
}
|
||||
BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
|
||||
FIRegOperand)
|
||||
.addReg(WebAssembly::SP32)
|
||||
.addReg(FIRegOperand);
|
||||
.addReg(OffsetOp);
|
||||
}
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
|
||||
}
|
||||
|
|
|
@ -35,15 +35,15 @@ define void @byval_arg(%SmallStruct* %ptr) {
|
|||
; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($0)
|
||||
; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L4]]
|
||||
; Pass a pointer to the stack slot to the function
|
||||
; CHECK-NEXT: i32.const [[L5:.+]]=, 12
|
||||
; CHECK-NEXT: i32.add [[ARG:.+]]=, [[SP]], [[L5]]
|
||||
; CHECK-NEXT: call ext_byval_func@FUNCTION, [[L5]]
|
||||
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12
|
||||
; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]]
|
||||
; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]]
|
||||
call void @ext_byval_func(%SmallStruct* byval %ptr)
|
||||
; Restore the stack
|
||||
; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16
|
||||
; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L6]]
|
||||
; CHECK-NEXT: i32.const $push[[L7:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), [[SP]]
|
||||
; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16
|
||||
; CHECK-NEXT: i32.add $push[[L8:.+]]=, [[SP]], $pop[[L6]]
|
||||
; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), $pop[[L8]]
|
||||
; CHECK-NEXT: return
|
||||
ret void
|
||||
}
|
||||
|
@ -58,9 +58,9 @@ define void @byval_arg_align8(%SmallStruct* %ptr) {
|
|||
; CHECK: i32.load $push[[L4:.+]]=, 0($0):p2align=3
|
||||
; CHECK-NEXT: i32.store {{.*}}=, 8([[SP]]):p2align=3, $pop[[L4]]
|
||||
; Pass a pointer to the stack slot to the function
|
||||
; CHECK-NEXT: i32.const [[L5:.+]]=, 8
|
||||
; CHECK-NEXT: i32.add [[ARG:.+]]=, [[SP]], [[L5]]
|
||||
; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, [[L5]]
|
||||
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8
|
||||
; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]]
|
||||
; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]]
|
||||
call void @ext_byval_func_align8(%SmallStruct* byval align 8 %ptr)
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -61,8 +61,8 @@ define void @set_no(i8* %dst, i8 %src, i32 %len) {
|
|||
|
||||
|
||||
; CHECK-LABEL: frame_index:
|
||||
; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop1, $pop0{{$}}
|
||||
; CHECK: i32.call $discard=, memset@FUNCTION, $1, $pop3, $pop2{{$}}
|
||||
; CHECK: i32.call $discard=, memset@FUNCTION, $pop12, $pop1, $pop0{{$}}
|
||||
; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop3, $pop2{{$}}
|
||||
; CHECK: return{{$}}
|
||||
define void @frame_index() {
|
||||
entry:
|
||||
|
|
|
@ -22,10 +22,10 @@ define void @alloca32() noredzone {
|
|||
; CHECK: i32.const $push[[L0:.+]]=, 0
|
||||
; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L0]]
|
||||
store i32 0, i32* %retval
|
||||
; CHECK: i32.const $push[[L5:.+]]=, 16
|
||||
; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]]
|
||||
; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]]
|
||||
; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
|
||||
; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ define void @alloca3264() {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: allocarray:
|
||||
; CHECK: .local i32, i32{{$}}
|
||||
; CHECK: .local i32{{$}}
|
||||
define void @allocarray() {
|
||||
; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
|
||||
|
@ -59,10 +59,10 @@ define void @allocarray() {
|
|||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]]
|
||||
%r = alloca [33 x i32]
|
||||
|
||||
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12
|
||||
; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
|
||||
; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12
|
||||
; CHECK-NEXT: i32.const [[L5:.+]]=, 12
|
||||
; CHECK-NEXT: i32.add [[L5]]=, [[SP]], [[L5]]
|
||||
; CHECK-NEXT: i32.add $push[[L6:.+]]=, [[L5]], $pop[[L4]]
|
||||
; CHECK-NEXT: i32.add $push[[L6:.+]]=, $pop[[L7]], $pop[[L4]]
|
||||
; CHECK-NEXT: i32.const $push[[L9:.+]]=, 1{{$}}
|
||||
; CHECK-NEXT: i32.store $push[[L10:.+]]=, 12([[SP]]), $pop[[L9]]{{$}}
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}}
|
||||
|
@ -71,10 +71,10 @@ define void @allocarray() {
|
|||
%p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3
|
||||
store i32 1, i32* %p2
|
||||
|
||||
; CHECK: i32.const $push[[L11:.+]]=, 144
|
||||
; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L11]]
|
||||
; CHECK-NEXT: i32.const $push[[L12:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), [[SP]]
|
||||
; CHECK: i32.const $push[[L12:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.const $push[[L11:.+]]=, 144
|
||||
; CHECK-NEXT: i32.add $push[[L13:.+]]=, [[SP]], $pop[[L11]]
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), $pop[[L13]]
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -86,18 +86,18 @@ define void @non_mem_use(i8** %addr) {
|
|||
%r = alloca i64
|
||||
%r2 = alloca i64
|
||||
; %r is at SP+8
|
||||
; CHECK: i32.const [[OFF:.+]]=, 8
|
||||
; CHECK-NEXT: i32.add [[ARG1:.+]]=, [[SP]], [[OFF]]
|
||||
; CHECK-NEXT: call ext_func@FUNCTION, [[ARG1]]
|
||||
; CHECK: i32.const $push[[OFF:.+]]=, 8
|
||||
; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, [[SP]], $pop[[OFF]]
|
||||
; CHECK-NEXT: call ext_func@FUNCTION, $pop[[ARG1]]
|
||||
call void @ext_func(i64* %r)
|
||||
; %r2 is at SP+0, no add needed
|
||||
; CHECK-NEXT: call ext_func@FUNCTION, [[SP]]
|
||||
call void @ext_func(i64* %r2)
|
||||
; Use as a value, but in a store
|
||||
; %buf is at SP+16
|
||||
; CHECK: i32.const [[OFF:.+]]=, 16
|
||||
; CHECK-NEXT: i32.add [[VAL:.+]]=, [[SP]], [[OFF]]
|
||||
; CHECK-NEXT: i32.store {{.*}}=, 0($0), [[VAL]]
|
||||
; CHECK: i32.const $push[[OFF:.+]]=, 16
|
||||
; CHECK-NEXT: i32.add $push[[VAL:.+]]=, [[SP]], $pop[[OFF]]
|
||||
; CHECK-NEXT: i32.store {{.*}}=, 0($0), $pop[[VAL]]
|
||||
%gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0
|
||||
store i8* %gep, i8** %addr
|
||||
ret void
|
||||
|
@ -120,10 +120,10 @@ define void @allocarray_inbounds() {
|
|||
%p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3
|
||||
store i32 1, i32* %p2
|
||||
call void @ext_func(i64* null);
|
||||
; CHECK: i32.const $push[[L5:.+]]=, 32
|
||||
; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]]
|
||||
; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]]
|
||||
; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32
|
||||
; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -143,7 +143,7 @@ define void @dynamic_alloca(i32 %alloc) {
|
|||
; CHECK: call ext_func_i32@FUNCTION
|
||||
call void @ext_func_i32(i32* %r)
|
||||
; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L3]]), [[FP]]
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L3]]), [[FP]]
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -183,10 +183,10 @@ define void @dynamic_static_alloca(i32 %alloc) noredzone {
|
|||
%r1 = alloca i32
|
||||
%r = alloca i32, i32 %alloc
|
||||
store i32 0, i32* %r
|
||||
; CHECK: i32.const $push[[L5:.+]]=, 16
|
||||
; CHECK-NEXT: i32.add [[SP]]=, [[FP]], $pop[[L5]]
|
||||
; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]]
|
||||
; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
|
||||
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
|
||||
; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[FP]], $pop[[L5]]
|
||||
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -198,9 +198,9 @@ entry:
|
|||
; CHECK: i32.const $push[[L1:.+]]=, 16
|
||||
; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
|
||||
%addr = alloca i32
|
||||
; CHECK: i32.const [[OFF:.+]]=, 12
|
||||
; CHECK-NEXT: i32.add [[ADDR:.+]]=, [[SP]], [[OFF]]
|
||||
; CHECK-NEXT: copy_local [[COPY:.+]]=, [[ADDR]]
|
||||
; CHECK: i32.const $push[[OFF:.+]]=, 12
|
||||
; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, [[SP]], $pop[[OFF]]
|
||||
; CHECK-NEXT: copy_local [[COPY:.+]]=, $pop[[ADDR]]
|
||||
br label %body
|
||||
body:
|
||||
%a = phi i32* [%addr, %entry], [%b, %body]
|
||||
|
|
Loading…
Reference in New Issue