[WebAssembly] Stackify code emitted by eliminateFrameIndex and SP writeback

Summary:
MRI::eliminateFrameIndex can emit several instructions to do address
calculations; these can usually be stackified. Because instructions with
FI operands can have subsequent operands which may be expression trees,
find the top of the leftmost tree and insert the code before it, to keep
the LIFO property.

Also use stackified registers when writing back the SP value to memory
in the epilog; it's unnecessary because SP will not be used after the
epilog, and it results in better code.

Differential Revision: http://reviews.llvm.org/D18234

llvm-svn: 263725
This commit is contained in:
Derek Schuff 2016-03-17 17:00:29 +00:00
parent 93bbc7cd66
commit d4207ba0f6
5 changed files with 126 additions and 60 deletions

View File

@ -78,19 +78,20 @@ bool WebAssemblyFrameLowering::needsSPWriteback(
static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &InsertPt,
MachineBasicBlock::iterator &InsertAddr,
MachineBasicBlock::iterator &InsertStore,
DebugLoc DL) {
auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
unsigned SPAddr =
MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
.addExternalSymbol(SPSymbol);
auto *MMO = new MachineMemOperand(MachinePointerInfo(),
MachineMemOperand::MOStore, 4, 4);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32),
WebAssembly::SP32)
BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32),
SrcReg)
.addImm(0)
.addReg(SPAddr)
.addImm(2) // p2align
@ -108,7 +109,7 @@ void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
needsSPWriteback(MF, *MF.getFrameInfo())) {
DebugLoc DL = I->getDebugLoc();
writeSPToMemory(WebAssembly::SP32, MF, MBB, I, DL);
writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL);
}
MBB.erase(I);
}
@ -171,7 +172,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(WebAssembly::SP32);
}
if (StackSize && needsSPWriteback(MF, *MFI)) {
writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, DL);
writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL);
}
}
@ -192,18 +193,24 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
// Restore the stack pointer. If we had fixed-size locals, add the offset
// subtracted in the prolog.
unsigned SPReg = 0;
MachineBasicBlock::iterator InsertAddr = InsertPt;
if (StackSize) {
unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
InsertAddr =
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
.addImm(StackSize);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32),
WebAssembly::SP32)
// In the epilog we don't need to write the result back to the SP32 physreg
// because it won't be used again. We can use a stackified register instead.
SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg)
.addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32)
.addReg(OffsetReg);
WFI->stackifyVReg(OffsetReg);
WFI->stackifyVReg(SPReg);
} else {
SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
}
writeSPToMemory(
(!StackSize && hasFP(MF)) ? WebAssembly::FP32 : WebAssembly::SP32, MF,
MBB, InsertPt, DL);
writeSPToMemory(SPReg, MF, MBB, InsertAddr, InsertPt, DL);
}

View File

@ -51,6 +51,51 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const {
return Reserved;
}
static bool isStackifiedVReg(const WebAssemblyFunctionInfo *WFI,
const MachineOperand& Op) {
if (Op.isReg()) {
unsigned Reg = Op.getReg();
return TargetRegisterInfo::isVirtualRegister(Reg) &&
WFI->isVRegStackified(Reg);
}
return false;
}
static bool canStackifyOperand(const MachineInstr& Inst) {
unsigned Op = Inst.getOpcode();
return Op != TargetOpcode::PHI &&
Op != TargetOpcode::INLINEASM &&
Op != TargetOpcode::DBG_VALUE;
}
// Determine if the FI sequence can be stackified, and if so, where the code can
// be inserted. If stackification is possible, returns true and ajusts II to
// point to the insertion point.
bool findInsertPt(const WebAssemblyFunctionInfo *WFI, MachineBasicBlock &MBB,
unsigned OperandNum, MachineBasicBlock::iterator &II) {
if (!canStackifyOperand(*II)) return false;
MachineBasicBlock::iterator InsertPt(II);
int StackCount = 0;
// Operands are popped in reverse order, so any operands after FIOperand
// impose a constraint
for (unsigned i = OperandNum; i < II->getNumOperands(); i++) {
if (isStackifiedVReg(WFI, II->getOperand(i))) ++StackCount;
}
// Walk backwards, tracking stack depth. When it reaches 0 we have reached the
// top of the subtree.
while (StackCount) {
if (InsertPt == MBB.begin()) return false;
--InsertPt;
for (const auto &def : InsertPt->defs())
if (isStackifiedVReg(WFI, def)) --StackCount;
for (const auto &use : InsertPt->explicit_uses())
if (isStackifiedVReg(WFI, use)) ++StackCount;
}
II = InsertPt;
return true;
}
void WebAssemblyRegisterInfo::eliminateFrameIndex(
MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
RegScavenger * /*RS*/) const {
@ -78,20 +123,34 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
MI.getOperand(FIOperandNum)
.ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
} else {
// Otherwise create an i32.add SP, offset and make it the operand.
// Otherwise calculate the address
auto &MRI = MF.getRegInfo();
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
unsigned FIRegOperand = WebAssembly::SP32;
if (FrameOffset) {
FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32),
FIRegOperand)
// Create i32.add SP, offset and make it the operand. We want to stackify
// this sequence, but we need to preserve the LIFO expr stack ordering
// (i.e. we can't insert our code in between MI and any operands it
// pops before FIOperand).
auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();
bool CanStackifyFI = findInsertPt(WFI, MBB, FIOperandNum, II);
unsigned OffsetOp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
OffsetOp)
.addImm(FrameOffset);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32),
if (CanStackifyFI) {
WFI->stackifyVReg(OffsetOp);
FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
WFI->stackifyVReg(FIRegOperand);
} else {
FIRegOperand = OffsetOp;
}
BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
FIRegOperand)
.addReg(WebAssembly::SP32)
.addReg(FIRegOperand);
.addReg(OffsetOp);
}
MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
}

View File

@ -35,15 +35,15 @@ define void @byval_arg(%SmallStruct* %ptr) {
; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($0)
; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L4]]
; Pass a pointer to the stack slot to the function
; CHECK-NEXT: i32.const [[L5:.+]]=, 12
; CHECK-NEXT: i32.add [[ARG:.+]]=, [[SP]], [[L5]]
; CHECK-NEXT: call ext_byval_func@FUNCTION, [[L5]]
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12
; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]]
; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]]
call void @ext_byval_func(%SmallStruct* byval %ptr)
; Restore the stack
; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16
; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L6]]
; CHECK-NEXT: i32.const $push[[L7:.+]]=, __stack_pointer
; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), [[SP]]
; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16
; CHECK-NEXT: i32.add $push[[L8:.+]]=, [[SP]], $pop[[L6]]
; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), $pop[[L8]]
; CHECK-NEXT: return
ret void
}
@ -58,9 +58,9 @@ define void @byval_arg_align8(%SmallStruct* %ptr) {
; CHECK: i32.load $push[[L4:.+]]=, 0($0):p2align=3
; CHECK-NEXT: i32.store {{.*}}=, 8([[SP]]):p2align=3, $pop[[L4]]
; Pass a pointer to the stack slot to the function
; CHECK-NEXT: i32.const [[L5:.+]]=, 8
; CHECK-NEXT: i32.add [[ARG:.+]]=, [[SP]], [[L5]]
; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, [[L5]]
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8
; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]]
; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]]
call void @ext_byval_func_align8(%SmallStruct* byval align 8 %ptr)
ret void
}

View File

@ -61,8 +61,8 @@ define void @set_no(i8* %dst, i8 %src, i32 %len) {
; CHECK-LABEL: frame_index:
; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop1, $pop0{{$}}
; CHECK: i32.call $discard=, memset@FUNCTION, $1, $pop3, $pop2{{$}}
; CHECK: i32.call $discard=, memset@FUNCTION, $pop12, $pop1, $pop0{{$}}
; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop3, $pop2{{$}}
; CHECK: return{{$}}
define void @frame_index() {
entry:

View File

@ -22,10 +22,10 @@ define void @alloca32() noredzone {
; CHECK: i32.const $push[[L0:.+]]=, 0
; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L0]]
store i32 0, i32* %retval
; CHECK: i32.const $push[[L5:.+]]=, 16
; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]]
; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]]
; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
ret void
}
@ -49,7 +49,7 @@ define void @alloca3264() {
}
; CHECK-LABEL: allocarray:
; CHECK: .local i32, i32{{$}}
; CHECK: .local i32{{$}}
define void @allocarray() {
; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
@ -59,10 +59,10 @@ define void @allocarray() {
; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]]
%r = alloca [33 x i32]
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12
; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12
; CHECK-NEXT: i32.const [[L5:.+]]=, 12
; CHECK-NEXT: i32.add [[L5]]=, [[SP]], [[L5]]
; CHECK-NEXT: i32.add $push[[L6:.+]]=, [[L5]], $pop[[L4]]
; CHECK-NEXT: i32.add $push[[L6:.+]]=, $pop[[L7]], $pop[[L4]]
; CHECK-NEXT: i32.const $push[[L9:.+]]=, 1{{$}}
; CHECK-NEXT: i32.store $push[[L10:.+]]=, 12([[SP]]), $pop[[L9]]{{$}}
; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}}
@ -71,10 +71,10 @@ define void @allocarray() {
%p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3
store i32 1, i32* %p2
; CHECK: i32.const $push[[L11:.+]]=, 144
; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L11]]
; CHECK-NEXT: i32.const $push[[L12:.+]]=, __stack_pointer
; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), [[SP]]
; CHECK: i32.const $push[[L12:.+]]=, __stack_pointer
; CHECK-NEXT: i32.const $push[[L11:.+]]=, 144
; CHECK-NEXT: i32.add $push[[L13:.+]]=, [[SP]], $pop[[L11]]
; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), $pop[[L13]]
ret void
}
@ -86,18 +86,18 @@ define void @non_mem_use(i8** %addr) {
%r = alloca i64
%r2 = alloca i64
; %r is at SP+8
; CHECK: i32.const [[OFF:.+]]=, 8
; CHECK-NEXT: i32.add [[ARG1:.+]]=, [[SP]], [[OFF]]
; CHECK-NEXT: call ext_func@FUNCTION, [[ARG1]]
; CHECK: i32.const $push[[OFF:.+]]=, 8
; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, [[SP]], $pop[[OFF]]
; CHECK-NEXT: call ext_func@FUNCTION, $pop[[ARG1]]
call void @ext_func(i64* %r)
; %r2 is at SP+0, no add needed
; CHECK-NEXT: call ext_func@FUNCTION, [[SP]]
call void @ext_func(i64* %r2)
; Use as a value, but in a store
; %buf is at SP+16
; CHECK: i32.const [[OFF:.+]]=, 16
; CHECK-NEXT: i32.add [[VAL:.+]]=, [[SP]], [[OFF]]
; CHECK-NEXT: i32.store {{.*}}=, 0($0), [[VAL]]
; CHECK: i32.const $push[[OFF:.+]]=, 16
; CHECK-NEXT: i32.add $push[[VAL:.+]]=, [[SP]], $pop[[OFF]]
; CHECK-NEXT: i32.store {{.*}}=, 0($0), $pop[[VAL]]
%gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0
store i8* %gep, i8** %addr
ret void
@ -120,10 +120,10 @@ define void @allocarray_inbounds() {
%p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3
store i32 1, i32* %p2
call void @ext_func(i64* null);
; CHECK: i32.const $push[[L5:.+]]=, 32
; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]]
; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]]
; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32
; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]]
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
ret void
}
@ -143,7 +143,7 @@ define void @dynamic_alloca(i32 %alloc) {
; CHECK: call ext_func_i32@FUNCTION
call void @ext_func_i32(i32* %r)
; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer
; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L3]]), [[FP]]
; CHECK-NEXT: i32.store $discard=, 0($pop[[L3]]), [[FP]]
ret void
}
@ -183,10 +183,10 @@ define void @dynamic_static_alloca(i32 %alloc) noredzone {
%r1 = alloca i32
%r = alloca i32, i32 %alloc
store i32 0, i32* %r
; CHECK: i32.const $push[[L5:.+]]=, 16
; CHECK-NEXT: i32.add [[SP]]=, [[FP]], $pop[[L5]]
; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]]
; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[FP]], $pop[[L5]]
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
ret void
}
@ -198,9 +198,9 @@ entry:
; CHECK: i32.const $push[[L1:.+]]=, 16
; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
%addr = alloca i32
; CHECK: i32.const [[OFF:.+]]=, 12
; CHECK-NEXT: i32.add [[ADDR:.+]]=, [[SP]], [[OFF]]
; CHECK-NEXT: copy_local [[COPY:.+]]=, [[ADDR]]
; CHECK: i32.const $push[[OFF:.+]]=, 12
; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, [[SP]], $pop[[OFF]]
; CHECK-NEXT: copy_local [[COPY:.+]]=, $pop[[ADDR]]
br label %body
body:
%a = phi i32* [%addr, %entry], [%b, %body]