[mips] Optimize stack pointer adjustments.

Instead of always using addu to adjust the stack pointer when the
size out is of the range of an addiu instruction, use subu so that
a smaller constant can be generated.

This can give savings of ~3 instructions whenever a function has a
a stack frame whose size is out of range of an addiu instruction.

This change may break some naive stack unwinders.

Partially resolves PR/26291.

Thanks to David Chisnall for reporting the issue.

Reviewers: dsanders, vkalintiris

Differential Review: http://reviews.llvm.org/D21321

llvm-svn: 272666
This commit is contained in:
Simon Dardis 2016-06-14 13:39:43 +00:00
parent 65b6be1d3a
commit 878c0b1b76
6 changed files with 49 additions and 24 deletions

View File

@ -120,6 +120,10 @@ unsigned MipsABIInfo::GetPtrAddiuOp() const {
return ArePtrs64bit() ? Mips::DADDiu : Mips::ADDiu;
}
unsigned MipsABIInfo::GetPtrSubuOp() const {
return ArePtrs64bit() ? Mips::DSUBu : Mips::SUBu;
}
unsigned MipsABIInfo::GetPtrAndOp() const {
return ArePtrs64bit() ? Mips::AND64 : Mips::AND;
}

View File

@ -69,6 +69,7 @@ public:
unsigned GetZeroReg() const;
unsigned GetPtrAdduOp() const;
unsigned GetPtrAddiuOp() const;
unsigned GetPtrSubuOp() const;
unsigned GetPtrAndOp() const;
unsigned GetGPRMoveOp() const;
inline bool ArePtrs64bit() const { return IsN64(); }

View File

@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@ -440,17 +441,24 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock::iterator I) const {
MipsABIInfo ABI = Subtarget.getABI();
DebugLoc DL;
unsigned ADDu = ABI.GetPtrAdduOp();
unsigned ADDiu = ABI.GetPtrAddiuOp();
if (Amount == 0)
return;
if (isInt<16>(Amount))// addi sp, sp, amount
if (isInt<16>(Amount)) {
// addi sp, sp, amount
BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
else { // Expand immediate that doesn't fit in 16-bit.
} else {
// For numbers which are not 16bit integers we synthesize Amount inline
// then add or subtract it from sp.
unsigned Opc = ABI.GetPtrAdduOp();
if (Amount < 0) {
Opc = ABI.GetPtrSubuOp();
Amount = -Amount;
}
unsigned Reg = loadImmediate(Amount, MBB, I, DL, nullptr);
BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg, RegState::Kill);
BuildMI(MBB, I, DL, get(Opc), SP).addReg(SP).addReg(Reg, RegState::Kill);
}
}

View File

@ -13,6 +13,8 @@ entry:
%0 = call i8* @llvm.eh.dwarf.cfa(i32 0)
ret i8* %0
; CHECK-LABEL: f1:
; CHECK: addiu $sp, $sp, -32
; CHECK: addiu $2, $sp, 32
}
@ -24,10 +26,12 @@ entry:
%0 = call i8* @llvm.eh.dwarf.cfa(i32 0)
ret i8* %0
; CHECK-LABEL: f2:
; check stack size (65536 + 8)
; CHECK: lui $[[R0:[a-z0-9]+]], 65535
; CHECK: addiu $[[R0]], $[[R0]], -8
; CHECK: addu $sp, $sp, $[[R0]]
; CHECK: lui $[[R0:[a-z0-9]+]], 1
; CHECK: addiu $[[R0]], $[[R0]], 8
; CHECK: subu $sp, $sp, $[[R0]]
; check return value ($sp + stack size)
; CHECK: lui $[[R1:[a-z0-9]+]], 1
@ -46,6 +50,8 @@ entry:
%add = add i32 %1, %3
ret i32 %add
; CHECK-LABEL: f3:
; CHECK: addiu $sp, $sp, -40
; check return value ($fp + stack size + $fp)
@ -60,6 +66,8 @@ entry:
%0 = call i8* @llvm.eh.dwarf.cfa(i32 0)
ret i8* %0
; CHECK-LABEL: f4:
; CHECK-MIPS64: daddiu $sp, $sp, -32
; CHECK-MIPS64: daddiu $2, $sp, 32
}

View File

@ -1,13 +1,19 @@
; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s
; CHECK: lui ${{[0-9]+}}, 49152
; CHECK: lui ${{[0-9]+}}, 16384
define void @f() nounwind {
entry:
%a1 = alloca [1073741824 x i8], align 1
%arrayidx = getelementptr inbounds [1073741824 x i8], [1073741824 x i8]* %a1, i32 0, i32 1048676
call void @f2(i8* %arrayidx) nounwind
ret void
; CHECK-LABEL: f:
; CHECK: lui $[[R0:[a-z0-9]+]], 16384
; CHECK: addiu $[[R1:[a-z0-9]+]], $[[R0]], 24
; CHECK: subu $sp, $sp, $[[R1]]
; CHECK: lui $[[R2:[a-z0-9]+]], 16384
; CHECK: addu ${{[0-9]+}}, $sp, $[[R2]]
}
declare void @f2(i8*)

View File

@ -10,21 +10,19 @@
define void @f() nounwind {
entry:
; 32: lui $[[R0:[0-9]+]], 65535
; 32: addiu $[[R0]], $[[R0]], -24
; 32: addu $sp, $sp, $[[R0]]
; 32: lui $[[R1:[0-9]+]], 1
; 32: addu $[[R1]], $sp, $[[R1]]
; 32: sw $ra, 20($[[R1]])
; 64: daddiu $[[R0:[0-9]+]], $zero, 1
; 64: dsll $[[R0]], $[[R0]], 48
; 64: daddiu $[[R0]], $[[R0]], -1
; 64: dsll $[[R0]], $[[R0]], 16
; 64: daddiu $[[R0]], $[[R0]], -32
; 64: daddu $sp, $sp, $[[R0]]
; 64: lui $[[R1:[0-9]+]], 1
; 64: daddu $[[R1]], $sp, $[[R1]]
; 64: sd $ra, 24($[[R1]])
; 32: lui $[[R0:[0-9]+]], 1
; 32: addiu $[[R0]], $[[R0]], 24
; 32: subu $sp, $sp, $[[R0]]
; 32: lui $[[R1:[0-9]+]], 1
; 32: addu $[[R1]], $sp, $[[R1]]
; 32: sw $ra, 20($[[R1]])
; 64: lui $[[R0:[0-9]+]], 1
; 64: daddiu $[[R0]], $[[R0]], 32
; 64: dsubu $sp, $sp, $[[R0]]
; 64: lui $[[R1:[0-9]+]], 1
; 64: daddu $[[R1]], $sp, $[[R1]]
; 64: sd $ra, 24($[[R1]])
%agg.tmp = alloca %struct.S1, align 1
%tmp = getelementptr inbounds %struct.S1, %struct.S1* %agg.tmp, i32 0, i32 0, i32 0