[mips] Use register scavenging with MSA.

MSA stores and loads to the stack are more likely to require an
emergency GPR spill slot due to the smaller offsets available
with those instructions.

Handle this by overestimating the size of the stack by determining
the largest offset presuming that all callee save registers are
spilled and accounting of incoming arguments when determining
whether an emergency spill slot is required.

Reviewers: atanasyan

Differential Revision: https://reviews.llvm.org/D39056

llvm-svn: 317204
This commit is contained in:
Simon Dardis 2017-11-02 12:47:22 +00:00
parent 0e142499a9
commit 725acb2d91
4 changed files with 272 additions and 41 deletions

View File

@ -107,38 +107,31 @@ bool MipsFrameLowering::hasBP(const MachineFunction &MF) const {
return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF); return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
} }
// Estimate the size of the stack, including the incoming arguments. We need to
// account for register spills, local objects, reserved call frame and incoming
// arguments. This is required to determine the largest possible positive offset
// from $sp so that it can be determined if an emergency spill slot for stack
// addresses is required.
uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
int64_t Offset = 0; int64_t Size = 0;
// Iterate over fixed sized objects. // Iterate over fixed sized objects which are incoming arguments.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
Offset = std::max(Offset, -MFI.getObjectOffset(I)); if (MFI.getObjectOffset(I) > 0)
Size += MFI.getObjectSize(I);
// Conservatively assume all callee-saved registers will be saved. // Conservatively assume all callee-saved registers will be saved.
for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
Offset = alignTo(Offset + Size, Size); Size = alignTo(Size + RegSize, RegSize);
} }
unsigned MaxAlign = MFI.getMaxAlignment(); // Get the size of the rest of the frame objects and any possible reserved
// call frame, accounting for alignment.
// Check that MaxAlign is not zero if there is a stack object that is not a return Size + MFI.estimateStackSize(MF);
// callee-saved spill.
assert(!MFI.getObjectIndexEnd() || MaxAlign);
// Iterate over other objects.
for (unsigned I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
Offset = alignTo(Offset + MFI.getObjectSize(I), MaxAlign);
// Call frame.
if (MFI.adjustsStack() && hasReservedCallFrame(MF))
Offset = alignTo(Offset + MFI.getMaxCallFrameSize(),
std::max(MaxAlign, getStackAlignment()));
return alignTo(Offset, getStackAlignment());
} }
// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions

View File

@ -893,10 +893,12 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
} }
// Set scavenging frame index if necessary. // Set scavenging frame index if necessary.
uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() + uint64_t MaxSPOffset = estimateStackSize(MF);
estimateStackSize(MF);
if (isInt<16>(MaxSPOffset)) // MSA has a minimum offset of 10 bits signed. If there is a variable
// sized object on the stack, the estimation cannot account for it.
if (isIntN(STI.hasMSA() ? 10 : 16, MaxSPOffset) &&
!MF.getFrameInfo().hasVarSizedObjects())
return; return;
const TargetRegisterClass &RC = const TargetRegisterClass &RC =

View File

@ -0,0 +1,221 @@
# RUN: llc %s -start-after=shrink-wrap -march=mips64 -mcpu=mips64r6 -mattr=+fp64,+msa -o /dev/null
# Test that estimated size of the stack leads to the creation of an emergency
# spill when MSA is in use. Previously, this test case would fail during
# register scavenging due to the lack of a spill slot.
--- |
define inreg { i64, i64 } @test(i64 inreg %a.coerce0, i64 inreg %a.coerce1, i64 inreg %b.coerce0, i64 inreg %b.coerce1, i32 signext %c) #0 {
entry:
%retval = alloca <16 x i8>, align 16
%a = alloca <16 x i8>, align 16
%b = alloca <16 x i8>, align 16
%a.addr = alloca <16 x i8>, align 16
%b.addr = alloca <16 x i8>, align 16
%c.addr = alloca i32, align 4
%g = alloca <16 x i8>*, align 8
%d = alloca i8*, align 8
%0 = bitcast <16 x i8>* %a to { i64, i64 }*
%1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 0
store i64 %a.coerce0, i64* %1, align 16
%2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 1
store i64 %a.coerce1, i64* %2, align 8
%a1 = load <16 x i8>, <16 x i8>* %a, align 16
%3 = bitcast <16 x i8>* %b to { i64, i64 }*
%4 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 0
store i64 %b.coerce0, i64* %4, align 16
%5 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 1
store i64 %b.coerce1, i64* %5, align 8
%b2 = load <16 x i8>, <16 x i8>* %b, align 16
store <16 x i8> %a1, <16 x i8>* %a.addr, align 16
store <16 x i8> %b2, <16 x i8>* %b.addr, align 16
store i32 %c, i32* %c.addr, align 4
%6 = alloca i8, i64 6400, align 16
%7 = bitcast i8* %6 to <16 x i8>*
store <16 x i8>* %7, <16 x i8>** %g, align 8
%8 = load <16 x i8>*, <16 x i8>** %g, align 8
call void @h(<16 x i8>* %b.addr, <16 x i8>* %8)
%9 = load <16 x i8>*, <16 x i8>** %g, align 8
%10 = bitcast <16 x i8>* %9 to i8*
store i8* %10, i8** %d, align 8
%11 = load <16 x i8>, <16 x i8>* %a.addr, align 16
%12 = load i8*, i8** %d, align 8
%arrayidx = getelementptr inbounds i8, i8* %12, i64 0
%13 = load i8, i8* %arrayidx, align 1
%conv = sext i8 %13 to i32
%14 = call <16 x i8> @llvm.mips.fill.b(i32 %conv)
%add = add <16 x i8> %11, %14
%15 = load i8*, i8** %d, align 8
%arrayidx3 = getelementptr inbounds i8, i8* %15, i64 1
%16 = load i8, i8* %arrayidx3, align 1
%conv4 = sext i8 %16 to i32
%17 = call <16 x i8> @llvm.mips.fill.b(i32 %conv4)
%add5 = add <16 x i8> %add, %17
%18 = load <16 x i8>, <16 x i8>* %b.addr, align 16
%add6 = add <16 x i8> %18, %add5
store <16 x i8> %add6, <16 x i8>* %b.addr, align 16
%19 = load <16 x i8>, <16 x i8>* %b.addr, align 16
store <16 x i8> %19, <16 x i8>* %retval, align 16
%20 = bitcast <16 x i8>* %retval to { i64, i64 }*
%21 = load { i64, i64 }, { i64, i64 }* %20, align 16
ret { i64, i64 } %21
}
declare void @h(<16 x i8>*, <16 x i8>*)
declare <16 x i8> @llvm.mips.fill.b(i32)
declare void @llvm.stackprotector(i8*, i8**)
...
---
name: test
alignment: 3
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
liveins:
- { reg: '%a0_64', virtual-reg: '' }
- { reg: '%a1_64', virtual-reg: '' }
- { reg: '%a2_64', virtual-reg: '' }
- { reg: '%a3_64', virtual-reg: '' }
- { reg: '%t0_64', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 16
adjustsStack: false
hasCalls: true
stackProtector: ''
maxCallFrameSize: 4294967295
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
savePoint: ''
restorePoint: ''
fixedStack:
stack:
- { id: 0, name: retval, type: default, offset: 0, size: 16, alignment: 16,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
- { id: 1, name: a, type: default, offset: 0, size: 16, alignment: 16,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
- { id: 2, name: b, type: default, offset: 0, size: 16, alignment: 16,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
- { id: 3, name: a.addr, type: default, offset: 0, size: 16, alignment: 16,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
- { id: 4, name: b.addr, type: default, offset: 0, size: 16, alignment: 16,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
- { id: 5, name: c.addr, type: default, offset: 0, size: 4, alignment: 4,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
- { id: 6, name: g, type: default, offset: 0, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
- { id: 7, name: d, type: default, offset: 0, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
- { id: 8, name: '', type: default, offset: 0, size: 6400,
alignment: 16, stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
constants:
body: |
bb.0.entry:
liveins: %a0_64, %a1_64, %a2_64, %a3_64, %t0_64
SD killed %a0_64, %stack.1.a, 0 :: (store 8 into %ir.1, align 16)
SD killed %a1_64, %stack.1.a, 8 :: (store 8 into %ir.2)
%w0 = LD_B %stack.1.a, 0 :: (dereferenceable load 16 from %ir.a)
SD killed %a2_64, %stack.2.b, 0 :: (store 8 into %ir.4, align 16)
SD killed %a3_64, %stack.2.b, 8 :: (store 8 into %ir.5)
%w1 = LD_B %stack.2.b, 0 :: (dereferenceable load 16 from %ir.b)
ST_B killed %w0, %stack.3.a.addr, 0 :: (store 16 into %ir.a.addr)
ST_B killed %w1, %stack.4.b.addr, 0 :: (store 16 into %ir.b.addr)
SW %t0, %stack.5.c.addr, 0, implicit killed %t0_64 :: (store 4 into %ir.c.addr)
%at_64 = LEA_ADDiu64 %stack.8, 0
SD killed %at_64, %stack.6.g, 0 :: (store 8 into %ir.g)
%a1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp
%a0_64 = LEA_ADDiu64 %stack.4.b.addr, 0
JAL @h, csr_n64, implicit-def dead %ra, implicit %a0_64, implicit %a1_64, implicit-def %sp
ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp
%at_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%v0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%v1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%a0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%a1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%a2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%a3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t4_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t5_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t6_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t7_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%s0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%s1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%s2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%s3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%s4_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%s5_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%s6_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%s7_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t8_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%t9_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%ra_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
%w0 = LD_B %stack.3.a.addr, 0 :: (dereferenceable load 16 from %ir.a.addr)
SD %at_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %v0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %v1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %a0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %a1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %a2_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %a3_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t2_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t3_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t4_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t5_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t6_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t7_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %s0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %s1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %s2_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %s3_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %s4_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %s5_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %s6_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %s7_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t8_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %t9_64, %stack.7.d, 0 :: (store 8 into %ir.d)
SD %ra_64, %stack.7.d, 0 :: (store 8 into %ir.d)
%at_64 = LD %stack.7.d, 0 :: (dereferenceable load 8 from %ir.d)
%v0 = LB %at_64, 0 :: (load 1 from %ir.arrayidx)
%w1 = FILL_B killed %v0
%w0 = ADDV_B killed %w0, killed %w1
%at = LB killed %at_64, 1 :: (load 1 from %ir.arrayidx3)
%w1 = FILL_B killed %at
%w0 = ADDV_B killed %w0, killed %w1
%w1 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load 16 from %ir.b.addr)
%w0 = ADDV_B killed %w1, killed %w0
ST_B killed %w0, %stack.4.b.addr, 0 :: (store 16 into %ir.b.addr)
%w0 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load 16 from %ir.b.addr)
ST_B killed %w0, %stack.0.retval, 0 :: (store 16 into %ir.retval)
%v0_64 = LD %stack.0.retval, 0 :: (dereferenceable load 8 from %ir.20, align 16)
%v1_64 = LD %stack.0.retval, 8 :: (dereferenceable load 8 from %ir.20 + 8, align 16)
RetRA implicit %v0_64, implicit %v1_64
...

View File

@ -18,7 +18,8 @@ define void @loadstore_v16i8_just_under_simm10() nounwind {
; MIPS32-AE: loadstore_v16i8_just_under_simm10: ; MIPS32-AE: loadstore_v16i8_just_under_simm10:
%1 = alloca <16 x i8> %1 = alloca <16 x i8>
%2 = alloca [496 x i8] ; Push the frame right up to 512 bytes %2 = alloca [492 x i8] ; Push the frame--acounting for the emergency spill
; slot--right up to 512 bytes
%3 = load volatile <16 x i8>, <16 x i8>* %1 %3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp) ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp)
@ -33,7 +34,8 @@ define void @loadstore_v16i8_just_over_simm10() nounwind {
; MIPS32-AE: loadstore_v16i8_just_over_simm10: ; MIPS32-AE: loadstore_v16i8_just_over_simm10:
%1 = alloca <16 x i8> %1 = alloca <16 x i8>
%2 = alloca [497 x i8] ; Push the frame just over 512 bytes %2 = alloca [497 x i8] ; Push the frame--acounting for the emergency spill
; slot--right up to 512 bytes
%3 = load volatile <16 x i8>, <16 x i8>* %1 %3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512 ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
@ -50,7 +52,8 @@ define void @loadstore_v16i8_just_under_simm16() nounwind {
; MIPS32-AE: loadstore_v16i8_just_under_simm16: ; MIPS32-AE: loadstore_v16i8_just_under_simm16:
%1 = alloca <16 x i8> %1 = alloca <16 x i8>
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
; slot--right up to 32768 bytes
%3 = load volatile <16 x i8>, <16 x i8>* %1 %3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@ -69,7 +72,8 @@ define void @loadstore_v16i8_just_over_simm16() nounwind {
; MIPS32-AE: loadstore_v16i8_just_over_simm16: ; MIPS32-AE: loadstore_v16i8_just_over_simm16:
%1 = alloca <16 x i8> %1 = alloca <16 x i8>
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
; slot--just over 32768 bytes
%3 = load volatile <16 x i8>, <16 x i8>* %1 %3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@ -121,7 +125,8 @@ define void @loadstore_v8i16_just_under_simm10() nounwind {
; MIPS32-AE: loadstore_v8i16_just_under_simm10: ; MIPS32-AE: loadstore_v8i16_just_under_simm10:
%1 = alloca <8 x i16> %1 = alloca <8 x i16>
%2 = alloca [1008 x i8] ; Push the frame right up to 1024 bytes %2 = alloca [1004 x i8] ; Push the frame--acounting for the emergency spill
; slot--right up to 1024 bytes
%3 = load volatile <8 x i16>, <8 x i16>* %1 %3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 1008($sp) ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 1008($sp)
@ -136,7 +141,8 @@ define void @loadstore_v8i16_just_over_simm10() nounwind {
; MIPS32-AE: loadstore_v8i16_just_over_simm10: ; MIPS32-AE: loadstore_v8i16_just_over_simm10:
%1 = alloca <8 x i16> %1 = alloca <8 x i16>
%2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes %2 = alloca [1009 x i8] ; Push the frame--acounting for the emergency spill
; slot--just over 1024 bytes
%3 = load volatile <8 x i16>, <8 x i16>* %1 %3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024 ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
@ -153,7 +159,8 @@ define void @loadstore_v8i16_just_under_simm16() nounwind {
; MIPS32-AE: loadstore_v8i16_just_under_simm16: ; MIPS32-AE: loadstore_v8i16_just_under_simm16:
%1 = alloca <8 x i16> %1 = alloca <8 x i16>
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
; slot--right up to 32768 bytes
%3 = load volatile <8 x i16>, <8 x i16>* %1 %3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@ -172,7 +179,8 @@ define void @loadstore_v8i16_just_over_simm16() nounwind {
; MIPS32-AE: loadstore_v8i16_just_over_simm16: ; MIPS32-AE: loadstore_v8i16_just_over_simm16:
%1 = alloca <8 x i16> %1 = alloca <8 x i16>
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
; slot--just over 32768 bytes
%3 = load volatile <8 x i16>, <8 x i16>* %1 %3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@ -224,7 +232,8 @@ define void @loadstore_v4i32_just_under_simm10() nounwind {
; MIPS32-AE: loadstore_v4i32_just_under_simm10: ; MIPS32-AE: loadstore_v4i32_just_under_simm10:
%1 = alloca <4 x i32> %1 = alloca <4 x i32>
%2 = alloca [2032 x i8] ; Push the frame right up to 2048 bytes %2 = alloca [2028 x i8] ; Push the frame--acounting for the emergency spill
; slot--right up to 2048 bytes
%3 = load volatile <4 x i32>, <4 x i32>* %1 %3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 2032($sp) ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 2032($sp)
@ -239,7 +248,8 @@ define void @loadstore_v4i32_just_over_simm10() nounwind {
; MIPS32-AE: loadstore_v4i32_just_over_simm10: ; MIPS32-AE: loadstore_v4i32_just_over_simm10:
%1 = alloca <4 x i32> %1 = alloca <4 x i32>
%2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes %2 = alloca [2033 x i8] ; Push the frame--acounting for the emergency spill
; slot--just over 2048 bytes
%3 = load volatile <4 x i32>, <4 x i32>* %1 %3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048 ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
@ -256,7 +266,8 @@ define void @loadstore_v4i32_just_under_simm16() nounwind {
; MIPS32-AE: loadstore_v4i32_just_under_simm16: ; MIPS32-AE: loadstore_v4i32_just_under_simm16:
%1 = alloca <4 x i32> %1 = alloca <4 x i32>
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
; slot-- right up to 32768 bytes
%3 = load volatile <4 x i32>, <4 x i32>* %1 %3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@ -275,7 +286,8 @@ define void @loadstore_v4i32_just_over_simm16() nounwind {
; MIPS32-AE: loadstore_v4i32_just_over_simm16: ; MIPS32-AE: loadstore_v4i32_just_over_simm16:
%1 = alloca <4 x i32> %1 = alloca <4 x i32>
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
; slot--just over 32768 bytes
%3 = load volatile <4 x i32>, <4 x i32>* %1 %3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@ -327,8 +339,8 @@ define void @loadstore_v2i64_just_under_simm10() nounwind {
; MIPS32-AE: loadstore_v2i64_just_under_simm10: ; MIPS32-AE: loadstore_v2i64_just_under_simm10:
%1 = alloca <2 x i64> %1 = alloca <2 x i64>
%2 = alloca [4080 x i8] ; Push the frame right up to 4096 bytes %2 = alloca [4076 x i8] ; Push the frame--acounting for the emergency spill
; slot--right up to 4096 bytes
%3 = load volatile <2 x i64>, <2 x i64>* %1 %3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 4080($sp) ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 4080($sp)
store volatile <2 x i64> %3, <2 x i64>* %1 store volatile <2 x i64> %3, <2 x i64>* %1
@ -342,7 +354,8 @@ define void @loadstore_v2i64_just_over_simm10() nounwind {
; MIPS32-AE: loadstore_v2i64_just_over_simm10: ; MIPS32-AE: loadstore_v2i64_just_over_simm10:
%1 = alloca <2 x i64> %1 = alloca <2 x i64>
%2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes %2 = alloca [4081 x i8] ; Push the frame--acounting for the emergency spill
; slot--just over 4096 bytes
%3 = load volatile <2 x i64>, <2 x i64>* %1 %3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096 ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
@ -359,7 +372,8 @@ define void @loadstore_v2i64_just_under_simm16() nounwind {
; MIPS32-AE: loadstore_v2i64_just_under_simm16: ; MIPS32-AE: loadstore_v2i64_just_under_simm16:
%1 = alloca <2 x i64> %1 = alloca <2 x i64>
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
; slot--right up to 32768 bytes
%3 = load volatile <2 x i64>, <2 x i64>* %1 %3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@ -378,7 +392,8 @@ define void @loadstore_v2i64_just_over_simm16() nounwind {
; MIPS32-AE: loadstore_v2i64_just_over_simm16: ; MIPS32-AE: loadstore_v2i64_just_over_simm16:
%1 = alloca <2 x i64> %1 = alloca <2 x i64>
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
; slot--just over 32768 bytes
%3 = load volatile <2 x i64>, <2 x i64>* %1 %3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768 ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768