forked from OSchip/llvm-project
[Thumb1] Re-write emitThumbRegPlusImmediate
This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8*) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8* %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125
This commit is contained in:
parent
236b0ca790
commit
970b0d576c
|
@ -595,10 +595,10 @@ inline uint64_t PowerOf2Floor(uint64_t A) {
|
|||
/// RoundUpToAlignment(5, 8) = 8
|
||||
/// RoundUpToAlignment(17, 8) = 24
|
||||
/// RoundUpToAlignment(~0LL, 8) = 0
|
||||
/// RoundUpToAlignment(321, 255) = 510
|
||||
/// \endcode
|
||||
inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align) {
|
||||
assert(isPowerOf2_64(Align) && "Alignment must be power of 2!");
|
||||
return (Value + Align - 1) & ~uint64_t(Align - 1);
|
||||
return (Value + Align - 1) / Align * Align;
|
||||
}
|
||||
|
||||
/// Returns the offset to the next integer (mod 2**64) that is greater than
|
||||
|
|
|
@ -66,6 +66,10 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
|
|||
int Val,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg,
|
||||
unsigned MIFlags) const {
|
||||
assert((isARMLowRegister(DestReg) ||
|
||||
isVirtualRegister(DestReg)) &&
|
||||
"Thumb1 does not have ldr to high register");
|
||||
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
|
||||
MachineConstantPool *ConstantPool = MF.getConstantPool();
|
||||
|
@ -106,15 +110,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
|
|||
NumBytes = -NumBytes;
|
||||
}
|
||||
unsigned LdReg = DestReg;
|
||||
if (DestReg == ARM::SP) {
|
||||
if (DestReg == ARM::SP)
|
||||
assert(BaseReg == ARM::SP && "Unexpected!");
|
||||
if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg))
|
||||
LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
|
||||
}
|
||||
|
||||
if (NumBytes <= 255 && NumBytes >= 0)
|
||||
if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
|
||||
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
|
||||
.addImm(NumBytes).setMIFlags(MIFlags);
|
||||
else if (NumBytes < 0 && NumBytes >= -255) {
|
||||
} else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
|
||||
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
|
||||
.addImm(NumBytes).setMIFlags(MIFlags);
|
||||
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
|
||||
|
@ -124,7 +128,8 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
|
|||
ARMCC::AL, 0, MIFlags);
|
||||
|
||||
// Emit add / sub.
|
||||
int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
|
||||
int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr
|
||||
: ARM::tADDrr);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
|
||||
if (Opc != ARM::tADDhirr)
|
||||
|
@ -136,32 +141,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
|
|||
AddDefaultPred(MIB);
|
||||
}
|
||||
|
||||
/// calcNumMI - Returns the number of instructions required to materialize
|
||||
/// the specific add / sub r, c instruction.
|
||||
static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
|
||||
unsigned NumBits, unsigned Scale) {
|
||||
unsigned NumMIs = 0;
|
||||
unsigned Chunk = ((1 << NumBits) - 1) * Scale;
|
||||
|
||||
if (Opc == ARM::tADDrSPi) {
|
||||
unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
|
||||
Bytes -= ThisVal;
|
||||
NumMIs++;
|
||||
NumBits = 8;
|
||||
Scale = 1; // Followed by a number of tADDi8.
|
||||
Chunk = ((1 << NumBits) - 1) * Scale;
|
||||
}
|
||||
|
||||
NumMIs += Bytes / Chunk;
|
||||
if ((Bytes % Chunk) != 0)
|
||||
NumMIs++;
|
||||
if (ExtraOpc)
|
||||
NumMIs++;
|
||||
return NumMIs;
|
||||
}
|
||||
|
||||
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
|
||||
/// a destreg = basereg + immediate in Thumb code.
|
||||
/// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or
|
||||
/// SUBs first, and uses a constant pool value if the instruction sequence would
|
||||
/// be too long. This is allowed to modify the condition flags.
|
||||
void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
DebugLoc dl,
|
||||
|
@ -172,131 +155,146 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
|
|||
bool isSub = NumBytes < 0;
|
||||
unsigned Bytes = (unsigned)NumBytes;
|
||||
if (isSub) Bytes = -NumBytes;
|
||||
bool isMul4 = (Bytes & 3) == 0;
|
||||
bool isTwoAddr = false;
|
||||
bool DstNotEqBase = false;
|
||||
unsigned NumBits = 1;
|
||||
unsigned Scale = 1;
|
||||
int Opc = 0;
|
||||
int ExtraOpc = 0;
|
||||
bool NeedCC = false;
|
||||
|
||||
if (DestReg == BaseReg && BaseReg == ARM::SP) {
|
||||
assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
|
||||
NumBits = 7;
|
||||
Scale = 4;
|
||||
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
|
||||
isTwoAddr = true;
|
||||
} else if (!isSub && BaseReg == ARM::SP) {
|
||||
// r1 = add sp, 403
|
||||
// =>
|
||||
// r1 = add sp, 100 * 4
|
||||
// r1 = add r1, 3
|
||||
if (!isMul4) {
|
||||
Bytes &= ~3;
|
||||
ExtraOpc = ARM::tADDi3;
|
||||
}
|
||||
DstNotEqBase = true;
|
||||
NumBits = 8;
|
||||
Scale = 4;
|
||||
Opc = ARM::tADDrSPi;
|
||||
} else {
|
||||
// sp = sub sp, c
|
||||
// r1 = sub sp, c
|
||||
// r8 = sub sp, c
|
||||
if (DestReg != BaseReg)
|
||||
DstNotEqBase = true;
|
||||
if (DestReg == ARM::SP) {
|
||||
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
|
||||
assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
|
||||
NumBits = 7;
|
||||
Scale = 4;
|
||||
int CopyOpc = 0;
|
||||
unsigned CopyBits = 0;
|
||||
unsigned CopyScale = 1;
|
||||
bool CopyNeedsCC = false;
|
||||
int ExtraOpc = 0;
|
||||
unsigned ExtraBits = 0;
|
||||
unsigned ExtraScale = 1;
|
||||
bool ExtraNeedsCC = false;
|
||||
|
||||
// Strategy:
|
||||
// We need to select two types of instruction, maximizing the available
|
||||
// immediate range of each. The instructions we use will depend on whether
|
||||
// DestReg and BaseReg are low, high or the stack pointer.
|
||||
// * CopyOpc - DestReg = BaseReg + imm
|
||||
// This will be emitted once if DestReg != BaseReg, and never if
|
||||
// DestReg == BaseReg.
|
||||
// * ExtraOpc - DestReg = DestReg + imm
|
||||
// This will be emitted as many times as necessary to add the
|
||||
// full immediate.
|
||||
// If the immediate ranges of these instructions are not large enough to cover
|
||||
// NumBytes with a reasonable number of instructions, we fall back to using a
|
||||
// value loaded from a constant pool.
|
||||
if (DestReg == ARM::SP) {
|
||||
if (BaseReg == ARM::SP) {
|
||||
// sp -> sp
|
||||
// Already in right reg, no copy needed
|
||||
} else {
|
||||
Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
|
||||
NumBits = 8;
|
||||
NeedCC = true;
|
||||
// low -> sp or high -> sp
|
||||
CopyOpc = ARM::tMOVr;
|
||||
CopyBits = 0;
|
||||
}
|
||||
isTwoAddr = true;
|
||||
ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi;
|
||||
ExtraBits = 7;
|
||||
ExtraScale = 4;
|
||||
} else if (isARMLowRegister(DestReg)) {
|
||||
if (BaseReg == ARM::SP) {
|
||||
// sp -> low
|
||||
assert(!isSub && "Thumb1 does not have tSUBrSPi");
|
||||
CopyOpc = ARM::tADDrSPi;
|
||||
CopyBits = 8;
|
||||
CopyScale = 4;
|
||||
} else if (DestReg == BaseReg) {
|
||||
// low -> same low
|
||||
// Already in right reg, no copy needed
|
||||
} else if (isARMLowRegister(BaseReg)) {
|
||||
// low -> different low
|
||||
CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3;
|
||||
CopyBits = 3;
|
||||
CopyNeedsCC = true;
|
||||
} else {
|
||||
// high -> low
|
||||
CopyOpc = ARM::tMOVr;
|
||||
CopyBits = 0;
|
||||
}
|
||||
ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
|
||||
ExtraBits = 8;
|
||||
ExtraNeedsCC = true;
|
||||
} else /* DestReg is high */ {
|
||||
if (DestReg == BaseReg) {
|
||||
// high -> same high
|
||||
// Already in right reg, no copy needed
|
||||
} else {
|
||||
// {low,high,sp} -> high
|
||||
CopyOpc = ARM::tMOVr;
|
||||
CopyBits = 0;
|
||||
}
|
||||
ExtraOpc = 0;
|
||||
}
|
||||
|
||||
unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
|
||||
// We could handle an unaligned immediate with an unaligned copy instruction
|
||||
// and an aligned extra instruction, but this case is not currently needed.
|
||||
assert(((Bytes & 3) == 0 || ExtraScale == 1) &&
|
||||
"Unaligned offset, but all instructions require alignment");
|
||||
|
||||
unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale;
|
||||
// If we would emit the copy with an immediate of 0, just use tMOVr.
|
||||
if (CopyOpc && Bytes < CopyScale) {
|
||||
CopyOpc = ARM::tMOVr;
|
||||
CopyBits = 0;
|
||||
CopyScale = 1;
|
||||
CopyNeedsCC = false;
|
||||
CopyRange = 0;
|
||||
}
|
||||
unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction
|
||||
unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0;
|
||||
unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange);
|
||||
|
||||
// We could handle this case when the copy instruction does not require an
|
||||
// aligned immediate, but we do not currently do this.
|
||||
assert(RangeAfterCopy % ExtraScale == 0 &&
|
||||
"Extra instruction requires immediate to be aligned");
|
||||
|
||||
unsigned RequiredExtraInstrs;
|
||||
if (ExtraRange)
|
||||
RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange;
|
||||
else if (RangeAfterCopy > 0)
|
||||
// We need an extra instruction but none is available
|
||||
RequiredExtraInstrs = 1000000;
|
||||
else
|
||||
RequiredExtraInstrs = 0;
|
||||
unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs;
|
||||
unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
|
||||
if (NumMIs > Threshold) {
|
||||
// This will expand into too many instructions. Load the immediate from a
|
||||
// constpool entry.
|
||||
|
||||
// Use a constant pool, if the sequence of ADDs/SUBs is too expensive.
|
||||
if (RequiredInstrs > Threshold) {
|
||||
emitThumbRegPlusImmInReg(MBB, MBBI, dl,
|
||||
DestReg, BaseReg, NumBytes, true,
|
||||
TII, MRI, MIFlags);
|
||||
return;
|
||||
}
|
||||
|
||||
if (DstNotEqBase) {
|
||||
if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
|
||||
// If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
|
||||
unsigned Chunk = (1 << 3) - 1;
|
||||
unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
|
||||
Bytes -= ThisVal;
|
||||
const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
|
||||
const MachineInstrBuilder MIB =
|
||||
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
|
||||
.setMIFlags(MIFlags));
|
||||
AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
|
||||
} else if (isARMLowRegister(DestReg) && BaseReg == ARM::SP && Bytes > 0) {
|
||||
unsigned ThisVal = std::min(1020U, Bytes / 4 * 4);
|
||||
Bytes -= ThisVal;
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg)
|
||||
.addReg(BaseReg, RegState::Kill).addImm(ThisVal / 4))
|
||||
.setMIFlags(MIFlags);
|
||||
} else {
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
|
||||
.addReg(BaseReg, RegState::Kill))
|
||||
.setMIFlags(MIFlags);
|
||||
// Emit zero or one copy instructions
|
||||
if (CopyOpc) {
|
||||
unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale;
|
||||
Bytes -= CopyImm * CopyScale;
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg);
|
||||
if (CopyNeedsCC)
|
||||
MIB = AddDefaultT1CC(MIB);
|
||||
MIB.addReg(BaseReg, RegState::Kill);
|
||||
if (CopyOpc != ARM::tMOVr) {
|
||||
MIB.addImm(CopyImm);
|
||||
}
|
||||
AddDefaultPred(MIB.setMIFlags(MIFlags));
|
||||
|
||||
BaseReg = DestReg;
|
||||
}
|
||||
|
||||
unsigned Chunk = ((1 << NumBits) - 1) * Scale;
|
||||
// Emit zero or more in-place add/sub instructions
|
||||
while (Bytes) {
|
||||
unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
|
||||
Bytes -= ThisVal;
|
||||
ThisVal /= Scale;
|
||||
// Build the new tADD / tSUB.
|
||||
if (isTwoAddr) {
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
|
||||
if (NeedCC)
|
||||
MIB = AddDefaultT1CC(MIB);
|
||||
MIB.addReg(DestReg).addImm(ThisVal);
|
||||
MIB = AddDefaultPred(MIB);
|
||||
MIB.setMIFlags(MIFlags);
|
||||
} else {
|
||||
bool isKill = BaseReg != ARM::SP;
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
|
||||
if (NeedCC)
|
||||
MIB = AddDefaultT1CC(MIB);
|
||||
MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
|
||||
MIB = AddDefaultPred(MIB);
|
||||
MIB.setMIFlags(MIFlags);
|
||||
unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale;
|
||||
Bytes -= ExtraImm * ExtraScale;
|
||||
|
||||
BaseReg = DestReg;
|
||||
if (Opc == ARM::tADDrSPi) {
|
||||
// r4 = add sp, imm
|
||||
// r4 = add r4, imm
|
||||
// ...
|
||||
NumBits = 8;
|
||||
Scale = 1;
|
||||
Chunk = ((1 << NumBits) - 1) * Scale;
|
||||
Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
|
||||
NeedCC = isTwoAddr = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ExtraOpc) {
|
||||
const MCInstrDesc &MCID = TII.get(ExtraOpc);
|
||||
AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
|
||||
.addReg(DestReg, RegState::Kill)
|
||||
.addImm(((unsigned)NumBytes) & 3)
|
||||
.setMIFlags(MIFlags));
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg);
|
||||
if (ExtraNeedsCC)
|
||||
MIB = AddDefaultT1CC(MIB);
|
||||
MIB.addReg(BaseReg).addImm(ExtraImm);
|
||||
MIB = AddDefaultPred(MIB);
|
||||
MIB.setMIFlags(MIFlags);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
|
||||
; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-darwin
|
||||
; RUN: llvm-objdump -triple=thumbv6-apple-darwin -d %t | FileCheck %s
|
||||
|
||||
@__bar = external hidden global i8*
|
||||
@__baz = external hidden global i8*
|
||||
|
@ -49,13 +51,13 @@ define void @test_local_var_addr() {
|
|||
%addr2 = alloca i8
|
||||
|
||||
; CHECK: mov r0, sp
|
||||
; CHECK: adds r0, r0, #{{[0-9]+}}
|
||||
; CHECK: blx _take_ptr
|
||||
; CHECK: adds r0, #{{[0-9]+}}
|
||||
; CHECK: blx
|
||||
call void @take_ptr(i8* %addr1)
|
||||
|
||||
; CHECK: mov r0, sp
|
||||
; CHECK: adds r0, r0, #{{[0-9]+}}
|
||||
; CHECK: blx _take_ptr
|
||||
; CHECK: adds r0, #{{[0-9]+}}
|
||||
; CHECK: blx
|
||||
call void @take_ptr(i8* %addr2)
|
||||
|
||||
ret void
|
||||
|
@ -70,7 +72,7 @@ define void @test_simple_var() {
|
|||
|
||||
; CHECK: mov r0, sp
|
||||
; CHECK-NOT: adds r0
|
||||
; CHECK: blx _take_ptr
|
||||
; CHECK: blx
|
||||
call void @take_ptr(i8* %addr8)
|
||||
ret void
|
||||
}
|
||||
|
@ -85,12 +87,12 @@ define void @test_local_var_addr_aligned() {
|
|||
%addr2 = bitcast i32* %addr2.32 to i8*
|
||||
|
||||
; CHECK: add r0, sp, #{{[0-9]+}}
|
||||
; CHECK: blx _take_ptr
|
||||
; CHECK: blx
|
||||
call void @take_ptr(i8* %addr1)
|
||||
|
||||
; CHECK: mov r0, sp
|
||||
; CHECK-NOT: add r0
|
||||
; CHECK: blx _take_ptr
|
||||
; CHECK: blx
|
||||
call void @take_ptr(i8* %addr2)
|
||||
|
||||
ret void
|
||||
|
@ -104,8 +106,35 @@ define void @test_local_var_big_offset() {
|
|||
%addr2.32 = alloca i32, i32 257
|
||||
|
||||
; CHECK: add [[RTMP:r[0-9]+]], sp, #1020
|
||||
; CHECL: add r0, [[RTMP]], #8
|
||||
; CHECK: blx _take_ptr
|
||||
; CHECK: adds [[RTMP]], #8
|
||||
; CHECK: blx
|
||||
call void @take_ptr(i8* %addr1)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Max range addressable with tADDrSPi
|
||||
define void @test_local_var_offset_1020() {
|
||||
; CHECK-LABEL: test_local_var_offset_1020
|
||||
%addr1 = alloca i8, i32 4
|
||||
%addr2 = alloca i8, i32 1020
|
||||
|
||||
; CHECK: add r0, sp, #1020
|
||||
; CHECK-NEXT: blx
|
||||
call void @take_ptr(i8* %addr1)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Max range addressable with tADDrSPi + tADDi8
|
||||
define void @test_local_var_offset_1275() {
|
||||
; CHECK-LABEL: test_local_var_offset_1275
|
||||
%addr1 = alloca i8, i32 1
|
||||
%addr2 = alloca i8, i32 1275
|
||||
|
||||
; CHECK: add r0, sp, #1020
|
||||
; CHECK: adds r0, #255
|
||||
; CHECK-NEXT: blx
|
||||
call void @take_ptr(i8* %addr1)
|
||||
|
||||
ret void
|
||||
|
|
|
@ -1,31 +1,57 @@
|
|||
; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=IOS
|
||||
; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=EABI
|
||||
; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-ios
|
||||
; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=IOS
|
||||
; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-none-eabi
|
||||
; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=EABI
|
||||
|
||||
; Largest stack for which a single tADDspi/tSUBspi is enough
|
||||
define void @test1() {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: sub sp, #256
|
||||
; CHECK: add sp, #256
|
||||
%tmp = alloca [ 64 x i32 ] , align 4
|
||||
; CHECK: sub sp, #508
|
||||
; CHECK: add sp, #508
|
||||
%tmp = alloca [ 508 x i8 ] , align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Largest stack for which three tADDspi/tSUBspis are enough
|
||||
define void @test100() {
|
||||
; CHECK-LABEL: test100:
|
||||
; CHECK: sub sp, #508
|
||||
; CHECK: sub sp, #508
|
||||
; CHECK: sub sp, #508
|
||||
; EABI: add sp, #508
|
||||
; EABI: add sp, #508
|
||||
; EABI: add sp, #508
|
||||
; IOS: subs r4, r7, #4
|
||||
; IOS: mov sp, r4
|
||||
%tmp = alloca [ 1524 x i8 ] , align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Smallest stack for which we use a constant pool
|
||||
define void @test2() {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: ldr r0, LCPI
|
||||
; CHECK: ldr r0,
|
||||
; CHECK: add sp, r0
|
||||
; CHECK: subs r4, r7, #4
|
||||
; CHECK: mov sp, r4
|
||||
%tmp = alloca [ 4168 x i8 ] , align 4
|
||||
; EABI: ldr r0,
|
||||
; EABI: add sp, r0
|
||||
; IOS: subs r4, r7, #4
|
||||
; IOS: mov sp, r4
|
||||
%tmp = alloca [ 1528 x i8 ] , align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @test3() {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: ldr r1, LCPI
|
||||
; CHECK: ldr r1,
|
||||
; CHECK: add sp, r1
|
||||
; CHECK: ldr r1, LCPI
|
||||
; CHECK: ldr r1,
|
||||
; CHECK: add r1, sp
|
||||
; CHECK: subs r4, r7, #4
|
||||
; CHECK: mov sp, r4
|
||||
; EABI: ldr r1,
|
||||
; EABI: add sp, r1
|
||||
; IOS: subs r4, r7, #4
|
||||
; IOS: mov sp, r4
|
||||
%retval = alloca i32, align 4
|
||||
%tmp = alloca i32, align 4
|
||||
%a = alloca [805306369 x i8], align 16
|
||||
|
|
Loading…
Reference in New Issue