llvm-project/llvm/test/CodeGen/Thumb/large-stack.ll

; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=IOS
; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=EABI
; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-ios
; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=IOS
; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-none-eabi
; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=EABI

; Largest stack for which a single tADDspi/tSUBspi is enough
define void @test1() {
; CHECK-LABEL: test1:
; CHECK: sub sp, #508
; CHECK: add sp, #508
    %tmp = alloca [ 508 x i8 ] , align 4
    ret void
}

; Largest stack for which three tADDspi/tSUBspis are enough
define void @test100() {
; CHECK-LABEL: test100:
; CHECK: sub sp, #508
; CHECK: sub sp, #508
; CHECK: sub sp, #508
; EABI: add sp, #508
; EABI: add sp, #508
; EABI: add sp, #508
; IOS: subs r4, r7, #4
; IOS: mov sp, r4
    %tmp = alloca [ 1524 x i8 ] , align 4
    ret void
}

; Smallest stack for which we use a constant pool
define void @test2() {
; CHECK-LABEL: test2:
; CHECK: ldr r0,
; CHECK: add sp, r0
; EABI: ldr r0,
; EABI: add sp, r0
; IOS: subs r4, r7, #4
; IOS: mov sp, r4
    %tmp = alloca [ 1528 x i8 ] , align 4
    ret void
}

define i32 @test3() {
; CHECK-LABEL: test3:
; CHECK: ldr r1,
; CHECK: add sp, r1
; CHECK: ldr r1,
; CHECK: add r1, sp
; EABI: ldr r1,
; EABI: add sp, r1
; IOS: subs r4, r7, #4
; IOS: mov sp, r4
    %retval = alloca i32, align 4
    %tmp = alloca i32, align 4
    %a = alloca [805306369 x i8], align 16
    store i32 0, i32* %tmp
    %tmp1 = load i32, i32* %tmp
    ret i32 %tmp1
}

; Here, the adds get optimized out because they are dead, but the calculation
; of the address of stack_a is dead but not optimized out. When the address
; calculation gets expanded to two instructions, we need to avoid reading a
; dead register.
; No CHECK lines (just test for crashes), as we hope this will be optimised
; better in future.
define i32 @test4() {
entry:
  %stack_a = alloca i8, align 1
  %stack_b = alloca [256 x i32*], align 4
  %int = ptrtoint i8* %stack_a to i32
  %add = add i32 %int, 1
  br label %block2

block2:
  %add2 = add i32 %add, 1
  ret i32 0
}
[Thumb1] Re-write emitThumbRegPlusImmediate This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8 %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125 2014-11-17 19:18:10 +08:00			`; RUN: llc < %s -mtriple=thumb-apple-ios \| FileCheck %s --check-prefix=CHECK --check-prefix=IOS`
			`; RUN: llc < %s -mtriple=thumb-none-eabi \| FileCheck %s --check-prefix=CHECK --check-prefix=EABI`
			`; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-ios`
			`; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t \| FileCheck %s --check-prefix=CHECK --check-prefix=IOS`
			`; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-none-eabi`
			`; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t \| FileCheck %s --check-prefix=CHECK --check-prefix=EABI`
Move thumb and thumb2 tests into separate directories. llvm-svn: 74068 2009-06-24 14:36:07 +08:00
[Thumb1] Re-write emitThumbRegPlusImmediate This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8 %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125 2014-11-17 19:18:10 +08:00			`; Largest stack for which a single tADDspi/tSUBspi is enough`
Move thumb and thumb2 tests into separate directories. llvm-svn: 74068 2009-06-24 14:36:07 +08:00			`define void @test1() {`
Convert CodeGen//.ll tests to use the new CHECK-LABEL for easier debugging. No functionality change and all tests pass after conversion. This was done with the following sed invocation to catch label lines demarking function boundaries: sed -i '' "s/^;\( \)\([A-Z0-9_]\):\( \)test\([A-Za-z0-9_-]\):\( \)$/;\1\2-LABEL:\3test\4:\5/g" test/CodeGen//*.ll which was written conservatively to avoid false positives rather than false negatives. I scanned through all the changes and everything looks correct. llvm-svn: 186258 2013-07-14 04:38:47 +08:00			`; CHECK-LABEL: test1:`
[Thumb1] Re-write emitThumbRegPlusImmediate This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8 %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125 2014-11-17 19:18:10 +08:00			`; CHECK: sub sp, #508`
			`; CHECK: add sp, #508`
			`%tmp = alloca [ 508 x i8 ] , align 4`
Move thumb and thumb2 tests into separate directories. llvm-svn: 74068 2009-06-24 14:36:07 +08:00			`ret void`
			`}`

[Thumb1] Re-write emitThumbRegPlusImmediate This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8 %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125 2014-11-17 19:18:10 +08:00			`; Largest stack for which three tADDspi/tSUBspis are enough`
			`define void @test100() {`
			`; CHECK-LABEL: test100:`
			`; CHECK: sub sp, #508`
			`; CHECK: sub sp, #508`
			`; CHECK: sub sp, #508`
			`; EABI: add sp, #508`
			`; EABI: add sp, #508`
			`; EABI: add sp, #508`
			`; IOS: subs r4, r7, #4`
			`; IOS: mov sp, r4`
			`%tmp = alloca [ 1524 x i8 ] , align 4`
			`ret void`
			`}`

			`; Smallest stack for which we use a constant pool`
Move thumb and thumb2 tests into separate directories. llvm-svn: 74068 2009-06-24 14:36:07 +08:00			`define void @test2() {`
Convert CodeGen//.ll tests to use the new CHECK-LABEL for easier debugging. No functionality change and all tests pass after conversion. This was done with the following sed invocation to catch label lines demarking function boundaries: sed -i '' "s/^;\( \)\([A-Z0-9_]\):\( \)test\([A-Za-z0-9_-]\):\( \)$/;\1\2-LABEL:\3test\4:\5/g" test/CodeGen//*.ll which was written conservatively to avoid false positives rather than false negatives. I scanned through all the changes and everything looks correct. llvm-svn: 186258 2013-07-14 04:38:47 +08:00			`; CHECK-LABEL: test2:`
[Thumb1] Re-write emitThumbRegPlusImmediate This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8 %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125 2014-11-17 19:18:10 +08:00			`; CHECK: ldr r0,`
Re-apply r110655 with fixes. Epilogue must restore sp from fp if the function stack frame has a var-sized object. Also added a test case to check for the added benefit of this patch: it's optimizing away the unnecessary restore of sp from fp for some non-leaf functions. llvm-svn: 110707 2010-08-11 03:30:19 +08:00			`; CHECK: add sp, r0`
[Thumb1] Re-write emitThumbRegPlusImmediate This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8 %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125 2014-11-17 19:18:10 +08:00			`; EABI: ldr r0,`
			`; EABI: add sp, r0`
			`; IOS: subs r4, r7, #4`
			`; IOS: mov sp, r4`
			`%tmp = alloca [ 1528 x i8 ] , align 4`
Move thumb and thumb2 tests into separate directories. llvm-svn: 74068 2009-06-24 14:36:07 +08:00			`ret void`
			`}`

			`define i32 @test3() {`
Convert CodeGen//.ll tests to use the new CHECK-LABEL for easier debugging. No functionality change and all tests pass after conversion. This was done with the following sed invocation to catch label lines demarking function boundaries: sed -i '' "s/^;\( \)\([A-Z0-9_]\):\( \)test\([A-Za-z0-9_-]\):\( \)$/;\1\2-LABEL:\3test\4:\5/g" test/CodeGen//*.ll which was written conservatively to avoid false positives rather than false negatives. I scanned through all the changes and everything looks correct. llvm-svn: 186258 2013-07-14 04:38:47 +08:00			`; CHECK-LABEL: test3:`
[Thumb1] Re-write emitThumbRegPlusImmediate This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8 %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125 2014-11-17 19:18:10 +08:00			`; CHECK: ldr r1,`
LocalStackSlotAllocation improvements First, taking advantage of the fact that the virtual base registers are allocated in order of the local frame offsets, remove the quadratic register-searching behavior. Because of the ordering, we only need to check the last virtual base register created. Second, store the frame index in the FrameRef structure, and get the frame index and the local offset from this structure at the top of the loop iteration. This allows us to de-nest the loops in insertFrameReferenceRegisters (and I think makes the code cleaner). I also moved the needsFrameBaseReg check into the first loop over instructions so that we don't bother pushing FrameRefs for instructions that don't want a virtual base register anyway. Lastly, and this is the only functionality change, avoid the creation of single-use virtual base registers. These are currently not useful because, in general, they end up replacing what would be one r+r instruction with an add and a r+i instruction. Committing this removes the XFAIL in CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll Jim has okayed this off-list. llvm-svn: 180799 2013-05-01 04:04:37 +08:00			`; CHECK: add sp, r1`
[Thumb1] Re-write emitThumbRegPlusImmediate This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8 %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125 2014-11-17 19:18:10 +08:00			`; CHECK: ldr r1,`
Re-apply r110655 with fixes. Epilogue must restore sp from fp if the function stack frame has a var-sized object. Also added a test case to check for the added benefit of this patch: it's optimizing away the unnecessary restore of sp from fp for some non-leaf functions. llvm-svn: 110707 2010-08-11 03:30:19 +08:00			`; CHECK: add r1, sp`
[Thumb1] Re-write emitThumbRegPlusImmediate This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8 %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125 2014-11-17 19:18:10 +08:00			`; EABI: ldr r1,`
			`; EABI: add sp, r1`
			`; IOS: subs r4, r7, #4`
			`; IOS: mov sp, r4`
Re-apply r110655 with fixes. Epilogue must restore sp from fp if the function stack frame has a var-sized object. Also added a test case to check for the added benefit of this patch: it's optimizing away the unnecessary restore of sp from fp for some non-leaf functions. llvm-svn: 110707 2010-08-11 03:30:19 +08:00			`%retval = alloca i32, align 4`
			`%tmp = alloca i32, align 4`
			`%a = alloca [805306369 x i8], align 16`
			`store i32 0, i32* %tmp`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%tmp1 = load i32, i32* %tmp`
Re-apply r110655 with fixes. Epilogue must restore sp from fp if the function stack frame has a var-sized object. Also added a test case to check for the added benefit of this patch: it's optimizing away the unnecessary restore of sp from fp for some non-leaf functions. llvm-svn: 110707 2010-08-11 03:30:19 +08:00			`ret i32 %tmp1`
Move thumb and thumb2 tests into separate directories. llvm-svn: 74068 2009-06-24 14:36:07 +08:00			`}`
[Thumb] Fix crash in Thumb1RegisterInfo::rewriteFrameIndex This function can, for some offsets from the SP, split one instruction into two. Since it re-uses the original instruction as the first instruction of the result, we need ensure its result register is not marked as dead before we use it in the second instruction. llvm-svn: 220194 2014-10-20 19:00:18 +08:00
			`; Here, the adds get optimized out because they are dead, but the calculation`
			`; of the address of stack_a is dead but not optimized out. When the address`
			`; calculation gets expanded to two instructions, we need to avoid reading a`
			`; dead register.`
			`; No CHECK lines (just test for crashes), as we hope this will be optimised`
			`; better in future.`
			`define i32 @test4() {`
			`entry:`
			`%stack_a = alloca i8, align 1`
			`%stack_b = alloca [256 x i32*], align 4`
			`%int = ptrtoint i8* %stack_a to i32`
			`%add = add i32 %int, 1`
			`br label %block2`

			`block2:`
			`%add2 = add i32 %add, 1`
			`ret i32 0`
			`}`