llvm-project/llvm/test/CodeGen/ARM/arm-position-independence-j...

; Test for generation of jump table for ropi/rwpi

; RUN: llc -relocation-model=static    -mtriple=armv7a--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ARM_ABS
; RUN: llc -relocation-model=ropi      -mtriple=armv7a--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ARM_PC
; RUN: llc -relocation-model=ropi-rwpi -mtriple=armv7a--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ARM_PC

; RUN: llc -relocation-model=static    -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
; RUN: llc -relocation-model=ropi      -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2

; RUN: llc -relocation-model=static    -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
; RUN: llc -relocation-model=ropi      -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1


declare void @exit0()
declare void @exit1()
declare void @exit2()
declare void @exit3()
declare void @exit4()
define void @jump_table(i32 %val) {
entry:
  switch i32 %val, label %default [ i32 1, label %lab1
                                    i32 2, label %lab2
                                    i32 3, label %lab3
                                    i32 4, label %lab4 ]

default:
  tail call void @exit0()
  ret void

lab1:
  tail call void @exit1()
  ret void

lab2:
  tail call void @exit2()
  ret void

lab3:
  tail call void @exit3()
  ret void

lab4:
  tail call void @exit4()
  ret void

; CHECK-LABEL: jump_table:

; ARM: adr     r[[R_TAB_BASE:[0-9]+]], [[LJTI:\.LJTI[0-9]+_[0-9]+]]
; ARM: lsl     r[[R_TAB_IDX:[0-9]+]], r{{[0-9]+}}, #2
; ARM_ABS: ldr     pc, [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]
; ARM_PC:  ldr     r[[R_OFFSET:[0-9]+]], [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]
; ARM_PC:  add     pc, r[[R_OFFSET]], r[[R_TAB_BASE]]
; ARM: [[LJTI]]
; ARM_ABS: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]
; ARM_ABS: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]
; ARM_ABS: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]
; ARM_ABS: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]
; ARM_PC:  .long [[LBB1:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
; ARM_PC:  .long [[LBB2:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
; ARM_PC:  .long [[LBB3:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
; ARM_PC:  .long [[LBB4:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
; ARM: [[LBB1]]
; ARM-NEXT: b exit1
; ARM: [[LBB2]]
; ARM-NEXT: b exit2
; ARM: [[LBB3]]
; ARM-NEXT: b exit3
; ARM: [[LBB4]]
; ARM-NEXT: b exit4

; THUMB2: [[LCPI:\.LCPI[0-9]+_[0-9]+]]:
; THUMB2: tbb     [pc, r{{[0-9]+}}]
; THUMB2: .byte   ([[LBB1:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB2: .byte   ([[LBB2:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB2: .byte   ([[LBB3:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB2: .byte   ([[LBB4:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB2: [[LBB1]]
; THUMB2-NEXT: b exit1
; THUMB2: [[LBB2]]
; THUMB2-NEXT: b exit2
; THUMB2: [[LBB3]]
; THUMB2-NEXT: b exit3
; THUMB2: [[LBB4]]
; THUMB2-NEXT: b exit4

; THUMB1: .p2align 2
; THUMB1: add     r[[x:[0-9]+]], pc
; THUMB1: ldrb    r[[x]], [r[[x]], #4]
; THUMB1: lsls    r[[x]], r[[x]], #1
; THUMB1: [[LCPI:\.LCPI[0-9]+_[0-9]+]]:
; THUMB1: add     pc, r[[x]]
; THUMB1: .p2align 2
; THUMB1: .byte   ([[LBB1:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB1: .byte   ([[LBB2:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB1: .byte   ([[LBB3:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB1: .byte   ([[LBB4:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB1: [[LBB1]]
; THUMB1-NEXT: bl exit1
; THUMB1: [[LBB2]]
; THUMB1-NEXT: bl exit2
; THUMB1: [[LBB3]]
; THUMB1-NEXT: bl exit3
; THUMB1: [[LBB4]]
; THUMB1-NEXT: bl exit4
}
[ARM] Add support for embedded position-independent code This patch adds support for some new relocation models to the ARM backend: * Read-only position independence (ROPI): Code and read-only data is accessed PC-relative. The offsets between all code and RO data sections are known at static link time. This does not affect read-write data. * Read-write position independence (RWPI): Read-write data is accessed relative to the static base register (r9). The offsets between all writeable data sections are known at static link time. This does not affect read-only data. These two modes are independent (they specify how different objects should be addressed), so they can be used individually or together. They are otherwise the same as the "static" relocation model, and are not compatible with SysV-style PIC using a global offset table. These modes are normally used by bare-metal systems or systems with small real-time operating systems. They are designed to avoid the need for a dynamic linker, the only initialisation required is setting r9 to an appropriate value for RWPI code. I have only added support to SelectionDAG, not FastISel, because FastISel is currently disabled for bare-metal targets where these modes would be used. Differential Revision: https://reviews.llvm.org/D23195 llvm-svn: 278015 2016-08-08 23:28:31 +08:00			`; Test for generation of jump table for ropi/rwpi`

			`; RUN: llc -relocation-model=static -mtriple=armv7a--none-eabi -disable-block-placement < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ARM_ABS`
			`; RUN: llc -relocation-model=ropi -mtriple=armv7a--none-eabi -disable-block-placement < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ARM_PC`
			`; RUN: llc -relocation-model=ropi-rwpi -mtriple=armv7a--none-eabi -disable-block-placement < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ARM_PC`

			`; RUN: llc -relocation-model=static -mtriple=thumbv7m--none-eabi -disable-block-placement < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2`
			`; RUN: llc -relocation-model=ropi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2`
			`; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2`

[Thumb-1] Synthesize TBB/TBH instructions to make use of compressed jump tables [Reapplying r284580 and r285917 with fix and testing to ensure emitted jump tables for Thumb-1 have 4-byte alignment] The TBB and TBH instructions in Thumb-2 allow jump tables to be compressed into sequences of bytes or shorts respectively. These instructions do not exist in Thumb-1, however it is possible to synthesize them out of a sequence of other instructions. It turns out this sequence is so short that it's almost never a lose for performance and is ALWAYS a significant win for code size. TBB example: Before: lsls r0, r0, #2 After: add r0, pc adr r1, .LJTI0_0 ldrb r0, [r0, #6] ldr r0, [r0, r1] lsls r0, r0, #1 mov pc, r0 add pc, r0 => No change in prologue code size or dynamic instruction count. Jump table shrunk by a factor of 4. The only case that can increase dynamic instruction count is the TBH case: Before: lsls r0, r4, #2 After: lsls r4, r4, #1 adr r1, .LJTI0_0 add r4, pc ldr r0, [r0, r1] ldrh r4, [r4, #6] mov pc, r0 lsls r4, r4, #1 add pc, r4 => 1 more instruction in prologue. Jump table shrunk by a factor of 2. So there is an argument that this should be disabled when optimizing for performance (and a TBH needs to be generated). I'm not so sure about that in practice, because on small cores with Thumb-1 performance is often tied to code size. But I'm willing to turn it off when optimizing for performance if people want (also note that TBHs are fairly rare in practice!) llvm-svn: 285690 2016-11-01 21:37:41 +08:00			`; RUN: llc -relocation-model=static -mtriple=thumbv6m--none-eabi -disable-block-placement < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1`
			`; RUN: llc -relocation-model=ropi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1`
			`; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1`
[ARM] Add support for embedded position-independent code This patch adds support for some new relocation models to the ARM backend: * Read-only position independence (ROPI): Code and read-only data is accessed PC-relative. The offsets between all code and RO data sections are known at static link time. This does not affect read-write data. * Read-write position independence (RWPI): Read-write data is accessed relative to the static base register (r9). The offsets between all writeable data sections are known at static link time. This does not affect read-only data. These two modes are independent (they specify how different objects should be addressed), so they can be used individually or together. They are otherwise the same as the "static" relocation model, and are not compatible with SysV-style PIC using a global offset table. These modes are normally used by bare-metal systems or systems with small real-time operating systems. They are designed to avoid the need for a dynamic linker, the only initialisation required is setting r9 to an appropriate value for RWPI code. I have only added support to SelectionDAG, not FastISel, because FastISel is currently disabled for bare-metal targets where these modes would be used. Differential Revision: https://reviews.llvm.org/D23195 llvm-svn: 278015 2016-08-08 23:28:31 +08:00

			`declare void @exit0()`
			`declare void @exit1()`
			`declare void @exit2()`
			`declare void @exit3()`
			`declare void @exit4()`
			`define void @jump_table(i32 %val) {`
			`entry:`
			`switch i32 %val, label %default [ i32 1, label %lab1`
			`i32 2, label %lab2`
			`i32 3, label %lab3`
			`i32 4, label %lab4 ]`

			`default:`
			`tail call void @exit0()`
			`ret void`

			`lab1:`
			`tail call void @exit1()`
			`ret void`

			`lab2:`
			`tail call void @exit2()`
			`ret void`

			`lab3:`
			`tail call void @exit3()`
			`ret void`

			`lab4:`
			`tail call void @exit4()`
			`ret void`

			`; CHECK-LABEL: jump_table:`

			`; ARM: adr r[[R_TAB_BASE:[0-9]+]], [[LJTI:\.LJTI[0-9]+_[0-9]+]]`
[ARM] Make -mcpu=generic schedule for an in-order core (Cortex-A8). The benchmarking summarized in http://lists.llvm.org/pipermail/llvm-dev/2017-May/113525.html showed this is beneficial for a wide range of cores. As is to be expected, quite a few small adaptations are needed to the regressions tests, as the difference in scheduling results in: - Quite a few small instruction schedule differences. - A few changes in register allocation decisions caused by different instruction schedules. - A few changes in IfConversion decisions, due to a difference in instruction schedule and/or the estimated cost of a branch mispredict. llvm-svn: 306514 2017-06-28 15:07:03 +08:00			`; ARM: lsl r[[R_TAB_IDX:[0-9]+]], r{{[0-9]+}}, #2`
[ARM] Add support for embedded position-independent code This patch adds support for some new relocation models to the ARM backend: * Read-only position independence (ROPI): Code and read-only data is accessed PC-relative. The offsets between all code and RO data sections are known at static link time. This does not affect read-write data. * Read-write position independence (RWPI): Read-write data is accessed relative to the static base register (r9). The offsets between all writeable data sections are known at static link time. This does not affect read-only data. These two modes are independent (they specify how different objects should be addressed), so they can be used individually or together. They are otherwise the same as the "static" relocation model, and are not compatible with SysV-style PIC using a global offset table. These modes are normally used by bare-metal systems or systems with small real-time operating systems. They are designed to avoid the need for a dynamic linker, the only initialisation required is setting r9 to an appropriate value for RWPI code. I have only added support to SelectionDAG, not FastISel, because FastISel is currently disabled for bare-metal targets where these modes would be used. Differential Revision: https://reviews.llvm.org/D23195 llvm-svn: 278015 2016-08-08 23:28:31 +08:00			`; ARM_ABS: ldr pc, [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]`
			`; ARM_PC: ldr r[[R_OFFSET:[0-9]+]], [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]`
			`; ARM_PC: add pc, r[[R_OFFSET]], r[[R_TAB_BASE]]`
			`; ARM: [[LJTI]]`
			`; ARM_ABS: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]`
			`; ARM_ABS: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]`
			`; ARM_ABS: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]`
			`; ARM_ABS: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]`
			`; ARM_PC: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]`
			`; ARM_PC: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]`
			`; ARM_PC: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]`
			`; ARM_PC: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]`
			`; ARM: [[LBB1]]`
			`; ARM-NEXT: b exit1`
			`; ARM: [[LBB2]]`
			`; ARM-NEXT: b exit2`
			`; ARM: [[LBB3]]`
			`; ARM-NEXT: b exit3`
			`; ARM: [[LBB4]]`
			`; ARM-NEXT: b exit4`

			`; THUMB2: [[LCPI:\.LCPI[0-9]+_[0-9]+]]:`
			`; THUMB2: tbb [pc, r{{[0-9]+}}]`
			`; THUMB2: .byte ([[LBB1:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2`
			`; THUMB2: .byte ([[LBB2:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2`
			`; THUMB2: .byte ([[LBB3:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2`
			`; THUMB2: .byte ([[LBB4:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2`
			`; THUMB2: [[LBB1]]`
			`; THUMB2-NEXT: b exit1`
			`; THUMB2: [[LBB2]]`
			`; THUMB2-NEXT: b exit2`
			`; THUMB2: [[LBB3]]`
			`; THUMB2-NEXT: b exit3`
			`; THUMB2: [[LBB4]]`
			`; THUMB2-NEXT: b exit4`

[Thumb1] Move padding earlier when synthesizing TBBs off of the PC When the base register (register pointing to the jump table) is the PC, we expect the jump table to directly follow the jump sequence with no intervening padding. If there is intervening padding, the calculated offsets will not be correct. One solution would be to account for any padding in the emitted LDRB instruction, but at the moment we don't support emitting MCExprs for the load offset. In the meantime, it's correct and only a slight amount worse to just move the padding up, from just before the jump table to just before the jump instruction sequence. We can do that by emitting code alignment before the jump sequence, as we know the number of instructions in the sequence is always 4. llvm-svn: 286107 2016-11-07 21:38:21 +08:00			`; THUMB1: .p2align 2`
[Thumb-1] Synthesize TBB/TBH instructions to make use of compressed jump tables [Reapplying r284580 and r285917 with fix and testing to ensure emitted jump tables for Thumb-1 have 4-byte alignment] The TBB and TBH instructions in Thumb-2 allow jump tables to be compressed into sequences of bytes or shorts respectively. These instructions do not exist in Thumb-1, however it is possible to synthesize them out of a sequence of other instructions. It turns out this sequence is so short that it's almost never a lose for performance and is ALWAYS a significant win for code size. TBB example: Before: lsls r0, r0, #2 After: add r0, pc adr r1, .LJTI0_0 ldrb r0, [r0, #6] ldr r0, [r0, r1] lsls r0, r0, #1 mov pc, r0 add pc, r0 => No change in prologue code size or dynamic instruction count. Jump table shrunk by a factor of 4. The only case that can increase dynamic instruction count is the TBH case: Before: lsls r0, r4, #2 After: lsls r4, r4, #1 adr r1, .LJTI0_0 add r4, pc ldr r0, [r0, r1] ldrh r4, [r4, #6] mov pc, r0 lsls r4, r4, #1 add pc, r4 => 1 more instruction in prologue. Jump table shrunk by a factor of 2. So there is an argument that this should be disabled when optimizing for performance (and a TBH needs to be generated). I'm not so sure about that in practice, because on small cores with Thumb-1 performance is often tied to code size. But I'm willing to turn it off when optimizing for performance if people want (also note that TBHs are fairly rare in practice!) llvm-svn: 285690 2016-11-01 21:37:41 +08:00			`; THUMB1: add r[[x:[0-9]+]], pc`
			`; THUMB1: ldrb r[[x]], [r[[x]], #4]`
			`; THUMB1: lsls r[[x]], r[[x]], #1`
			`; THUMB1: [[LCPI:\.LCPI[0-9]+_[0-9]+]]:`
			`; THUMB1: add pc, r[[x]]`
			`; THUMB1: .p2align 2`
			`; THUMB1: .byte ([[LBB1:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2`
			`; THUMB1: .byte ([[LBB2:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2`
			`; THUMB1: .byte ([[LBB3:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2`
			`; THUMB1: .byte ([[LBB4:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2`
[ARM] Add support for embedded position-independent code This patch adds support for some new relocation models to the ARM backend: * Read-only position independence (ROPI): Code and read-only data is accessed PC-relative. The offsets between all code and RO data sections are known at static link time. This does not affect read-write data. * Read-write position independence (RWPI): Read-write data is accessed relative to the static base register (r9). The offsets between all writeable data sections are known at static link time. This does not affect read-only data. These two modes are independent (they specify how different objects should be addressed), so they can be used individually or together. They are otherwise the same as the "static" relocation model, and are not compatible with SysV-style PIC using a global offset table. These modes are normally used by bare-metal systems or systems with small real-time operating systems. They are designed to avoid the need for a dynamic linker, the only initialisation required is setting r9 to an appropriate value for RWPI code. I have only added support to SelectionDAG, not FastISel, because FastISel is currently disabled for bare-metal targets where these modes would be used. Differential Revision: https://reviews.llvm.org/D23195 llvm-svn: 278015 2016-08-08 23:28:31 +08:00			`; THUMB1: [[LBB1]]`
			`; THUMB1-NEXT: bl exit1`
			`; THUMB1: [[LBB2]]`
			`; THUMB1-NEXT: bl exit2`
			`; THUMB1: [[LBB3]]`
			`; THUMB1-NEXT: bl exit3`
			`; THUMB1: [[LBB4]]`
			`; THUMB1-NEXT: bl exit4`
			`}`