[LLD][ELF][ARM] Implement ARM pc-relative relocations for ADR and LDR

The R_ARM_ALU_PC_G0 and R_ARM_LDR_PC_G0 relocations are used by the
ADR and LDR pseudo instructions, and are the basis of the group
relocations that can load an arbitrary constant via a series of add, sub
and ldr instructions.

The relocations need to be obtained via the .reloc directive.

R_ARM_ALU_PC_G0 is much more complicated as the add/sub instruction uses
a modified immediate encoding of an 8-bit immediate rotated right by an
even 4-bit field. This means that the range of representable immediates
is sparse. We extract the encoding and decoding functions for the modified
immediate from llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h as
this header file is not accessible from LLD. Duplication of code isn't
ideal, but as these are well-defined mathematical functions they are
unlikely to change.

Differential Revision: https://reviews.llvm.org/D75349
This commit is contained in:
Peter Smith 2020-04-04 17:15:19 +01:00
parent 66c18c729d
commit 28b172e341
6 changed files with 491 additions and 0 deletions

View File

@ -132,6 +132,8 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
case R_ARM_THM_MOVW_PREL_NC:
case R_ARM_THM_MOVT_PREL:
return R_PC;
case R_ARM_ALU_PC_G0:
case R_ARM_LDR_PC_G0:
case R_ARM_THM_ALU_PREL_11_0:
case R_ARM_THM_PC8:
case R_ARM_THM_PC12:
@ -410,6 +412,58 @@ static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) {
}
}
// Utility functions taken from ARMAddressingModes.h, only changes are LLD
// coding style.
// Rotate a 32-bit unsigned value right by a specified amt of bits.
static uint32_t rotr32(uint32_t val, uint32_t amt) {
assert(amt < 32 && "Invalid rotate amount");
return (val >> amt) | (val << ((32 - amt) & 31));
}
// Rotate a 32-bit unsigned value left by a specified amt of bits.
static uint32_t rotl32(uint32_t val, uint32_t amt) {
assert(amt < 32 && "Invalid rotate amount");
return (val << amt) | (val >> ((32 - amt) & 31));
}
// Try to encode a 32-bit unsigned immediate imm with an immediate shifter
// operand, this form is an 8-bit immediate rotated right by an even number of
// bits. We compute the rotate amount to use. If this immediate value cannot be
// handled with a single shifter-op, determine a good rotate amount that will
// take a maximal chunk of bits out of the immediate.
static uint32_t getSOImmValRotate(uint32_t imm) {
// 8-bit (or less) immediates are trivially shifter_operands with a rotate
// of zero.
if ((imm & ~255U) == 0)
return 0;
// Use CTZ to compute the rotate amount.
unsigned tz = llvm::countTrailingZeros(imm);
// Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
// not 9.
unsigned rotAmt = tz & ~1;
// If we can handle this spread, return it.
if ((rotr32(imm, rotAmt) & ~255U) == 0)
return (32 - rotAmt) & 31; // HW rotates right, not left.
// For values like 0xF000000F, we should ignore the low 6 bits, then
// retry the hunt.
if (imm & 63U) {
unsigned tz2 = countTrailingZeros(imm & ~63U);
unsigned rotAmt2 = tz2 & ~1;
if ((rotr32(imm, rotAmt2) & ~255U) == 0)
return (32 - rotAmt2) & 31; // HW rotates right, not left.
}
// Otherwise, we have no way to cover this span of bits with a single
// shifter_op immediate. Return a chunk of bits that will be useful to
// handle.
return (32 - rotAmt) & 31; // HW rotates right, not left.
}
void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
switch (rel.type) {
case R_ARM_ABS32:
@ -574,6 +628,45 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
((val << 4) & 0x7000) | // imm3
(val & 0x00ff)); // imm8
break;
case R_ARM_ALU_PC_G0: {
// ADR (literal) add = bit23, sub = bit22
// literal is a 12-bit modified immediate, made up of a 4-bit even rotate
// right and an 8-bit immediate. The code-sequence here is derived from
// ARMAddressingModes.h in llvm/Target/ARM/MCTargetDesc. In our case we
// want to give an error if we cannot encode the constant.
uint32_t opcode = 0x00800000;
if (val >> 63) {
opcode = 0x00400000;
val = ~val + 1;
}
if ((val & ~255U) != 0) {
uint32_t rotAmt = getSOImmValRotate(val);
// Error if we cannot encode this with a single shift
if (rotr32(~255U, rotAmt) & val)
error(getErrorLocation(loc) + "unencodeable immediate " +
Twine(val).str() + " for relocation " + toString(rel.type));
val = rotl32(val, rotAmt) | ((rotAmt >> 1) << 8);
}
write32le(loc, (read32le(loc) & 0xff0ff000) | opcode | val);
break;
}
case R_ARM_LDR_PC_G0: {
// R_ARM_LDR_PC_G0 is S + A - P, we have ((S + A) | T) - P, if S is a
// function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
// bottom bit to recover S + A - P.
if (rel.sym->isFunc())
val &= ~0x1;
// LDR (literal) u = bit23
int64_t imm = val;
uint32_t u = 0x00800000;
if (imm < 0) {
imm = -imm;
u = 0;
}
checkUInt(loc, imm, 12, rel);
write32le(loc, (read32le(loc) & 0xff7ff000) | u | imm);
break;
}
case R_ARM_THM_ALU_PREL_11_0: {
// ADR encoding T2 (sub), T3 (add) i:imm3:imm8
int64_t imm = val;
@ -708,6 +801,22 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
((lo & 0x7000) >> 4) | // imm3
(lo & 0x00ff)); // imm8
}
case R_ARM_ALU_PC_G0: {
// 12-bit immediate is a modified immediate made up of a 4-bit even
// right rotation and 8-bit constant. After the rotation the value
// is zero-extended. When bit 23 is set the instruction is an add, when
// bit 22 is set it is a sub.
uint32_t instr = read32le(buf);
uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2);
return (instr & 0x00400000) ? -val : val;
}
case R_ARM_LDR_PC_G0: {
// ADR (literal) add = bit23, sub = bit22
// LDR (literal) u = bit23 unsigned imm12
bool u = read32le(buf) & 0x00800000;
uint32_t imm12 = read32le(buf) & 0xfff;
return u ? imm12 : -imm12;
}
case R_ARM_THM_ALU_PREL_11_0: {
// Thumb2 ADR, which is an alias for a sub or add instruction with an
// unsigned immediate.

View File

@ -0,0 +1,30 @@
// REQUIRES: arm
// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
// RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s
.section .os0, "ax", %progbits
.balign 1024
.thumb_func
low:
bx lr
/// Check that we error when the immediate for the add or sub is not encodeable
.section .os1, "ax", %progbits
.arm
.balign 1024
.global _start
.type _start, %function
_start:
// CHECK: {{.*}}.s.tmp.o:(.os1+0x0): unencodeable immediate 1031 for relocation R_ARM_ALU_PC_G0
/// adr r0, low
.inst 0xe24f0008
.reloc 0, R_ARM_ALU_PC_G0, low
// CHECK: {{.*}}.s.tmp.o:(.os1+0x4): unencodeable immediate 1013 for relocation R_ARM_ALU_PC_G0
/// adr r1, unaligned
.inst 0xe24f1008
.reloc 4, R_ARM_ALU_PC_G0, unaligned
.section .os2, "ax", %progbits
.balign 1024
.thumb_func
unaligned:
bx lr

View File

@ -0,0 +1,42 @@
// REQUIRES: arm
// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
// RUN: echo "SECTIONS { \
// RUN: .text.0 0x10000000 : { *(.text.0) } \
// RUN: .text.1 0x80000000 : { *(.text.1) } \
// RUN: .text.2 0xf0000010 : { *(.text.2) } \
// RUN: } " > %t.script
// RUN: ld.lld --script %t.script %t.o -o %t
// RUN: llvm-objdump -d --no-show-raw-insn --triple=armv7a-none-eabi %t | FileCheck %s
/// Test the long range encoding of R_ARM_ALU_PC_G0. We can encode an 8-bit
/// immediate rotated right by an even 4-bit field.
.section .text.0, "ax", %progbits
dat1:
.word 0
.section .text.1, "ax", %progbits
.global _start
.type _start, %function
_start:
/// adr.w r0, dat1
.inst 0xe24f0008
.reloc 0, R_ARM_ALU_PC_G0, dat1
/// adr.w r1, dat2
.inst 0xe24f1008
.reloc 4, R_ARM_ALU_PC_G0, dat2
.section .text.2, "ax", %progbits
dat2:
.word 0
// CHECK: 10000000 <dat1>:
// CHECK-NEXT: 10000000: andeq r0, r0, r0
// CHECK: 80000000 <_start>:
/// 0x80000000 + 0x8 - 0x70000008 = 0x10000000
// CHECK-NEXT: 80000000: sub r0, pc, #1879048200
/// 0x80000004 + 0x8 + 0x70000004 = 0xf0000010
// CHECK-NEXT: 80000004: add r1, pc, #1879048196
// CHECK: f0000010 <dat2>:
// CHECK-NEXT: f0000010: andeq r0, r0, r0

111
lld/test/ELF/arm-adr.s Normal file
View File

@ -0,0 +1,111 @@
// REQUIRES: arm
// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
// RUN: ld.lld %t.o -o %t
// RUN: llvm-objdump -d --no-show-raw-insn --triple=armv7a-none-eabi %t | FileCheck %s
/// Test the short range cases of R_ARM_ALU_PC_G0. The range of the instruction
/// depends on the number of trailing zeros of the displacement. In practice
/// the maximum effective range will be 1024 bytes, which is a 4-byte aligned
/// instruction to a 4-byte aligned word.
.arm
.section .os1, "ax", %progbits
.balign 1024
.word 0
.word 0
.word 0
.word 0
dat1:
.word 0
dat2:
.word 0
.section .os2, "ax", %progbits
.balign 1024
.global _start
.type _start, %function
_start:
/// adr r0, dat1
.inst 0xe24f0008
.reloc 0, R_ARM_ALU_PC_G0, dat1
/// adr r0, dat2
.inst 0xe24f0008
.reloc 4, R_ARM_ALU_PC_G0, dat2
/// adr r0, dat3
.inst 0xe24f0008
.reloc 8, R_ARM_ALU_PC_G0, dat3
/// adr r0, dat4
.inst 0xe24f0008
.reloc 0xc, R_ARM_ALU_PC_G0, dat4
.section .os3, "ax", %progbits
.balign 1024
.word 0
.word 0
.word 0
.word 0
dat3:
.word 0
dat4:
.word 0
.section .os4, "ax", %progbits
.thumb
.type tfunc, %function
tfunc:
bx lr
.section .os5, "ax", %progbits
.arm
.type arm_func, %function
arm_func:
.balign 4
/// adr r0, tfunc
.inst 0xe24f0008
.reloc 0, R_ARM_ALU_PC_G0, tfunc
/// adr r0, afunc
.inst 0xe24f0008
.reloc 4, R_ARM_ALU_PC_G0, afunc
bx lr
.section .os6, "ax", %progbits
.type afunc, %function
.balign 4
afunc:
bx lr
// CHECK: 00011410 <dat1>:
// CHECK-NEXT: 11410: andeq r0, r0, r0
// CHECK: 00011414 <dat2>:
// CHECK-NEXT: 11414: andeq r0, r0, r0
// CHECK: 00011800 <_start>:
/// 0x11800 + 0x8 - 0x3f8 = 0x11410 = dat1
// CHECK-NEXT: 11800: sub r0, pc, #1016
/// 0x11804 + 0x8 - 0x3f8 = 0x11414 = dat2
// CHECK-NEXT: 11804: sub r0, pc, #1016
/// 0x11808 + 0x8 + 0x400 = 0x11c10 = dat3
// CHECK-NEXT: 11808: add r0, pc, #1024
/// 0x1180c + 0x8 + 0x400 = 0x11c14 = dat4
// CHECK-NEXT: 1180c: add r0, pc, #1024
// CHECK: 00011c10 <dat3>:
// CHECK-NEXT: 11c10: andeq r0, r0, r0
// CHECK: 00011c14 <dat4>:
// CHECK-NEXT: 11c14: andeq r0, r0, r0
// CHECK: 00011c18 <tfunc>:
// CHECK-NEXT: 11c18: bx lr
// CHECK: 00011c1c <arm_func>:
/// 0x11c1c + 0x8 - 0xb = 11c19 = tfunc
// CHECK-NEXT: 11c1c: sub r0, pc, #11
/// 0x11c20 + 0x8 = 0x11c28 = afunc
// CHECK-NEXT: 11c20: add r0, pc, #0
// CHECK-NEXT: 11c24: bx lr
// CHECK: 00011c28 <afunc>:
// CHECK-NEXT: 11c28: bx lr

View File

@ -0,0 +1,29 @@
// REQUIRES: arm
// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
// RUN: not ld.lld -n %t.o -o %t 2>&1 | FileCheck %s
.section .text.0, "ax", %progbits
.thumb_func
.balign 4
low:
bx lr
nop
nop
.section .text.1, "ax", %progbits
.global _start
.arm
_start:
// CHECK: {{.*}}.s.tmp.o:(.text.1+0x0): relocation R_ARM_LDR_PC_G0 out of range: 4096 is not in [0, 4095]
/// ldr r0, low - 4076
.inst 0xe51f0ff4
.reloc 0, R_ARM_LDR_PC_G0, low
// CHECK: {{.*}}.s.tmp.o:(.text.1+0x4): relocation R_ARM_LDR_PC_G0 out of range: 4096 is not in [0, 4095]
/// ldr r0, high + 4100
.inst 0xe59f0ffc
.reloc 4, R_ARM_LDR_PC_G0, high
.section .text.2
.thumb_func
.balign 4
high:
bx lr

170
lld/test/ELF/arm-ldrlit.s Normal file
View File

@ -0,0 +1,170 @@
// REQUIRES: arm
// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
// RUN: llvm-objdump -d -r --triple=armv7a-none-eabi %t.o
// RUN: echo "SECTIONS { \
// RUN: .rodata.low 0x8012 : { *(.rodata.low) } \
// RUN: .text.low 0x8f00 : { *(.text.low) } \
// RUN: .text.neg 0x9000 : { *(.text.neg) } \
// RUN: .text.pos 0x10000 : { *(.text.pos) } \
// RUN: .text.high 0x10100 : { *(.text.high) } \
// RUN: .data_high 0x1100f : { *(.data.high) } \
// RUN: } " > %t.script
// RUN: ld.lld -n --script %t.script %t.o -o %t
// RUN: llvm-readobj --symbols %t | FileCheck %s --check-prefix=SYMS
// RUN: llvm-objdump -d --no-show-raw-insn --triple=armv7a-none-eabi %t | FileCheck %s
/// Test the various legal cases for the R_ARM_LDR_PC_G0 relocation
/// Range is +- 4095 bytes
/// The Thumb bit for function symbols is ignored
.section .rodata.low, "a", %progbits
dat1:
.byte 0
dat2:
.byte 1
dat3:
.byte 2
dat4:
.byte 3
.section .text.low, "ax", %progbits
.balign 4
.global target1
.type target1, %function
target1:
bx lr
.type target2, %function
target2:
bx lr
.section .text.neg, "ax", %progbits
.balign 4
.global _start
.type _start, %function
_start:
/// ldr r0, dat1
.inst 0xe51f0008
.reloc 0, R_ARM_LDR_PC_G0, dat1
/// ldr r1, dat2
.inst 0xe51f1008
.reloc 4, R_ARM_LDR_PC_G0, dat2
/// ldr r2, dat3
.inst 0xe51f2008
.reloc 8, R_ARM_LDR_PC_G0, dat3
/// ldr r3, dat4
.inst 0xe51f3008
.reloc 0xc, R_ARM_LDR_PC_G0, dat4
/// ldr r0, target1
.inst 0xe51f0008
.reloc 0x10, R_ARM_LDR_PC_G0, target1
/// ldr r1, target2
.inst 0xe51f1008
.reloc 0x14, R_ARM_LDR_PC_G0, target2
.section .text.pos, "ax", %progbits
.balign 4
.global pos
.type pos, %function
pos:
/// ldr r2, target3
.inst 0xe51f2008
.reloc 0, R_ARM_LDR_PC_G0, target3
/// ldr r3, target4
.inst 0xe51f3008
.reloc 4, R_ARM_LDR_PC_G0, target4
/// ldr r0, dat5
.inst 0xe51f0008
.reloc 8, R_ARM_LDR_PC_G0, dat5
/// ldr r1, dat6
.inst 0xe51f1008
.reloc 0xc, R_ARM_LDR_PC_G0, dat6
/// ldr r2, dat7
.inst 0xe51f2008
.reloc 0x10, R_ARM_LDR_PC_G0, dat7
/// ldr r3, dat8
.inst 0xe51f3008
.reloc 0x14, R_ARM_LDR_PC_G0, dat8
/// positive addend in instruction, all others are -4 (PC bias)
///ldr r4, dat5 + 8
.inst 0xe59f4000
.reloc 0x18, R_ARM_LDR_PC_G0, dat5
.section .text.high, "ax", %progbits
.balign 4
.type target3, %function
.global target3
target3:
bx lr
.thumb_func
target4:
bx lr
.section .data.high, "aw", %progbits
dat5:
.byte 0
dat6:
.byte 1
dat7:
.byte 2
dat8:
.byte 3
// SYMS: Name: dat1
// SYMS-NEXT: Value: 0x8012
// SYMS: Name: dat2
// SYMS-NEXT: Value: 0x8013
// SYMS: Name: dat3
// SYMS-NEXT: Value: 0x8014
// SYMS: Name: dat4
// SYMS-NEXT: Value: 0x8015
// CHECK: 00008f00 <target1>:
// CHECK-NEXT: 8f00: bx lr
// CHECK: 00008f04 <target2>:
// CHECK-NEXT: 8f04: bx lr
// CHECK: 00009000 <_start>:
/// 0x9000 + 0x8 - 0xff6 = 0x8012
// CHECK-NEXT: 9000: ldr r0, [pc, #-4086]
/// 0x9004 + 0x8 - 0xff9 = 0x8013
// CHECK-NEXT: 9004: ldr r1, [pc, #-4089]
/// 0x9008 + 0x8 - 0xffc = 0x8014
// CHECK-NEXT: 9008: ldr r2, [pc, #-4092]
/// 0x900c + 0x8 - 0xfff = 0x8015
// CHECK-NEXT: 900c: ldr r3, [pc, #-4095]
/// 0x9010 + 0x8 - 0x118 = 0x8f00
// CHECK-NEXT: 9010: ldr r0, [pc, #-280]
/// 0x9014 + 0x8 - 0x118 = 0x8f04
// CHECK-NEXT: 9014: ldr r1, [pc, #-280]
///
// CHECK: 00010000 <pos>:
/// 0x10000 + 0x8 + 0xf8 = 0x10100
// CHECK-NEXT: 10000: ldr r2, [pc, #248]
/// 0x10004 + 0x8 + 0xf8 = 0x10104
// CHECK-NEXT: 10004: ldr r3, [pc, #248]
/// 0x10008 + 0x8 + 0xfff = 0x1100f
// CHECK-NEXT: 10008: ldr r0, [pc, #4095]
/// 0x1000c + 0x8 + 0xffc = 0x11010
// CHECK-NEXT: 1000c: ldr r1, [pc, #4092]
/// 0x10010 + 0x8 + 0xff9 = 0x11011
// CHECK-NEXT: 10010: ldr r2, [pc, #4089]
/// 0x10014 + 0x8 + 0xff6 = 0x11012
// CHECK-NEXT: 10014: ldr r3, [pc, #4086]
/// 0x10018 + 0x8 + 0xff7 = 0x11017 = dat5 + 8
// CHECK-NEXT: 10018: ldr r4, [pc, #4087]
// CHECK: 00010100 <target3>:
// CHECK-NEXT: 10100: bx lr
// CHECK: 00010104 <target4>:
// CHECK-NEXT: 10104: bx lr
// SYMS: Name: dat5
// SYMS-NEXT: Value: 0x1100F
// SYMS: Name: dat6
// SYMS-NEXT: Value: 0x11010
// SYMS: Name: dat7
// SYMS-NEXT: Value: 0x11011
// SYMS: Name: dat8
// SYMS-NEXT: Value: 0x11012