[LLD][ELF][ARM] Implement ARM pc-relative relocations for ADR and LDR

The R_ARM_ALU_PC_G0 and R_ARM_LDR_PC_G0 relocations are used by the ADR and LDR pseudo instructions, and are the basis of the group relocations that can load an arbitrary constant via a series of add, sub and ldr instructions. The relocations need to be obtained via the .reloc directive. R_ARM_ALU_PC_G0 is much more complicated as the add/sub instruction uses a modified immediate encoding of an 8-bit immediate rotated right by an even 4-bit field. This means that the range of representable immediates is sparse. We extract the encoding and decoding functions for the modified immediate from llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h as this header file is not accessible from LLD. Duplication of code isn't ideal, but as these are well-defined mathematical functions they are unlikely to change. Differential Revision: https://reviews.llvm.org/D75349
2020-04-04 17:15:19 +01:00 · 2020-04-04 17:15:19 +01:00 · 28b172e341
parent 66c18c729d
commit 28b172e341
6 changed files with 491 additions and 0 deletions
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@ -132,6 +132,8 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
  case R_ARM_THM_MOVW_PREL_NC:
  case R_ARM_THM_MOVT_PREL:
    return R_PC;
+  case R_ARM_ALU_PC_G0:
+  case R_ARM_LDR_PC_G0:
  case R_ARM_THM_ALU_PREL_11_0:
  case R_ARM_THM_PC8:
  case R_ARM_THM_PC12:
@ -410,6 +412,58 @@ static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) {
  }
 }

+// Utility functions taken from ARMAddressingModes.h, only changes are LLD
+// coding style.
+
+// Rotate a 32-bit unsigned value right by a specified amt of bits.
+static uint32_t rotr32(uint32_t val, uint32_t amt) {
+  assert(amt < 32 && "Invalid rotate amount");
+  return (val >> amt) | (val << ((32 - amt) & 31));
+}
+
+// Rotate a 32-bit unsigned value left by a specified amt of bits.
+static uint32_t rotl32(uint32_t val, uint32_t amt) {
+  assert(amt < 32 && "Invalid rotate amount");
+  return (val << amt) | (val >> ((32 - amt) & 31));
+}
+
+// Try to encode a 32-bit unsigned immediate imm with an immediate shifter
+// operand, this form is an 8-bit immediate rotated right by an even number of
+// bits. We compute the rotate amount to use.  If this immediate value cannot be
+// handled with a single shifter-op, determine a good rotate amount that will
+// take a maximal chunk of bits out of the immediate.
+static uint32_t getSOImmValRotate(uint32_t imm) {
+  // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+  // of zero.
+  if ((imm & ~255U) == 0)
+    return 0;
+
+  // Use CTZ to compute the rotate amount.
+  unsigned tz = llvm::countTrailingZeros(imm);
+
+  // Rotate amount must be even.  Something like 0x200 must be rotated 8 bits,
+  // not 9.
+  unsigned rotAmt = tz & ~1;
+
+  // If we can handle this spread, return it.
+  if ((rotr32(imm, rotAmt) & ~255U) == 0)
+    return (32 - rotAmt) & 31; // HW rotates right, not left.
+
+  // For values like 0xF000000F, we should ignore the low 6 bits, then
+  // retry the hunt.
+  if (imm & 63U) {
+    unsigned tz2 = countTrailingZeros(imm & ~63U);
+    unsigned rotAmt2 = tz2 & ~1;
+    if ((rotr32(imm, rotAmt2) & ~255U) == 0)
+      return (32 - rotAmt2) & 31; // HW rotates right, not left.
+  }
+
+  // Otherwise, we have no way to cover this span of bits with a single
+  // shifter_op immediate.  Return a chunk of bits that will be useful to
+  // handle.
+  return (32 - rotAmt) & 31; // HW rotates right, not left.
+}
+
 void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
  switch (rel.type) {
  case R_ARM_ABS32:
@ -574,6 +628,45 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
                  ((val << 4) & 0x7000) |    // imm3
                  (val & 0x00ff));           // imm8
    break;
+  case R_ARM_ALU_PC_G0: {
+    // ADR (literal) add = bit23, sub = bit22
+    // literal is a 12-bit modified immediate, made up of a 4-bit even rotate
+    // right and an 8-bit immediate. The code-sequence here is derived from
+    // ARMAddressingModes.h in llvm/Target/ARM/MCTargetDesc. In our case we
+    // want to give an error if we cannot encode the constant.
+    uint32_t opcode = 0x00800000;
+    if (val >> 63) {
+      opcode = 0x00400000;
+      val = ~val + 1;
+    }
+    if ((val & ~255U) != 0) {
+      uint32_t rotAmt = getSOImmValRotate(val);
+      // Error if we cannot encode this with a single shift
+      if (rotr32(~255U, rotAmt) & val)
+        error(getErrorLocation(loc) + "unencodeable immediate " +
+              Twine(val).str() + " for relocation " + toString(rel.type));
+      val = rotl32(val, rotAmt) | ((rotAmt >> 1) << 8);
+    }
+    write32le(loc, (read32le(loc) & 0xff0ff000) | opcode | val);
+    break;
+  }
+  case R_ARM_LDR_PC_G0: {
+    // R_ARM_LDR_PC_G0 is S + A - P, we have ((S + A) | T) - P, if S is a
+    // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
+    // bottom bit to recover S + A - P.
+    if (rel.sym->isFunc())
+      val &= ~0x1;
+    // LDR (literal) u = bit23
+    int64_t imm = val;
+    uint32_t u = 0x00800000;
+    if (imm < 0) {
+      imm = -imm;
+      u = 0;
+    }
+    checkUInt(loc, imm, 12, rel);
+    write32le(loc, (read32le(loc) & 0xff7ff000) | u | imm);
+    break;
+  }
  case R_ARM_THM_ALU_PREL_11_0: {
    // ADR encoding T2 (sub), T3 (add) i:imm3:imm8
    int64_t imm = val;
@ -708,6 +801,22 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
                            ((lo & 0x7000) >> 4) |  // imm3
                            (lo & 0x00ff));         // imm8
  }
+  case R_ARM_ALU_PC_G0: {
+    // 12-bit immediate is a modified immediate made up of a 4-bit even
+    // right rotation and 8-bit constant. After the rotation the value
+    // is zero-extended. When bit 23 is set the instruction is an add, when
+    // bit 22 is set it is a sub.
+    uint32_t instr = read32le(buf);
+    uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2);
+    return (instr & 0x00400000) ? -val : val;
+  }
+  case R_ARM_LDR_PC_G0: {
+    // ADR (literal) add = bit23, sub = bit22
+    // LDR (literal) u = bit23 unsigned imm12
+    bool u = read32le(buf) & 0x00800000;
+    uint32_t imm12 = read32le(buf) & 0xfff;
+    return u ? imm12 : -imm12;
+  }
  case R_ARM_THM_ALU_PREL_11_0: {
    // Thumb2 ADR, which is an alias for a sub or add instruction with an
    // unsigned immediate.
--- a/lld/test/ELF/arm-adr-err.s
+++ b/lld/test/ELF/arm-adr-err.s
@ -0,0 +1,30 @@
+// REQUIRES: arm
+// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
+// RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s
+ .section .os0, "ax", %progbits
+ .balign 1024
+ .thumb_func
+low:
+ bx lr
+
+/// Check that we error when the immediate for the add or sub is not encodeable
+ .section .os1, "ax", %progbits
+ .arm
+ .balign 1024
+ .global _start
+ .type _start, %function
+_start:
+// CHECK: {{.*}}.s.tmp.o:(.os1+0x0): unencodeable immediate 1031 for relocation R_ARM_ALU_PC_G0
+/// adr r0, low
+ .inst 0xe24f0008
+ .reloc 0, R_ARM_ALU_PC_G0, low
+ // CHECK: {{.*}}.s.tmp.o:(.os1+0x4): unencodeable immediate 1013 for relocation R_ARM_ALU_PC_G0
+/// adr r1, unaligned
+ .inst 0xe24f1008
+ .reloc 4, R_ARM_ALU_PC_G0, unaligned
+
+ .section .os2, "ax", %progbits
+ .balign 1024
+ .thumb_func
+unaligned:
+  bx lr
--- a/lld/test/ELF/arm-adr-long.s
+++ b/lld/test/ELF/arm-adr-long.s
@ -0,0 +1,42 @@
+// REQUIRES: arm
+// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
+// RUN: echo "SECTIONS { \
+// RUN:                 .text.0 0x10000000 : { *(.text.0) } \
+// RUN:                 .text.1 0x80000000 : { *(.text.1) } \
+// RUN:                 .text.2 0xf0000010 : { *(.text.2) } \
+// RUN:               } " > %t.script
+// RUN: ld.lld --script %t.script %t.o -o %t
+// RUN: llvm-objdump -d --no-show-raw-insn --triple=armv7a-none-eabi %t | FileCheck %s
+
+/// Test the long range encoding of R_ARM_ALU_PC_G0. We can encode an 8-bit
+/// immediate rotated right by an even 4-bit field.
+ .section .text.0, "ax", %progbits
+dat1:
+ .word 0
+
+ .section .text.1, "ax", %progbits
+ .global _start
+ .type _start, %function
+_start:
+/// adr.w r0, dat1
+ .inst 0xe24f0008
+ .reloc 0, R_ARM_ALU_PC_G0, dat1
+/// adr.w r1, dat2
+ .inst 0xe24f1008
+ .reloc 4, R_ARM_ALU_PC_G0, dat2
+
+ .section .text.2, "ax", %progbits
+dat2:
+ .word 0
+
+// CHECK:      10000000 <dat1>:
+// CHECK-NEXT: 10000000: andeq   r0, r0, r0
+
+// CHECK:      80000000 <_start>:
+/// 0x80000000 + 0x8 - 0x70000008 = 0x10000000
+// CHECK-NEXT: 80000000: sub     r0, pc, #1879048200
+/// 0x80000004 + 0x8 + 0x70000004 = 0xf0000010
+// CHECK-NEXT: 80000004: add     r1, pc, #1879048196
+
+// CHECK:      f0000010 <dat2>:
+// CHECK-NEXT: f0000010: andeq   r0, r0, r0
--- a/lld/test/ELF/arm-adr.s
+++ b/lld/test/ELF/arm-adr.s
@ -0,0 +1,111 @@
+// REQUIRES: arm
+// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
+// RUN: ld.lld %t.o -o %t
+// RUN: llvm-objdump -d --no-show-raw-insn --triple=armv7a-none-eabi %t | FileCheck %s
+
+/// Test the short range cases of R_ARM_ALU_PC_G0. The range of the instruction
+/// depends on the number of trailing zeros of the displacement. In practice
+/// the maximum effective range will be 1024 bytes, which is a 4-byte aligned
+/// instruction to a 4-byte aligned word.
+
+ .arm
+ .section .os1, "ax", %progbits
+ .balign 1024
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+dat1:
+ .word 0
+dat2:
+ .word 0
+
+ .section .os2, "ax", %progbits
+ .balign 1024
+ .global _start
+ .type _start, %function
+_start:
+/// adr r0, dat1
+ .inst 0xe24f0008
+ .reloc 0, R_ARM_ALU_PC_G0, dat1
+/// adr r0, dat2
+ .inst 0xe24f0008
+ .reloc 4, R_ARM_ALU_PC_G0, dat2
+/// adr r0, dat3
+ .inst 0xe24f0008
+ .reloc 8, R_ARM_ALU_PC_G0, dat3
+/// adr r0, dat4
+ .inst 0xe24f0008
+ .reloc 0xc, R_ARM_ALU_PC_G0, dat4
+
+ .section .os3, "ax", %progbits
+ .balign 1024
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+dat3:
+ .word 0
+dat4:
+ .word 0
+
+ .section .os4, "ax", %progbits
+ .thumb
+ .type tfunc, %function
+tfunc:
+  bx lr
+
+ .section .os5, "ax", %progbits
+ .arm
+ .type arm_func, %function
+
+arm_func:
+ .balign 4
+/// adr r0, tfunc
+ .inst 0xe24f0008
+ .reloc 0, R_ARM_ALU_PC_G0, tfunc
+/// adr r0, afunc
+ .inst 0xe24f0008
+ .reloc 4, R_ARM_ALU_PC_G0, afunc
+ bx lr
+
+ .section .os6, "ax", %progbits
+ .type afunc, %function
+ .balign 4
+afunc:
+ bx lr
+
+// CHECK:      00011410 <dat1>:
+// CHECK-NEXT: 11410: andeq   r0, r0, r0
+
+// CHECK:      00011414 <dat2>:
+// CHECK-NEXT: 11414: andeq   r0, r0, r0
+
+// CHECK:     00011800 <_start>:
+/// 0x11800 + 0x8 - 0x3f8 = 0x11410 = dat1
+// CHECK-NEXT: 11800: sub     r0, pc, #1016
+/// 0x11804 + 0x8 - 0x3f8 = 0x11414 = dat2
+// CHECK-NEXT: 11804: sub     r0, pc, #1016
+/// 0x11808 + 0x8 + 0x400 = 0x11c10 = dat3
+// CHECK-NEXT: 11808: add     r0, pc, #1024
+/// 0x1180c + 0x8 + 0x400 = 0x11c14 = dat4
+// CHECK-NEXT: 1180c: add     r0, pc, #1024
+
+// CHECK:      00011c10 <dat3>:
+// CHECK-NEXT: 11c10: andeq   r0, r0, r0
+
+// CHECK:      00011c14 <dat4>:
+// CHECK-NEXT: 11c14: andeq   r0, r0, r0
+
+// CHECK:      00011c18 <tfunc>:
+// CHECK-NEXT: 11c18: bx      lr
+
+// CHECK:      00011c1c <arm_func>:
+/// 0x11c1c + 0x8 - 0xb = 11c19 = tfunc
+// CHECK-NEXT: 11c1c: sub     r0, pc, #11
+/// 0x11c20 + 0x8 = 0x11c28 = afunc
+// CHECK-NEXT: 11c20: add     r0, pc, #0
+// CHECK-NEXT: 11c24: bx      lr
+
+// CHECK:      00011c28 <afunc>:
+// CHECK-NEXT: 11c28: bx      lr
--- a/lld/test/ELF/arm-ldrlit-err.s
+++ b/lld/test/ELF/arm-ldrlit-err.s
@ -0,0 +1,29 @@
+// REQUIRES: arm
+// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
+// RUN: not ld.lld -n %t.o -o %t 2>&1 | FileCheck %s
+ .section .text.0, "ax", %progbits
+ .thumb_func
+ .balign 4
+low:
+  bx lr
+  nop
+  nop
+
+ .section .text.1, "ax", %progbits
+ .global _start
+ .arm
+_start:
+// CHECK: {{.*}}.s.tmp.o:(.text.1+0x0): relocation R_ARM_LDR_PC_G0 out of range: 4096 is not in [0, 4095]
+/// ldr r0, low - 4076
+ .inst 0xe51f0ff4
+ .reloc 0, R_ARM_LDR_PC_G0, low
+// CHECK: {{.*}}.s.tmp.o:(.text.1+0x4): relocation R_ARM_LDR_PC_G0 out of range: 4096 is not in [0, 4095]
+/// ldr r0, high + 4100
+ .inst 0xe59f0ffc
+ .reloc 4, R_ARM_LDR_PC_G0, high
+
+ .section .text.2
+ .thumb_func
+ .balign 4
+high:
+ bx lr
--- a/lld/test/ELF/arm-ldrlit.s
+++ b/lld/test/ELF/arm-ldrlit.s
@ -0,0 +1,170 @@
+// REQUIRES: arm
+// RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
+// RUN: llvm-objdump -d -r --triple=armv7a-none-eabi %t.o
+// RUN: echo "SECTIONS { \
+// RUN:                 .rodata.low 0x8012  : { *(.rodata.low) } \
+// RUN:                 .text.low   0x8f00  : { *(.text.low) } \
+// RUN:                 .text.neg   0x9000  : { *(.text.neg) } \
+// RUN:                 .text.pos   0x10000 : { *(.text.pos) } \
+// RUN:                 .text.high  0x10100 : { *(.text.high) } \
+// RUN:                 .data_high  0x1100f : { *(.data.high) } \
+// RUN:               } " > %t.script
+// RUN: ld.lld -n --script %t.script %t.o -o %t
+// RUN: llvm-readobj --symbols %t | FileCheck %s --check-prefix=SYMS
+// RUN: llvm-objdump -d --no-show-raw-insn --triple=armv7a-none-eabi %t | FileCheck %s
+
+/// Test the various legal cases for the R_ARM_LDR_PC_G0 relocation
+/// Range is +- 4095 bytes
+/// The Thumb bit for function symbols is ignored
+ .section .rodata.low, "a", %progbits
+dat1:
+ .byte 0
+dat2:
+ .byte 1
+dat3:
+ .byte 2
+dat4:
+ .byte 3
+
+ .section .text.low, "ax", %progbits
+ .balign 4
+ .global target1
+ .type target1, %function
+target1:
+ bx lr
+ .type target2, %function
+target2:
+ bx lr
+
+ .section .text.neg, "ax", %progbits
+ .balign 4
+ .global _start
+ .type _start, %function
+_start:
+/// ldr r0, dat1
+ .inst 0xe51f0008
+ .reloc 0, R_ARM_LDR_PC_G0, dat1
+/// ldr r1, dat2
+ .inst 0xe51f1008
+ .reloc 4, R_ARM_LDR_PC_G0, dat2
+/// ldr r2, dat3
+ .inst 0xe51f2008
+ .reloc 8, R_ARM_LDR_PC_G0, dat3
+/// ldr r3, dat4
+ .inst 0xe51f3008
+ .reloc 0xc, R_ARM_LDR_PC_G0, dat4
+/// ldr r0, target1
+ .inst 0xe51f0008
+ .reloc 0x10, R_ARM_LDR_PC_G0, target1
+/// ldr r1, target2
+ .inst 0xe51f1008
+ .reloc 0x14, R_ARM_LDR_PC_G0, target2
+
+ .section .text.pos, "ax", %progbits
+ .balign 4
+ .global pos
+ .type pos, %function
+pos:
+/// ldr r2, target3
+ .inst 0xe51f2008
+ .reloc 0, R_ARM_LDR_PC_G0, target3
+/// ldr r3, target4
+ .inst 0xe51f3008
+ .reloc 4, R_ARM_LDR_PC_G0, target4
+/// ldr r0, dat5
+ .inst 0xe51f0008
+ .reloc 8, R_ARM_LDR_PC_G0, dat5
+/// ldr r1, dat6
+ .inst 0xe51f1008
+ .reloc 0xc, R_ARM_LDR_PC_G0, dat6
+/// ldr r2, dat7
+ .inst 0xe51f2008
+ .reloc 0x10, R_ARM_LDR_PC_G0, dat7
+/// ldr r3, dat8
+ .inst 0xe51f3008
+ .reloc 0x14, R_ARM_LDR_PC_G0, dat8
+
+/// positive addend in instruction, all others are -4 (PC bias)
+///ldr r4, dat5 + 8
+ .inst 0xe59f4000
+ .reloc 0x18, R_ARM_LDR_PC_G0, dat5
+
+ .section .text.high, "ax", %progbits
+ .balign 4
+ .type target3, %function
+ .global target3
+target3:
+ bx lr
+ .thumb_func
+target4:
+ bx lr
+
+ .section .data.high, "aw", %progbits
+dat5:
+ .byte 0
+dat6:
+ .byte 1
+dat7:
+ .byte 2
+dat8:
+ .byte 3
+
+// SYMS:     Name: dat1
+// SYMS-NEXT:     Value: 0x8012
+// SYMS:     Name: dat2
+// SYMS-NEXT:     Value: 0x8013
+// SYMS:     Name: dat3
+// SYMS-NEXT:     Value: 0x8014
+// SYMS:     Name: dat4
+// SYMS-NEXT:     Value: 0x8015
+
+// CHECK: 00008f00 <target1>:
+// CHECK-NEXT:     8f00:        bx      lr
+
+// CHECK: 00008f04 <target2>:
+// CHECK-NEXT:     8f04:        bx      lr
+
+// CHECK: 00009000 <_start>:
+/// 0x9000 + 0x8 - 0xff6 = 0x8012
+// CHECK-NEXT: 9000:  ldr     r0, [pc, #-4086]
+/// 0x9004 + 0x8 - 0xff9 = 0x8013
+// CHECK-NEXT: 9004:  ldr     r1, [pc, #-4089]
+/// 0x9008 + 0x8 - 0xffc = 0x8014
+// CHECK-NEXT: 9008:  ldr     r2, [pc, #-4092]
+/// 0x900c + 0x8 - 0xfff = 0x8015
+// CHECK-NEXT: 900c:  ldr     r3, [pc, #-4095]
+/// 0x9010 + 0x8 - 0x118 = 0x8f00
+// CHECK-NEXT: 9010:  ldr     r0, [pc, #-280]
+/// 0x9014 + 0x8 - 0x118 = 0x8f04
+// CHECK-NEXT: 9014:  ldr     r1, [pc, #-280]
+///
+// CHECK: 00010000 <pos>:
+/// 0x10000 + 0x8 + 0xf8 = 0x10100
+// CHECK-NEXT: 10000:  ldr     r2, [pc, #248]
+/// 0x10004 + 0x8 + 0xf8 = 0x10104
+// CHECK-NEXT: 10004: ldr     r3, [pc, #248]
+/// 0x10008 + 0x8 + 0xfff = 0x1100f
+// CHECK-NEXT: 10008: ldr     r0, [pc, #4095]
+/// 0x1000c + 0x8 + 0xffc = 0x11010
+// CHECK-NEXT: 1000c: ldr     r1, [pc, #4092]
+/// 0x10010 + 0x8 + 0xff9 = 0x11011
+// CHECK-NEXT: 10010: ldr     r2, [pc, #4089]
+/// 0x10014 + 0x8 + 0xff6 = 0x11012
+// CHECK-NEXT: 10014: ldr     r3, [pc, #4086]
+/// 0x10018 + 0x8 + 0xff7 = 0x11017 = dat5 + 8
+// CHECK-NEXT: 10018: ldr     r4, [pc, #4087]
+
+// CHECK: 00010100 <target3>:
+// CHECK-NEXT: 10100: bx      lr
+
+// CHECK: 00010104 <target4>:
+// CHECK-NEXT: 10104: bx      lr
+
+// SYMS:     Name: dat5
+// SYMS-NEXT:     Value: 0x1100F
+// SYMS:     Name: dat6
+// SYMS-NEXT:     Value: 0x11010
+// SYMS:     Name: dat7
+// SYMS-NEXT:     Value: 0x11011
+// SYMS:     Name: dat8
+// SYMS-NEXT:     Value: 0x11012