ARM: module: implement support for PC-relative group relocations
Add support for the R_ARM_ALU_PC_Gn_NC and R_ARM_LDR_PC_G2 group relocations [0] so we can use them in modules. These will be used to load the current task pointer from a global variable without having to rely on a literal pool entry to carry the address of this variable, which may have a significant negative impact on cache utilization for variables that are used often and in many different places, as each occurrence will result in a literal pool entry and therefore a line in the D-cache. [0] 'ELF for the ARM architecture' https://github.com/ARM-software/abi-aa/releases Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Tested-by: Marc Zyngier <maz@kernel.org> Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
This commit is contained in:
parent
831a469bc1
commit
1fa8c4b195
|
@ -61,6 +61,9 @@ typedef struct user_fp elf_fpregset_t;
|
|||
#define R_ARM_MOVT_ABS 44
|
||||
#define R_ARM_MOVW_PREL_NC 45
|
||||
#define R_ARM_MOVT_PREL 46
|
||||
#define R_ARM_ALU_PC_G0_NC 57
|
||||
#define R_ARM_ALU_PC_G1_NC 59
|
||||
#define R_ARM_LDR_PC_G2 63
|
||||
|
||||
#define R_ARM_THM_CALL 10
|
||||
#define R_ARM_THM_JUMP24 30
|
||||
|
|
|
@ -68,6 +68,42 @@ bool module_exit_section(const char *name)
|
|||
strstarts(name, ".ARM.exidx.exit");
|
||||
}
|
||||
|
||||
/*
|
||||
* This implements the partitioning algorithm for group relocations as
|
||||
* documented in the ARM AArch32 ELF psABI (IHI 0044).
|
||||
*
|
||||
* A single PC-relative symbol reference is divided in up to 3 add or subtract
|
||||
* operations, where the final one could be incorporated into a load/store
|
||||
* instruction with immediate offset. E.g.,
|
||||
*
|
||||
* ADD Rd, PC, #... or ADD Rd, PC, #...
|
||||
* ADD Rd, Rd, #... ADD Rd, Rd, #...
|
||||
* LDR Rd, [Rd, #...] ADD Rd, Rd, #...
|
||||
*
|
||||
* The latter has a guaranteed range of only 16 MiB (3x8 == 24 bits), so it is
|
||||
* of limited use in the kernel. However, the ADD/ADD/LDR combo has a range of
|
||||
* -/+ 256 MiB, (2x8 + 12 == 28 bits), which means it has sufficient range for
|
||||
* any in-kernel symbol reference (unless module PLTs are being used).
|
||||
*
|
||||
* The main advantage of this approach over the typical pattern using a literal
|
||||
* load is that literal loads may miss in the D-cache, and generally lead to
|
||||
* lower cache efficiency for variables that are referenced often from many
|
||||
* different places in the code.
|
||||
*/
|
||||
static u32 get_group_rem(u32 group, u32 *offset)
|
||||
{
|
||||
u32 val = *offset;
|
||||
u32 shift;
|
||||
do {
|
||||
shift = val ? (31 - __fls(val)) & ~1 : 32;
|
||||
*offset = val;
|
||||
if (!val)
|
||||
break;
|
||||
val &= 0xffffff >> shift;
|
||||
} while (group--);
|
||||
return shift;
|
||||
}
|
||||
|
||||
int
|
||||
apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
|
||||
unsigned int relindex, struct module *module)
|
||||
|
@ -82,6 +118,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
|
|||
unsigned long loc;
|
||||
Elf32_Sym *sym;
|
||||
const char *symname;
|
||||
u32 shift, group = 1;
|
||||
s32 offset;
|
||||
u32 tmp;
|
||||
#ifdef CONFIG_THUMB2_KERNEL
|
||||
|
@ -212,6 +249,54 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
|
|||
*(u32 *)loc = __opcode_to_mem_arm(tmp);
|
||||
break;
|
||||
|
||||
case R_ARM_ALU_PC_G0_NC:
|
||||
group = 0;
|
||||
fallthrough;
|
||||
case R_ARM_ALU_PC_G1_NC:
|
||||
tmp = __mem_to_opcode_arm(*(u32 *)loc);
|
||||
offset = ror32(tmp & 0xff, (tmp & 0xf00) >> 7);
|
||||
if (tmp & BIT(22))
|
||||
offset = -offset;
|
||||
offset += sym->st_value - loc;
|
||||
if (offset < 0) {
|
||||
offset = -offset;
|
||||
tmp = (tmp & ~BIT(23)) | BIT(22); // SUB opcode
|
||||
} else {
|
||||
tmp = (tmp & ~BIT(22)) | BIT(23); // ADD opcode
|
||||
}
|
||||
|
||||
shift = get_group_rem(group, &offset);
|
||||
if (shift < 24) {
|
||||
offset >>= 24 - shift;
|
||||
offset |= (shift + 8) << 7;
|
||||
}
|
||||
*(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset);
|
||||
break;
|
||||
|
||||
case R_ARM_LDR_PC_G2:
|
||||
tmp = __mem_to_opcode_arm(*(u32 *)loc);
|
||||
offset = tmp & 0xfff;
|
||||
if (~tmp & BIT(23)) // U bit cleared?
|
||||
offset = -offset;
|
||||
offset += sym->st_value - loc;
|
||||
if (offset < 0) {
|
||||
offset = -offset;
|
||||
tmp &= ~BIT(23); // clear U bit
|
||||
} else {
|
||||
tmp |= BIT(23); // set U bit
|
||||
}
|
||||
get_group_rem(2, &offset);
|
||||
|
||||
if (offset > 0xfff) {
|
||||
pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
|
||||
module->name, relindex, i, symname,
|
||||
ELF32_R_TYPE(rel->r_info), loc,
|
||||
sym->st_value);
|
||||
return -ENOEXEC;
|
||||
}
|
||||
*(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset);
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_THUMB2_KERNEL
|
||||
case R_ARM_THM_CALL:
|
||||
case R_ARM_THM_JUMP24:
|
||||
|
|
Loading…
Reference in New Issue