From 3fc24ef32d3b9368f4c103dcd21d6a3f959b4870 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 Jul 2022 15:53:22 +0200 Subject: [PATCH 001/126] arm64: compat: Implement misalignment fixups for multiword loads The 32-bit ARM kernel implements fixups on behalf of user space when using LDM/STM or LDRD/STRD instructions on addresses that are not 32-bit aligned. This is not something that is supported by the architecture, but was done anyway to increase compatibility with user space software, which mostly targeted x86 at the time and did not care about aligned accesses. This feature is one of the remaining impediments to being able to switch to 64-bit kernels on 64-bit capable hardware running 32-bit user space, so let's implement it for the arm64 compat layer as well. Note that the intent is to implement the exact same handling of misaligned multi-word loads and stores as the 32-bit kernel does, including what appears to be missing support for user space programs that rely on SETEND to switch to a different byte order and back. Also, like the 32-bit ARM version, we rely on the faulting address reported by the CPU to infer the memory address, instead of decoding the instruction fully to obtain this information. This implementation is taken from the 32-bit ARM tree, with all pieces removed that deal with instructions other than LDRD/STRD and LDM/STM, or that deal with alignment exceptions taken in kernel mode. Cc: debian-arm@lists.debian.org Cc: Vagrant Cascadian Cc: Riku Voipio Cc: Steve McIntyre Signed-off-by: Ard Biesheuvel Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20220701135322.3025321-1-ardb@kernel.org [catalin.marinas@arm.com: change the option to 'default n'] Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 3 + arch/arm64/include/asm/exception.h | 1 + arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/compat_alignment.c | 387 +++++++++++++++++++++++++++ arch/arm64/mm/fault.c | 3 + 5 files changed, 395 insertions(+) create mode 100644 arch/arm64/kernel/compat_alignment.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9fb9fff08c94..b17d6861a3ad 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1562,6 +1562,9 @@ config THUMB2_COMPAT_VDSO Compile the compat vDSO with '-mthumb -fomit-frame-pointer' if y, otherwise with '-marm'. +config COMPAT_ALIGNMENT_FIXUPS + bool "Fix up misaligned multi-word loads and stores in user space" + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on SYSCTL diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d94aecff9690..e92ca08f754c 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -70,6 +70,7 @@ void do_sysinstr(unsigned long esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs); void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr); void do_cp15instr(unsigned long esr, struct pt_regs *regs); +int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1add7b01efa7..38a0b0291edb 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,6 +45,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o obj-$(CONFIG_COMPAT) += sigreturn32.o +obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c new file mode 100644 index 000000000000..5edec2f49ec9 --- /dev/null +++ b/arch/arm64/kernel/compat_alignment.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0-only +// based on arch/arm/mm/alignment.c + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998 + * + * Speed optimisations and better fault handling by Russell King. + */ +#define CODING_BITS(i) (i & 0x0e000000) + +#define LDST_P_BIT(i) (i & (1 << 24)) /* Preindex */ +#define LDST_U_BIT(i) (i & (1 << 23)) /* Add offset */ +#define LDST_W_BIT(i) (i & (1 << 21)) /* Writeback */ +#define LDST_L_BIT(i) (i & (1 << 20)) /* Load */ + +#define LDST_P_EQ_U(i) ((((i) ^ ((i) >> 1)) & (1 << 23)) == 0) + +#define LDSTHD_I_BIT(i) (i & (1 << 22)) /* double/half-word immed */ + +#define RN_BITS(i) ((i >> 16) & 15) /* Rn */ +#define RD_BITS(i) ((i >> 12) & 15) /* Rd */ +#define RM_BITS(i) (i & 15) /* Rm */ + +#define REGMASK_BITS(i) (i & 0xffff) + +#define BAD_INSTR 0xdeadc0de + +/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */ +#define IS_T32(hi16) \ + (((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800)) + +union offset_union { + unsigned long un; + signed long sn; +}; + +#define TYPE_ERROR 0 +#define TYPE_FAULT 1 +#define TYPE_LDST 2 +#define TYPE_DONE 3 + +static void +do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs, + union offset_union offset) +{ + if (!LDST_U_BIT(instr)) + offset.un = -offset.un; + + if (!LDST_P_BIT(instr)) + addr += offset.un; + + if (!LDST_P_BIT(instr) || LDST_W_BIT(instr)) + regs->regs[RN_BITS(instr)] = addr; +} + +static int +do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + unsigned int rd2; + int load; + + if ((instr & 0xfe000000) == 0xe8000000) { + /* ARMv7 Thumb-2 32-bit LDRD/STRD */ + rd2 = (instr >> 8) & 0xf; + load = !!(LDST_L_BIT(instr)); + } else if (((rd & 1) == 1) || (rd == 14)) { + return TYPE_ERROR; + } else { + load = ((instr & 0xf0) == 0xd0); + rd2 = rd + 1; + } + + if (load) { + unsigned int val, val2; + + if (get_user(val, (u32 __user *)addr) || + get_user(val2, (u32 __user *)(addr + 4))) + return TYPE_FAULT; + regs->regs[rd] = val; + regs->regs[rd2] = val2; + } else { + if (put_user(regs->regs[rd], (u32 __user *)addr) || + put_user(regs->regs[rd2], (u32 __user *)(addr + 4))) + return TYPE_FAULT; + } + return TYPE_LDST; +} + +/* + * LDM/STM alignment handler. + * + * There are 4 variants of this instruction: + * + * B = rn pointer before instruction, A = rn pointer after instruction + * ------ increasing address -----> + * | | r0 | r1 | ... | rx | | + * PU = 01 B A + * PU = 11 B A + * PU = 00 A B + * PU = 10 A B + */ +static int +do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs) +{ + unsigned int rd, rn, nr_regs, regbits; + unsigned long eaddr, newaddr; + unsigned int val; + + /* count the number of registers in the mask to be transferred */ + nr_regs = hweight16(REGMASK_BITS(instr)) * 4; + + rn = RN_BITS(instr); + newaddr = eaddr = regs->regs[rn]; + + if (!LDST_U_BIT(instr)) + nr_regs = -nr_regs; + newaddr += nr_regs; + if (!LDST_U_BIT(instr)) + eaddr = newaddr; + + if (LDST_P_EQ_U(instr)) /* U = P */ + eaddr += 4; + + for (regbits = REGMASK_BITS(instr), rd = 0; regbits; + regbits >>= 1, rd += 1) + if (regbits & 1) { + if (LDST_L_BIT(instr)) { + if (get_user(val, (u32 __user *)eaddr)) + return TYPE_FAULT; + if (rd < 15) + regs->regs[rd] = val; + else + regs->pc = val; + } else { + /* + * The PC register has a bias of +8 in ARM mode + * and +4 in Thumb mode. This means that a read + * of the value of PC should account for this. + * Since Thumb does not permit STM instructions + * to refer to PC, just add 8 here. + */ + val = (rd < 15) ? regs->regs[rd] : regs->pc + 8; + if (put_user(val, (u32 __user *)eaddr)) + return TYPE_FAULT; + } + eaddr += 4; + } + + if (LDST_W_BIT(instr)) + regs->regs[rn] = newaddr; + + return TYPE_DONE; +} + +/* + * Convert Thumb multi-word load/store instruction forms to equivalent ARM + * instructions so we can reuse ARM userland alignment fault fixups for Thumb. + * + * This implementation was initially based on the algorithm found in + * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same + * to convert only Thumb ld/st instruction forms to equivalent ARM forms. + * + * NOTES: + * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections. + * 2. If for some reason we're passed an non-ld/st Thumb instruction to + * decode, we return 0xdeadc0de. This should never happen under normal + * circumstances but if it does, we've got other problems to deal with + * elsewhere and we obviously can't fix those problems here. + */ + +static unsigned long thumb2arm(u16 tinstr) +{ + u32 L = (tinstr & (1<<11)) >> 11; + + switch ((tinstr & 0xf800) >> 11) { + /* 6.6.1 Format 1: */ + case 0xc000 >> 11: /* 7.1.51 STMIA */ + case 0xc800 >> 11: /* 7.1.25 LDMIA */ + { + u32 Rn = (tinstr & (7<<8)) >> 8; + u32 W = ((L<> 11: /* 7.1.48 PUSH */ + case 0xb800 >> 11: /* 7.1.47 POP */ + if ((tinstr & (3 << 9)) == 0x0400) { + static const u32 subset[4] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe92d4000, /* STMDB sp!,{registers,lr} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + 0xe8bd8000 /* LDMIA sp!,{registers,pc} */ + }; + return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] | + (tinstr & 255); /* register_list */ + } + fallthrough; /* for illegal instruction case */ + + default: + return BAD_INSTR; + } +} + +/* + * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction + * handlable by ARM alignment handler, also find the corresponding handler, + * so that we can reuse ARM userland alignment fault fixups for Thumb. + * + * @pinstr: original Thumb-2 instruction; returns new handlable instruction + * @regs: register context. + * @poffset: return offset from faulted addr for later writeback + * + * NOTES: + * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections. + * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt) + */ +static void * +do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs, + union offset_union *poffset) +{ + u32 instr = *pinstr; + u16 tinst1 = (instr >> 16) & 0xffff; + u16 tinst2 = instr & 0xffff; + + switch (tinst1 & 0xffe0) { + /* A6.3.5 Load/Store multiple */ + case 0xe880: /* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */ + case 0xe8a0: /* ...above writeback version */ + case 0xe900: /* STMDB/STMFD, LDMDB/LDMEA */ + case 0xe920: /* ...above writeback version */ + /* no need offset decision since handler calculates it */ + return do_alignment_ldmstm; + + case 0xf840: /* POP/PUSH T3 (single register) */ + if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) { + u32 L = !!(LDST_L_BIT(instr)); + const u32 subset[2] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + }; + *pinstr = subset[L] | (1<un = (tinst2 & 0xff) << 2; + fallthrough; + + case 0xe940: + case 0xe9c0: + return do_alignment_ldrdstrd; + + /* + * No need to handle load/store instructions up to word size + * since ARMv6 and later CPUs can perform unaligned accesses. + */ + default: + break; + } + return NULL; +} + +static int alignment_get_arm(struct pt_regs *regs, __le32 __user *ip, u32 *inst) +{ + __le32 instr = 0; + int fault; + + fault = get_user(instr, ip); + if (fault) + return fault; + + *inst = __le32_to_cpu(instr); + return 0; +} + +static int alignment_get_thumb(struct pt_regs *regs, __le16 __user *ip, u16 *inst) +{ + __le16 instr = 0; + int fault; + + fault = get_user(instr, ip); + if (fault) + return fault; + + *inst = __le16_to_cpu(instr); + return 0; +} + +int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs) +{ + union offset_union offset; + unsigned long instrptr; + int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs); + unsigned int type; + u32 instr = 0; + u16 tinstr = 0; + int isize = 4; + int thumb2_32b = 0; + int fault; + + instrptr = instruction_pointer(regs); + + if (compat_thumb_mode(regs)) { + __le16 __user *ptr = (__le16 __user *)(instrptr & ~1); + + fault = alignment_get_thumb(regs, ptr, &tinstr); + if (!fault) { + if (IS_T32(tinstr)) { + /* Thumb-2 32-bit */ + u16 tinst2; + fault = alignment_get_thumb(regs, ptr + 1, &tinst2); + instr = ((u32)tinstr << 16) | tinst2; + thumb2_32b = 1; + } else { + isize = 2; + instr = thumb2arm(tinstr); + } + } + } else { + fault = alignment_get_arm(regs, (__le32 __user *)instrptr, &instr); + } + + if (fault) + return 1; + + switch (CODING_BITS(instr)) { + case 0x00000000: /* 3.13.4 load/store instruction extensions */ + if (LDSTHD_I_BIT(instr)) + offset.un = (instr & 0xf00) >> 4 | (instr & 15); + else + offset.un = regs->regs[RM_BITS(instr)]; + + if ((instr & 0x001000f0) == 0x000000d0 || /* LDRD */ + (instr & 0x001000f0) == 0x000000f0) /* STRD */ + handler = do_alignment_ldrdstrd; + else + return 1; + break; + + case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */ + if (thumb2_32b) { + offset.un = 0; + handler = do_alignment_t32_to_handler(&instr, regs, &offset); + } else { + offset.un = 0; + handler = do_alignment_ldmstm; + } + break; + + default: + return 1; + } + + type = handler(addr, instr, regs); + + if (type == TYPE_ERROR || type == TYPE_FAULT) + return 1; + + if (type == TYPE_LDST) + do_alignment_finish_ldst(addr, instr, regs, offset); + + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->pc); + arm64_skip_faulting_instruction(regs, isize); + + return 0; +} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c33f1fad2745..5b391490e045 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -691,6 +691,9 @@ static int __kprobes do_translation_fault(unsigned long far, static int do_alignment_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { + if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS) && + compat_user_mode(regs)) + return do_compat_alignment_fixup(far, regs); do_bad_area(far, esr, regs); return 0; } From 7a9bcaaad5f17cc5c4cf882a25cd3efb2b3b8b6c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:46:02 +0100 Subject: [PATCH 002/126] kselftest/arm64: Add simple hwcap validation Add some trivial hwcap validation which checks that /proc/cpuinfo and AT_HWCAP agree with each other and can verify that for extensions that can generate a SIGILL due to adding new instructions one appears or doesn't appear as expected. I've added SVE and SME, other capabilities can be added later if this gets merged. This isn't super exciting but on the other hand took very little time to write and should be handy when verifying that you wired up AT_HWCAP properly. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154602.827275-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/.gitignore | 1 + tools/testing/selftests/arm64/abi/Makefile | 2 +- tools/testing/selftests/arm64/abi/hwcap.c | 188 +++++++++++++++++++ 3 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/abi/hwcap.c diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore index b9e54417250d..c31d34d48a30 100644 --- a/tools/testing/selftests/arm64/abi/.gitignore +++ b/tools/testing/selftests/arm64/abi/.gitignore @@ -1,2 +1,3 @@ +hwcap syscall-abi tpidr2 diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile index c8d7f2495eb2..e60752eb8334 100644 --- a/tools/testing/selftests/arm64/abi/Makefile +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2021 ARM Limited -TEST_GEN_PROGS := syscall-abi tpidr2 +TEST_GEN_PROGS := hwcap syscall-abi tpidr2 include ../../lib.mk diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c new file mode 100644 index 000000000000..8b5c646cea63 --- /dev/null +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ARM Limited. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +#define TESTS_PER_HWCAP 2 + +/* + * Function expected to generate SIGILL when the feature is not + * supported and return when it is supported. If SIGILL is generated + * then the handler must be able to skip over the instruction safely. + * + * Note that it is expected that for many architecture extensions + * there are no specific traps due to no architecture state being + * added so we may not fault if running on a kernel which doesn't know + * to add the hwcap. + */ +typedef void (*sigill_fn)(void); + +static void sme_sigill(void) +{ + /* RDSVL x0, #0 */ + asm volatile(".inst 0x04bf5800" : : : "x0"); +} + +static void sve_sigill(void) +{ + /* RDVL x0, #0 */ + asm volatile(".inst 0x04bf5000" : : : "x0"); +} + +static const struct hwcap_data { + const char *name; + unsigned long at_hwcap; + unsigned long hwcap_bit; + const char *cpuinfo; + sigill_fn sigill_fn; + bool sigill_reliable; +} hwcaps[] = { + { + .name = "SME", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME, + .cpuinfo = "sme", + .sigill_fn = sme_sigill, + .sigill_reliable = true, + }, + { + .name = "SVE", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE, + .cpuinfo = "sve", + .sigill_fn = sve_sigill, + .sigill_reliable = true, + }, +}; + +static bool seen_sigill; + +static void handle_sigill(int sig, siginfo_t *info, void *context) +{ + ucontext_t *uc = context; + + seen_sigill = true; + + /* Skip over the offending instruction */ + uc->uc_mcontext.pc += 4; +} + +bool cpuinfo_present(const char *name) +{ + FILE *f; + char buf[2048], name_space[30], name_newline[30]; + char *s; + + /* + * The feature should appear with a leading space and either a + * trailing space or a newline. + */ + snprintf(name_space, sizeof(name_space), " %s ", name); + snprintf(name_newline, sizeof(name_newline), " %s\n", name); + + f = fopen("/proc/cpuinfo", "r"); + if (!f) { + ksft_print_msg("Failed to open /proc/cpuinfo\n"); + return false; + } + + while (fgets(buf, sizeof(buf), f)) { + /* Features: line? */ + if (strncmp(buf, "Features\t:", strlen("Features\t:")) != 0) + continue; + + /* All CPUs should be symmetric, don't read any more */ + fclose(f); + + s = strstr(buf, name_space); + if (s) + return true; + s = strstr(buf, name_newline); + if (s) + return true; + + return false; + } + + ksft_print_msg("Failed to find Features in /proc/cpuinfo\n"); + fclose(f); + return false; +} + +int main(void) +{ + const struct hwcap_data *hwcap; + int i, ret; + bool have_cpuinfo, have_hwcap; + struct sigaction sa; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP); + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_sigill; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGILL, &sa, NULL); + if (ret < 0) + ksft_exit_fail_msg("Failed to install SIGILL handler: %s (%d)\n", + strerror(errno), errno); + + for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { + hwcap = &hwcaps[i]; + + have_hwcap = getauxval(hwcaps->at_hwcap) & hwcap->hwcap_bit; + have_cpuinfo = cpuinfo_present(hwcap->cpuinfo); + + if (have_hwcap) + ksft_print_msg("%s present", hwcap->name); + + ksft_test_result(have_hwcap == have_cpuinfo, + "cpuinfo_match_%s\n", hwcap->name); + + if (hwcap->sigill_fn) { + seen_sigill = false; + hwcap->sigill_fn(); + + if (have_hwcap) { + /* Should be able to use the extension */ + ksft_test_result(!seen_sigill, "sigill_%s\n", + hwcap->name); + } else if (hwcap->sigill_reliable) { + /* Guaranteed a SIGILL */ + ksft_test_result(seen_sigill, "sigill_%s\n", + hwcap->name); + } else { + /* Missing SIGILL might be fine */ + ksft_print_msg("SIGILL %sreported for %s\n", + seen_sigill ? "" : "not ", + hwcap->name); + ksft_test_result_skip("sigill_%s\n", + hwcap->name); + } + } else { + ksft_test_result_skip("sigill_%s\n", + hwcap->name); + } + } + + ksft_print_cnts(); + + return 0; +} From 9e40e6272353ea0fde654b548c96e8bbab2fed9f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:44:49 +0100 Subject: [PATCH 003/126] kselftest/arm64: Always encourage preemption for za-test Since we now have an explicit test for the syscall ABI there is no need for za-test to cover getpid() so just unconditionally do sched_yield() like we do in fpsimd-test. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154452.824870-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/za-test.S | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 9ab6f9cd9623..2dd358f9edf2 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -287,12 +287,7 @@ _start: subs x21, x21, #1 b.ne 0b - and x8, x22, #127 // Every 128 interations... - cbz x8, 0f - mov x8, #__NR_getpid // (otherwise minimal syscall) - b 1f -0: - mov x8, #__NR_sched_yield // ...encourage preemption + mov x8, #__NR_sched_yield // encourage preemption 1: svc #0 From 05a5980f7ff50e6c18a9f38ca49521180a333fdf Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:44:50 +0100 Subject: [PATCH 004/126] kselftest/arm64: Count SIGUSR2 deliveries in FP stress tests Currently the floating point stress tests mostly support testing that the data they are checking can be disrupted from a signal handler triggered by SIGUSR1. This is not properly implemented for all the tests and in testing is frequently modified to just handle the signal without corrupting data in order to ensure that signal handling does not corrupt data. Directly support this usage by installing a SIGUSR2 handler which simply counts the signal delivery. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154452.824870-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/asm-offsets.h | 1 + tools/testing/selftests/arm64/fp/fpsimd-test.S | 15 +++++++++++++++ tools/testing/selftests/arm64/fp/sve-test.S | 15 +++++++++++++++ tools/testing/selftests/arm64/fp/za-test.S | 15 +++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h index a180851496ec..757b2fd75dd7 100644 --- a/tools/testing/selftests/arm64/fp/asm-offsets.h +++ b/tools/testing/selftests/arm64/fp/asm-offsets.h @@ -3,6 +3,7 @@ #define sa_handler 0 #define sa_mask_sz 8 #define SIGUSR1 10 +#define SIGUSR2 12 #define SIGTERM 15 #define SIGINT 2 #define SIGABRT 6 diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index e21e8ea52c7e..f0f92192351a 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -151,6 +151,15 @@ function irritator_handler ret endfunction +function tickle_handler + // Increment the signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + ret +endfunction + function terminate_handler mov w21, w0 mov x20, x2 @@ -255,6 +264,12 @@ _start: orr w2, w2, #SA_NODEFER bl setsignal + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + mov x22, #0 // generation number, increments per iteration .Ltest_loop: diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 589264231a2d..79c56e6c5b23 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -314,6 +314,15 @@ function irritator_handler ret endfunction +function tickle_handler + // Increment the signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + ret +endfunction + function terminate_handler mov w21, w0 mov x20, x2 @@ -423,6 +432,12 @@ _start: orr w2, w2, #SA_NODEFER bl setsignal + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + #ifdef SSVE smstart_sm // syscalls will have exited streaming mode #endif diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 2dd358f9edf2..901839107205 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -167,6 +167,15 @@ function irritator_handler ret endfunction +function tickle_handler + // Increment the signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + ret +endfunction + function terminate_handler mov w21, w0 mov x20, x2 @@ -273,6 +282,12 @@ _start: orr w2, w2, #SA_NODEFER bl setsignal + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + mov x22, #0 // generation number, increments per iteration .Ltest_loop: rdsvl 0, 8 From d09ee410a3c30345af87f46dafd2ae308b17c0af Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:25:01 +0100 Subject: [PATCH 005/126] arm64/sve: Document our actual ABI for clearing registers on syscall Currently our ABI documentation says that the state of the bits in the Z registers not shared with the V registers becomes undefined on syscall but our actual implementation unconditionally clears these bits. Taking advantage of the flexibility of our documented ABI would be a change in the observable ABI so there is concern around doing so, instead document the actual behaviour so that it is more discoverable for userspace programmers who might be able to take advantage of it and to record our decision about not changing the kernel ABI. This makes qemu's user mode implementation buggy since it does not clear these bits. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220829162502.886816-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arm64/sve.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arm64/sve.rst b/Documentation/arm64/sve.rst index 93c2c2990584..e39acf95d157 100644 --- a/Documentation/arm64/sve.rst +++ b/Documentation/arm64/sve.rst @@ -111,7 +111,7 @@ the SVE instruction set architecture. * On syscall, V0..V31 are preserved (as without SVE). Thus, bits [127:0] of Z0..Z31 are preserved. All other bits of Z0..Z31, and all of P0..P15 and FFR - become unspecified on return from a syscall. + become zero on return from a syscall. * The SVE registers are not used to pass arguments to or receive results from any syscall. From 81ff692ad924da2233381bedff90c5c1f5c31368 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:48:15 +0100 Subject: [PATCH 006/126] arm64/sysreg: Add hwcap for SVE EBF16 SVE has a separate identification register indicating support for BFloat16 operations. Add a hwcap identifying support for EBF16 in this register, mirroring what we did for the non-SVE case. While there is currently an architectural requirement for BF16 support to be the same in SVE and non-SVE contexts there are separate identification registers this separate hwcap helps avoid issues if that requirement were to be relaxed in the future, we have already chosen to have a separate capability for base BF16 support. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154815.832347-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arm64/elf_hwcaps.rst | 3 +++ arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/cpuinfo.c | 1 + 5 files changed, 7 insertions(+) diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 311021f2e560..bb34287c8e01 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -272,6 +272,9 @@ HWCAP2_WFXT HWCAP2_EBF16 Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010. +HWCAP2_SVE_EBF16 + Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0010. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index cef4ae7a3d8b..298b386d3ebe 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -119,6 +119,7 @@ #define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64) #define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT) #define KERNEL_HWCAP_EBF16 __khwcap2_feature(EBF16) +#define KERNEL_HWCAP_SVE_EBF16 __khwcap2_feature(SVE_EBF16) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 1ad2568a2569..9b245da6f507 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -92,5 +92,6 @@ #define HWCAP2_SME_FA64 (1 << 30) #define HWCAP2_WFXT (1UL << 31) #define HWCAP2_EBF16 (1UL << 32) +#define HWCAP2_SVE_EBF16 (1UL << 33) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af4de817d712..2a59d6a875bc 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2733,6 +2733,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BitPerm_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SHA3_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SM4_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_I8MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index d7702f39b4d3..28d4f442b0bc 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -115,6 +115,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_FA64] = "smefa64", [KERNEL_HWCAP_WFXT] = "wfxt", [KERNEL_HWCAP_EBF16] = "ebf16", + [KERNEL_HWCAP_SVE_EBF16] = "sveebf16", }; #ifdef CONFIG_COMPAT From d47d8a5e21fc7f83bccc94156dfe6192e3f57724 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 6 Sep 2022 23:00:56 +0100 Subject: [PATCH 007/126] kselftest/arm64: Install signal handlers before output in FP stress tests To interface more robustly with other processes install the signal handers in the floating point stress tests before we produce any output, this means that a parent process can know that if it has seen any output from the test then the test is ready to handle incoming signals. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220906220056.820295-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- .../testing/selftests/arm64/fp/fpsimd-test.S | 48 +++++++++---------- tools/testing/selftests/arm64/fp/sve-test.S | 48 +++++++++---------- tools/testing/selftests/arm64/fp/za-test.S | 48 +++++++++---------- 3 files changed, 72 insertions(+), 72 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index f0f92192351a..918d04885a33 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -216,6 +216,30 @@ endfunction .globl _start function _start _start: + mov x23, #0 // signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + // Sanity-check and report the vector length mov x19, #128 @@ -246,30 +270,6 @@ _start: mov x0, x20 bl putdecn - mov x23, #0 // Irritation signal count - - mov w0, #SIGINT - adr x1, terminate_handler - mov w2, #SA_SIGINFO - bl setsignal - - mov w0, #SIGTERM - adr x1, terminate_handler - mov w2, #SA_SIGINFO - bl setsignal - - mov w0, #SIGUSR1 - adr x1, irritator_handler - mov w2, #SA_SIGINFO - orr w2, w2, #SA_NODEFER - bl setsignal - - mov w0, #SIGUSR2 - adr x1, tickle_handler - mov w2, #SA_SIGINFO - orr w2, w2, #SA_NODEFER - bl setsignal - mov x22, #0 // generation number, increments per iteration .Ltest_loop: diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 79c56e6c5b23..2a18cb4c528c 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -379,6 +379,30 @@ endfunction .globl _start function _start _start: + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + #ifdef SSVE puts "Streaming mode " smstart_sm @@ -414,30 +438,6 @@ _start: mov x0, x20 bl putdecn - mov x23, #0 // Irritation signal count - - mov w0, #SIGINT - adr x1, terminate_handler - mov w2, #SA_SIGINFO - bl setsignal - - mov w0, #SIGTERM - adr x1, terminate_handler - mov w2, #SA_SIGINFO - bl setsignal - - mov w0, #SIGUSR1 - adr x1, irritator_handler - mov w2, #SA_SIGINFO - orr w2, w2, #SA_NODEFER - bl setsignal - - mov w0, #SIGUSR2 - adr x1, tickle_handler - mov w2, #SA_SIGINFO - orr w2, w2, #SA_NODEFER - bl setsignal - #ifdef SSVE smstart_sm // syscalls will have exited streaming mode #endif diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 901839107205..53c54af65704 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -232,6 +232,30 @@ endfunction .globl _start function _start _start: + mov x23, #0 // signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + puts "Streaming mode " smstart_za @@ -264,30 +288,6 @@ _start: mov x0, x20 bl putdecn - mov x23, #0 // Irritation signal count - - mov w0, #SIGINT - adr x1, terminate_handler - mov w2, #SA_SIGINFO - bl setsignal - - mov w0, #SIGTERM - adr x1, terminate_handler - mov w2, #SA_SIGINFO - bl setsignal - - mov w0, #SIGUSR1 - adr x1, irritator_handler - mov w2, #SA_SIGINFO - orr w2, w2, #SA_NODEFER - bl setsignal - - mov w0, #SIGUSR2 - adr x1, tickle_handler - mov w2, #SA_SIGINFO - orr w2, w2, #SA_NODEFER - bl setsignal - mov x22, #0 // generation number, increments per iteration .Ltest_loop: rdsvl 0, 8 From fd5c2c6f08b10a5f5c85261f580075bc8aa08d4d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:44:52 +0100 Subject: [PATCH 008/126] kselftest/arm64: kselftest harness for FP stress tests Currently the stress test programs for floating point context switching are run by hand, there are extremely simplistic harnesses which run some copies of each test individually but they are not integrated into kselftest and with SVE and SME they only run with whatever vector length the process has by default. This is hassle when running the tests and means that they're not being run at all by CI systems picking up kselftest. In order to improve our coverage and provide a more convenient interface provide a harness program which starts enough stress test programs up to cause context switching and runs them for a set period. If only FPSIMD is available in the system we start two copies of the FPSIMD stress test per CPU, otherwise we start one copy of the FPSIMD and then start the SVE, streaming SVE and ZA tests once per CPU for each available VL they have to run on. We then run for a set period monitoring for any errors reported by the test programs before cleanly terminating them. In order to provide additional coverage of signal handling and some extra noise in the scheduling we send a SIGUSR2 to the stress tests once a second, the tests will count the number of signals they get. Since kselftest is generally expected to run quickly we by default only run for ten seconds. This is enough to show if there is anything cripplingly wrong but not exactly a thorough soak test, for interactive and more focused use a command line option -t N is provided which overrides the length of time to run for (specified in seconds) and if 0 is specified then there is no timeout and the test must be manually terminated. The timeout is counted in seconds with no output, this is done to account for the potentially slow startup time for the test programs on virtual platforms which tend to struggle during startup as they are both slow and tend to support a wide range of vector lengths. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154452.824870-5-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/.gitignore | 1 + tools/testing/selftests/arm64/fp/Makefile | 5 +- tools/testing/selftests/arm64/fp/fp-stress.c | 535 +++++++++++++++++++ 3 files changed, 540 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/fp/fp-stress.c diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index ea947af63882..df79d29664a1 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -1,4 +1,5 @@ fp-pidbench +fp-stress fpsimd-test rdvl-sme rdvl-sve diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index a7c2286bf65b..36db61358ed5 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -5,7 +5,10 @@ top_srcdir = $(realpath ../../../../../) CFLAGS += -I$(top_srcdir)/usr/include/ -TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-fork za-ptrace +TEST_GEN_PROGS := fp-stress \ + sve-ptrace sve-probe-vls \ + vec-syscfg \ + za-fork za-ptrace TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \ rdvl-sme rdvl-sve \ sve-test \ diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c new file mode 100644 index 000000000000..01cef1962ab5 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -0,0 +1,535 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ARM Limited. + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 199309L + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +#define MAX_VLS 16 + +struct child_data { + char *name, *output; + pid_t pid; + int stdout; + bool output_seen; + bool exited; + int exit_status; +}; + +static int epoll_fd; +static struct child_data *children; +static int num_children; +static bool terminate; + +static void drain_output(bool flush); + +static int num_processors(void) +{ + long nproc = sysconf(_SC_NPROCESSORS_CONF); + if (nproc < 0) { + perror("Unable to read number of processors\n"); + exit(EXIT_FAILURE); + } + + return nproc; +} + +static void child_start(struct child_data *child, const char *program) +{ + int ret, pipefd[2], i; + struct epoll_event ev; + + ret = pipe(pipefd); + if (ret != 0) + ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n", + strerror(errno), errno); + + child->pid = fork(); + if (child->pid == -1) + ksft_exit_fail_msg("fork() failed: %s (%d)\n", + strerror(errno), errno); + + if (!child->pid) { + /* + * In child, replace stdout with the pipe, errors to + * stderr from here as kselftest prints to stdout. + */ + ret = dup2(pipefd[1], 1); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* + * Very dumb mechanism to clean open FDs other than + * stdio. We don't want O_CLOEXEC for the pipes... + */ + for (i = 3; i < 8192; i++) + close(i); + + ret = execl(program, program, NULL); + fprintf(stderr, "execl(%s) failed: %d (%s)\n", + program, errno, strerror(errno)); + + exit(EXIT_FAILURE); + } else { + /* + * In parent, remember the child and close our copy of the + * write side of stdout. + */ + close(pipefd[1]); + child->stdout = pipefd[0]; + child->output = NULL; + child->exited = false; + child->output_seen = false; + + ev.events = EPOLLIN | EPOLLHUP; + ev.data.ptr = child; + + ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev); + if (ret < 0) { + ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n", + child->name, strerror(errno), errno); + } + + /* + * Keep output flowing during child startup so logs + * are more timely, can help debugging. + */ + drain_output(false); + } +} + +static void child_output(struct child_data *child, uint32_t events, + bool flush) +{ + char read_data[1024]; + char work[1024]; + int ret, len, cur_work, cur_read; + + if (events & EPOLLIN) { + ret = read(child->stdout, read_data, sizeof(read_data)); + if (ret < 0) { + ksft_print_msg("%s: read() failed: %s (%d)\n", + child->name, strerror(errno), errno); + return; + } + len = ret; + + child->output_seen = true; + + /* Pick up any partial read */ + if (child->output) { + strncpy(work, child->output, sizeof(work) - 1); + cur_work = strnlen(work, sizeof(work)); + free(child->output); + child->output = NULL; + } else { + cur_work = 0; + } + + cur_read = 0; + while (cur_read < len) { + work[cur_work] = read_data[cur_read++]; + + if (work[cur_work] == '\n') { + work[cur_work] = '\0'; + ksft_print_msg("%s: %s\n", child->name, work); + cur_work = 0; + } else { + cur_work++; + } + } + + if (cur_work) { + work[cur_work] = '\0'; + ret = asprintf(&child->output, "%s", work); + if (ret == -1) + ksft_exit_fail_msg("Out of memory\n"); + } + } + + if (events & EPOLLHUP) { + close(child->stdout); + child->stdout = -1; + flush = true; + } + + if (flush && child->output) { + ksft_print_msg("%s: %s\n", child->name, child->output); + free(child->output); + child->output = NULL; + } +} + +static void child_tickle(struct child_data *child) +{ + if (child->output_seen && !child->exited) + kill(child->pid, SIGUSR2); +} + +static void child_stop(struct child_data *child) +{ + if (!child->exited) + kill(child->pid, SIGTERM); +} + +static void child_cleanup(struct child_data *child) +{ + pid_t ret; + int status; + bool fail = false; + + if (!child->exited) { + do { + ret = waitpid(child->pid, &status, 0); + if (ret == -1 && errno == EINTR) + continue; + + if (ret == -1) { + ksft_print_msg("waitpid(%d) failed: %s (%d)\n", + child->pid, strerror(errno), + errno); + fail = true; + break; + } + } while (!WIFEXITED(status)); + child->exit_status = WEXITSTATUS(status); + } + + if (!child->output_seen) { + ksft_print_msg("%s no output seen\n", child->name); + fail = true; + } + + if (child->exit_status != 0) { + ksft_print_msg("%s exited with error code %d\n", + child->name, child->exit_status); + fail = true; + } + + ksft_test_result(!fail, "%s\n", child->name); +} + +static void handle_child_signal(int sig, siginfo_t *info, void *context) +{ + int i; + bool found = false; + + for (i = 0; i < num_children; i++) { + if (children[i].pid == info->si_pid) { + children[i].exited = true; + children[i].exit_status = info->si_status; + found = true; + break; + } + } + + if (!found) + ksft_print_msg("SIGCHILD for unknown PID %d with status %d\n", + info->si_pid, info->si_status); +} + +static void handle_exit_signal(int sig, siginfo_t *info, void *context) +{ + int i; + + ksft_print_msg("Got signal, exiting...\n"); + + terminate = true; + + /* + * This should be redundant, the main loop should clean up + * after us, but for safety stop everything we can here. + */ + for (i = 0; i < num_children; i++) + child_stop(&children[i]); +} + +static void start_fpsimd(struct child_data *child, int cpu, int copy) +{ + int ret; + + child_start(child, "./fpsimd-test"); + + ret = asprintf(&child->name, "FPSIMD-%d-%d", cpu, copy); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ksft_print_msg("Started %s\n", child->name); +} + +static void start_sve(struct child_data *child, int vl, int cpu) +{ + int ret; + + ret = prctl(PR_SVE_SET_VL, vl | PR_SVE_VL_INHERIT); + if (ret < 0) + ksft_exit_fail_msg("Failed to set SVE VL %d\n", vl); + + child_start(child, "./sve-test"); + + ret = asprintf(&child->name, "SVE-VL-%d-%d", vl, cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ksft_print_msg("Started %s\n", child->name); +} + +static void start_ssve(struct child_data *child, int vl, int cpu) +{ + int ret; + + ret = prctl(PR_SME_SET_VL, vl | PR_SME_VL_INHERIT); + if (ret < 0) + ksft_exit_fail_msg("Failed to set SME VL %d\n", ret); + + child_start(child, "./ssve-test"); + + ret = asprintf(&child->name, "SSVE-VL-%d-%d", vl, cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ksft_print_msg("Started %s\n", child->name); +} + +static void start_za(struct child_data *child, int vl, int cpu) +{ + int ret; + + ret = prctl(PR_SME_SET_VL, vl | PR_SVE_VL_INHERIT); + if (ret < 0) + ksft_exit_fail_msg("Failed to set SME VL %d\n", ret); + + child_start(child, "./za-test"); + + ret = asprintf(&child->name, "ZA-VL-%d-%d", vl, cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ksft_print_msg("Started %s\n", child->name); +} + +static void probe_vls(int vls[], int *vl_count, int set_vl) +{ + unsigned int vq; + int vl; + + *vl_count = 0; + + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(set_vl, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + vq = sve_vq_from_vl(vl); + + vls[*vl_count] = vl; + *vl_count += 1; + } +} + +/* Handle any pending output without blocking */ +static void drain_output(bool flush) +{ + struct epoll_event ev; + int ret = 1; + + while (ret > 0) { + ret = epoll_wait(epoll_fd, &ev, 1, 0); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_print_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + if (ret == 1) + child_output(ev.data.ptr, ev.events, flush); + } +} + +static const struct option options[] = { + { "timeout", required_argument, NULL, 't' }, + { } +}; + +int main(int argc, char **argv) +{ + int ret; + int timeout = 10; + int cpus, tests, i, j, c; + int sve_vl_count, sme_vl_count, fpsimd_per_cpu; + int sve_vls[MAX_VLS], sme_vls[MAX_VLS]; + struct epoll_event ev; + struct sigaction sa; + + while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) { + switch (c) { + case 't': + ret = sscanf(optarg, "%d", &timeout); + if (ret != 1) + ksft_exit_fail_msg("Failed to parse timeout %s\n", + optarg); + break; + default: + ksft_exit_fail_msg("Unknown argument\n"); + } + } + + cpus = num_processors(); + tests = 0; + + if (getauxval(AT_HWCAP) & HWCAP_SVE) { + probe_vls(sve_vls, &sve_vl_count, PR_SVE_SET_VL); + tests += sve_vl_count * cpus; + } else { + sve_vl_count = 0; + } + + if (getauxval(AT_HWCAP2) & HWCAP2_SME) { + probe_vls(sme_vls, &sme_vl_count, PR_SME_SET_VL); + tests += sme_vl_count * cpus * 2; + } else { + sme_vl_count = 0; + } + + /* Force context switching if we only have FPSIMD */ + if (!sve_vl_count && !sme_vl_count) + fpsimd_per_cpu = 2; + else + fpsimd_per_cpu = 1; + tests += cpus * fpsimd_per_cpu; + + ksft_print_header(); + ksft_set_plan(tests); + + ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs\n", + cpus, sve_vl_count, sme_vl_count); + + if (timeout > 0) + ksft_print_msg("Will run for %ds\n", timeout); + else + ksft_print_msg("Will run until terminated\n"); + + children = calloc(sizeof(*children), tests); + if (!children) + ksft_exit_fail_msg("Unable to allocate child data\n"); + + ret = epoll_create1(EPOLL_CLOEXEC); + if (ret < 0) + ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n", + strerror(errno), ret); + epoll_fd = ret; + + /* Get signal handers ready before we start any children */ + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_exit_signal; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGINT, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n", + strerror(errno), errno); + ret = sigaction(SIGTERM, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGTEM handler: %s (%d)\n", + strerror(errno), errno); + sa.sa_sigaction = handle_child_signal; + ret = sigaction(SIGCHLD, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n", + strerror(errno), errno); + + for (i = 0; i < cpus; i++) { + for (j = 0; j < fpsimd_per_cpu; j++) + start_fpsimd(&children[num_children++], i, j); + + for (j = 0; j < sve_vl_count; j++) + start_sve(&children[num_children++], sve_vls[j], i); + + for (j = 0; j < sme_vl_count; j++) { + start_ssve(&children[num_children++], sme_vls[j], i); + start_za(&children[num_children++], sme_vls[j], i); + } + } + + for (;;) { + /* Did we get a signal asking us to exit? */ + if (terminate) + break; + + /* + * Timeout is counted in seconds with no output, the + * tests print during startup then are silent when + * running so this should ensure they all ran enough + * to install the signal handler, this is especially + * useful in emulation where we will both be slow and + * likely to have a large set of VLs. + */ + ret = epoll_wait(epoll_fd, &ev, 1, 1000); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + /* Output? */ + if (ret == 1) { + child_output(ev.data.ptr, ev.events, false); + continue; + } + + /* Otherwise epoll_wait() timed out */ + + for (i = 0; i < num_children; i++) + child_tickle(&children[i]); + + /* Negative timeout means run indefinitely */ + if (timeout < 0) + continue; + if (--timeout == 0) + break; + } + + ksft_print_msg("Finishing up...\n"); + + for (i = 0; i < tests; i++) + child_stop(&children[i]); + + drain_output(false); + + for (i = 0; i < tests; i++) + child_cleanup(&children[i]); + + drain_output(true); + + ksft_print_cnts(); + + return 0; +} From b95b07d1c4d48e462a6d2e7b1b11f03f8626bf11 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:57:27 +0100 Subject: [PATCH 009/126] kselftest/arm64: Tighten up validation of ZA signal context Currently we accept any size for the ZA signal context that the shared code will accept which means we don't verify that any data is present. Since we have enabled ZA we know that there must be data so strengthen the check to only accept a signal frame with data, and while we're at it since we enabled ZA but did not set any data we know that ZA must contain zeros, confirm that. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829155728.854947-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- .../selftests/arm64/signal/testcases/za_regs.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index b94e4f99fcac..2514cb7de525 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -52,6 +52,8 @@ static void setup_za_regs(void) asm volatile(".inst 0xd503457f" : : : ); } +static char zeros[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]; + static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, unsigned int vl) { @@ -87,10 +89,22 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, return 1; } - /* The actual size validation is done in get_current_context() */ + if (head->size != ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(vl))) { + fprintf(stderr, "ZA context size %u, expected %lu\n", + head->size, ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(vl))); + return 1; + } + fprintf(stderr, "Got expected size %u and VL %d\n", head->size, za->vl); + /* We didn't load any data into ZA so it should be all zeros */ + if (memcmp(zeros, (char *)za + ZA_SIG_REGS_OFFSET, + ZA_SIG_REGS_SIZE(sve_vq_from_vl(za->vl))) != 0) { + fprintf(stderr, "ZA data invalid\n"); + return 1; + } + return 0; } From 0f40caf7ebfe3d4773fc37bc842690233d03c7c9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:57:28 +0100 Subject: [PATCH 010/126] kselftest/arm64: Add a test for signal frames with ZA disabled When ZA is disabled there should be no register data in the ZA signal frame, add a test case which confirms that this is the case. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829155728.854947-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- .../arm64/signal/testcases/za_no_regs.c | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/za_no_regs.c diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c new file mode 100644 index 000000000000..4d6f94b6178f --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that the ZA register context in signal frames is set up as + * expected. + */ + +#include +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; +static unsigned int vls[SVE_VQ_MAX]; +unsigned int nvls = 0; + +static bool sme_get_vls(struct tdescr *td) +{ + int vq, vl; + + /* + * Enumerate up to SME_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SME_SET_VL, vq * 16); + if (vl == -1) + return false; + + vl &= PR_SME_VL_LEN_MASK; + + /* Skip missing VLs */ + vq = sve_vq_from_vl(vl); + + vls[nvls++] = vl; + } + + /* We need at least one VL */ + if (nvls < 1) { + fprintf(stderr, "Only %d VL supported\n", nvls); + return false; + } + + return true; +} + +static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, + unsigned int vl) +{ + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct za_context *za; + + fprintf(stderr, "Testing VL %d\n", vl); + + if (prctl(PR_SME_SET_VL, vl) != vl) { + fprintf(stderr, "Failed to set VL\n"); + return 1; + } + + /* + * Get a signal context which should have a SVE frame and registers + * in it. + */ + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + head = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context), &offset); + if (!head) { + fprintf(stderr, "No ZA context\n"); + return 1; + } + + za = (struct za_context *)head; + if (za->vl != vl) { + fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl); + return 1; + } + + if (head->size != ZA_SIG_REGS_OFFSET) { + fprintf(stderr, "Context size %u, expected %lu\n", + head->size, ZA_SIG_REGS_OFFSET); + return 1; + } + + /* The actual size validation is done in get_current_context() */ + fprintf(stderr, "Got expected size %u and VL %d\n", + head->size, za->vl); + + return 0; +} + +static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + int i; + + for (i = 0; i < nvls; i++) { + if (do_one_sme_vl(td, si, uc, vls[i])) + return 1; + } + + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "ZA registers - ZA disabled", + .descr = "Check ZA context with ZA disabled", + .feats_required = FEAT_SME, + .timeout = 3, + .init = sme_get_vls, + .run = sme_regs, +}; From 647d0933809a3e894a9cf989bc32e28244cb2c5d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:06:54 +0100 Subject: [PATCH 011/126] kselftest/arm64: Enumerate SME rather than SVE vector lengths for za_regs The za_regs signal test was enumerating the SVE vector lengths rather than the SME vector lengths through cut'n'paste error when determining what to test. Enumerate the SME vector lengths instead. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/signal/testcases/za_regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index 2514cb7de525..6f0c1582f541 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -22,10 +22,10 @@ static bool sme_get_vls(struct tdescr *td) int vq, vl; /* - * Enumerate up to SVE_VQ_MAX vector lengths + * Enumerate up to SME_VQ_MAX vector lengths */ for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); + vl = prctl(PR_SME_SET_VL, vq * 16); if (vl == -1) return false; From e40422c94e6a3b64d1092dcdbe1aeb4011ac3d69 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:06:55 +0100 Subject: [PATCH 012/126] kselftest/arm64: Validate signal ucontext in place In handle_input_signal_copyctx() we use ASSERT_GOOD_CONTEXT() to validate that the context we are saving meets expectations however we do this on the saved copy rather than on the actual signal context passed in. This breaks validation of EXTRA_CONTEXT since we attempt to validate the ABI requirement that the additional space supplied is immediately after the termination record in the standard context which will not be the case after it has been copied to another location. Fix this by doing the validation before we copy. Note that nothing actually looks inside the EXTRA_CONTEXT at present. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- .../testing/selftests/arm64/signal/test_signals_utils.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index b588d10afd5b..a54dc1b6f35c 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -165,12 +165,15 @@ static bool handle_signal_ok(struct tdescr *td, } static bool handle_signal_copyctx(struct tdescr *td, - siginfo_t *si, void *uc) + siginfo_t *si, void *uc_in) { + ucontext_t *uc = uc_in; + + ASSERT_GOOD_CONTEXT(uc); + /* Mangling PC to avoid loops on original BRK instr */ - ((ucontext_t *)uc)->uc_mcontext.pc += 4; + uc->uc_mcontext.pc += 4; memcpy(td->live_uc, uc, td->live_sz); - ASSERT_GOOD_CONTEXT(td->live_uc); td->live_uc_valid = 1; fprintf(stderr, "GOOD CONTEXT grabbed from sig_copyctx handler\n"); From 5c152c2f66f9368394b89ac90dc7483476ef7b88 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:06:56 +0100 Subject: [PATCH 013/126] kselftest/arm64: Fix validatation termination record after EXTRA_CONTEXT When arm64 signal context data overflows the base struct sigcontext it gets placed in an extra buffer pointed to by a record of type EXTRA_CONTEXT in the base struct sigcontext which is required to be the last record in the base struct sigframe. The current validation code attempts to check this by using GET_RESV_NEXT_HEAD() to step forward from the current record to the next but that is a macro which assumes it is being provided with a struct _aarch64_ctx and uses the size there to skip forward to the next record. Instead validate_extra_context() passes it a struct extra_context which has a separate size field. This compiles but results in us trying to validate a termination record in completely the wrong place, at best failing validation and at worst just segfaulting. Fix this by passing the struct _aarch64_ctx we meant to into the macro. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/signal/testcases/testcases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 84c36bee4d82..d98828cb542b 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -33,7 +33,7 @@ bool validate_extra_context(struct extra_context *extra, char **err) return false; fprintf(stderr, "Validating EXTRA...\n"); - term = GET_RESV_NEXT_HEAD(extra); + term = GET_RESV_NEXT_HEAD(&extra->head); if (!term || term->magic || term->size) { *err = "Missing terminator after EXTRA context"; return false; From b721c8237fe39a0e3b653f47add3156f7cdccef2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:06:57 +0100 Subject: [PATCH 014/126] kselftest/arm64: Fix validation of EXTRA_CONTEXT signal context location Currently in validate_extra_context() we assert both that the extra data pointed to by the EXTRA_CONTEXT is 16 byte aligned and that it immediately follows the struct _aarch64_ctx providing the terminator for the linked list of contexts in the signal frame. Since struct _aarch64_ctx is an 8 byte structure which must be 16 byte aligned these cannot both be true. As documented in sigcontext.h and implemented by the kernel the extra data should be at the next 16 byte aligned address after the terminator so fix the validation to match. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-5-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/signal/testcases/testcases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index d98828cb542b..16dc916939f9 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -42,7 +42,7 @@ bool validate_extra_context(struct extra_context *extra, char **err) *err = "Extra DATAP misaligned"; else if (extra->size & 0x0fUL) *err = "Extra SIZE misaligned"; - else if (extra->datap != (uint64_t)term + sizeof(*term)) + else if (extra->datap != (uint64_t)term + 0x10UL) *err = "Extra DATAP misplaced (not contiguous)"; if (*err) return false; From 2384cd2b7e68890fcff8598ead37b3eff40aaaa1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:06:58 +0100 Subject: [PATCH 015/126] kselftest/arm64: Remove unneeded protype for validate_extra_context() Nothing outside testcases.c should need to use validate_extra_context(), remove the prototype to ensure nothing does. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-6-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/signal/testcases/testcases.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 49f1d5de7b5b..b39f538c7be1 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -79,8 +79,6 @@ struct fake_sigframe { bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err); -bool validate_extra_context(struct extra_context *extra, char **err); - struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, size_t resv_sz, size_t *offset); From 1998c823c535643c59135a48242d1f933c379d5b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:06:59 +0100 Subject: [PATCH 016/126] kselftest/arm64: Only validate each signal context once Currently for the more complex signal context types we validate the context specific details the end of the parsing loop validate_reserved() if we've ever seen a context of that type. This is currently merely a bit inefficient but will get a bit awkward when we start parsing extra_context, at which point we will need to reset the head to advance into the extra space that extra_context provides. Instead only do the more detailed checks on each context type the first time we see that context type. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-7-broonie@kernel.org Signed-off-by: Catalin Marinas --- .../arm64/signal/testcases/testcases.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 16dc916939f9..0b3c9b4b1d39 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -105,6 +105,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) bool terminated = false; size_t offs = 0; int flags = 0; + int new_flags; struct extra_context *extra = NULL; struct sve_context *sve = NULL; struct za_context *za = NULL; @@ -120,6 +121,8 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) return false; } + new_flags = 0; + switch (head->magic) { case 0: if (head->size) @@ -133,7 +136,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) else if (head->size != sizeof(struct fpsimd_context)) *err = "Bad size for fpsimd_context"; - flags |= FPSIMD_CTX; + new_flags |= FPSIMD_CTX; break; case ESR_MAGIC: if (head->size != sizeof(struct esr_context)) @@ -144,14 +147,14 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) *err = "Multiple SVE_MAGIC"; /* Size is validated in validate_sve_context() */ sve = (struct sve_context *)head; - flags |= SVE_CTX; + new_flags |= SVE_CTX; break; case ZA_MAGIC: if (flags & ZA_CTX) *err = "Multiple ZA_MAGIC"; /* Size is validated in validate_za_context() */ za = (struct za_context *)head; - flags |= ZA_CTX; + new_flags |= ZA_CTX; break; case EXTRA_MAGIC: if (flags & EXTRA_CTX) @@ -159,7 +162,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) else if (head->size != sizeof(struct extra_context)) *err = "Bad size for extra_context"; - flags |= EXTRA_CTX; + new_flags |= EXTRA_CTX; extra = (struct extra_context *)head; break; case KSFT_BAD_MAGIC: @@ -192,16 +195,18 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) return false; } - if (flags & EXTRA_CTX) + if (new_flags & EXTRA_CTX) if (!validate_extra_context(extra, err)) return false; - if (flags & SVE_CTX) + if (new_flags & SVE_CTX) if (!validate_sve_context(sve, err)) return false; - if (flags & ZA_CTX) + if (new_flags & ZA_CTX) if (!validate_za_context(za, err)) return false; + flags |= new_flags; + head = GET_RESV_NEXT_HEAD(head); } From ff89a81903d3c206ed90cfe60dce900f3b7be65b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:07:00 +0100 Subject: [PATCH 017/126] kselftest/arm64: Validate contents of EXTRA_CONTEXT blocks Currently in validate_reserved() we check the basic form and contents of an EXTRA_CONTEXT block but do not actually validate anything inside the data block it provides. Extend the validation to do so, when we get to the terminator for the main data block reset and start walking the extra data block instead. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-8-broonie@kernel.org Signed-off-by: Catalin Marinas --- .../arm64/signal/testcases/testcases.c | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 0b3c9b4b1d39..e1c625b20ac4 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -25,7 +25,8 @@ struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, return found; } -bool validate_extra_context(struct extra_context *extra, char **err) +bool validate_extra_context(struct extra_context *extra, char **err, + void **extra_data, size_t *extra_size) { struct _aarch64_ctx *term; @@ -47,6 +48,9 @@ bool validate_extra_context(struct extra_context *extra, char **err) if (*err) return false; + *extra_data = (void *)extra->datap; + *extra_size = extra->size; + return true; } @@ -111,6 +115,8 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) struct za_context *za = NULL; struct _aarch64_ctx *head = (struct _aarch64_ctx *)uc->uc_mcontext.__reserved; + void *extra_data = NULL; + size_t extra_sz = 0; if (!err) return false; @@ -125,10 +131,20 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) switch (head->magic) { case 0: - if (head->size) + if (head->size) { *err = "Bad size for terminator"; - else + } else if (extra_data) { + /* End of main data, walking the extra data */ + head = extra_data; + resv_sz = extra_sz; + offs = 0; + + extra_data = NULL; + extra_sz = 0; + continue; + } else { terminated = true; + } break; case FPSIMD_MAGIC: if (flags & FPSIMD_CTX) @@ -196,7 +212,8 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) } if (new_flags & EXTRA_CTX) - if (!validate_extra_context(extra, err)) + if (!validate_extra_context(extra, err, + &extra_data, &extra_sz)) return false; if (new_flags & SVE_CTX) if (!validate_sve_context(sve, err)) From 2fa1116bb491edd30814daa0cd0c9d3a76ba4f4d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:07:01 +0100 Subject: [PATCH 018/126] kselftest/arm64: Preserve any EXTRA_CONTEXT in handle_signal_copyctx() When preserving the signal context for later verification by testcases check for and include any EXTRA_CONTEXT block if enough space has been provided. Since the EXTRA_CONTEXT block includes a pointer to the start of the additional data block we need to do at least some fixup on the copied data. For simplicity in users we do this by extending the length of the EXTRA_CONTEXT to include the following termination record, this will cause users to see the extra data as part of the linked list of contexts without needing any special handling. Care will be needed if any specific tests for EXTRA_CONTEXT are added beyond the validation done in ASSERT_GOOD_CONTEXT. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-9-broonie@kernel.org Signed-off-by: Catalin Marinas --- .../arm64/signal/test_signals_utils.c | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index a54dc1b6f35c..308e229e58ab 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -168,15 +168,61 @@ static bool handle_signal_copyctx(struct tdescr *td, siginfo_t *si, void *uc_in) { ucontext_t *uc = uc_in; + struct _aarch64_ctx *head; + struct extra_context *extra, *copied_extra; + size_t offset = 0; + size_t to_copy; ASSERT_GOOD_CONTEXT(uc); /* Mangling PC to avoid loops on original BRK instr */ uc->uc_mcontext.pc += 4; - memcpy(td->live_uc, uc, td->live_sz); + + /* + * Check for an preserve any extra data too with fixups. + */ + head = (struct _aarch64_ctx *)uc->uc_mcontext.__reserved; + head = get_header(head, EXTRA_MAGIC, td->live_sz, &offset); + if (head) { + extra = (struct extra_context *)head; + + /* + * The extra buffer must be immediately after the + * extra_context and a 16 byte terminator. Include it + * in the copy, this was previously validated in + * ASSERT_GOOD_CONTEXT(). + */ + to_copy = offset + sizeof(struct extra_context) + 16 + + extra->size; + copied_extra = (struct extra_context *)&(td->live_uc->uc_mcontext.__reserved[offset]); + } else { + copied_extra = NULL; + to_copy = sizeof(ucontext_t); + } + + if (to_copy > td->live_sz) { + fprintf(stderr, + "Not enough space to grab context, %lu/%lu bytes\n", + td->live_sz, to_copy); + return false; + } + + memcpy(td->live_uc, uc, to_copy); + + /* + * If there was any EXTRA_CONTEXT fix up the size to be the + * struct extra_context and the following terminator record, + * this means that the rest of the code does not need to have + * special handling for the record and we don't need to fix up + * datap for the new location. + */ + if (copied_extra) + copied_extra->head.size = sizeof(*copied_extra) + 16; + td->live_uc_valid = 1; fprintf(stderr, - "GOOD CONTEXT grabbed from sig_copyctx handler\n"); + "%lu byte GOOD CONTEXT grabbed from sig_copyctx handler\n", + to_copy); return true; } From 38150a6204c731a4846786682e500d132571fd82 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:07:02 +0100 Subject: [PATCH 019/126] kselftest/arm64: Allow larger buffers in get_signal_context() In order to allow testing of signal contexts that overflow the base signal frame allow callers to pass the buffer size for the user context into get_signal_context(). No functional change. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-10-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/signal/test_signals_utils.h | 5 +++-- .../arm64/signal/testcases/fake_sigreturn_bad_magic.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_bad_size.c | 2 +- .../signal/testcases/fake_sigreturn_bad_size_for_magic0.c | 2 +- .../signal/testcases/fake_sigreturn_duplicated_fpsimd.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_misaligned_sp.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_sme_change_vl.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_sve_change_vl.c | 2 +- tools/testing/selftests/arm64/signal/testcases/sme_vl.c | 2 +- tools/testing/selftests/arm64/signal/testcases/ssve_regs.c | 2 +- tools/testing/selftests/arm64/signal/testcases/sve_regs.c | 2 +- tools/testing/selftests/arm64/signal/testcases/sve_vl.c | 2 +- tools/testing/selftests/arm64/signal/testcases/za_regs.c | 2 +- 14 files changed, 16 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index f3aa99ba67bb..222093f51b67 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -56,7 +56,8 @@ static inline bool feats_ok(struct tdescr *td) * at sizeof(ucontext_t). */ static __always_inline bool get_current_context(struct tdescr *td, - ucontext_t *dest_uc) + ucontext_t *dest_uc, + size_t dest_sz) { static volatile bool seen_already; @@ -64,7 +65,7 @@ static __always_inline bool get_current_context(struct tdescr *td, /* it's a genuine invocation..reinit */ seen_already = 0; td->live_uc_valid = 0; - td->live_sz = sizeof(*dest_uc); + td->live_sz = dest_sz; memset(dest_uc, 0x00, td->live_sz); td->live_uc = dest_uc; /* diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c index 8dc600a7d4fd..8c7f00ea9823 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c @@ -21,7 +21,7 @@ static int fake_sigreturn_bad_magic_run(struct tdescr *td, struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; /* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; /* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */ diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c index b3c362100666..1c03f6b638e0 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c @@ -24,7 +24,7 @@ static int fake_sigreturn_bad_size_run(struct tdescr *td, struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; /* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c index a44b88bfc81a..bc22f64b544e 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c @@ -21,7 +21,7 @@ static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td, struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; /* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; /* at least HDR_SZ for the badly sized terminator. */ diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c index afe8915f0998..63e3906b631c 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c @@ -21,7 +21,7 @@ static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td, struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; /* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ, diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c index 1e089e66f9f3..d00625ff12c2 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c @@ -19,7 +19,7 @@ static int fake_sigreturn_misaligned_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) { /* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; /* Forcing sigframe on misaligned SP (16 + 3) */ diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c index 08ecd8073a1a..f805138cb20d 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c @@ -23,7 +23,7 @@ static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td, struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); /* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c index 7ed762b7202f..ebd5815b54bb 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c @@ -54,7 +54,7 @@ static int fake_sigreturn_ssve_change_vl(struct tdescr *td, struct sve_context *sve; /* Get a signal context with a SME ZA frame in it */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c index 915821375b0a..e2a452190511 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c @@ -56,7 +56,7 @@ static int fake_sigreturn_sve_change_vl(struct tdescr *td, struct sve_context *sve; /* Get a signal context with a SVE frame in it */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c index 13ff3b35cbaf..75f387f2db81 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c @@ -34,7 +34,7 @@ static int sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc) struct za_context *za; /* Get a signal context which should have a ZA frame in it */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index 9022a6cab4b3..71f14632c524 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -73,7 +73,7 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, * in it. */ setup_ssve_regs(); - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c index 4b2418aa08a9..4cdedb706786 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c @@ -71,7 +71,7 @@ static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, * in it. */ setup_sve_regs(); - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c index 92904653add1..aa835acec062 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c @@ -34,7 +34,7 @@ static int sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc) struct sve_context *sve; /* Get a signal context which should have a SVE frame in it */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index 6f0c1582f541..a92b8e917683 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -73,7 +73,7 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, * in it. */ setup_za_regs(); - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); From 31ba63426b328f99323ba44ae0f123417e80340e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:07:03 +0100 Subject: [PATCH 020/126] kselftest/arm64: Include larger SVE and SME VLs in signal tests Now that the core utilities for signal testing support handling data in EXTRA_CONTEXT blocks we can test larger SVE and SME VLs which spill over the limits in the base signal context. This is done by defining storage for the context as a union with a ucontext_t and a buffer together with some helpers for getting relevant sizes and offsets like we do for fake_sigframe, this isn't the most lovely code ever but is fairly straightforward to implement and much less invasive to the somewhat unclear and indistinct layers of abstraction in the signal handling test code. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829160703.874492-11-broonie@kernel.org Signed-off-by: Catalin Marinas --- .../arm64/signal/testcases/ssve_regs.c | 25 +++++++------------ .../arm64/signal/testcases/sve_regs.c | 23 +++++++---------- .../arm64/signal/testcases/testcases.h | 7 ++++++ .../arm64/signal/testcases/za_regs.c | 24 ++++++------------ 4 files changed, 33 insertions(+), 46 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index 71f14632c524..d0a178945b1a 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -13,7 +13,10 @@ #include "test_signals_utils.h" #include "testcases.h" -struct fake_sigframe sf; +static union { + ucontext_t uc; + char buf[1024 * 64]; +} context; static unsigned int vls[SVE_VQ_MAX]; unsigned int nvls = 0; @@ -55,8 +58,8 @@ static void setup_ssve_regs(void) static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, unsigned int vl) { - size_t resv_sz, offset; - struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); struct sve_context *ssve; int ret; @@ -73,11 +76,11 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, * in it. */ setup_ssve_regs(); - if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) + if (!get_current_context(td, &context.uc, sizeof(context))) return 1; - resv_sz = GET_SF_RESV_SIZE(sf); - head = get_header(head, SVE_MAGIC, resv_sz, &offset); + head = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context), + &offset); if (!head) { fprintf(stderr, "No SVE context\n"); return 1; @@ -101,16 +104,6 @@ static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) int i; for (i = 0; i < nvls; i++) { - /* - * TODO: the signal test helpers can't currently cope - * with signal frames bigger than struct sigcontext, - * skip VLs that will trigger that. - */ - if (vls[i] > 64) { - printf("Skipping VL %u due to stack size\n", vls[i]); - continue; - } - if (do_one_sme_vl(td, si, uc, vls[i])) return 1; } diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c index 4cdedb706786..8b16eabbb769 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c @@ -13,7 +13,10 @@ #include "test_signals_utils.h" #include "testcases.h" -struct fake_sigframe sf; +static union { + ucontext_t uc; + char buf[1024 * 64]; +} context; static unsigned int vls[SVE_VQ_MAX]; unsigned int nvls = 0; @@ -55,8 +58,8 @@ static void setup_sve_regs(void) static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, unsigned int vl) { - size_t resv_sz, offset; - struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); struct sve_context *sve; fprintf(stderr, "Testing VL %d\n", vl); @@ -71,11 +74,11 @@ static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, * in it. */ setup_sve_regs(); - if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) + if (!get_current_context(td, &context.uc, sizeof(context))) return 1; - resv_sz = GET_SF_RESV_SIZE(sf); - head = get_header(head, SVE_MAGIC, resv_sz, &offset); + head = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context), + &offset); if (!head) { fprintf(stderr, "No SVE context\n"); return 1; @@ -99,14 +102,6 @@ static int sve_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) int i; for (i = 0; i < nvls; i++) { - /* - * TODO: the signal test helpers can't currently cope - * with signal frames bigger than struct sigcontext, - * skip VLs that will trigger that. - */ - if (vls[i] > 64) - continue; - if (do_one_sve_vl(td, si, uc, vls[i])) return 1; } diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index b39f538c7be1..040afded0b76 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -30,6 +30,13 @@ #define GET_SF_RESV_SIZE(sf) \ sizeof((sf).uc.uc_mcontext.__reserved) +#define GET_BUF_RESV_HEAD(buf) \ + (struct _aarch64_ctx *)(&(buf).uc.uc_mcontext.__reserved) + +#define GET_BUF_RESV_SIZE(buf) \ + (sizeof(buf) - sizeof(buf.uc) + \ + sizeof((buf).uc.uc_mcontext.__reserved)) + #define GET_UCP_RESV_SIZE(ucp) \ sizeof((ucp)->uc_mcontext.__reserved) diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index a92b8e917683..ea45acb115d5 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -13,7 +13,10 @@ #include "test_signals_utils.h" #include "testcases.h" -struct fake_sigframe sf; +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; static unsigned int vls[SVE_VQ_MAX]; unsigned int nvls = 0; @@ -57,8 +60,8 @@ static char zeros[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]; static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, unsigned int vl) { - size_t resv_sz, offset; - struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); struct za_context *za; fprintf(stderr, "Testing VL %d\n", vl); @@ -73,11 +76,10 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, * in it. */ setup_za_regs(); - if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) + if (!get_current_context(td, &context.uc, sizeof(context))) return 1; - resv_sz = GET_SF_RESV_SIZE(sf); - head = get_header(head, ZA_MAGIC, resv_sz, &offset); + head = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context), &offset); if (!head) { fprintf(stderr, "No ZA context\n"); return 1; @@ -113,16 +115,6 @@ static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) int i; for (i = 0; i < nvls; i++) { - /* - * TODO: the signal test helpers can't currently cope - * with signal frames bigger than struct sigcontext, - * skip VLs that will trigger that. - */ - if (vls[i] > 32) { - printf("Skipping VL %u due to stack size\n", vls[i]); - continue; - } - if (do_one_sme_vl(td, si, uc, vls[i])) return 1; } From bb7852c168fe02fddbbc5cf26dc6fb779e6aad38 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:25:00 +0100 Subject: [PATCH 021/126] kselftest/arm64: Correct buffer allocation for SVE Z registers The buffer used for verifying SVE Z registers allocated enough space for 16 maximally sized registers rather than 32 due to using the macro for the number of P registers. In practice this didn't matter since for historical reasons the maximum VQ defined in the ABI is greater the architectural maximum so we will always allocate more space than is needed even with emulated platforms implementing the architectural maximum. Still, we should use the right define. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220829162502.886816-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/syscall-abi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index b632bfe9e022..95229fa73232 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -113,8 +113,8 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, } static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)]; -uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; -uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; +uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; +uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) From af3ce550a6590551a88383382983301c431e3153 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 17:25:02 +0100 Subject: [PATCH 022/126] kselftest/arm64: Enforce actual ABI for SVE syscalls Currently syscall-abi permits the bits in Z registers not shared with the V registers as well as all of the predicate registers to be preserved on syscall but the actual implementation has always cleared them and our documentation has now been updated to make that the documented ABI so update the syscall-abi test to match. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220829162502.886816-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- .../testing/selftests/arm64/abi/syscall-abi.c | 55 +++++++++++++------ 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index 95229fa73232..dd7ebe536d05 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -112,6 +112,8 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } +#define SVE_Z_SHARED_BYTES (128 / 8) + static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)]; uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; @@ -133,22 +135,39 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, if (!sve_vl) return 0; - /* - * After a syscall the low 128 bits of the Z registers should - * be preserved and the rest be zeroed or preserved, except if - * we were in streaming mode in which case the low 128 bits may - * also be cleared by the transition out of streaming mode. - */ for (i = 0; i < SVE_NUM_ZREGS; i++) { - void *in = &z_in[reg_size * i]; - void *out = &z_out[reg_size * i]; + uint8_t *in = &z_in[reg_size * i]; + uint8_t *out = &z_out[reg_size * i]; - if ((memcmp(in, out, SVE_VQ_BYTES) != 0) && - !((svcr & SVCR_SM_MASK) && - memcmp(z_zero, out, SVE_VQ_BYTES) == 0)) { - ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n", - cfg->name, sve_vl, i); - errors++; + if (svcr & SVCR_SM_MASK) { + /* + * In streaming mode the whole register should + * be cleared by the transition out of + * streaming mode. + */ + if (memcmp(z_zero, out, reg_size) != 0) { + ksft_print_msg("%s SVE VL %d Z%d non-zero\n", + cfg->name, sve_vl, i); + errors++; + } + } else { + /* + * For standard SVE the low 128 bits should be + * preserved and any additional bits cleared. + */ + if (memcmp(in, out, SVE_Z_SHARED_BYTES) != 0) { + ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n", + cfg->name, sve_vl, i); + errors++; + } + + if (reg_size > SVE_Z_SHARED_BYTES && + (memcmp(z_zero, out + SVE_Z_SHARED_BYTES, + reg_size - SVE_Z_SHARED_BYTES) != 0)) { + ksft_print_msg("%s SVE VL %d Z%d high bits non-zero\n", + cfg->name, sve_vl, i); + errors++; + } } } @@ -176,9 +195,9 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, if (!sve_vl) return 0; - /* After a syscall the P registers should be preserved or zeroed */ + /* After a syscall the P registers should be zeroed */ for (i = 0; i < SVE_NUM_PREGS * reg_size; i++) - if (p_out[i] && (p_in[i] != p_out[i])) + if (p_out[i]) errors++; if (errors) ksft_print_msg("%s SVE VL %d predicate registers non-zero\n", @@ -226,9 +245,9 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) return 0; - /* After a syscall the P registers should be preserved or zeroed */ + /* After a syscall FFR should be zeroed */ for (i = 0; i < reg_size; i++) - if (ffr_out[i] && (ffr_in[i] != ffr_out[i])) + if (ffr_out[i]) errors++; if (errors) ksft_print_msg("%s SVE VL %d FFR non-zero\n", From 537addee1e8e843a00f6cb0f656af19655b13107 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 7 Sep 2022 18:09:02 +0100 Subject: [PATCH 023/126] kselftest/arm64: Fix spelling misakes of signal names There are a couple of spelling mistakes of signame names. Fix them. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20220907170902.687340-1-colin.i.king@gmail.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-stress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index 01cef1962ab5..a5c0ebef2419 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -247,7 +247,7 @@ static void handle_child_signal(int sig, siginfo_t *info, void *context) } if (!found) - ksft_print_msg("SIGCHILD for unknown PID %d with status %d\n", + ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n", info->si_pid, info->si_status); } @@ -457,7 +457,7 @@ int main(int argc, char **argv) strerror(errno), errno); ret = sigaction(SIGTERM, &sa, NULL); if (ret < 0) - ksft_print_msg("Failed to install SIGTEM handler: %s (%d)\n", + ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n", strerror(errno), errno); sa.sa_sigaction = handle_child_signal; ret = sigaction(SIGCHLD, &sa, NULL); From a04054362e4a9f48a51a742215c2ae1b8bfd04b5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:53:58 +0100 Subject: [PATCH 024/126] arm64/sysreg: Remove stray SMIDR_EL1 defines SMIDR_EL1 was converted to automatic generation but some of the constants for fields in it were mistakenly left, remove them. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 818df938a7ad..62376ef4a4f1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -445,10 +445,6 @@ #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) -#define SMIDR_EL1_IMPLEMENTER_SHIFT 24 -#define SMIDR_EL1_SMPS_SHIFT 15 -#define SMIDR_EL1_AFFINITY_SHIFT 0 - #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) From 4c8b18af25be2e87b7b62bec397de2cf160555fa Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:53:59 +0100 Subject: [PATCH 025/126] arm64/sysreg: Describe ID_AA64SMFR0_EL1.SMEVer as an enumeration As with the corresponding SVE field ID_AA64ZFR0_EL1.SVEVer and other ID register fields the SMEVer field should be identified as an enumeration but it is currently described as a plain field (most likely due to there presently being only one possible value). Update it to be an enumeration as one would expect. No functional change. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/tools/sysreg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 9ae483ec1e56..185bc5b0faf7 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -98,7 +98,9 @@ Enum 63 FA64 0b1 IMP EndEnum Res0 62:60 -Field 59:56 SMEver +Enum 59:56 SMEver + 0b0000 IMP +EndEnum Enum 55:52 I16I64 0b0000 NI 0b1111 IMP From d9b230f644196b2986501ecc45c2b2a41075040d Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Mon, 5 Sep 2022 23:54:00 +0100 Subject: [PATCH 026/126] arm64: cache: Remove unused CTR_CACHE_MINLINE_MASK A recent change renamed CTR_DMINLINE_SHIFT to CTR_EL0_DminLine_SHIFT but didn't fully update CTR_CACHE_MINLINE_MASK. As CTR_CACHE_MINLINE_MASK is not used anywhere anyway, just remove it. Signed-off-by: Kristina Martsenko Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cache.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 34256bda0da9..c0b178d1bb4f 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -45,10 +45,6 @@ static inline unsigned int arch_slab_minalign(void) #define arch_slab_minalign() arch_slab_minalign() #endif -#define CTR_CACHE_MINLINE_MASK \ - (0xf << CTR_EL0_DMINLINE_SHIFT | \ - CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT) - #define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr) #define ICACHEF_ALIASING 0 From 2d987e64e8c756ffcbace4a598444297df28b8a1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:01 +0100 Subject: [PATCH 027/126] arm64/sysreg: Add _EL1 into ID_AA64MMFR0_EL1 definition names Normally we include the full register name in the defines for fields within registers but this has not been followed for ID registers. In preparation for automatic generation of defines add the _EL1s into the defines for ID_AA64MMFR0_EL1 to follow the convention. No functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-5-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 4 +- arch/arm64/include/asm/cpufeature.h | 38 +++---- arch/arm64/include/asm/el2_setup.h | 2 +- arch/arm64/include/asm/kvm_pgtable.h | 6 +- arch/arm64/include/asm/sysreg.h | 100 +++++++++--------- arch/arm64/kernel/cpufeature.c | 34 +++--- arch/arm64/kernel/head.S | 6 +- .../arm64/kvm/hyp/include/nvhe/fixed_config.h | 12 +-- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- arch/arm64/kvm/hyp/pgtable.c | 2 +- arch/arm64/kvm/reset.c | 12 +-- arch/arm64/mm/context.c | 6 +- arch/arm64/mm/init.c | 2 +- drivers/firmware/efi/libstub/arm64-stub.c | 4 +- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 6 +- 15 files changed, 118 insertions(+), 118 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 5846145be523..a6e7061d84e7 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -384,8 +384,8 @@ alternative_cb_end .macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1 mrs \tmp0, ID_AA64MMFR0_EL1 // Narrow PARange to fit the PS field in TCR_ELx - ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3 - mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX + ubfx \tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3 + mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX cmp \tmp0, \tmp1 csel \tmp0, \tmp1, \tmp0, hi bfi \tcr, \tmp0, \pos, #3 diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index fd7d75a275f6..96ccf823f46e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -597,8 +597,8 @@ static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val) static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) { - return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 || - cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; + return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL_SHIFT) == 0x1 || + cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT) == 0x1; } static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) @@ -694,10 +694,10 @@ static inline bool system_supports_4kb_granule(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); val = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_TGRAN4_SHIFT); + ID_AA64MMFR0_EL1_TGRAN4_SHIFT); - return (val >= ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN) && - (val <= ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX); + return (val >= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN) && + (val <= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX); } static inline bool system_supports_64kb_granule(void) @@ -707,10 +707,10 @@ static inline bool system_supports_64kb_granule(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); val = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_TGRAN64_SHIFT); + ID_AA64MMFR0_EL1_TGRAN64_SHIFT); - return (val >= ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN) && - (val <= ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX); + return (val >= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN) && + (val <= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX); } static inline bool system_supports_16kb_granule(void) @@ -720,10 +720,10 @@ static inline bool system_supports_16kb_granule(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); val = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_TGRAN16_SHIFT); + ID_AA64MMFR0_EL1_TGRAN16_SHIFT); - return (val >= ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN) && - (val <= ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX); + return (val >= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN) && + (val <= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX); } static inline bool system_supports_mixed_endian_el0(void) @@ -738,7 +738,7 @@ static inline bool system_supports_mixed_endian(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); val = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_BIGENDEL_SHIFT); + ID_AA64MMFR0_EL1_BIGENDEL_SHIFT); return val == 0x1; } @@ -840,13 +840,13 @@ extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) { switch (parange) { - case ID_AA64MMFR0_PARANGE_32: return 32; - case ID_AA64MMFR0_PARANGE_36: return 36; - case ID_AA64MMFR0_PARANGE_40: return 40; - case ID_AA64MMFR0_PARANGE_42: return 42; - case ID_AA64MMFR0_PARANGE_44: return 44; - case ID_AA64MMFR0_PARANGE_48: return 48; - case ID_AA64MMFR0_PARANGE_52: return 52; + case ID_AA64MMFR0_EL1_PARANGE_32: return 32; + case ID_AA64MMFR0_EL1_PARANGE_36: return 36; + case ID_AA64MMFR0_EL1_PARANGE_40: return 40; + case ID_AA64MMFR0_EL1_PARANGE_42: return 42; + case ID_AA64MMFR0_EL1_PARANGE_44: return 44; + case ID_AA64MMFR0_EL1_PARANGE_48: return 48; + case ID_AA64MMFR0_EL1_PARANGE_52: return 52; /* * A future PE could use a value unknown to the kernel. * However, by the "D10.1.4 Principles of the ID scheme diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 2630faa5bc08..faad9e01e52b 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -132,7 +132,7 @@ /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 - ubfx x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4 + ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4 cbz x1, .Lskip_fgt_\@ mov x0, xzr diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 9f339dffbc1a..1b098bd4cd37 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -16,9 +16,9 @@ static inline u64 kvm_get_parange(u64 mmfr0) { u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_PARANGE_SHIFT); - if (parange > ID_AA64MMFR0_PARANGE_MAX) - parange = ID_AA64MMFR0_PARANGE_MAX; + ID_AA64MMFR0_EL1_PARANGE_SHIFT); + if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX) + parange = ID_AA64MMFR0_EL1_PARANGE_MAX; return parange; } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 62376ef4a4f1..f9af77ab5f98 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -733,53 +733,53 @@ #define ID_AA64PFR1_MTE_ASYMM 0x3 /* id_aa64mmfr0 */ -#define ID_AA64MMFR0_ECV_SHIFT 60 -#define ID_AA64MMFR0_FGT_SHIFT 56 -#define ID_AA64MMFR0_EXS_SHIFT 44 -#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40 -#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36 -#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32 -#define ID_AA64MMFR0_TGRAN4_SHIFT 28 -#define ID_AA64MMFR0_TGRAN64_SHIFT 24 -#define ID_AA64MMFR0_TGRAN16_SHIFT 20 -#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 -#define ID_AA64MMFR0_SNSMEM_SHIFT 12 -#define ID_AA64MMFR0_BIGENDEL_SHIFT 8 -#define ID_AA64MMFR0_ASID_SHIFT 4 -#define ID_AA64MMFR0_PARANGE_SHIFT 0 +#define ID_AA64MMFR0_EL1_ECV_SHIFT 60 +#define ID_AA64MMFR0_EL1_FGT_SHIFT 56 +#define ID_AA64MMFR0_EL1_EXS_SHIFT 44 +#define ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT 40 +#define ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT 36 +#define ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT 32 +#define ID_AA64MMFR0_EL1_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_EL1_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_EL1_TGRAN16_SHIFT 20 +#define ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_EL1_SNSMEM_SHIFT 12 +#define ID_AA64MMFR0_EL1_BIGENDEL_SHIFT 8 +#define ID_AA64MMFR0_EL1_ASID_SHIFT 4 +#define ID_AA64MMFR0_EL1_PARANGE_SHIFT 0 -#define ID_AA64MMFR0_ASID_8 0x0 -#define ID_AA64MMFR0_ASID_16 0x2 +#define ID_AA64MMFR0_EL1_ASID_8 0x0 +#define ID_AA64MMFR0_EL1_ASID_16 0x2 -#define ID_AA64MMFR0_TGRAN4_NI 0xf -#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0 -#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7 -#define ID_AA64MMFR0_TGRAN64_NI 0xf -#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN 0x0 -#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX 0x7 -#define ID_AA64MMFR0_TGRAN16_NI 0x0 -#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1 -#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf +#define ID_AA64MMFR0_EL1_TGRAN4_NI 0xf +#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_EL1_TGRAN64_NI 0xf +#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_EL1_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf -#define ID_AA64MMFR0_PARANGE_32 0x0 -#define ID_AA64MMFR0_PARANGE_36 0x1 -#define ID_AA64MMFR0_PARANGE_40 0x2 -#define ID_AA64MMFR0_PARANGE_42 0x3 -#define ID_AA64MMFR0_PARANGE_44 0x4 -#define ID_AA64MMFR0_PARANGE_48 0x5 -#define ID_AA64MMFR0_PARANGE_52 0x6 +#define ID_AA64MMFR0_EL1_PARANGE_32 0x0 +#define ID_AA64MMFR0_EL1_PARANGE_36 0x1 +#define ID_AA64MMFR0_EL1_PARANGE_40 0x2 +#define ID_AA64MMFR0_EL1_PARANGE_42 0x3 +#define ID_AA64MMFR0_EL1_PARANGE_44 0x4 +#define ID_AA64MMFR0_EL1_PARANGE_48 0x5 +#define ID_AA64MMFR0_EL1_PARANGE_52 0x6 #define ARM64_MIN_PARANGE_BITS 32 -#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0 -#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1 -#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2 -#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7 #ifdef CONFIG_ARM64_PA_BITS_52 -#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 +#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_52 #else -#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48 +#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48 #endif /* id_aa64mmfr1 */ @@ -951,20 +951,20 @@ #define ID_PFR1_PROGMOD_SHIFT 0 #if defined(CONFIG_ARM64_4K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX -#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX +#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT #elif defined(CONFIG_ARM64_16K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX -#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX +#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT #elif defined(CONFIG_ARM64_64K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX -#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN64_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX +#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT #endif #define MVFR2_FPMISC_SHIFT 4 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af4de817d712..3f4512267fe4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -316,9 +316,9 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_FGT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_EXS_SHIFT, 4, 0), /* * Page size not being supported at Stage-2 is not fatal. You * just give up KVM if PAGE_SIZE isn't supported there. Go fix @@ -334,9 +334,9 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { * fields are inconsistent across vCPUs, then it isn't worth * trying to bring KVM up. */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_2_SHIFT, 4, 1), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_2_SHIFT, 4, 1), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_2_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT, 4, 1), /* * We already refuse to boot CPUs that don't support our configured * page size, so we can only detect mismatches for a page size other @@ -344,20 +344,20 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { * exist in the wild so, even though we don't like it, we'll have to go * along with it and treat them as non-strict. */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN4_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN4_NI), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN64_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN64_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN16_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN16_NI), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT, 4, 0), /* Linux shouldn't care about secure memory */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ASID_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_SNSMEM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGENDEL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ASID_SHIFT, 4, 0), /* * Differing PARange is fine as long as all peripherals and memory are mapped * within the minimum PARange of all CPUs */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_PARANGE_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2104,7 +2104,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR0_EL1, - .field_pos = ID_AA64MMFR0_ECV_SHIFT, + .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, @@ -2751,7 +2751,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ - HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), @@ -3102,7 +3102,7 @@ static void verify_hyp_capabilities(void) /* Verify IPA range */ parange = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_PARANGE_SHIFT); + ID_AA64MMFR0_EL1_PARANGE_SHIFT); ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange); if (ipa_max < get_kvm_ipa_limit()) { pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id()); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index cefe6a73ee54..bffb034d8f73 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -656,10 +656,10 @@ SYM_FUNC_END(__secondary_too_slow) */ SYM_FUNC_START(__enable_mmu) mrs x3, ID_AA64MMFR0_EL1 - ubfx x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4 - cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN + ubfx x3, x3, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4 + cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN b.lt __no_granule_support - cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX + cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX b.gt __no_granule_support phys_to_ttbr x2, x2 msr ttbr0_el1, x2 // load TTBR0 diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index fa6e466ed57f..aac538c34f87 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -74,10 +74,10 @@ * - Non-context synchronizing exception entry and exit */ #define PVM_ID_AA64MMFR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \ ) /* @@ -86,8 +86,8 @@ * - 16-bit ASID */ #define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASID), ID_AA64MMFR0_EL1_ASID_16) \ ) /* diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 99c8d8b73e70..823eb4d03956 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -128,7 +128,7 @@ static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) u64 mdcr_set = 0; /* Trap Debug Communications Channel registers */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids)) mdcr_set |= MDCR_EL2_TDCC; vcpu->arch.mdcr_el2 |= mdcr_set; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 2cb3867eb7c2..cdf8e76b0be1 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -61,7 +61,7 @@ struct kvm_pgtable_walk_data { static bool kvm_phys_is_valid(u64 phys) { - return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX)); + return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); } static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 0e08fbe68715..5ae18472205a 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -359,7 +359,7 @@ int kvm_set_ipa_limit(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); parange = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_PARANGE_SHIFT); + ID_AA64MMFR0_EL1_PARANGE_SHIFT); /* * IPA size beyond 48 bits could not be supported * on either 4K or 16K page size. Hence let's cap @@ -367,20 +367,20 @@ int kvm_set_ipa_limit(void) * on the system. */ if (PAGE_SIZE != SZ_64K) - parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48); + parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48); /* * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at * Stage-2. If not, things will stop very quickly. */ - switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) { - case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE: + switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_TGRAN_2_SHIFT)) { + case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE: kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n"); return -EINVAL; - case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT: + case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT: kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n"); break; - case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX: + case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX: kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n"); break; default: diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b8b4cf0bcf39..8f38a5452d05 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -43,17 +43,17 @@ static u32 get_cpu_asid_bits(void) { u32 asid; int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1), - ID_AA64MMFR0_ASID_SHIFT); + ID_AA64MMFR0_EL1_ASID_SHIFT); switch (fld) { default: pr_warn("CPU%d: Unknown ASID size (%d); assuming 8-bit\n", smp_processor_id(), fld); fallthrough; - case ID_AA64MMFR0_ASID_8: + case ID_AA64MMFR0_EL1_ASID_8: asid = 8; break; - case ID_AA64MMFR0_ASID_16: + case ID_AA64MMFR0_EL1_ASID_16: asid = 16; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index b9af30be813e..4b4651ee47f2 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -360,7 +360,7 @@ void __init arm64_memblock_init(void) extern u16 memstart_offset_seed; u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); int parange = cpuid_feature_extract_unsigned_field( - mmfr0, ID_AA64MMFR0_PARANGE_SHIFT); + mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); s64 range = linear_region_size - BIT(id_aa64mmfr0_parange_to_phys_shift(parange)); diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 577173ee1f83..60973e84d7ab 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -23,8 +23,8 @@ efi_status_t check_platform_features(void) if (IS_ENABLED(CONFIG_ARM64_4K_PAGES)) return EFI_SUCCESS; - tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf; - if (tg < ID_AA64MMFR0_TGRAN_SUPPORTED_MIN || tg > ID_AA64MMFR0_TGRAN_SUPPORTED_MAX) { + tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_EL1_TGRAN_SHIFT) & 0xf; + if (tg < ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN || tg > ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX) { if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) efi_err("This 64 KB granular kernel is not supported by your CPU\n"); else diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 1ef7bbb4acf3..da67a75cdaad 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -150,7 +150,7 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) } reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); - par = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_PARANGE_SHIFT); + par = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_PARANGE_SHIFT); tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par); cd->ttbr = virt_to_phys(mm->pgd); @@ -425,13 +425,13 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) * addresses larger than what we support. */ reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); - fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_PARANGE_SHIFT); + fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_PARANGE_SHIFT); oas = id_aa64mmfr0_parange_to_phys_shift(fld); if (smmu->oas < oas) return false; /* We can support bigger ASIDs than the CPU, but not smaller */ - fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_ASID_SHIFT); + fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_ASID_SHIFT); asid_bits = fld ? 16 : 8; if (smmu->asid_bits < asid_bits) return false; From a957c6be2b88564ed413a03a6009f11b1e5d5806 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:02 +0100 Subject: [PATCH 028/126] arm64/sysreg: Add _EL1 into ID_AA64MMFR2_EL1 definition names Normally we include the full register name in the defines for fields within registers but this has not been followed for ID registers. In preparation for automatic generation of defines add the _EL1s into the defines for ID_AA64MMFR2_EL1 to follow the convention. No functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-6-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 2 +- arch/arm64/include/asm/sysreg.h | 30 ++++++------- arch/arm64/kernel/cpufeature.c | 42 +++++++++---------- arch/arm64/kernel/head.S | 4 +- .../arm64/kvm/hyp/include/nvhe/fixed_config.h | 16 +++---- 5 files changed, 47 insertions(+), 47 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index a6e7061d84e7..0d5ced93c740 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -612,7 +612,7 @@ alternative_endif .macro offset_ttbr1, ttbr, tmp #ifdef CONFIG_ARM64_VA_BITS_52 mrs_s \tmp, SYS_ID_AA64MMFR2_EL1 - and \tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT) + and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_LVA_SHIFT) cbnz \tmp, .Lskipoffs_\@ orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET .Lskipoffs_\@ : diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f9af77ab5f98..bb1f9ae5705f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -805,21 +805,21 @@ #define ID_AA64MMFR1_TIDCP1_IMP 1 /* id_aa64mmfr2 */ -#define ID_AA64MMFR2_E0PD_SHIFT 60 -#define ID_AA64MMFR2_EVT_SHIFT 56 -#define ID_AA64MMFR2_BBM_SHIFT 52 -#define ID_AA64MMFR2_TTL_SHIFT 48 -#define ID_AA64MMFR2_FWB_SHIFT 40 -#define ID_AA64MMFR2_IDS_SHIFT 36 -#define ID_AA64MMFR2_AT_SHIFT 32 -#define ID_AA64MMFR2_ST_SHIFT 28 -#define ID_AA64MMFR2_NV_SHIFT 24 -#define ID_AA64MMFR2_CCIDX_SHIFT 20 -#define ID_AA64MMFR2_LVA_SHIFT 16 -#define ID_AA64MMFR2_IESB_SHIFT 12 -#define ID_AA64MMFR2_LSM_SHIFT 8 -#define ID_AA64MMFR2_UAO_SHIFT 4 -#define ID_AA64MMFR2_CNP_SHIFT 0 +#define ID_AA64MMFR2_EL1_E0PD_SHIFT 60 +#define ID_AA64MMFR2_EL1_EVT_SHIFT 56 +#define ID_AA64MMFR2_EL1_BBM_SHIFT 52 +#define ID_AA64MMFR2_EL1_TTL_SHIFT 48 +#define ID_AA64MMFR2_EL1_FWB_SHIFT 40 +#define ID_AA64MMFR2_EL1_IDS_SHIFT 36 +#define ID_AA64MMFR2_EL1_AT_SHIFT 32 +#define ID_AA64MMFR2_EL1_ST_SHIFT 28 +#define ID_AA64MMFR2_EL1_NV_SHIFT 24 +#define ID_AA64MMFR2_EL1_CCIDX_SHIFT 20 +#define ID_AA64MMFR2_EL1_LVA_SHIFT 16 +#define ID_AA64MMFR2_EL1_IESB_SHIFT 12 +#define ID_AA64MMFR2_EL1_LSM_SHIFT 8 +#define ID_AA64MMFR2_EL1_UAO_SHIFT 4 +#define ID_AA64MMFR2_EL1_CNP_SHIFT 0 /* id_aa64dfr0 */ #define ID_AA64DFR0_MTPMU_SHIFT 48 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3f4512267fe4..eb50d52dac1f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -378,21 +378,21 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EVT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_BBM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IDS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_ST_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_NV_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CCIDX_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_E0PD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_EVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_BBM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_TTL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_FWB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IDS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_AT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_ST_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_NV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CCIDX_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_LVA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_LSM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_UAO_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CNP_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1571,7 +1571,7 @@ bool kaslr_requires_kpti(void) if (IS_ENABLED(CONFIG_ARM64_E0PD)) { u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); if (cpuid_feature_extract_unsigned_field(mmfr2, - ID_AA64MMFR2_E0PD_SHIFT)) + ID_AA64MMFR2_EL1_E0PD_SHIFT)) return false; } @@ -2303,7 +2303,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_STAGE2_FWB, .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_FWB_SHIFT, + .field_pos = ID_AA64MMFR2_EL1_FWB_SHIFT, .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, @@ -2314,7 +2314,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_ARMv8_4_TTL, .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_TTL_SHIFT, + .field_pos = ID_AA64MMFR2_EL1_TTL_SHIFT, .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, @@ -2380,7 +2380,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_useable_cnp, .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .field_pos = ID_AA64MMFR2_EL1_CNP_SHIFT, .field_width = 4, .min_field_value = 1, .cpu_enable = cpu_enable_cnp, @@ -2499,7 +2499,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_width = 4, - .field_pos = ID_AA64MMFR2_E0PD_SHIFT, + .field_pos = ID_AA64MMFR2_EL1_E0PD_SHIFT, .matches = has_cpuid_feature, .min_field_value = 1, .cpu_enable = cpu_enable_e0pd, @@ -2725,7 +2725,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_EBF16), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), - HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_EL1_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index bffb034d8f73..d040f57d3496 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -99,7 +99,7 @@ SYM_CODE_START(primary_entry) */ #if VA_BITS > 48 mrs_s x0, SYS_ID_AA64MMFR2_EL1 - tst x0, #0xf << ID_AA64MMFR2_LVA_SHIFT + tst x0, #0xf << ID_AA64MMFR2_EL1_LVA_SHIFT mov x0, #VA_BITS mov x25, #VA_BITS_MIN csel x25, x25, x0, eq @@ -677,7 +677,7 @@ SYM_FUNC_START(__cpu_secondary_check52bitva) b.ne 2f mrs_s x0, SYS_ID_AA64MMFR2_EL1 - and x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT) + and x0, x0, #(0xf << ID_AA64MMFR2_EL1_LVA_SHIFT) cbnz x0, 2f update_early_cpu_boot_status \ diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index aac538c34f87..3dad7b2079ee 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -120,14 +120,14 @@ * - E0PDx mechanism */ #define PVM_ID_AA64MMFR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CNP) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \ ) /* From 55adc08d7e6433357f2b3b4fee248ae9da1fe2fa Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:03 +0100 Subject: [PATCH 029/126] arm64/sysreg: Add _EL1 into ID_AA64PFR0_EL1 definition names Normally we include the full register name in the defines for fields within registers but this has not been followed for ID registers. In preparation for automatic generation of defines add the _EL1s into the defines for ID_AA64PFR0_EL1 to follow the convention. No functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-7-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 2 +- arch/arm64/include/asm/cpufeature.h | 12 ++-- arch/arm64/include/asm/el2_setup.h | 4 +- arch/arm64/include/asm/sysreg.h | 50 ++++++------- arch/arm64/kernel/cpufeature.c | 70 +++++++++---------- arch/arm64/kernel/hyp-stub.S | 2 +- arch/arm64/kernel/idreg-override.c | 2 +- arch/arm64/kernel/proton-pack.c | 2 +- .../arm64/kvm/hyp/include/nvhe/fixed_config.h | 16 ++--- arch/arm64/kvm/hyp/nvhe/pkvm.c | 20 +++--- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 8 +-- arch/arm64/kvm/sys_regs.c | 26 +++---- drivers/irqchip/irq-gic-v4.c | 2 +- 13 files changed, 108 insertions(+), 108 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0d5ced93c740..48c7963abaf3 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -524,7 +524,7 @@ alternative_endif */ .macro reset_amuserenr_el0, tmpreg mrs \tmpreg, id_aa64pfr0_el1 // Check ID_AA64PFR0_EL1 - ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_AMU_SHIFT, #4 + ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_EL1_AMU_SHIFT, #4 cbz \tmpreg, .Lskip_\@ // Skip if no AMU present msr_s SYS_AMUSERENR_EL0, xzr // Disable AMU access from EL0 .Lskip_\@: diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 96ccf823f46e..8ba9f1c07432 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -603,21 +603,21 @@ static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) { - u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT); - return val == ID_AA64PFR0_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT; } static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) { - u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT); - return val == ID_AA64PFR0_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT; } static inline bool id_aa64pfr0_sve(u64 pfr0) { - u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SVE_SHIFT); return val > 0; } @@ -659,7 +659,7 @@ static inline bool supports_csv2p3(int scope) pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); csv2_val = cpuid_feature_extract_unsigned_field(pfr0, - ID_AA64PFR0_CSV2_SHIFT); + ID_AA64PFR0_EL1_CSV2_SHIFT); return csv2_val == 3; } diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index faad9e01e52b..a011c87ec6e3 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -97,7 +97,7 @@ /* GICv3 system register access */ .macro __init_el2_gicv3 mrs x0, id_aa64pfr0_el1 - ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 + ubfx x0, x0, #ID_AA64PFR0_EL1_GIC_SHIFT, #4 cbz x0, .Lskip_gicv3_\@ mrs_s x0, SYS_ICC_SRE_EL2 @@ -162,7 +162,7 @@ msr_s SYS_HFGITR_EL2, xzr mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU - ubfx x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4 + ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4 cbz x1, .Lskip_fgt_\@ msr_s SYS_HAFGRTR_EL2, xzr diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index bb1f9ae5705f..06f93aa9abb1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -686,32 +686,32 @@ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) /* id_aa64pfr0 */ -#define ID_AA64PFR0_CSV3_SHIFT 60 -#define ID_AA64PFR0_CSV2_SHIFT 56 -#define ID_AA64PFR0_DIT_SHIFT 48 -#define ID_AA64PFR0_AMU_SHIFT 44 -#define ID_AA64PFR0_MPAM_SHIFT 40 -#define ID_AA64PFR0_SEL2_SHIFT 36 -#define ID_AA64PFR0_SVE_SHIFT 32 -#define ID_AA64PFR0_RAS_SHIFT 28 -#define ID_AA64PFR0_GIC_SHIFT 24 -#define ID_AA64PFR0_ASIMD_SHIFT 20 -#define ID_AA64PFR0_FP_SHIFT 16 -#define ID_AA64PFR0_EL3_SHIFT 12 -#define ID_AA64PFR0_EL2_SHIFT 8 -#define ID_AA64PFR0_EL1_SHIFT 4 -#define ID_AA64PFR0_EL0_SHIFT 0 +#define ID_AA64PFR0_EL1_CSV3_SHIFT 60 +#define ID_AA64PFR0_EL1_CSV2_SHIFT 56 +#define ID_AA64PFR0_EL1_DIT_SHIFT 48 +#define ID_AA64PFR0_EL1_AMU_SHIFT 44 +#define ID_AA64PFR0_EL1_MPAM_SHIFT 40 +#define ID_AA64PFR0_EL1_SEL2_SHIFT 36 +#define ID_AA64PFR0_EL1_SVE_SHIFT 32 +#define ID_AA64PFR0_EL1_RAS_SHIFT 28 +#define ID_AA64PFR0_EL1_GIC_SHIFT 24 +#define ID_AA64PFR0_EL1_ASIMD_SHIFT 20 +#define ID_AA64PFR0_EL1_FP_SHIFT 16 +#define ID_AA64PFR0_EL1_EL3_SHIFT 12 +#define ID_AA64PFR0_EL1_EL2_SHIFT 8 +#define ID_AA64PFR0_EL1_EL1_SHIFT 4 +#define ID_AA64PFR0_EL1_EL0_SHIFT 0 -#define ID_AA64PFR0_AMU 0x1 -#define ID_AA64PFR0_SVE 0x1 -#define ID_AA64PFR0_RAS_V1 0x1 -#define ID_AA64PFR0_RAS_V1P1 0x2 -#define ID_AA64PFR0_FP_NI 0xf -#define ID_AA64PFR0_FP_SUPPORTED 0x0 -#define ID_AA64PFR0_ASIMD_NI 0xf -#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 -#define ID_AA64PFR0_ELx_64BIT_ONLY 0x1 -#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2 +#define ID_AA64PFR0_EL1_AMU 0x1 +#define ID_AA64PFR0_EL1_SVE 0x1 +#define ID_AA64PFR0_EL1_RAS_V1 0x1 +#define ID_AA64PFR0_EL1_RAS_V1P1 0x2 +#define ID_AA64PFR0_EL1_FP_NI 0xf +#define ID_AA64PFR0_EL1_FP_SUPPORTED 0x0 +#define ID_AA64PFR0_EL1_ASIMD_NI 0xf +#define ID_AA64PFR0_EL1_ASIMD_SUPPORTED 0x0 +#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 /* id_aa64pfr1 */ #define ID_AA64PFR1_SME_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index eb50d52dac1f..3bda767af32d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -243,22 +243,22 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_MPAM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SEL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_DIT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AMU_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_MPAM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SEL2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), - S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), - S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SVE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_RAS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_GIC_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_ASIMD_SHIFT, 4, ID_AA64PFR0_EL1_ASIMD_NI), + S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), ARM64_FTR_END, }; @@ -1492,7 +1492,7 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); return cpuid_feature_extract_signed_field(pfr0, - ID_AA64PFR0_FP_SHIFT) < 0; + ID_AA64PFR0_EL1_FP_SHIFT) < 0; } static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, @@ -2093,7 +2093,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, - .field_pos = ID_AA64PFR0_GIC_SHIFT, + .field_pos = ID_AA64PFR0_EL1_GIC_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, @@ -2168,9 +2168,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_32bit_el0, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_EL0_SHIFT, + .field_pos = ID_AA64PFR0_EL1_EL0_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, + .min_field_value = ID_AA64PFR0_EL1_ELx_32BIT_64BIT, }, #ifdef CONFIG_KVM { @@ -2180,9 +2180,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_EL1_SHIFT, + .field_pos = ID_AA64PFR0_EL1_EL1_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, + .min_field_value = ID_AA64PFR0_EL1_ELx_32BIT_64BIT, }, { .desc = "Protected KVM", @@ -2201,7 +2201,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { * more details. */ .sys_reg = SYS_ID_AA64PFR0_EL1, - .field_pos = ID_AA64PFR0_CSV3_SHIFT, + .field_pos = ID_AA64PFR0_EL1_CSV3_SHIFT, .field_width = 4, .min_field_value = 1, .matches = unmap_kernel_at_el0, @@ -2244,9 +2244,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_SVE, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_SVE_SHIFT, + .field_pos = ID_AA64PFR0_EL1_SVE_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR0_SVE, + .min_field_value = ID_AA64PFR0_EL1_SVE, .matches = has_cpuid_feature, .cpu_enable = sve_kernel_enable, }, @@ -2259,9 +2259,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_RAS_SHIFT, + .field_pos = ID_AA64PFR0_EL1_RAS_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR0_RAS_V1, + .min_field_value = ID_AA64PFR0_EL1_RAS_V1, .cpu_enable = cpu_clear_disr, }, #endif /* CONFIG_ARM64_RAS_EXTN */ @@ -2278,9 +2278,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_amu, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_AMU_SHIFT, + .field_pos = ID_AA64PFR0_EL1_AMU_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR0_AMU, + .min_field_value = ID_AA64PFR0_EL1_AMU, .cpu_enable = cpu_amu_enable, }, #endif /* CONFIG_ARM64_AMU_EXTN */ @@ -2485,7 +2485,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = can_use_gic_priorities, .sys_reg = SYS_ID_AA64PFR0_EL1, - .field_pos = ID_AA64PFR0_GIC_SHIFT, + .field_pos = ID_AA64PFR0_EL1_GIC_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, @@ -2708,11 +2708,11 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), @@ -2727,7 +2727,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_EL1_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_EL1_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 12c7fad02ae5..f0644e945117 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -98,7 +98,7 @@ SYM_CODE_START_LOCAL(elx_sync) SYM_CODE_END(elx_sync) SYM_CODE_START_LOCAL(__finalise_el2) - check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve + check_override id_aa64pfr0 ID_AA64PFR0_EL1_SVE_SHIFT .Linit_sve .Lskip_sve .Linit_sve: /* SVE register access */ mrs x0, cptr_el2 // Disable SVE traps diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 1b0542c69738..7b90a9b4cc0a 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -74,7 +74,7 @@ static const struct ftr_set_desc pfr0 __initconst = { .name = "id_aa64pfr0", .override = &id_aa64pfr0_override, .fields = { - FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter), + FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter), {} }, }; diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 40be3a7c2c53..6ee586b4e235 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -168,7 +168,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void) /* If the CPU has CSV2 set, we're safe */ pfr0 = read_cpuid(ID_AA64PFR0_EL1); - if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) + if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_CSV2_SHIFT)) return SPECTRE_UNAFFECTED; /* Alternatively, we have a list of unaffected CPUs */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 3dad7b2079ee..d94fb45a0e34 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -35,9 +35,9 @@ * - Data Independent Timing */ #define PVM_ID_AA64PFR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_ASIMD) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) \ ) /* @@ -49,11 +49,11 @@ * Supported by KVM */ #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_V1) \ ) /* diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 823eb4d03956..d1fa03e2a449 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -20,35 +20,35 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) u64 cptr_set = 0; /* Protected KVM does not support AArch32 guests. */ - BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY); - BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY); + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); /* * Linux guests assume support for floating-point and Advanced SIMD. Do * not change the trapping behavior for these from the KVM default. */ - BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP), + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP), PVM_ID_AA64PFR0_ALLOW)); - BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD), + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_ASIMD), PVM_ID_AA64PFR0_ALLOW)); /* Trap RAS unless all current versions are supported */ - if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) < - ID_AA64PFR0_RAS_V1P1) { + if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) < + ID_AA64PFR0_EL1_RAS_V1P1) { hcr_set |= HCR_TERR | HCR_TEA; hcr_clear |= HCR_FIEN; } /* Trap AMU */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) { + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) { hcr_clear |= HCR_AMVOFFEN; cptr_set |= CPTR_EL2_TAM; } /* Trap SVE */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) cptr_set |= CPTR_EL2_TZ; vcpu->arch.hcr_el2 |= hcr_set; diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index e20fa4475dac..2ebf93336437 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -92,9 +92,9 @@ static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) PVM_ID_AA64PFR0_RESTRICT_UNSIGNED); /* Spectre and Meltdown mitigation in KVM */ - set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), + set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), (u64)kvm->arch.pfr0_csv2); - set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), + set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), (u64)kvm->arch.pfr0_csv3); return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask; @@ -281,8 +281,8 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu, * No support for AArch32 guests, therefore, pKVM has no sanitized copy * of AArch32 feature id registers. */ - BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY); + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_ELx_64BIT_ONLY); return pvm_access_raz_wi(vcpu, p, r); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3234f50b8c4b..cf1fc616e00d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1077,15 +1077,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, switch (id) { case SYS_ID_AA64PFR0_EL1: if (!vcpu_has_sve(vcpu)) - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_AMU); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); if (kvm_vgic_global_state.type == VGIC_V3) { - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1); } break; case SYS_ID_AA64PFR1_EL1: @@ -1196,21 +1196,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, * it doesn't promise more than what is actually provided (the * guest could otherwise be covered in ectoplasmic residue). */ - csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV2_SHIFT); + csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV2_SHIFT); if (csv2 > 1 || (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED)) return -EINVAL; /* Same thing for CSV3 */ - csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT); + csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV3_SHIFT); if (csv3 > 1 || (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED)) return -EINVAL; /* We can only differ with CSV[23], and anything else is an error */ val ^= read_id_reg(vcpu, rd, false); - val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) | - (0xFUL << ID_AA64PFR0_CSV3_SHIFT)); + val &= ~((0xFUL << ID_AA64PFR0_EL1_CSV2_SHIFT) | + (0xFUL << ID_AA64PFR0_EL1_CSV3_SHIFT)); if (val) return -EINVAL; @@ -1825,7 +1825,7 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu, } else { u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT); + u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL1_EL3_SHIFT); p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) | diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 4ea71b28f9f5..a6277dea4c7a 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -94,7 +94,7 @@ bool gic_cpuif_has_vsgi(void) { unsigned long fld, reg = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64PFR0_GIC_SHIFT); + fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64PFR0_EL1_GIC_SHIFT); return fld >= 0x3; } From 6ca2b9ca459a598b78265477d288fdec8a0fdd6d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:04 +0100 Subject: [PATCH 030/126] arm64/sysreg: Add _EL1 into ID_AA64PFR1_EL1 constant names Our standard is to include the _EL1 in the constant names for registers but we did not do that for ID_AA64PFR1_EL1, update to do so in preparation for conversion to automatic generation. No functional change. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-8-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 6 +-- arch/arm64/include/asm/el2_setup.h | 2 +- arch/arm64/include/asm/sysreg.h | 30 ++++++------- arch/arm64/kernel/cpufeature.c | 42 +++++++++---------- arch/arm64/kernel/hyp-stub.S | 2 +- arch/arm64/kernel/idreg-override.c | 6 +-- .../arm64/kvm/hyp/include/nvhe/fixed_config.h | 4 +- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 2 +- arch/arm64/kvm/sys_regs.c | 4 +- arch/arm64/mm/mmu.c | 2 +- arch/arm64/mm/proc.S | 4 +- 12 files changed, 53 insertions(+), 53 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8ba9f1c07432..214325a7f627 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -624,16 +624,16 @@ static inline bool id_aa64pfr0_sve(u64 pfr0) static inline bool id_aa64pfr1_sme(u64 pfr1) { - u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_SME_SHIFT); return val > 0; } static inline bool id_aa64pfr1_mte(u64 pfr1) { - u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT); - return val >= ID_AA64PFR1_MTE; + return val >= ID_AA64PFR1_EL1_MTE; } void __init setup_cpu_features(void); diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index a011c87ec6e3..80ef55b66196 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -149,7 +149,7 @@ mov x0, xzr mrs x1, id_aa64pfr1_el1 - ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 + ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4 cbz x1, .Lset_fgt_\@ /* Disable nVHE traps of TPIDR2 and SMPRI */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 06f93aa9abb1..e72bab4452e9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -714,23 +714,23 @@ #define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 /* id_aa64pfr1 */ -#define ID_AA64PFR1_SME_SHIFT 24 -#define ID_AA64PFR1_MPAMFRAC_SHIFT 16 -#define ID_AA64PFR1_RASFRAC_SHIFT 12 -#define ID_AA64PFR1_MTE_SHIFT 8 -#define ID_AA64PFR1_SSBS_SHIFT 4 -#define ID_AA64PFR1_BT_SHIFT 0 +#define ID_AA64PFR1_EL1_SME_SHIFT 24 +#define ID_AA64PFR1_EL1_MPAMFRAC_SHIFT 16 +#define ID_AA64PFR1_EL1_RASFRAC_SHIFT 12 +#define ID_AA64PFR1_EL1_MTE_SHIFT 8 +#define ID_AA64PFR1_EL1_SSBS_SHIFT 4 +#define ID_AA64PFR1_EL1_BT_SHIFT 0 -#define ID_AA64PFR1_SSBS_PSTATE_NI 0 -#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 -#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 -#define ID_AA64PFR1_BT_BTI 0x1 -#define ID_AA64PFR1_SME 1 +#define ID_AA64PFR1_EL1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_EL1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_EL1_SSBS_PSTATE_INSNS 2 +#define ID_AA64PFR1_EL1_BT_BTI 0x1 +#define ID_AA64PFR1_EL1_SME 1 -#define ID_AA64PFR1_MTE_NI 0x0 -#define ID_AA64PFR1_MTE_EL0 0x1 -#define ID_AA64PFR1_MTE 0x2 -#define ID_AA64PFR1_MTE_ASYMM 0x3 +#define ID_AA64PFR1_EL1_MTE_NI 0x0 +#define ID_AA64PFR1_EL1_MTE_EL0 0x1 +#define ID_AA64PFR1_EL1_MTE 0x2 +#define ID_AA64PFR1_EL1_MTE_ASYMM 0x3 /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_ECV_SHIFT 60 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3bda767af32d..2e19cbdab50a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -264,14 +264,14 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAMFRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_RASFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_SHIFT, 4, ID_AA64PFR1_EL1_MTE_NI), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, ID_AA64PFR1_EL1_SSBS_PSTATE_NI), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_BT_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2367,10 +2367,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, - .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .field_pos = ID_AA64PFR1_EL1_SSBS_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + .min_field_value = ID_AA64PFR1_EL1_SSBS_PSTATE_ONLY, }, #ifdef CONFIG_ARM64_CNP { @@ -2528,9 +2528,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .cpu_enable = bti_enable, .sys_reg = SYS_ID_AA64PFR1_EL1, - .field_pos = ID_AA64PFR1_BT_SHIFT, + .field_pos = ID_AA64PFR1_EL1_BT_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR1_BT_BTI, + .min_field_value = ID_AA64PFR1_EL1_BT_BTI, .sign = FTR_UNSIGNED, }, #endif @@ -2541,9 +2541,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, - .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_pos = ID_AA64PFR1_EL1_MTE_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR1_MTE, + .min_field_value = ID_AA64PFR1_EL1_MTE, .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, }, @@ -2553,9 +2553,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, - .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_pos = ID_AA64PFR1_EL1_MTE_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR1_MTE_ASYMM, + .min_field_value = ID_AA64PFR1_EL1_MTE_ASYMM, .sign = FTR_UNSIGNED, }, #endif /* CONFIG_ARM64_MTE */ @@ -2577,9 +2577,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_SME, .sys_reg = SYS_ID_AA64PFR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR1_SME_SHIFT, + .field_pos = ID_AA64PFR1_EL1_SME_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR1_SME, + .min_field_value = ID_AA64PFR1_EL1_SME, .matches = has_cpuid_feature, .cpu_enable = sme_kernel_enable, }, @@ -2739,24 +2739,24 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), #endif #ifdef CONFIG_ARM64_PTR_AUTH HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif #ifdef CONFIG_ARM64_MTE - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), #ifdef CONFIG_ARM64_SME - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index f0644e945117..bce1f5f6b8c9 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -109,7 +109,7 @@ SYM_CODE_START_LOCAL(__finalise_el2) msr_s SYS_ZCR_EL2, x1 // length for EL1. .Lskip_sve: - check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme + check_override id_aa64pfr1 ID_AA64PFR1_EL1_SME_SHIFT .Linit_sme .Lskip_sme .Linit_sme: /* SME register access and priority mapping */ mrs x0, cptr_el2 // Disable SME traps diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 7b90a9b4cc0a..8c474915a11d 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -98,9 +98,9 @@ static const struct ftr_set_desc pfr1 __initconst = { .name = "id_aa64pfr1", .override = &id_aa64pfr1_override, .fields = { - FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ), - FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL), - FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter), + FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ), + FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL), + FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter), {} }, }; diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index d94fb45a0e34..fad5406fc71a 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -62,8 +62,8 @@ * - Speculative Store Bypassing */ #define PVM_ID_AA64PFR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \ - ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \ + ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \ + ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \ ) /* diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index d1fa03e2a449..05301d3b3fc2 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -66,7 +66,7 @@ static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) u64 hcr_clear = 0; /* Memory Tagging: Trap and Treat as Untagged if not supported. */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) { + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) { hcr_set |= HCR_TID5; hcr_clear |= HCR_DCT | HCR_ATA; } diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 2ebf93336437..0f9ac25afdf4 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -106,7 +106,7 @@ static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) u64 allow_mask = PVM_ID_AA64PFR1_ALLOW; if (!kvm_has_mte(kvm)) - allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); + allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); return id_aa64pfr1_el1_sys_val & allow_mask; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index cf1fc616e00d..ff4405a6ea25 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1090,9 +1090,9 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64PFR1_EL1: if (!kvm_has_mte(vcpu->kvm)) - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_SME); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e7ad44585f40..5810eddfb48e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -686,7 +686,7 @@ static bool arm64_early_this_cpu_has_bti(void) pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1); return cpuid_feature_extract_unsigned_field(pfr1, - ID_AA64PFR1_BT_SHIFT); + ID_AA64PFR1_EL1_BT_SHIFT); } /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 7837a69524c5..15539da36bc3 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -434,8 +434,8 @@ SYM_FUNC_START(__cpu_setup) * (ID_AA64PFR1_EL1[11:8] > 1). */ mrs x10, ID_AA64PFR1_EL1 - ubfx x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4 - cmp x10, #ID_AA64PFR1_MTE + ubfx x10, x10, #ID_AA64PFR1_EL1_MTE_SHIFT, #4 + cmp x10, #ID_AA64PFR1_EL1_MTE b.lt 1f /* Normal Tagged memory type at the corresponding MAIR index */ From ed7c138d6f82a9e75f27c9ff43262ba8158533b0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:05 +0100 Subject: [PATCH 031/126] arm64/sysreg: Standardise naming of ID_AA64MMFR0_EL1.BigEnd For some reason we refer to ID_AA64MMFR0_EL1.BigEnd as BIGENDEL. Remove the EL from the name, bringing the naming into sync with DDI0487H.a. Due to the large amount of MixedCase in this register which isn't really consistent with either the kernel style or the majority of the architecture the use of upper case is preserved. No functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-9-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 214325a7f627..d7b96dc9364b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -597,7 +597,7 @@ static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val) static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) { - return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL_SHIFT) == 0x1 || + return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGEND_SHIFT) == 0x1 || cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT) == 0x1; } @@ -738,7 +738,7 @@ static inline bool system_supports_mixed_endian(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); val = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_EL1_BIGENDEL_SHIFT); + ID_AA64MMFR0_EL1_BIGEND_SHIFT); return val == 0x1; } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e72bab4452e9..f1430c77911a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -744,7 +744,7 @@ #define ID_AA64MMFR0_EL1_TGRAN16_SHIFT 20 #define ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT 16 #define ID_AA64MMFR0_EL1_SNSMEM_SHIFT 12 -#define ID_AA64MMFR0_EL1_BIGENDEL_SHIFT 8 +#define ID_AA64MMFR0_EL1_BIGEND_SHIFT 8 #define ID_AA64MMFR0_EL1_ASID_SHIFT 4 #define ID_AA64MMFR0_EL1_PARANGE_SHIFT 0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2e19cbdab50a..def03583523b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -351,7 +351,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT, 4, 0), /* Linux shouldn't care about secure memory */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_SNSMEM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGENDEL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGEND_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ASID_SHIFT, 4, 0), /* * Differing PARange is fine as long as all peripherals and memory are mapped diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index fad5406fc71a..0ece26707fc0 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -74,7 +74,7 @@ * - Non-context synchronizing exception entry and exit */ #define PVM_ID_AA64MMFR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \ From 07d7d848b96b57eccafeafad023d02c7f627d494 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:06 +0100 Subject: [PATCH 032/126] arm64/sysreg: Standardise naming of ID_AA64MMFR0_EL1.ASIDBits For some reason we refer to ID_AA64MMFR0_EL1.ASIDBits as ASID. Add BITS into the name, bringing the naming into sync with DDI0487H.a. Due to the large amount of MixedCase in this register which isn't really consistent with either the kernel style or the majority of the architecture the use of upper case is preserved. No functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-10-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 6 +++--- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 2 +- arch/arm64/mm/context.c | 6 +++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f1430c77911a..b6cd9996e12b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -745,11 +745,11 @@ #define ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT 16 #define ID_AA64MMFR0_EL1_SNSMEM_SHIFT 12 #define ID_AA64MMFR0_EL1_BIGEND_SHIFT 8 -#define ID_AA64MMFR0_EL1_ASID_SHIFT 4 +#define ID_AA64MMFR0_EL1_ASIDBITS_SHIFT 4 #define ID_AA64MMFR0_EL1_PARANGE_SHIFT 0 -#define ID_AA64MMFR0_EL1_ASID_8 0x0 -#define ID_AA64MMFR0_EL1_ASID_16 0x2 +#define ID_AA64MMFR0_EL1_ASIDBITS_8 0x0 +#define ID_AA64MMFR0_EL1_ASIDBITS_16 0x2 #define ID_AA64MMFR0_EL1_TGRAN4_NI 0xf #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index def03583523b..ba44f67c5544 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -352,7 +352,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { /* Linux shouldn't care about secure memory */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_SNSMEM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGEND_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ASID_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ASIDBITS_SHIFT, 4, 0), /* * Differing PARange is fine as long as all peripherals and memory are mapped * within the minimum PARange of all CPUs diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 0ece26707fc0..0c2e474d0c9e 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -87,7 +87,7 @@ */ #define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASID), ID_AA64MMFR0_EL1_ASID_16) \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \ ) /* diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 8f38a5452d05..e1e0dca01839 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -43,17 +43,17 @@ static u32 get_cpu_asid_bits(void) { u32 asid; int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1), - ID_AA64MMFR0_EL1_ASID_SHIFT); + ID_AA64MMFR0_EL1_ASIDBITS_SHIFT); switch (fld) { default: pr_warn("CPU%d: Unknown ASID size (%d); assuming 8-bit\n", smp_processor_id(), fld); fallthrough; - case ID_AA64MMFR0_EL1_ASID_8: + case ID_AA64MMFR0_EL1_ASIDBITS_8: asid = 8; break; - case ID_AA64MMFR0_EL1_ASID_16: + case ID_AA64MMFR0_EL1_ASIDBITS_16: asid = 16; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index da67a75cdaad..5968a568aae2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -431,7 +431,7 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) return false; /* We can support bigger ASIDs than the CPU, but not smaller */ - fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_ASID_SHIFT); + fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_ASIDBITS_SHIFT); asid_bits = fld ? 16 : 8; if (smmu->asid_bits < asid_bits) return false; From 6fcd019359028f3c6477508b329d69e27f41d895 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Mon, 5 Sep 2022 23:54:07 +0100 Subject: [PATCH 033/126] arm64/sysreg: Standardise naming for ID_AA64MMFR1_EL1 fields In preparation for converting the ID_AA64MMFR1_EL1 system register defines to automatic generation, rename them to follow the conventions used by other automatically generated registers: * Add _EL1 in the register name. * Rename fields to match the names in the ARM ARM: * LOR -> LO * HPD -> HPDS * VHE -> VH * HADBS -> HAFDBS * SPECSEI -> SpecSEI * VMIDBITS -> VMIDBits There should be no functional change as a result of this patch. Signed-off-by: Kristina Martsenko Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-11-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 8 ++--- arch/arm64/include/asm/el2_setup.h | 2 +- arch/arm64/include/asm/sysreg.h | 36 +++++++++---------- arch/arm64/kernel/cpufeature.c | 36 +++++++++---------- arch/arm64/kernel/hyp-stub.S | 4 +-- arch/arm64/kernel/idreg-override.c | 2 +- arch/arm64/kernel/proton-pack.c | 2 +- .../arm64/kvm/hyp/include/nvhe/fixed_config.h | 12 +++---- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 +- 10 files changed, 53 insertions(+), 53 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index d7b96dc9364b..5fc43f7f3ed6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -868,14 +868,14 @@ static inline bool cpu_has_hw_af(void) mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); return cpuid_feature_extract_unsigned_field(mmfr1, - ID_AA64MMFR1_HADBS_SHIFT); + ID_AA64MMFR1_EL1_HAFDBS_SHIFT); } static inline bool cpu_has_pan(void) { u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); return cpuid_feature_extract_unsigned_field(mmfr1, - ID_AA64MMFR1_PAN_SHIFT); + ID_AA64MMFR1_EL1_PAN_SHIFT); } #ifdef CONFIG_ARM64_AMU_EXTN @@ -896,8 +896,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) int vmid_bits; vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1, - ID_AA64MMFR1_VMIDBITS_SHIFT); - if (vmid_bits == ID_AA64MMFR1_VMIDBITS_16) + ID_AA64MMFR1_EL1_VMIDBits_SHIFT); + if (vmid_bits == ID_AA64MMFR1_EL1_VMIDBits_16) return 16; /* diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 80ef55b66196..b6e9bea7c9ec 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -83,7 +83,7 @@ /* LORegions */ .macro __init_el2_lor mrs x1, id_aa64mmfr1_el1 - ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 + ubfx x0, x1, #ID_AA64MMFR1_EL1_LO_SHIFT, 4 cbz x0, .Lskip_lor_\@ msr_s SYS_LORC_EL1, xzr .Lskip_lor_\@: diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b6cd9996e12b..410b628fbb67 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -783,26 +783,26 @@ #endif /* id_aa64mmfr1 */ -#define ID_AA64MMFR1_ECBHB_SHIFT 60 -#define ID_AA64MMFR1_TIDCP1_SHIFT 52 -#define ID_AA64MMFR1_HCX_SHIFT 40 -#define ID_AA64MMFR1_AFP_SHIFT 44 -#define ID_AA64MMFR1_ETS_SHIFT 36 -#define ID_AA64MMFR1_TWED_SHIFT 32 -#define ID_AA64MMFR1_XNX_SHIFT 28 -#define ID_AA64MMFR1_SPECSEI_SHIFT 24 -#define ID_AA64MMFR1_PAN_SHIFT 20 -#define ID_AA64MMFR1_LOR_SHIFT 16 -#define ID_AA64MMFR1_HPD_SHIFT 12 -#define ID_AA64MMFR1_VHE_SHIFT 8 -#define ID_AA64MMFR1_VMIDBITS_SHIFT 4 -#define ID_AA64MMFR1_HADBS_SHIFT 0 +#define ID_AA64MMFR1_EL1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_EL1_TIDCP1_SHIFT 52 +#define ID_AA64MMFR1_EL1_HCX_SHIFT 40 +#define ID_AA64MMFR1_EL1_AFP_SHIFT 44 +#define ID_AA64MMFR1_EL1_ETS_SHIFT 36 +#define ID_AA64MMFR1_EL1_TWED_SHIFT 32 +#define ID_AA64MMFR1_EL1_XNX_SHIFT 28 +#define ID_AA64MMFR1_EL1_SpecSEI_SHIFT 24 +#define ID_AA64MMFR1_EL1_PAN_SHIFT 20 +#define ID_AA64MMFR1_EL1_LO_SHIFT 16 +#define ID_AA64MMFR1_EL1_HPDS_SHIFT 12 +#define ID_AA64MMFR1_EL1_VH_SHIFT 8 +#define ID_AA64MMFR1_EL1_VMIDBits_SHIFT 4 +#define ID_AA64MMFR1_EL1_HAFDBS_SHIFT 0 -#define ID_AA64MMFR1_VMIDBITS_8 0 -#define ID_AA64MMFR1_VMIDBITS_16 2 +#define ID_AA64MMFR1_EL1_VMIDBits_8 0 +#define ID_AA64MMFR1_EL1_VMIDBits_16 2 -#define ID_AA64MMFR1_TIDCP1_NI 0 -#define ID_AA64MMFR1_TIDCP1_IMP 1 +#define ID_AA64MMFR1_EL1_TIDCP1_NI 0 +#define ID_AA64MMFR1_EL1_TIDCP1_IMP 1 /* id_aa64mmfr2 */ #define ID_AA64MMFR2_EL1_E0PD_SHIFT 60 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ba44f67c5544..534819afadd5 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -362,18 +362,18 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TIDCP1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_SPECSEI_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VHE_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1_SpecSEI_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_PAN_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_LO_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HPDS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VH_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VMIDBits_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2116,7 +2116,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, - .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, @@ -2130,7 +2130,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, - .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 3, @@ -2344,7 +2344,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HW_DBM, .sys_reg = SYS_ID_AA64MMFR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR1_HADBS_SHIFT, + .field_pos = ID_AA64MMFR1_EL1_HAFDBS_SHIFT, .field_width = 4, .min_field_value = 2, .matches = has_hw_dbm, @@ -2614,9 +2614,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64MMFR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR1_TIDCP1_SHIFT, + .field_pos = ID_AA64MMFR1_EL1_TIDCP1_SHIFT, .field_width = 4, - .min_field_value = ID_AA64MMFR1_TIDCP1_IMP, + .min_field_value = ID_AA64MMFR1_EL1_TIDCP1_IMP, .matches = has_cpuid_feature, .cpu_enable = cpu_trap_el0_impdef, }, @@ -2752,7 +2752,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), - HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), + HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), #ifdef CONFIG_ARM64_SME diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index bce1f5f6b8c9..2ee18c860f2a 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -142,7 +142,7 @@ SYM_CODE_START_LOCAL(__finalise_el2) msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? - ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 + ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x1, .Lskip_sme mrs_s x1, SYS_HCRX_EL2 @@ -157,7 +157,7 @@ SYM_CODE_START_LOCAL(__finalise_el2) tbnz x1, #0, 1f // Needs to be VHE capable, obviously - check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f + check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 2f 1f 1: mov_q x0, HVC_STUB_ERR eret diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 8c474915a11d..95133765ed29 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -50,7 +50,7 @@ static const struct ftr_set_desc mmfr1 __initconst = { .name = "id_aa64mmfr1", .override = &id_aa64mmfr1_override, .fields = { - FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter), + FIELD("vh", ID_AA64MMFR1_EL1_VH_SHIFT, mmfr1_vh_filter), {} }, }; diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 6ee586b4e235..fe3bc4c1c5ac 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -945,7 +945,7 @@ static bool supports_ecbhb(int scope) mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); return cpuid_feature_extract_unsigned_field(mmfr1, - ID_AA64MMFR1_ECBHB_SHIFT); + ID_AA64MMFR1_EL1_ECBHB_SHIFT); } bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 0c2e474d0c9e..1653299ff8f8 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -100,12 +100,12 @@ * - Enhanced Translation Synchronization */ #define PVM_ID_AA64MMFR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \ ) /* diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 05301d3b3fc2..b92ecdd6bdab 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -143,7 +143,7 @@ static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) u64 hcr_set = 0; /* Trap LOR */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids)) hcr_set |= HCR_TLOR; vcpu->arch.hcr_el2 |= hcr_set; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ff4405a6ea25..fa61793467a7 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -273,7 +273,7 @@ static bool trap_loregion(struct kvm_vcpu *vcpu, u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); u32 sr = reg_to_encoding(r); - if (!(val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))) { + if (!(val & (0xfUL << ID_AA64MMFR1_EL1_LO_SHIFT))) { kvm_inject_undefined(vcpu); return false; } From 8f40baded4a14ef56da0b73e028117b146ca4584 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:08 +0100 Subject: [PATCH 034/126] arm64/sysreg: Standardise naming for ID_AA64MMFR2_EL1.VARange The kernel refers to ID_AA64MMFR2_EL1.VARange as LVA. In preparation for automatic generation of defines for the system registers bring the naming used by the kernel in sync with that of DDI0487H.a. No functional change. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-12-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 2 +- arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kernel/head.S | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 48c7963abaf3..c1fc5f7bb978 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -612,7 +612,7 @@ alternative_endif .macro offset_ttbr1, ttbr, tmp #ifdef CONFIG_ARM64_VA_BITS_52 mrs_s \tmp, SYS_ID_AA64MMFR2_EL1 - and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_LVA_SHIFT) + and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT) cbnz \tmp, .Lskipoffs_\@ orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET .Lskipoffs_\@ : diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 410b628fbb67..c80f1f7a10f1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -815,7 +815,7 @@ #define ID_AA64MMFR2_EL1_ST_SHIFT 28 #define ID_AA64MMFR2_EL1_NV_SHIFT 24 #define ID_AA64MMFR2_EL1_CCIDX_SHIFT 20 -#define ID_AA64MMFR2_EL1_LVA_SHIFT 16 +#define ID_AA64MMFR2_EL1_VARange_SHIFT 16 #define ID_AA64MMFR2_EL1_IESB_SHIFT 12 #define ID_AA64MMFR2_EL1_LSM_SHIFT 8 #define ID_AA64MMFR2_EL1_UAO_SHIFT 4 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 534819afadd5..f927b4451613 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -388,7 +388,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_ST_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_NV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CCIDX_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_LVA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_VARange_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_LSM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_UAO_SHIFT, 4, 0), diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index d040f57d3496..b5accf53a153 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -99,7 +99,7 @@ SYM_CODE_START(primary_entry) */ #if VA_BITS > 48 mrs_s x0, SYS_ID_AA64MMFR2_EL1 - tst x0, #0xf << ID_AA64MMFR2_EL1_LVA_SHIFT + tst x0, #0xf << ID_AA64MMFR2_EL1_VARange_SHIFT mov x0, #VA_BITS mov x25, #VA_BITS_MIN csel x25, x25, x0, eq @@ -677,7 +677,7 @@ SYM_FUNC_START(__cpu_secondary_check52bitva) b.ne 2f mrs_s x0, SYS_ID_AA64MMFR2_EL1 - and x0, x0, #(0xf << ID_AA64MMFR2_EL1_LVA_SHIFT) + and x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT) cbnz x0, 2f update_early_cpu_boot_status \ From ca951862ad3e6fa33d8ba8220b80f3fdc496821c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:09 +0100 Subject: [PATCH 035/126] arm64/sysreg: Standardise naming for ID_AA64MMFR2_EL1.CnP The kernel refers to ID_AA64MMFR2_EL1.CnP as CNP. In preparation for automatic generation of defines for the system registers bring the naming used by the kernel in sync with that of DDI0487H.a. No functional change. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-13-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/cpufeature.c | 4 ++-- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c80f1f7a10f1..7795a043a8ff 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -819,7 +819,7 @@ #define ID_AA64MMFR2_EL1_IESB_SHIFT 12 #define ID_AA64MMFR2_EL1_LSM_SHIFT 8 #define ID_AA64MMFR2_EL1_UAO_SHIFT 4 -#define ID_AA64MMFR2_EL1_CNP_SHIFT 0 +#define ID_AA64MMFR2_EL1_CnP_SHIFT 0 /* id_aa64dfr0 */ #define ID_AA64DFR0_MTPMU_SHIFT 48 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f927b4451613..2de9b28ee84d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -392,7 +392,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_LSM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_UAO_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CNP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CnP_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2380,7 +2380,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_useable_cnp, .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_EL1_CNP_SHIFT, + .field_pos = ID_AA64MMFR2_EL1_CnP_SHIFT, .field_width = 4, .min_field_value = 1, .cpu_enable = cpu_enable_cnp, diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 1653299ff8f8..0ba290e1a791 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -120,7 +120,7 @@ * - E0PDx mechanism */ #define PVM_ID_AA64MMFR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CNP) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \ From 4f8456c3199dab2436dab1f8ec17cba853fa1060 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:10 +0100 Subject: [PATCH 036/126] arm64/sysreg: Standardise naming for ID_AA64PFR0_EL1 constants We generally refer to the baseline feature implemented as _IMP so in preparation for automatic generation of register defines update those for ID_AA64PFR0_EL1 to reflect this. In the case of ASIMD we don't actually use the define so just remove it. No functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-14-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 9 ++++----- arch/arm64/kernel/cpufeature.c | 8 ++++---- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7795a043a8ff..c2fffb863f08 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -702,14 +702,13 @@ #define ID_AA64PFR0_EL1_EL1_SHIFT 4 #define ID_AA64PFR0_EL1_EL0_SHIFT 0 -#define ID_AA64PFR0_EL1_AMU 0x1 -#define ID_AA64PFR0_EL1_SVE 0x1 -#define ID_AA64PFR0_EL1_RAS_V1 0x1 +#define ID_AA64PFR0_EL1_AMU_IMP 0x1 +#define ID_AA64PFR0_EL1_SVE_IMP 0x1 +#define ID_AA64PFR0_EL1_RAS_IMP 0x1 #define ID_AA64PFR0_EL1_RAS_V1P1 0x2 #define ID_AA64PFR0_EL1_FP_NI 0xf -#define ID_AA64PFR0_EL1_FP_SUPPORTED 0x0 +#define ID_AA64PFR0_EL1_FP_IMP 0x0 #define ID_AA64PFR0_EL1_ASIMD_NI 0xf -#define ID_AA64PFR0_EL1_ASIMD_SUPPORTED 0x0 #define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2de9b28ee84d..43afa9a1cd73 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2246,7 +2246,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL1_SVE_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_SVE, + .min_field_value = ID_AA64PFR0_EL1_SVE_IMP, .matches = has_cpuid_feature, .cpu_enable = sve_kernel_enable, }, @@ -2261,7 +2261,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL1_RAS_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_RAS_V1, + .min_field_value = ID_AA64PFR0_EL1_RAS_IMP, .cpu_enable = cpu_clear_disr, }, #endif /* CONFIG_ARM64_RAS_EXTN */ @@ -2280,7 +2280,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL1_AMU_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_AMU, + .min_field_value = ID_AA64PFR0_EL1_AMU_IMP, .cpu_enable = cpu_amu_enable, }, #endif /* CONFIG_ARM64_AMU_EXTN */ @@ -2727,7 +2727,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_EL1_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_EL1_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_EL1_SVE_IMP, CAP_HWCAP, KERNEL_HWCAP_SVE), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 0ba290e1a791..6200d53600ba 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -53,7 +53,7 @@ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_V1) \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \ ) /* From 5620b4b0371569b9ba65b756cd6d5fc6af47ddd6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:11 +0100 Subject: [PATCH 037/126] arm64/sysreg: Standardise naming for ID_AA64PFR0_EL1.AdvSIMD constants The architecture refers to the register field identifying advanced SIMD as AdvSIMD but the kernel refers to it as ASIMD. Use the architecture's naming. No functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-15-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 6 +++--- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 2 +- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c2fffb863f08..78087b1a3ca4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -695,7 +695,7 @@ #define ID_AA64PFR0_EL1_SVE_SHIFT 32 #define ID_AA64PFR0_EL1_RAS_SHIFT 28 #define ID_AA64PFR0_EL1_GIC_SHIFT 24 -#define ID_AA64PFR0_EL1_ASIMD_SHIFT 20 +#define ID_AA64PFR0_EL1_AdvSIMD_SHIFT 20 #define ID_AA64PFR0_EL1_FP_SHIFT 16 #define ID_AA64PFR0_EL1_EL3_SHIFT 12 #define ID_AA64PFR0_EL1_EL2_SHIFT 8 @@ -708,7 +708,7 @@ #define ID_AA64PFR0_EL1_RAS_V1P1 0x2 #define ID_AA64PFR0_EL1_FP_NI 0xf #define ID_AA64PFR0_EL1_FP_IMP 0x0 -#define ID_AA64PFR0_EL1_ASIMD_NI 0xf +#define ID_AA64PFR0_EL1_AdvSIMD_NI 0xf #define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 43afa9a1cd73..1610b35229e4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -253,7 +253,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_RAS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_GIC_SHIFT, 4, 0), - S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_ASIMD_SHIFT, 4, ID_AA64PFR0_EL1_ASIMD_NI), + S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, ID_AA64PFR0_EL1_AdvSIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0), @@ -2710,8 +2710,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 6200d53600ba..07edfc7524c9 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -36,7 +36,7 @@ */ #define PVM_ID_AA64PFR0_ALLOW (\ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_ASIMD) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) \ ) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index b92ecdd6bdab..fc3e32709ba2 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -31,7 +31,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) */ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP), PVM_ID_AA64PFR0_ALLOW)); - BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_ASIMD), + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD), PVM_ID_AA64PFR0_ALLOW)); /* Trap RAS unless all current versions are supported */ From 53275da8dccc8f8140f24e2634c5d61e3206307d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:12 +0100 Subject: [PATCH 038/126] arm64/sysreg: Standardise naming for SSBS feature enumeration In preparation for conversion to automatic generation refresh the names given to the items in the SSBS feature enumeration to reflect our standard pattern for naming, corresponding to the architecture feature names they reflect. No functional change. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-16-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 10 +++++----- arch/arm64/kernel/cpufeature.c | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 78087b1a3ca4..d6df8fe8e61e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -720,11 +720,11 @@ #define ID_AA64PFR1_EL1_SSBS_SHIFT 4 #define ID_AA64PFR1_EL1_BT_SHIFT 0 -#define ID_AA64PFR1_EL1_SSBS_PSTATE_NI 0 -#define ID_AA64PFR1_EL1_SSBS_PSTATE_ONLY 1 -#define ID_AA64PFR1_EL1_SSBS_PSTATE_INSNS 2 -#define ID_AA64PFR1_EL1_BT_BTI 0x1 -#define ID_AA64PFR1_EL1_SME 1 +#define ID_AA64PFR1_EL1_SSBS_NI 0 +#define ID_AA64PFR1_EL1_SSBS_IMP 1 +#define ID_AA64PFR1_EL1_SSBS_SSBS2 2 +#define ID_AA64PFR1_EL1_BT_BTI 0x1 +#define ID_AA64PFR1_EL1_SME 1 #define ID_AA64PFR1_EL1_MTE_NI 0x0 #define ID_AA64PFR1_EL1_MTE_EL0 0x1 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1610b35229e4..7e58cb5b9185 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -269,7 +269,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_RASFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_SHIFT, 4, ID_AA64PFR1_EL1_MTE_NI), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, ID_AA64PFR1_EL1_SSBS_PSTATE_NI), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, ID_AA64PFR1_EL1_SSBS_NI), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_BT_SHIFT, 4, 0), ARM64_FTR_END, @@ -2370,7 +2370,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR1_EL1_SSBS_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64PFR1_EL1_SSBS_PSTATE_ONLY, + .min_field_value = ID_AA64PFR1_EL1_SSBS_IMP, }, #ifdef CONFIG_ARM64_CNP { @@ -2739,7 +2739,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SSBS_SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), #endif From 2e75b393ff2e45a32e9621e1b27cd7854122c1c8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:13 +0100 Subject: [PATCH 039/126] arm64/sysreg: Standardise naming for MTE feature enumeration In preparation for conversion to automatic generation refresh the names given to the items in the MTE feture enumeration to reflect our standard pattern for naming, corresponding to the architecture feature names they reflect. No functional change. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-17-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/sysreg.h | 6 +++--- arch/arm64/kernel/cpufeature.c | 8 ++++---- arch/arm64/mm/proc.S | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5fc43f7f3ed6..79bb9e58d9c6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -633,7 +633,7 @@ static inline bool id_aa64pfr1_mte(u64 pfr1) { u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT); - return val >= ID_AA64PFR1_EL1_MTE; + return val >= ID_AA64PFR1_EL1_MTE_MTE2; } void __init setup_cpu_features(void); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d6df8fe8e61e..385242a6e380 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -727,9 +727,9 @@ #define ID_AA64PFR1_EL1_SME 1 #define ID_AA64PFR1_EL1_MTE_NI 0x0 -#define ID_AA64PFR1_EL1_MTE_EL0 0x1 -#define ID_AA64PFR1_EL1_MTE 0x2 -#define ID_AA64PFR1_EL1_MTE_ASYMM 0x3 +#define ID_AA64PFR1_EL1_MTE_IMP 0x1 +#define ID_AA64PFR1_EL1_MTE_MTE2 0x2 +#define ID_AA64PFR1_EL1_MTE_MTE3 0x3 /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_ECV_SHIFT 60 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7e58cb5b9185..2afc0a852359 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2543,7 +2543,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_EL1_MTE_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR1_EL1_MTE, + .min_field_value = ID_AA64PFR1_EL1_MTE_MTE2, .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, }, @@ -2555,7 +2555,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_EL1_MTE_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR1_EL1_MTE_ASYMM, + .min_field_value = ID_AA64PFR1_EL1_MTE_MTE3, .sign = FTR_UNSIGNED, }, #endif /* CONFIG_ARM64_MTE */ @@ -2748,8 +2748,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif #ifdef CONFIG_ARM64_MTE - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 15539da36bc3..5f7784ee6044 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -435,7 +435,7 @@ SYM_FUNC_START(__cpu_setup) */ mrs x10, ID_AA64PFR1_EL1 ubfx x10, x10, #ID_AA64PFR1_EL1_MTE_SHIFT, #4 - cmp x10, #ID_AA64PFR1_EL1_MTE + cmp x10, #ID_AA64PFR1_EL1_MTE_MTE2 b.lt 1f /* Normal Tagged memory type at the corresponding MAIR index */ From cf7fdbbe83a765e8ad2e0a556ec2c4e675bc3893 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:14 +0100 Subject: [PATCH 040/126] arm64/sysreg: Standardise naming of ID_AA64PFR1_EL1 fractional version fields The naming for fractional versions fields in ID_AA64PFR1_EL1 does not align with that in the architecture, lacking underscores and using upper case where the architecture uses lower case. In preparation for automatic generation of defines bring the code in sync with the architecture, no functional change. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-18-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 385242a6e380..aa1e970eddd5 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -714,8 +714,8 @@ /* id_aa64pfr1 */ #define ID_AA64PFR1_EL1_SME_SHIFT 24 -#define ID_AA64PFR1_EL1_MPAMFRAC_SHIFT 16 -#define ID_AA64PFR1_EL1_RASFRAC_SHIFT 12 +#define ID_AA64PFR1_EL1_MPAM_frac_SHIFT 16 +#define ID_AA64PFR1_EL1_RAS_frac_SHIFT 12 #define ID_AA64PFR1_EL1_MTE_SHIFT 8 #define ID_AA64PFR1_EL1_SSBS_SHIFT 4 #define ID_AA64PFR1_EL1_BT_SHIFT 0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2afc0a852359..636f6b207ef6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -265,8 +265,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAMFRAC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_RASFRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_RAS_frac_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_SHIFT, 4, ID_AA64PFR1_EL1_MTE_NI), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, ID_AA64PFR1_EL1_SSBS_NI), From 514e9b2aed04e86c93363b69318e23b19fb9ef78 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:15 +0100 Subject: [PATCH 041/126] arm64/sysreg: Standardise naming of ID_AA64PFR1_EL1 BTI enumeration In preparation for automatic generation of constants update the define for BTI being implemented to the convention we are using, no functional change. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-19-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/cpufeature.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index aa1e970eddd5..05401a9b4709 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -723,7 +723,7 @@ #define ID_AA64PFR1_EL1_SSBS_NI 0 #define ID_AA64PFR1_EL1_SSBS_IMP 1 #define ID_AA64PFR1_EL1_SSBS_SSBS2 2 -#define ID_AA64PFR1_EL1_BT_BTI 0x1 +#define ID_AA64PFR1_EL1_BT_IMP 0x1 #define ID_AA64PFR1_EL1_SME 1 #define ID_AA64PFR1_EL1_MTE_NI 0x0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 636f6b207ef6..5bf158750e02 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2530,7 +2530,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_EL1_BT_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR1_EL1_BT_BTI, + .min_field_value = ID_AA64PFR1_EL1_BT_IMP, .sign = FTR_UNSIGNED, }, #endif @@ -2741,7 +2741,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SSBS_SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_BT_IMP, CAP_HWCAP, KERNEL_HWCAP_BTI), #endif #ifdef CONFIG_ARM64_PTR_AUTH HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), From ed9075201c5a8e89afa969e2bbe947a5ea9c4620 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:16 +0100 Subject: [PATCH 042/126] arm64/sysreg: Standardise naming of ID_AA64PFR1_EL1 SME enumeration In preparation for automatic generation of constants update the define for SME being implemented to the convention we are using, no functional change. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-20-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/cpufeature.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 05401a9b4709..99a8c433db23 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -724,7 +724,7 @@ #define ID_AA64PFR1_EL1_SSBS_IMP 1 #define ID_AA64PFR1_EL1_SSBS_SSBS2 2 #define ID_AA64PFR1_EL1_BT_IMP 0x1 -#define ID_AA64PFR1_EL1_SME 1 +#define ID_AA64PFR1_EL1_SME_IMP 1 #define ID_AA64PFR1_EL1_MTE_NI 0x0 #define ID_AA64PFR1_EL1_MTE_IMP 0x1 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5bf158750e02..c2e42feb3e1a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2579,7 +2579,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR1_EL1_SME_SHIFT, .field_width = 4, - .min_field_value = ID_AA64PFR1_EL1_SME, + .min_field_value = ID_AA64PFR1_EL1_SME_IMP, .matches = has_cpuid_feature, .cpu_enable = sme_kernel_enable, }, @@ -2756,7 +2756,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), #ifdef CONFIG_ARM64_SME - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME, CAP_HWCAP, KERNEL_HWCAP_SME), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), From cfaa32108aeaf2f4137e4eb01c82e6d37a732b07 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:17 +0100 Subject: [PATCH 043/126] arm64/sysreg: Convert HCRX_EL2 to automatic generation Convert HCRX_EL2 to be automatically generated as per DDI04187H.a, n functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-21-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 4 ---- arch/arm64/tools/sysreg | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 99a8c433db23..74690363ae39 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -533,7 +533,6 @@ #define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) -#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) @@ -1023,9 +1022,6 @@ #define TRFCR_ELx_ExTRE BIT(1) #define TRFCR_ELx_E0TRE BIT(0) -/* HCRX_EL2 definitions */ -#define HCRX_EL2_SMPME_MASK (1 << 5) - /* GIC Hypervisor interface registers */ /* ICH_MISR_EL2 bit definitions */ #define ICH_MISR_EOI (1 << 0) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 185bc5b0faf7..746d4d40133e 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -516,6 +516,22 @@ Sysreg ZCR_EL2 3 4 1 2 0 Fields ZCR_ELx EndSysreg +Sysreg HCRX_EL2 3 4 1 2 2 +Res0 63:12 +Field 11 MSCEn +Field 10 MCE2 +Field 9 CMOW +Field 8 VFNMI +Field 7 VINMI +Field 6 TALLINT +Field 5 SMPME +Field 4 FGTnXS +Field 3 FnXS +Field 2 EnASR +Field 1 EnALS +Field 0 EnAS0 +EndSysreg + Sysreg SMPRIMAP_EL2 3 4 1 2 5 Field 63:60 P15 Field 59:56 P14 From 0b7ed4d8f59c252c7b0339947f69da6770979c0a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:18 +0100 Subject: [PATCH 044/126] arm64/sysreg: Convert ID_AA64MMFR0_EL1 to automatic generation Automatically generate most of the defines for ID_AA64MMFR0_EL1 mostly as per DDI0487H.a. Due to the large amount of MixedCase in this register which isn't really consistent with either the kernel style or the majority of the architecture the use of upper case is preserved. We also leave in place a number of min/max/default value definitions which don't flow from the architecture definitions. No functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-22-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 30 -------------- arch/arm64/tools/sysreg | 73 +++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 30 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 74690363ae39..787d9fa3c8e0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -199,7 +199,6 @@ #define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) #define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) -#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) #define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) @@ -731,42 +730,13 @@ #define ID_AA64PFR1_EL1_MTE_MTE3 0x3 /* id_aa64mmfr0 */ -#define ID_AA64MMFR0_EL1_ECV_SHIFT 60 -#define ID_AA64MMFR0_EL1_FGT_SHIFT 56 -#define ID_AA64MMFR0_EL1_EXS_SHIFT 44 -#define ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT 40 -#define ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT 36 -#define ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT 32 -#define ID_AA64MMFR0_EL1_TGRAN4_SHIFT 28 -#define ID_AA64MMFR0_EL1_TGRAN64_SHIFT 24 -#define ID_AA64MMFR0_EL1_TGRAN16_SHIFT 20 -#define ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT 16 -#define ID_AA64MMFR0_EL1_SNSMEM_SHIFT 12 -#define ID_AA64MMFR0_EL1_BIGEND_SHIFT 8 -#define ID_AA64MMFR0_EL1_ASIDBITS_SHIFT 4 -#define ID_AA64MMFR0_EL1_PARANGE_SHIFT 0 - -#define ID_AA64MMFR0_EL1_ASIDBITS_8 0x0 -#define ID_AA64MMFR0_EL1_ASIDBITS_16 0x2 - -#define ID_AA64MMFR0_EL1_TGRAN4_NI 0xf #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 -#define ID_AA64MMFR0_EL1_TGRAN64_NI 0xf #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7 -#define ID_AA64MMFR0_EL1_TGRAN16_NI 0x0 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf -#define ID_AA64MMFR0_EL1_PARANGE_32 0x0 -#define ID_AA64MMFR0_EL1_PARANGE_36 0x1 -#define ID_AA64MMFR0_EL1_PARANGE_40 0x2 -#define ID_AA64MMFR0_EL1_PARANGE_42 0x3 -#define ID_AA64MMFR0_EL1_PARANGE_44 0x4 -#define ID_AA64MMFR0_EL1_PARANGE_48 0x5 -#define ID_AA64MMFR0_EL1_PARANGE_52 0x6 - #define ARM64_MIN_PARANGE_BITS 32 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 746d4d40133e..c1d800c0d4d5 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -315,6 +315,79 @@ Enum 3:0 WFxT EndEnum EndSysreg +Sysreg ID_AA64MMFR0_EL1 3 0 0 7 0 +Enum 63:60 ECV + 0b0000 NI + 0b0001 IMP + 0b0010 CNTPOFF +EndEnum +Enum 59:56 FGT + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 55:48 +Enum 47:44 EXS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 43:40 TGRAN4_2 + 0b0000 TGRAN4 + 0b0001 NI + 0b0010 IMP + 0b0011 52_BIT +EndEnum +Enum 39:36 TGRAN64_2 + 0b0000 TGRAN64 + 0b0001 NI + 0b0010 IMP +EndEnum +Enum 35:32 TGRAN16_2 + 0b0000 TGRAN16 + 0b0001 NI + 0b0010 IMP + 0b0011 52_BIT +EndEnum +Enum 31:28 TGRAN4 + 0b0000 IMP + 0b0001 52_BIT + 0b1111 NI +EndEnum +Enum 27:24 TGRAN64 + 0b0000 IMP + 0b1111 NI +EndEnum +Enum 23:20 TGRAN16 + 0b0000 NI + 0b0001 IMP + 0b0010 52_BIT +EndEnum +Enum 19:16 BIGENDEL0 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 SNSMEM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 BIGEND + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 ASIDBITS + 0b0000 8 + 0b0010 16 +EndEnum +Enum 3:0 PARANGE + 0b0000 32 + 0b0001 36 + 0b0010 40 + 0b0011 42 + 0b0100 44 + 0b0101 48 + 0b0110 52 +EndEnum +EndSysreg + Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP Field 62 SPINMASK From 7d751b313dd9b4cb3a33812cf827decb48352d0b Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Mon, 5 Sep 2022 23:54:19 +0100 Subject: [PATCH 045/126] arm64/sysreg: Convert ID_AA64MMFR1_EL1 to automatic generation Convert ID_AA64MMFR1_EL1 to be automatically generated as per DDI0487H.a plus ECBHB which was RES0 in DDI0487H.a but has been subsequently defined and is already present in mainline. No functional changes. Signed-off-by: Kristina Martsenko Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-23-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 23 ----------- arch/arm64/tools/sysreg | 71 +++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 787d9fa3c8e0..5dcd8dff53b3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -199,7 +199,6 @@ #define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) #define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) -#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) #define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) @@ -750,28 +749,6 @@ #define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48 #endif -/* id_aa64mmfr1 */ -#define ID_AA64MMFR1_EL1_ECBHB_SHIFT 60 -#define ID_AA64MMFR1_EL1_TIDCP1_SHIFT 52 -#define ID_AA64MMFR1_EL1_HCX_SHIFT 40 -#define ID_AA64MMFR1_EL1_AFP_SHIFT 44 -#define ID_AA64MMFR1_EL1_ETS_SHIFT 36 -#define ID_AA64MMFR1_EL1_TWED_SHIFT 32 -#define ID_AA64MMFR1_EL1_XNX_SHIFT 28 -#define ID_AA64MMFR1_EL1_SpecSEI_SHIFT 24 -#define ID_AA64MMFR1_EL1_PAN_SHIFT 20 -#define ID_AA64MMFR1_EL1_LO_SHIFT 16 -#define ID_AA64MMFR1_EL1_HPDS_SHIFT 12 -#define ID_AA64MMFR1_EL1_VH_SHIFT 8 -#define ID_AA64MMFR1_EL1_VMIDBits_SHIFT 4 -#define ID_AA64MMFR1_EL1_HAFDBS_SHIFT 0 - -#define ID_AA64MMFR1_EL1_VMIDBits_8 0 -#define ID_AA64MMFR1_EL1_VMIDBits_16 2 - -#define ID_AA64MMFR1_EL1_TIDCP1_NI 0 -#define ID_AA64MMFR1_EL1_TIDCP1_IMP 1 - /* id_aa64mmfr2 */ #define ID_AA64MMFR2_EL1_E0PD_SHIFT 60 #define ID_AA64MMFR2_EL1_EVT_SHIFT 56 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index c1d800c0d4d5..f56758d7dc04 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -388,6 +388,77 @@ Enum 3:0 PARANGE EndEnum EndSysreg +Sysreg ID_AA64MMFR1_EL1 3 0 0 7 1 +Enum 63:60 ECBHB + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 59:56 CMOW + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 55:52 TIDCP1 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 51:48 nTLBPA + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 47:44 AFP + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 43:40 HCX + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 39:36 ETS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 35:32 TWED + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 31:28 XNX + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 SpecSEI + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 PAN + 0b0000 NI + 0b0001 IMP + 0b0010 PAN2 + 0b0011 PAN3 +EndEnum +Enum 19:16 LO + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 HPDS + 0b0000 NI + 0b0001 IMP + 0b0010 HPDS2 +EndEnum +Enum 11:8 VH + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 VMIDBits + 0b0000 8 + 0b0010 16 +EndEnum +Enum 3:0 HAFDBS + 0b0000 NI + 0b0001 AF + 0b0010 DBM +EndEnum +EndSysreg + Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP Field 62 SPINMASK From cfa3a6c55b61a062afa1ccd8bca45fd270dd3d0f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:20 +0100 Subject: [PATCH 046/126] arm64/sysreg: Convert ID_AA64MMFR2_EL1 to automatic generation Convert ID_AA64MMFR2_EL1 defines to automatic generation as per DDI0487H.a, no functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-24-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 19 ---------- arch/arm64/tools/sysreg | 67 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5dcd8dff53b3..62c5c596b18f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -199,8 +199,6 @@ #define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) #define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) -#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) - #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) @@ -749,23 +747,6 @@ #define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48 #endif -/* id_aa64mmfr2 */ -#define ID_AA64MMFR2_EL1_E0PD_SHIFT 60 -#define ID_AA64MMFR2_EL1_EVT_SHIFT 56 -#define ID_AA64MMFR2_EL1_BBM_SHIFT 52 -#define ID_AA64MMFR2_EL1_TTL_SHIFT 48 -#define ID_AA64MMFR2_EL1_FWB_SHIFT 40 -#define ID_AA64MMFR2_EL1_IDS_SHIFT 36 -#define ID_AA64MMFR2_EL1_AT_SHIFT 32 -#define ID_AA64MMFR2_EL1_ST_SHIFT 28 -#define ID_AA64MMFR2_EL1_NV_SHIFT 24 -#define ID_AA64MMFR2_EL1_CCIDX_SHIFT 20 -#define ID_AA64MMFR2_EL1_VARange_SHIFT 16 -#define ID_AA64MMFR2_EL1_IESB_SHIFT 12 -#define ID_AA64MMFR2_EL1_LSM_SHIFT 8 -#define ID_AA64MMFR2_EL1_UAO_SHIFT 4 -#define ID_AA64MMFR2_EL1_CnP_SHIFT 0 - /* id_aa64dfr0 */ #define ID_AA64DFR0_MTPMU_SHIFT 48 #define ID_AA64DFR0_TRBE_SHIFT 44 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index f56758d7dc04..711e8055397b 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -459,6 +459,73 @@ Enum 3:0 HAFDBS EndEnum EndSysreg +Sysreg ID_AA64MMFR2_EL1 3 0 0 7 2 +Enum 63:60 E0PD + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 59:56 EVT + 0b0000 NI + 0b0001 IMP + 0b0010 TTLBxS +EndEnum +Enum 55:52 BBM + 0b0000 0 + 0b0001 1 + 0b0010 2 +EndEnum +Enum 51:48 TTL + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 47:44 +Enum 43:40 FWB + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 39:36 IDS + 0b0000 0x0 + 0b0001 0x18 +EndEnum +Enum 35:32 AT + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 31:28 ST + 0b0000 39 + 0b0001 48_47 +EndEnum +Enum 27:24 NV + 0b0000 NI + 0b0001 IMP + 0b0010 NV2 +EndEnum +Enum 23:20 CCIDX + 0b0000 32 + 0b0001 64 +EndEnum +Enum 19:16 VARange + 0b0000 48 + 0b0001 52 +EndEnum +Enum 15:12 IESB + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 LSM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 UAO + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 CnP + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP Field 62 SPINMASK From cea08f2bf406416c9f54366e1328f2ce329bf4fd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:21 +0100 Subject: [PATCH 047/126] arm64/sysreg: Convert ID_AA64PFR0_EL1 to automatic generation Automatically generate the constants for ID_AA64PFR0_EL1 as per DDI0487I.a, no functional changes. The generic defines for the ELx fields are left in place as they remain useful. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-25-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 24 ----------- arch/arm64/tools/sysreg | 76 +++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 24 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 62c5c596b18f..2f032ea7e7e8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -190,7 +190,6 @@ #define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) #define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) -#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) #define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) #define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) @@ -681,29 +680,6 @@ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) /* id_aa64pfr0 */ -#define ID_AA64PFR0_EL1_CSV3_SHIFT 60 -#define ID_AA64PFR0_EL1_CSV2_SHIFT 56 -#define ID_AA64PFR0_EL1_DIT_SHIFT 48 -#define ID_AA64PFR0_EL1_AMU_SHIFT 44 -#define ID_AA64PFR0_EL1_MPAM_SHIFT 40 -#define ID_AA64PFR0_EL1_SEL2_SHIFT 36 -#define ID_AA64PFR0_EL1_SVE_SHIFT 32 -#define ID_AA64PFR0_EL1_RAS_SHIFT 28 -#define ID_AA64PFR0_EL1_GIC_SHIFT 24 -#define ID_AA64PFR0_EL1_AdvSIMD_SHIFT 20 -#define ID_AA64PFR0_EL1_FP_SHIFT 16 -#define ID_AA64PFR0_EL1_EL3_SHIFT 12 -#define ID_AA64PFR0_EL1_EL2_SHIFT 8 -#define ID_AA64PFR0_EL1_EL1_SHIFT 4 -#define ID_AA64PFR0_EL1_EL0_SHIFT 0 - -#define ID_AA64PFR0_EL1_AMU_IMP 0x1 -#define ID_AA64PFR0_EL1_SVE_IMP 0x1 -#define ID_AA64PFR0_EL1_RAS_IMP 0x1 -#define ID_AA64PFR0_EL1_RAS_V1P1 0x2 -#define ID_AA64PFR0_EL1_FP_NI 0xf -#define ID_AA64PFR0_EL1_FP_IMP 0x0 -#define ID_AA64PFR0_EL1_AdvSIMD_NI 0xf #define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 711e8055397b..e0b990369a4a 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -46,6 +46,82 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg ID_AA64PFR0_EL1 3 0 0 4 0 +Enum 63:60 CSV3 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 59:56 CSV2 + 0b0000 NI + 0b0001 IMP + 0b0010 CSV2_2 + 0b0011 CSV2_3 +EndEnum +Enum 55:52 RME + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 51:48 DIT + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 47:44 AMU + 0b0000 NI + 0b0001 IMP + 0b0010 V1P1 +EndEnum +Enum 43:40 MPAM + 0b0000 0 + 0b0001 1 +EndEnum +Enum 39:36 SEL2 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 35:32 SVE + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 31:28 RAS + 0b0000 NI + 0b0001 IMP + 0b0010 V1P1 +EndEnum +Enum 27:24 GIC + 0b0000 NI + 0b0001 IMP + 0b0010 V4P1 +EndEnum +Enum 23:20 AdvSIMD + 0b0000 IMP + 0b0001 FP16 + 0b1111 NI +EndEnum +Enum 19:16 FP + 0b0000 IMP + 0b0001 FP16 + 0b1111 NI +EndEnum +Enum 15:12 EL3 + 0b0000 NI + 0b0001 IMP + 0b0010 AARCH32 +EndEnum +Enum 11:8 EL2 + 0b0000 NI + 0b0001 IMP + 0b0010 AARCH32 +EndEnum +Enum 7:4 EL1 + 0b0001 IMP + 0b0010 AARCH32 +EndEnum +Enum 3:0 EL0 + 0b0001 IMP + 0b0010 AARCH32 +EndEnum +EndSysreg + Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4 Res0 63:60 Enum 59:56 F64MM From ef4ba5a635bfbd98c0893430ddfc9baf9fbccee6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:22 +0100 Subject: [PATCH 048/126] arm64/sysreg: Convert ID_AA64PFR1_EL1 to automatic generation Convert ID_AA64PFR1_EL1 to be automatically generated as per DDI04187H.a, no functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-26-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 21 --------------- arch/arm64/tools/sysreg | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 2f032ea7e7e8..e78d9dc1024d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -190,8 +190,6 @@ #define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) #define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) -#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) - #define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) #define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) @@ -683,25 +681,6 @@ #define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 -/* id_aa64pfr1 */ -#define ID_AA64PFR1_EL1_SME_SHIFT 24 -#define ID_AA64PFR1_EL1_MPAM_frac_SHIFT 16 -#define ID_AA64PFR1_EL1_RAS_frac_SHIFT 12 -#define ID_AA64PFR1_EL1_MTE_SHIFT 8 -#define ID_AA64PFR1_EL1_SSBS_SHIFT 4 -#define ID_AA64PFR1_EL1_BT_SHIFT 0 - -#define ID_AA64PFR1_EL1_SSBS_NI 0 -#define ID_AA64PFR1_EL1_SSBS_IMP 1 -#define ID_AA64PFR1_EL1_SSBS_SSBS2 2 -#define ID_AA64PFR1_EL1_BT_IMP 0x1 -#define ID_AA64PFR1_EL1_SME_IMP 1 - -#define ID_AA64PFR1_EL1_MTE_NI 0x0 -#define ID_AA64PFR1_EL1_MTE_IMP 0x1 -#define ID_AA64PFR1_EL1_MTE_MTE2 0x2 -#define ID_AA64PFR1_EL1_MTE_MTE3 0x3 - /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index e0b990369a4a..ca821f9279aa 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -122,6 +122,51 @@ Enum 3:0 EL0 EndEnum EndSysreg +Sysreg ID_AA64PFR1_EL1 3 0 0 4 1 +Res0 63:40 +Enum 39:36 NMI + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 35:32 CSV2_frac + 0b0000 NI + 0b0001 CSV2_1p1 + 0b0010 CSV2_1p2 +EndEnum +Enum 31:28 RNDR_trap + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 SME + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 23:20 +Enum 19:16 MPAM_frac + 0b0000 MINOR_0 + 0b0001 MINOR_1 +EndEnum +Enum 15:12 RAS_frac + 0b0000 NI + 0b0001 RASv1p1 +EndEnum +Enum 11:8 MTE + 0b0000 NI + 0b0001 IMP + 0b0010 MTE2 + 0b0011 MTE3 +EndEnum +Enum 7:4 SSBS + 0b0000 NI + 0b0001 IMP + 0b0010 SSBS2 +EndEnum +Enum 3:0 BT + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4 Res0 63:60 Enum 59:56 F64MM From 0a45f3980db0446febd21ae6dff475060fd39a39 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:23 +0100 Subject: [PATCH 049/126] arm64/sysreg: Convert TIPDR_EL1 to automatic generation Convert TPIDR_EL1 to automatic generation as per DDI0487H.a, no functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-27-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/tools/sysreg | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e78d9dc1024d..61a24737d517 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -429,8 +429,6 @@ #define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) #define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) -#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) - #define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index ca821f9279aa..644ad47e28e5 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -774,6 +774,10 @@ Sysreg CONTEXTIDR_EL1 3 0 13 0 1 Fields CONTEXTIDR_ELx EndSysreg +Sysreg TPIDR_EL1 3 0 13 0 4 +Field 63:0 ThreadID +EndSysreg + Sysreg CLIDR_EL1 3 1 0 0 1 Res0 63:47 Field 46:33 Ttypen From b1179b75e9a83cfa80accb402dd15ffa6b352080 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:24 +0100 Subject: [PATCH 050/126] arm64/sysreg: Convert SCXTNUM_EL1 to automatic generation Convert SCXTNUM_EL1 to automatic generation as per DDI0487H.a, no functional changes. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-28-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/tools/sysreg | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 61a24737d517..c7876363c6e5 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -429,8 +429,6 @@ #define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) #define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) -#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) - #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 644ad47e28e5..6326dca99f3c 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -778,6 +778,10 @@ Sysreg TPIDR_EL1 3 0 13 0 4 Field 63:0 ThreadID EndSysreg +Sysreg SCXTNUM_EL1 3 0 13 0 7 +Field 63:0 SoftwareContextNumber +EndSysreg + Sysreg CLIDR_EL1 3 1 0 0 1 Res0 63:47 Field 46:33 Ttypen From 3e9ae1ce508b8d69762abd1b8b9d9f97d6715b9b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2022 23:54:25 +0100 Subject: [PATCH 051/126] arm64/sysreg: Add defintion for ALLINT The FEAT_NMI extension adds a new system register ALLINT for controlling NMI related interrupt masking, add a definition of this register as per DDI0487H.a. Signed-off-by: Mark Brown Reviewed-by: Kristina Martsenko Link: https://lore.kernel.org/r/20220905225425.1871461-29-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/tools/sysreg | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 6326dca99f3c..5bccf7712ec3 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -761,6 +761,12 @@ Sysreg SMCR_EL1 3 0 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg ALLINT 3 0 4 3 0 +Res0 63:14 +Field 13 ALLINT +Res0 12:0 +EndSysreg + Sysreg FAR_EL1 3 0 6 0 0 Field 63:0 ADDR EndSysreg From b3adc3844e1dedd05fa8d09633eaa8ddc5ddcece Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 30 Aug 2022 11:48:31 +0100 Subject: [PATCH 052/126] arm64: module: move find_section to header Move it to the header so that the implementation can be shared by the alternatives code. Signed-off-by: Joey Gouly Cc: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220830104833.34636-2-joey.gouly@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/module.h | 15 +++++++++++++++ arch/arm64/kernel/module.c | 15 --------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 4e7fa2623896..22f1be47cc92 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -65,4 +65,19 @@ static inline bool plt_entry_is_initialized(const struct plt_entry *e) return e->adrp || e->add || e->br; } +static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; + } + + return NULL; +} + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f2d4bb14bfab..76b41e4ca9fa 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -476,21 +476,6 @@ overflow: return -ENOEXEC; } -static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - const char *name) -{ - const Elf_Shdr *s, *se; - const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (strcmp(name, secstrs + s->sh_name) == 0) - return s; - } - - return NULL; -} - static inline void __init_plt(struct plt_entry *plt, unsigned long addr) { *plt = get_plt_entry(addr, plt); From 4e3bca8f7cdd3b658ee7ad700fdce95b5e13a441 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 30 Aug 2022 11:48:32 +0100 Subject: [PATCH 053/126] arm64: alternative: patch alternatives in the vDSO Make it possible to use alternatives in the vDSO, so that better implementations can be used if possible. Signed-off-by: Joey Gouly Cc: Will Deacon Cc: Vincenzo Frascino Cc: Mark Rutland Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220830104833.34636-3-joey.gouly@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/vdso.h | 3 +++ arch/arm64/kernel/alternative.c | 28 ++++++++++++++++++++++++++++ arch/arm64/kernel/vdso.c | 3 --- arch/arm64/kernel/vdso/vdso.lds.S | 7 +++++++ 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index f99dcb94b438..b4ae32109932 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -26,6 +26,9 @@ (void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \ }) +extern char vdso_start[], vdso_end[]; +extern char vdso32_start[], vdso32_end[]; + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_H */ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 9bcaa5eacf16..a97775963f35 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -10,11 +10,14 @@ #include #include +#include #include #include #include #include +#include #include +#include #include #define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f) @@ -192,6 +195,30 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu } } +void apply_alternatives_vdso(void) +{ + struct alt_region region; + const struct elf64_hdr *hdr; + const struct elf64_shdr *shdr; + const struct elf64_shdr *alt; + DECLARE_BITMAP(all_capabilities, ARM64_NPATCHABLE); + + bitmap_fill(all_capabilities, ARM64_NPATCHABLE); + + hdr = (struct elf64_hdr *)vdso_start; + shdr = (void *)hdr + hdr->e_shoff; + alt = find_section(hdr, shdr, ".altinstructions"); + if (!alt) + return; + + region = (struct alt_region){ + .begin = (void *)hdr + alt->sh_offset, + .end = (void *)hdr + alt->sh_offset + alt->sh_size, + }; + + __apply_alternatives(®ion, false, &all_capabilities[0]); +} + /* * We might be patching the stop_machine state machine, so implement a * really simple polling protocol here. @@ -225,6 +252,7 @@ static int __apply_alternatives_multi_stop(void *unused) void __init apply_alternatives_all(void) { + apply_alternatives_vdso(); /* better not try code patching on a live SMP system */ stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask); } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index a61fc4f989b3..ac93a2ee9c07 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -29,9 +29,6 @@ #include #include -extern char vdso_start[], vdso_end[]; -extern char vdso32_start[], vdso32_end[]; - enum vdso_abi { VDSO_ABI_AA64, VDSO_ABI_AA32, diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index e69fb4aaaf3e..6028f1fe2d1c 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -48,6 +48,13 @@ SECTIONS PROVIDE (_etext = .); PROVIDE (etext = .); + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .dynamic : { *(.dynamic) } :text :dynamic .rela.dyn : ALIGN(8) { *(.rela .rela*) } From 9025cebf12d1763de36d5e09e2b0a1e4f9b13b28 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 30 Aug 2022 11:48:33 +0100 Subject: [PATCH 054/126] arm64: vdso: use SYS_CNTVCTSS_EL0 for gettimeofday If FEAT_ECV is implemented, the self-synchronized counter CNTVCTSS_EL0 can be used, removing the need for an ISB. Signed-off-by: Joey Gouly Cc: Will Deacon Cc: Vincenzo Frascino Cc: Mark Rutland Cc: Andre Przywara Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220830104833.34636-4-joey.gouly@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/vdso/gettimeofday.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 4f7a629df81f..764d13e2916c 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -7,8 +7,10 @@ #ifndef __ASSEMBLY__ +#include #include #include +#include #define VDSO_HAS_CLOCK_GETRES 1 @@ -78,11 +80,20 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return 0; /* - * This isb() is required to prevent that the counter value + * If FEAT_ECV is available, use the self-synchronizing counter. + * Otherwise the isb is required to prevent that the counter value * is speculated. - */ - isb(); - asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); + */ + asm volatile( + ALTERNATIVE("isb\n" + "mrs %0, cntvct_el0", + "nop\n" + __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (res) + : + : "memory"); + arch_counter_enforce_ordering(res); return res; From 16283c54a6c79b589001763421738db96de3ae4e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 1 Sep 2022 14:06:38 +0100 Subject: [PATCH 055/126] arm64: stacktrace: fix kerneldoc comments Many of the comment blocks in the arm64 stacktrace code are *almost* kerneldoc, but not quite. Convert them to kerneldoc, as was presumably originally intended. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Cc: Fuad Tabba Cc: Kalesh Singh Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20220901130646.1316937-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace/common.h | 28 +++++++++++++--------- arch/arm64/include/asm/stacktrace/nvhe.h | 4 ++-- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index f58eb944c46f..b8b20ef5aa5d 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -38,9 +38,8 @@ struct stack_info { enum stack_type type; }; -/* - * A snapshot of a frame record or fp/lr register values, along with some - * accounting information necessary for robust unwinding. +/** + * struct unwind_state - state used for robust unwinding. * * @fp: The fp value in the frame record (or the real fp) * @pc: The lr value in the frame record (or the real lr) @@ -113,27 +112,34 @@ static inline void unwind_init_common(struct unwind_state *state, state->prev_type = STACK_TYPE_UNKNOWN; } -/* - * stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to - * a kernel address. +/** + * typedef stack_trace_translate_fp_fn() - Translates a non-kernel frame + * pointer to a kernel address. * * @fp: the frame pointer to be updated to its kernel address. * @type: the stack type associated with frame pointer @fp * - * Returns true and success and @fp is updated to the corresponding - * kernel virtual address; otherwise returns false. + * Return: true if the VA can be translated, false otherwise. + * + * Upon success @fp is updated to the corresponding kernel virtual address. */ typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp, enum stack_type type); -/* - * on_accessible_stack_fn() - Check whether a stack range is on any - * of the possible stacks. +/** + * typedef on_accessible_stack_fn() - Check whether a stack range is on any of + * the possible stacks. * * @tsk: task whose stack is being unwound * @sp: stack address being checked * @size: size of the stack range being checked * @info: stack unwinding context + * + * Return: true if the stack range is accessible, false otherwise. + * + * Upon success @info is updated with information for the relevant stack. + * + * Upon failure @info is updated with the UNKNOWN stack. */ typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk, unsigned long sp, unsigned long size, diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h index d5527b600390..25ab83a315a7 100644 --- a/arch/arm64/include/asm/stacktrace/nvhe.h +++ b/arch/arm64/include/asm/stacktrace/nvhe.h @@ -20,8 +20,8 @@ #include -/* - * kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc +/** + * kvm_nvhe_unwind_init() - Start an unwind from the given nVHE HYP fp and pc * * @state : unwind_state to initialize * @fp : frame pointer at which to start the unwinding. From bc8d75212d735ac9624c1d3532ad371ec9e570ae Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 1 Sep 2022 14:06:39 +0100 Subject: [PATCH 056/126] arm64: stacktrace: simplify unwind_next_common() Currently unwind_next_common() takes a pointer to a stack_info which is only ever used within unwind_next_common(). Make it a local variable and simplify callers. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Cc: Fuad Tabba Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20220901130646.1316937-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace/common.h | 12 ++++++------ arch/arm64/kernel/stacktrace.c | 3 +-- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 4 +--- arch/arm64/kvm/stacktrace.c | 4 +--- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index b8b20ef5aa5d..0fbae7c78b70 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -146,27 +146,27 @@ typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk, struct stack_info *info); static inline int unwind_next_common(struct unwind_state *state, - struct stack_info *info, on_accessible_stack_fn accessible, stack_trace_translate_fp_fn translate_fp) { + struct stack_info info; unsigned long fp = state->fp, kern_fp = fp; struct task_struct *tsk = state->task; if (fp & 0x7) return -EINVAL; - if (!accessible(tsk, fp, 16, info)) + if (!accessible(tsk, fp, 16, &info)) return -EINVAL; - if (test_bit(info->type, state->stacks_done)) + if (test_bit(info.type, state->stacks_done)) return -EINVAL; /* * If fp is not from the current address space perform the necessary * translation before dereferencing it to get the next fp. */ - if (translate_fp && !translate_fp(&kern_fp, info->type)) + if (translate_fp && !translate_fp(&kern_fp, info.type)) return -EINVAL; /* @@ -183,7 +183,7 @@ static inline int unwind_next_common(struct unwind_state *state, * stack to another, it's never valid to unwind back to that first * stack. */ - if (info->type == state->prev_type) { + if (info.type == state->prev_type) { if (fp <= state->prev_fp) return -EINVAL; } else { @@ -197,7 +197,7 @@ static inline int unwind_next_common(struct unwind_state *state, state->fp = READ_ONCE(*(unsigned long *)(kern_fp)); state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8)); state->prev_fp = fp; - state->prev_type = info->type; + state->prev_type = info.type; return 0; } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ce190ee18a20..056fb045d0e0 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -103,14 +103,13 @@ static int notrace unwind_next(struct unwind_state *state) { struct task_struct *tsk = state->task; unsigned long fp = state->fp; - struct stack_info info; int err; /* Final frame; nothing to unwind */ if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) return -ENOENT; - err = unwind_next_common(state, &info, on_accessible_stack, NULL); + err = unwind_next_common(state, on_accessible_stack, NULL); if (err) return err; diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index 58f645ad66bc..50a1fa6b5c04 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -71,9 +71,7 @@ static bool on_accessible_stack(const struct task_struct *tsk, static int unwind_next(struct unwind_state *state) { - struct stack_info info; - - return unwind_next_common(state, &info, on_accessible_stack, NULL); + return unwind_next_common(state, on_accessible_stack, NULL); } static void notrace unwind(struct unwind_state *state, diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c index 949d19d603fb..b9cf551d9d31 100644 --- a/arch/arm64/kvm/stacktrace.c +++ b/arch/arm64/kvm/stacktrace.c @@ -97,9 +97,7 @@ static bool on_accessible_stack(const struct task_struct *tsk, static int unwind_next(struct unwind_state *state) { - struct stack_info info; - - return unwind_next_common(state, &info, on_accessible_stack, + return unwind_next_common(state, on_accessible_stack, kvm_nvhe_stack_kern_va); } From b532ab5f23389e2141d8b586608f6435f83d5ecd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 1 Sep 2022 14:06:40 +0100 Subject: [PATCH 057/126] arm64: stacktrace: rename unwind_next_common() -> unwind_next_frame_record() The unwind_next_common() function unwinds a single frame record. There are other unwind steps (e.g. unwinding through trampolines) which are handled in the regular kernel unwinder, and in future there may be other common unwind helpers. Clarify the purpose of unwind_next_common() by renaming it to unwind_next_frame_record(). At the same time, add commentary, and delete the redundant comment at the top of asm/stacktrace/common.h. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Cc: Fuad Tabba Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20220901130646.1316937-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace/common.h | 23 ++++++++++++---------- arch/arm64/kernel/stacktrace.c | 2 +- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 2 +- arch/arm64/kvm/stacktrace.c | 4 ++-- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index 0fbae7c78b70..a74fa301fe95 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -2,13 +2,6 @@ /* * Common arm64 stack unwinder code. * - * To implement a new arm64 stack unwinder: - * 1) Include this header - * - * 2) Call into unwind_next_common() from your top level unwind - * function, passing it the validation and translation callbacks - * (though the later can be NULL if no translation is required). - * * See: arch/arm64/kernel/stacktrace.c for the reference implementation. * * Copyright (C) 2012 ARM Ltd. @@ -145,9 +138,19 @@ typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk, unsigned long sp, unsigned long size, struct stack_info *info); -static inline int unwind_next_common(struct unwind_state *state, - on_accessible_stack_fn accessible, - stack_trace_translate_fp_fn translate_fp) +/** + * unwind_next_frame_record() - Unwind to the next frame record. + * + * @state: the current unwind state. + * @accessible: determines whether the frame record is accessible + * @translate_fp: translates the fp prior to access (may be NULL) + * + * Return: 0 upon success, an error code otherwise. + */ +static inline int +unwind_next_frame_record(struct unwind_state *state, + on_accessible_stack_fn accessible, + stack_trace_translate_fp_fn translate_fp) { struct stack_info info; unsigned long fp = state->fp, kern_fp = fp; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 056fb045d0e0..4c8865e495fe 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -109,7 +109,7 @@ static int notrace unwind_next(struct unwind_state *state) if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) return -ENOENT; - err = unwind_next_common(state, on_accessible_stack, NULL); + err = unwind_next_frame_record(state, on_accessible_stack, NULL); if (err) return err; diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index 50a1fa6b5c04..579b46aa9a55 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -71,7 +71,7 @@ static bool on_accessible_stack(const struct task_struct *tsk, static int unwind_next(struct unwind_state *state) { - return unwind_next_common(state, on_accessible_stack, NULL); + return unwind_next_frame_record(state, on_accessible_stack, NULL); } static void notrace unwind(struct unwind_state *state, diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c index b9cf551d9d31..b69c18a26567 100644 --- a/arch/arm64/kvm/stacktrace.c +++ b/arch/arm64/kvm/stacktrace.c @@ -97,8 +97,8 @@ static bool on_accessible_stack(const struct task_struct *tsk, static int unwind_next(struct unwind_state *state) { - return unwind_next_common(state, on_accessible_stack, - kvm_nvhe_stack_kern_va); + return unwind_next_frame_record(state, on_accessible_stack, + kvm_nvhe_stack_kern_va); } static void unwind(struct unwind_state *state, From 75758d511432c129db39b50dd3c108e65dd1a2b1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 1 Sep 2022 14:06:41 +0100 Subject: [PATCH 058/126] arm64: stacktrace: move SDEI stack helpers to stacktrace code For clarity and ease of maintenance, it would be helpful for all the stack helpers to be in the same place. Move the SDEI stack helpers into the stacktrace code where all the other stack helpers live. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Cc: Fuad Tabba Cc: James Morse Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20220901130646.1316937-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sdei.h | 17 ------------ arch/arm64/include/asm/stacktrace.h | 40 ++++++++++++++++++++++++++++- arch/arm64/kernel/sdei.c | 32 ----------------------- arch/arm64/kernel/stacktrace.c | 11 ++++++-- 4 files changed, 48 insertions(+), 52 deletions(-) diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index 7bea1d705dd6..4292d9bafb9d 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -43,22 +43,5 @@ unsigned long do_sdei_event(struct pt_regs *regs, unsigned long sdei_arch_get_entry_point(int conduit); #define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x) -struct stack_info; - -bool _on_sdei_stack(unsigned long sp, unsigned long size, - struct stack_info *info); -static inline bool on_sdei_stack(unsigned long sp, unsigned long size, - struct stack_info *info) -{ - if (!IS_ENABLED(CONFIG_VMAP_STACK)) - return false; - if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) - return false; - if (in_nmi()) - return _on_sdei_stack(sp, size, info); - - return false; -} - #endif /* __ASSEMBLY__ */ #endif /* __ASM_SDEI_H */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 6ebdcdff77f5..fa2df1ea22eb 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -54,7 +54,45 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size, } #else static inline bool on_overflow_stack(unsigned long sp, unsigned long size, - struct stack_info *info) { return false; } + struct stack_info *info) +{ + return false; +} +#endif + +#if defined(CONFIG_ARM_SDE_INTERFACE) && defined(CONFIG_VMAP_STACK) +DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); +DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); + +static inline bool on_sdei_normal_stack(unsigned long sp, unsigned long size, + struct stack_info *info) +{ + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); + unsigned long high = low + SDEI_STACK_SIZE; + + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info); +} + +static inline bool on_sdei_critical_stack(unsigned long sp, unsigned long size, + struct stack_info *info) +{ + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); + unsigned long high = low + SDEI_STACK_SIZE; + + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info); +} +#else +static inline bool on_sdei_normal_stack(unsigned long sp, unsigned long size, + struct stack_info *info) +{ + return false; +} + +static inline bool on_sdei_critical_stack(unsigned long sp, unsigned long size, + struct stack_info *info) +{ + return false; +} #endif #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index d20620a1c51a..d56e170e1ca7 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -162,38 +162,6 @@ static int init_sdei_scs(void) return err; } -static bool on_sdei_normal_stack(unsigned long sp, unsigned long size, - struct stack_info *info) -{ - unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); - unsigned long high = low + SDEI_STACK_SIZE; - - return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info); -} - -static bool on_sdei_critical_stack(unsigned long sp, unsigned long size, - struct stack_info *info) -{ - unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); - unsigned long high = low + SDEI_STACK_SIZE; - - return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info); -} - -bool _on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info) -{ - if (!IS_ENABLED(CONFIG_VMAP_STACK)) - return false; - - if (on_sdei_critical_stack(sp, size, info)) - return true; - - if (on_sdei_normal_stack(sp, size, info)) - return true; - - return false; -} - unsigned long sdei_arch_get_entry_point(int conduit) { /* diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 4c8865e495fe..edf9edca2055 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -86,8 +86,15 @@ static bool on_accessible_stack(const struct task_struct *tsk, return true; if (on_overflow_stack(sp, size, info)) return true; - if (on_sdei_stack(sp, size, info)) - return true; + + if (IS_ENABLED(CONFIG_VMAP_STACK) && + IS_ENABLED(CONFIG_ARM_SDE_INTERFACE) && + in_nmi()) { + if (on_sdei_critical_stack(sp, size, info)) + return true; + if (on_sdei_normal_stack(sp, size, info)) + return true; + } return false; } From 36f9a8793c16da01dffe0718b66c884933b06b98 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 1 Sep 2022 14:06:42 +0100 Subject: [PATCH 059/126] arm64: stacktrace: add stackinfo_on_stack() helper Factor the core predicate out of on_stack() into a helper which can be used on a pre-populated stack_info. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Cc: Fuad Tabba Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20220901130646.1316937-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace/common.h | 29 ++++++++++++++++------ 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index a74fa301fe95..81c21378b1ac 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -65,21 +65,34 @@ struct unwind_state { struct task_struct *task; }; +static inline bool stackinfo_on_stack(const struct stack_info *info, + unsigned long sp, unsigned long size) +{ + if (!info->low) + return false; + + if (sp < info->low || sp + size < sp || sp + size > info->high) + return false; + + return true; +} + static inline bool on_stack(unsigned long sp, unsigned long size, unsigned long low, unsigned long high, enum stack_type type, struct stack_info *info) { - if (!low) + struct stack_info tmp = { + .low = low, + .high = high, + .type = type, + }; + + if (!stackinfo_on_stack(&tmp, sp, size)) return false; - if (sp < low || sp + size < sp || sp + size > high) - return false; + if (info) + *info = tmp; - if (info) { - info->low = low; - info->high = high; - info->type = type; - } return true; } From d1f684e46bbd43eac5c6fb00906c57425d7022a6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 1 Sep 2022 14:06:43 +0100 Subject: [PATCH 060/126] arm64: stacktrace: rework stack boundary discovery In subsequent patches we'll want to acquire the stack boundaries ahead-of-time, and we'll need to be able to acquire the relevant stack_info regardless of whether we have an object the happens to be on the stack. This patch replaces the on_XXX_stack() helpers with stackinfo_get_XXX() helpers, with the caller being responsible for the checking whether an object is on a relevant stack. For the moment this is moved into the on_accessible_stack() functions, making these slightly larger; subsequent patches will remove the on_accessible_stack() functions and simplify the logic. The on_irq_stack() and on_task_stack() helpers are kept as these are used by IRQ entry sequences and stackleak respectively. As they're only used as predicates, the stack_info pointer parameter is removed in both cases. As the on_accessible_stack() functions are always passed a non-NULL info pointer, these now update info unconditionally. When updating the type to STACK_TYPE_UNKNOWN, the low/high bounds are also modified, but as these will not be consumed this should have no adverse affect. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Cc: Fuad Tabba Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20220901130646.1316937-7-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/include/asm/stacktrace.h | 78 +++++++++++++--------- arch/arm64/include/asm/stacktrace/common.h | 28 +++----- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/stacktrace.c | 61 +++++++++++------ arch/arm64/kvm/hyp/nvhe/stacktrace.c | 37 +++++++--- arch/arm64/kvm/stacktrace.c | 37 +++++++--- 7 files changed, 151 insertions(+), 94 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 86eb0bfe3b38..61883518fc50 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -410,7 +410,7 @@ long get_tagged_addr_ctrl(struct task_struct *task); * The top of the current task's task stack */ #define current_top_of_stack() ((unsigned long)current->stack + THREAD_SIZE) -#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL)) +#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1)) #endif /* __ASSEMBLY__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index fa2df1ea22eb..aad0c6258721 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -22,77 +22,91 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); -static inline bool on_irq_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static inline struct stack_info stackinfo_get_irq(void) { unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); unsigned long high = low + IRQ_STACK_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info); + return (struct stack_info) { + .low = low, + .high = high, + .type = STACK_TYPE_IRQ, + }; } -static inline bool on_task_stack(const struct task_struct *tsk, - unsigned long sp, unsigned long size, - struct stack_info *info) +static inline bool on_irq_stack(unsigned long sp, unsigned long size) +{ + struct stack_info info = stackinfo_get_irq(); + return stackinfo_on_stack(&info, sp, size); +} + +static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk) { unsigned long low = (unsigned long)task_stack_page(tsk); unsigned long high = low + THREAD_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_TASK, info); + return (struct stack_info) { + .low = low, + .high = high, + .type = STACK_TYPE_TASK, + }; +} + +static inline bool on_task_stack(const struct task_struct *tsk, + unsigned long sp, unsigned long size) +{ + struct stack_info info = stackinfo_get_task(tsk); + return stackinfo_on_stack(&info, sp, size); } #ifdef CONFIG_VMAP_STACK DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); -static inline bool on_overflow_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static inline struct stack_info stackinfo_get_overflow(void) { unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); unsigned long high = low + OVERFLOW_STACK_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info); + return (struct stack_info) { + .low = low, + .high = high, + .type = STACK_TYPE_OVERFLOW, + }; } #else -static inline bool on_overflow_stack(unsigned long sp, unsigned long size, - struct stack_info *info) -{ - return false; -} +#define stackinfo_get_overflow() stackinfo_get_unknown() #endif #if defined(CONFIG_ARM_SDE_INTERFACE) && defined(CONFIG_VMAP_STACK) DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); -static inline bool on_sdei_normal_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static inline struct stack_info stackinfo_get_sdei_normal(void) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info); + return (struct stack_info) { + .low = low, + .high = high, + .type = STACK_TYPE_SDEI_NORMAL, + }; } -static inline bool on_sdei_critical_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static inline struct stack_info stackinfo_get_sdei_critical(void) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info); + return (struct stack_info) { + .low = low, + .high = high, + .type = STACK_TYPE_SDEI_CRITICAL, + }; } #else -static inline bool on_sdei_normal_stack(unsigned long sp, unsigned long size, - struct stack_info *info) -{ - return false; -} - -static inline bool on_sdei_critical_stack(unsigned long sp, unsigned long size, - struct stack_info *info) -{ - return false; -} +#define stackinfo_get_sdei_normal() stackinfo_get_unknown() +#define stackinfo_get_sdei_critical() stackinfo_get_unknown() #endif #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index 81c21378b1ac..3c3bda34bb87 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -65,6 +65,15 @@ struct unwind_state { struct task_struct *task; }; +static inline struct stack_info stackinfo_get_unknown(void) +{ + return (struct stack_info) { + .low = 0, + .high = 0, + .type = STACK_TYPE_UNKNOWN, + }; +} + static inline bool stackinfo_on_stack(const struct stack_info *info, unsigned long sp, unsigned long size) { @@ -77,25 +86,6 @@ static inline bool stackinfo_on_stack(const struct stack_info *info, return true; } -static inline bool on_stack(unsigned long sp, unsigned long size, - unsigned long low, unsigned long high, - enum stack_type type, struct stack_info *info) -{ - struct stack_info tmp = { - .low = low, - .high = high, - .type = type, - }; - - if (!stackinfo_on_stack(&tmp, sp, size)) - return false; - - if (info) - *info = tmp; - - return true; -} - static inline void unwind_init_common(struct unwind_state *state, struct task_struct *task) { diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index eb7c08dfb834..00c05da7112c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -121,7 +121,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || - on_irq_stack(addr, sizeof(unsigned long), NULL); + on_irq_stack(addr, sizeof(unsigned long)); } /** diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index edf9edca2055..ca56fd732c2a 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -67,36 +67,55 @@ static inline void unwind_init_from_task(struct unwind_state *state, state->pc = thread_saved_pc(task); } -/* - * We can only safely access per-cpu stacks from current in a non-preemptible - * context. - */ static bool on_accessible_stack(const struct task_struct *tsk, unsigned long sp, unsigned long size, struct stack_info *info) { - if (info) - info->type = STACK_TYPE_UNKNOWN; + struct stack_info tmp; - if (on_task_stack(tsk, sp, size, info)) - return true; + tmp = stackinfo_get_task(tsk); + if (stackinfo_on_stack(&tmp, sp, size)) + goto found; + + /* + * We can only safely access per-cpu stacks when unwinding the current + * task in a non-preemptible context. + */ if (tsk != current || preemptible()) - return false; - if (on_irq_stack(sp, size, info)) - return true; - if (on_overflow_stack(sp, size, info)) - return true; + goto not_found; - if (IS_ENABLED(CONFIG_VMAP_STACK) && - IS_ENABLED(CONFIG_ARM_SDE_INTERFACE) && - in_nmi()) { - if (on_sdei_critical_stack(sp, size, info)) - return true; - if (on_sdei_normal_stack(sp, size, info)) - return true; - } + tmp = stackinfo_get_irq(); + if (stackinfo_on_stack(&tmp, sp, size)) + goto found; + tmp = stackinfo_get_overflow(); + if (stackinfo_on_stack(&tmp, sp, size)) + goto found; + + /* + * We can only safely access SDEI stacks which unwinding the current + * task in an NMI context. + */ + if (!IS_ENABLED(CONFIG_VMAP_STACK) || + !IS_ENABLED(CONFIG_ARM_SDE_INTERFACE) || + !in_nmi()) + goto not_found; + + tmp = stackinfo_get_sdei_normal(); + if (stackinfo_on_stack(&tmp, sp, size)) + goto found; + + tmp = stackinfo_get_sdei_critical(); + if (stackinfo_on_stack(&tmp, sp, size)) + goto found; + +not_found: + *info = stackinfo_get_unknown(); return false; + +found: + *info = tmp; + return true; } /* diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index 579b46aa9a55..5da0d44f61b7 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -39,34 +39,51 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc) DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace); -static bool on_overflow_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static struct stack_info stackinfo_get_overflow(void) { unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack); unsigned long high = low + OVERFLOW_STACK_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info); + return (struct stack_info) { + .low = low, + .high = high, + .type = STACK_TYPE_OVERFLOW, + }; } -static bool on_hyp_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static struct stack_info stackinfo_get_hyp(void) { struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); unsigned long high = params->stack_hyp_va; unsigned long low = high - PAGE_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_HYP, info); + return (struct stack_info) { + .low = low, + .high = high, + .type = STACK_TYPE_HYP, + }; } static bool on_accessible_stack(const struct task_struct *tsk, unsigned long sp, unsigned long size, struct stack_info *info) { - if (info) - info->type = STACK_TYPE_UNKNOWN; + struct stack_info tmp; - return (on_overflow_stack(sp, size, info) || - on_hyp_stack(sp, size, info)); + tmp = stackinfo_get_overflow(); + if (stackinfo_on_stack(&tmp, sp, size)) + goto found; + + tmp = stackinfo_get_hyp(); + if (stackinfo_on_stack(&tmp, sp, size)) + goto found; + + *info = stackinfo_get_unknown(); + return false; + +found: + *info = tmp; + return true; } static int unwind_next(struct unwind_state *state) diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c index b69c18a26567..26927344a263 100644 --- a/arch/arm64/kvm/stacktrace.c +++ b/arch/arm64/kvm/stacktrace.c @@ -62,37 +62,54 @@ static bool kvm_nvhe_stack_kern_va(unsigned long *addr, return true; } -static bool on_overflow_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static struct stack_info stackinfo_get_overflow(void) { struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base; unsigned long high = low + OVERFLOW_STACK_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info); + return (struct stack_info) { + .low = low, + .high = high, + .type = STACK_TYPE_OVERFLOW, + }; } -static bool on_hyp_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static struct stack_info stackinfo_get_hyp(void) { struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); unsigned long low = (unsigned long)stacktrace_info->stack_base; unsigned long high = low + PAGE_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_HYP, info); + return (struct stack_info) { + .low = low, + .high = high, + .type = STACK_TYPE_HYP, + }; } static bool on_accessible_stack(const struct task_struct *tsk, unsigned long sp, unsigned long size, struct stack_info *info) { - if (info) - info->type = STACK_TYPE_UNKNOWN; + struct stack_info tmp; - return (on_overflow_stack(sp, size, info) || - on_hyp_stack(sp, size, info)); + tmp = stackinfo_get_overflow(); + if (stackinfo_on_stack(&tmp, sp, size)) + goto found; + + tmp = stackinfo_get_hyp(); + if (stackinfo_on_stack(&tmp, sp, size)) + goto found; + + *info = stackinfo_get_unknown(); + return false; + +found: + *info = tmp; + return true; } static int unwind_next(struct unwind_state *state) From bd8abd68836b5c2b668afc4fb46d85d687779dec Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 1 Sep 2022 14:06:44 +0100 Subject: [PATCH 061/126] arm64: stacktrace: remove stack type from fp translator In subsequent patches we'll remove the stack_type enum, and move the FP translation logic out of the raw FP unwind code. In preparation for doing so, this patch removes the type parameter from the FP translation callback, and modifies kvm_nvhe_stack_kern_va() to determine the relevant stack directly. So that kvm_nvhe_stack_kern_va() can use the stackinfo_*() helpers, these are moved earlier in the file, but are not modified in any way. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Cc: Fuad Tabba Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20220901130646.1316937-8-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace/common.h | 6 +- arch/arm64/kvm/stacktrace.c | 96 ++++++++++++---------- 2 files changed, 56 insertions(+), 46 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index 3c3bda34bb87..2f972441f572 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -113,14 +113,12 @@ static inline void unwind_init_common(struct unwind_state *state, * pointer to a kernel address. * * @fp: the frame pointer to be updated to its kernel address. - * @type: the stack type associated with frame pointer @fp * * Return: true if the VA can be translated, false otherwise. * * Upon success @fp is updated to the corresponding kernel virtual address. */ -typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp, - enum stack_type type); +typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp); /** * typedef on_accessible_stack_fn() - Check whether a stack range is on any of @@ -172,7 +170,7 @@ unwind_next_frame_record(struct unwind_state *state, * If fp is not from the current address space perform the necessary * translation before dereferencing it to get the next fp. */ - if (translate_fp && !translate_fp(&kern_fp, info.type)) + if (translate_fp && !translate_fp(&kern_fp)) return -EINVAL; /* diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c index 26927344a263..7658c5db47c1 100644 --- a/arch/arm64/kvm/stacktrace.c +++ b/arch/arm64/kvm/stacktrace.c @@ -21,47 +21,6 @@ #include -/* - * kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs - * - * The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to - * allow for guard pages below the stack. Consequently, the fixed offset address - * translation macros won't work here. - * - * The kernel VA is calculated as an offset from the kernel VA of the hypervisor - * stack base. - * - * Returns true on success and updates @addr to its corresponding kernel VA; - * otherwise returns false. - */ -static bool kvm_nvhe_stack_kern_va(unsigned long *addr, - enum stack_type type) -{ - struct kvm_nvhe_stacktrace_info *stacktrace_info; - unsigned long hyp_base, kern_base, hyp_offset; - - stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); - - switch (type) { - case STACK_TYPE_HYP: - kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page); - hyp_base = (unsigned long)stacktrace_info->stack_base; - break; - case STACK_TYPE_OVERFLOW: - kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack); - hyp_base = (unsigned long)stacktrace_info->overflow_stack_base; - break; - default: - return false; - } - - hyp_offset = *addr - hyp_base; - - *addr = kern_base + hyp_offset; - - return true; -} - static struct stack_info stackinfo_get_overflow(void) { struct kvm_nvhe_stacktrace_info *stacktrace_info @@ -90,6 +49,59 @@ static struct stack_info stackinfo_get_hyp(void) }; } +/* + * kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs + * + * The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to + * allow for guard pages below the stack. Consequently, the fixed offset address + * translation macros won't work here. + * + * The kernel VA is calculated as an offset from the kernel VA of the hypervisor + * stack base. + * + * Returns true on success and updates @addr to its corresponding kernel VA; + * otherwise returns false. + */ +static bool kvm_nvhe_stack_kern_va(unsigned long *addr, unsigned long size) +{ + struct kvm_nvhe_stacktrace_info *stacktrace_info; + unsigned long hyp_base, kern_base, hyp_offset; + struct stack_info stack; + + stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); + + stack = stackinfo_get_hyp(); + if (stackinfo_on_stack(&stack, *addr, size)) { + kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page); + hyp_base = (unsigned long)stacktrace_info->stack_base; + goto found; + } + + stack = stackinfo_get_overflow(); + if (stackinfo_on_stack(&stack, *addr, size)) { + kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack); + hyp_base = (unsigned long)stacktrace_info->overflow_stack_base; + goto found; + } + + return false; + +found: + hyp_offset = *addr - hyp_base; + + *addr = kern_base + hyp_offset; + + return true; +} + +/* + * Convert a KVN nVHE HYP frame record address to a kernel VA + */ +static bool kvm_nvhe_stack_kern_record_va(unsigned long *addr) +{ + return kvm_nvhe_stack_kern_va(addr, 16); +} + static bool on_accessible_stack(const struct task_struct *tsk, unsigned long sp, unsigned long size, struct stack_info *info) @@ -115,7 +127,7 @@ found: static int unwind_next(struct unwind_state *state) { return unwind_next_frame_record(state, on_accessible_stack, - kvm_nvhe_stack_kern_va); + kvm_nvhe_stack_kern_record_va); } static void unwind(struct unwind_state *state, From 8df137300d1964c3810991aa2fe17a105348b647 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 1 Sep 2022 14:06:45 +0100 Subject: [PATCH 062/126] arm64: stacktrace: track all stack boundaries explicitly Currently we call an on_accessible_stack() callback for each step of the unwinder, requiring redundant work to be performed in the core of the unwind loop (e.g. disabling preemption around accesses to per-cpu variables containing stack boundaries). To prevent unwind loops which go through a stack multiple times, we have to track the set of unwound stacks, requiring a stack_type enum which needs to cater for all the stacks of all possible callees. To prevent loops within a stack, we must track the prior FP values. This patch reworks the unwinder to minimize the work in the core of the unwinder, and to remove the need for the stack_type enum. The set of accessible stacks (and their boundaries) are determined at the start of the unwind, and the current stack is tracked during the unwind, with completed stacks removed from the set of accessible stacks. This makes the boundary checks more accurate (e.g. detecting overlapped frame records), and removes the need for separate tracking of the prior FP and visited stacks. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Cc: Fuad Tabba Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20220901130646.1316937-9-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace.h | 5 - arch/arm64/include/asm/stacktrace/common.h | 164 ++++++++++----------- arch/arm64/kernel/stacktrace.c | 91 +++++------- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 35 ++--- arch/arm64/kvm/stacktrace.c | 36 ++--- 5 files changed, 132 insertions(+), 199 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index aad0c6258721..5a0edb064ea4 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -30,7 +30,6 @@ static inline struct stack_info stackinfo_get_irq(void) return (struct stack_info) { .low = low, .high = high, - .type = STACK_TYPE_IRQ, }; } @@ -48,7 +47,6 @@ static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk return (struct stack_info) { .low = low, .high = high, - .type = STACK_TYPE_TASK, }; } @@ -70,7 +68,6 @@ static inline struct stack_info stackinfo_get_overflow(void) return (struct stack_info) { .low = low, .high = high, - .type = STACK_TYPE_OVERFLOW, }; } #else @@ -89,7 +86,6 @@ static inline struct stack_info stackinfo_get_sdei_normal(void) return (struct stack_info) { .low = low, .high = high, - .type = STACK_TYPE_SDEI_NORMAL, }; } @@ -101,7 +97,6 @@ static inline struct stack_info stackinfo_get_sdei_critical(void) return (struct stack_info) { .low = low, .high = high, - .type = STACK_TYPE_SDEI_CRITICAL, }; } #else diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index 2f972441f572..638008f48597 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -9,26 +9,12 @@ #ifndef __ASM_STACKTRACE_COMMON_H #define __ASM_STACKTRACE_COMMON_H -#include -#include #include #include -enum stack_type { - STACK_TYPE_UNKNOWN, - STACK_TYPE_TASK, - STACK_TYPE_IRQ, - STACK_TYPE_OVERFLOW, - STACK_TYPE_SDEI_NORMAL, - STACK_TYPE_SDEI_CRITICAL, - STACK_TYPE_HYP, - __NR_STACK_TYPES -}; - struct stack_info { unsigned long low; unsigned long high; - enum stack_type type; }; /** @@ -37,32 +23,27 @@ struct stack_info { * @fp: The fp value in the frame record (or the real fp) * @pc: The lr value in the frame record (or the real lr) * - * @stacks_done: Stacks which have been entirely unwound, for which it is no - * longer valid to unwind to. - * - * @prev_fp: The fp that pointed to this frame record, or a synthetic value - * of 0. This is used to ensure that within a stack, each - * subsequent frame record is at an increasing address. - * @prev_type: The type of stack this frame record was on, or a synthetic - * value of STACK_TYPE_UNKNOWN. This is used to detect a - * transition from one stack to another. - * * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance * associated with the most recently encountered replacement lr * value. * * @task: The task being unwound. + * + * @stack: The stack currently being unwound. + * @stacks: An array of stacks which can be unwound. + * @nr_stacks: The number of stacks in @stacks. */ struct unwind_state { unsigned long fp; unsigned long pc; - DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); - unsigned long prev_fp; - enum stack_type prev_type; #ifdef CONFIG_KRETPROBES struct llist_node *kr_cur; #endif struct task_struct *task; + + struct stack_info stack; + struct stack_info *stacks; + int nr_stacks; }; static inline struct stack_info stackinfo_get_unknown(void) @@ -70,7 +51,6 @@ static inline struct stack_info stackinfo_get_unknown(void) return (struct stack_info) { .low = 0, .high = 0, - .type = STACK_TYPE_UNKNOWN, }; } @@ -94,18 +74,7 @@ static inline void unwind_init_common(struct unwind_state *state, state->kr_cur = NULL; #endif - /* - * Prime the first unwind. - * - * In unwind_next() we'll check that the FP points to a valid stack, - * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be - * treated as a transition to whichever stack that happens to be. The - * prev_fp value won't be used, but we set it to 0 such that it is - * definitely not an accessible stack address. - */ - bitmap_zero(state->stacks_done, __NR_STACK_TYPES); - state->prev_fp = 0; - state->prev_type = STACK_TYPE_UNKNOWN; + state->stack = stackinfo_get_unknown(); } /** @@ -120,51 +89,94 @@ static inline void unwind_init_common(struct unwind_state *state, */ typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp); +static struct stack_info *unwind_find_next_stack(const struct unwind_state *state, + unsigned long sp, + unsigned long size) +{ + for (int i = 0; i < state->nr_stacks; i++) { + struct stack_info *info = &state->stacks[i]; + + if (stackinfo_on_stack(info, sp, size)) + return info; + } + + return NULL; +} + /** - * typedef on_accessible_stack_fn() - Check whether a stack range is on any of - * the possible stacks. + * unwind_consume_stack() - Check if an object is on an accessible stack, + * updating stack boundaries so that future unwind steps cannot consume this + * object again. * - * @tsk: task whose stack is being unwound - * @sp: stack address being checked - * @size: size of the stack range being checked - * @info: stack unwinding context + * @state: the current unwind state. + * @sp: the base address of the object. + * @size: the size of the object. * - * Return: true if the stack range is accessible, false otherwise. - * - * Upon success @info is updated with information for the relevant stack. - * - * Upon failure @info is updated with the UNKNOWN stack. + * Return: 0 upon success, an error code otherwise. */ -typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk, - unsigned long sp, unsigned long size, - struct stack_info *info); +static inline int unwind_consume_stack(struct unwind_state *state, + unsigned long sp, + unsigned long size) +{ + struct stack_info *next; + + if (stackinfo_on_stack(&state->stack, sp, size)) + goto found; + + next = unwind_find_next_stack(state, sp, size); + if (!next) + return -EINVAL; + + /* + * Stack transitions are strictly one-way, and once we've + * transitioned from one stack to another, it's never valid to + * unwind back to the old stack. + * + * Remove the current stack from the list of stacks so that it cannot + * be found on a subsequent transition. + * + * Note that stacks can nest in several valid orders, e.g. + * + * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL + * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW + * HYP -> OVERFLOW + * + * ... so we do not check the specific order of stack + * transitions. + */ + state->stack = *next; + *next = stackinfo_get_unknown(); + +found: + /* + * Future unwind steps can only consume stack above this frame record. + * Update the current stack to start immediately above it. + */ + state->stack.low = sp + size; + return 0; +} /** * unwind_next_frame_record() - Unwind to the next frame record. * * @state: the current unwind state. - * @accessible: determines whether the frame record is accessible * @translate_fp: translates the fp prior to access (may be NULL) * * Return: 0 upon success, an error code otherwise. */ static inline int unwind_next_frame_record(struct unwind_state *state, - on_accessible_stack_fn accessible, stack_trace_translate_fp_fn translate_fp) { - struct stack_info info; unsigned long fp = state->fp, kern_fp = fp; - struct task_struct *tsk = state->task; + int err; if (fp & 0x7) return -EINVAL; - if (!accessible(tsk, fp, 16, &info)) - return -EINVAL; - - if (test_bit(info.type, state->stacks_done)) - return -EINVAL; + err = unwind_consume_stack(state, fp, 16); + if (err) + return err; /* * If fp is not from the current address space perform the necessary @@ -174,34 +186,10 @@ unwind_next_frame_record(struct unwind_state *state, return -EINVAL; /* - * As stacks grow downward, any valid record on the same stack must be - * at a strictly higher address than the prior record. - * - * Stacks can nest in several valid orders, e.g. - * - * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL - * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW - * HYP -> OVERFLOW - * - * ... but the nesting itself is strict. Once we transition from one - * stack to another, it's never valid to unwind back to that first - * stack. - */ - if (info.type == state->prev_type) { - if (fp <= state->prev_fp) - return -EINVAL; - } else { - __set_bit(state->prev_type, state->stacks_done); - } - - /* - * Record this frame record's values and location. The prev_fp and - * prev_type are only meaningful to the next unwind_next() invocation. + * Record this frame record's values. */ state->fp = READ_ONCE(*(unsigned long *)(kern_fp)); state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8)); - state->prev_fp = fp; - state->prev_type = info.type; return 0; } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ca56fd732c2a..9c8820f24262 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -67,57 +67,6 @@ static inline void unwind_init_from_task(struct unwind_state *state, state->pc = thread_saved_pc(task); } -static bool on_accessible_stack(const struct task_struct *tsk, - unsigned long sp, unsigned long size, - struct stack_info *info) -{ - struct stack_info tmp; - - tmp = stackinfo_get_task(tsk); - if (stackinfo_on_stack(&tmp, sp, size)) - goto found; - - /* - * We can only safely access per-cpu stacks when unwinding the current - * task in a non-preemptible context. - */ - if (tsk != current || preemptible()) - goto not_found; - - tmp = stackinfo_get_irq(); - if (stackinfo_on_stack(&tmp, sp, size)) - goto found; - - tmp = stackinfo_get_overflow(); - if (stackinfo_on_stack(&tmp, sp, size)) - goto found; - - /* - * We can only safely access SDEI stacks which unwinding the current - * task in an NMI context. - */ - if (!IS_ENABLED(CONFIG_VMAP_STACK) || - !IS_ENABLED(CONFIG_ARM_SDE_INTERFACE) || - !in_nmi()) - goto not_found; - - tmp = stackinfo_get_sdei_normal(); - if (stackinfo_on_stack(&tmp, sp, size)) - goto found; - - tmp = stackinfo_get_sdei_critical(); - if (stackinfo_on_stack(&tmp, sp, size)) - goto found; - -not_found: - *info = stackinfo_get_unknown(); - return false; - -found: - *info = tmp; - return true; -} - /* * Unwind from one frame record (A) to the next frame record (B). * @@ -135,7 +84,7 @@ static int notrace unwind_next(struct unwind_state *state) if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) return -ENOENT; - err = unwind_next_frame_record(state, on_accessible_stack, NULL); + err = unwind_next_frame_record(state, NULL); if (err) return err; @@ -215,11 +164,47 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) barrier(); } +/* + * Per-cpu stacks are only accessible when unwinding the current task in a + * non-preemptible context. + */ +#define STACKINFO_CPU(name) \ + ({ \ + ((task == current) && !preemptible()) \ + ? stackinfo_get_##name() \ + : stackinfo_get_unknown(); \ + }) + +/* + * SDEI stacks are only accessible when unwinding the current task in an NMI + * context. + */ +#define STACKINFO_SDEI(name) \ + ({ \ + ((task == current) && in_nmi()) \ + ? stackinfo_get_sdei_##name() \ + : stackinfo_get_unknown(); \ + }) + noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { - struct unwind_state state; + struct stack_info stacks[] = { + stackinfo_get_task(task), + STACKINFO_CPU(irq), +#if defined(CONFIG_VMAP_STACK) + STACKINFO_CPU(overflow), +#endif +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE) + STACKINFO_SDEI(normal), + STACKINFO_SDEI(critical), +#endif + }; + struct unwind_state state = { + .stacks = stacks, + .nr_stacks = ARRAY_SIZE(stacks), + }; if (regs) { if (task != current) diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index 5da0d44f61b7..08e1325ead73 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -47,7 +47,6 @@ static struct stack_info stackinfo_get_overflow(void) return (struct stack_info) { .low = low, .high = high, - .type = STACK_TYPE_OVERFLOW, }; } @@ -60,35 +59,12 @@ static struct stack_info stackinfo_get_hyp(void) return (struct stack_info) { .low = low, .high = high, - .type = STACK_TYPE_HYP, }; } -static bool on_accessible_stack(const struct task_struct *tsk, - unsigned long sp, unsigned long size, - struct stack_info *info) -{ - struct stack_info tmp; - - tmp = stackinfo_get_overflow(); - if (stackinfo_on_stack(&tmp, sp, size)) - goto found; - - tmp = stackinfo_get_hyp(); - if (stackinfo_on_stack(&tmp, sp, size)) - goto found; - - *info = stackinfo_get_unknown(); - return false; - -found: - *info = tmp; - return true; -} - static int unwind_next(struct unwind_state *state) { - return unwind_next_frame_record(state, on_accessible_stack, NULL); + return unwind_next_frame_record(state, NULL); } static void notrace unwind(struct unwind_state *state, @@ -144,7 +120,14 @@ static bool pkvm_save_backtrace_entry(void *arg, unsigned long where) */ static void pkvm_save_backtrace(unsigned long fp, unsigned long pc) { - struct unwind_state state; + struct stack_info stacks[] = { + stackinfo_get_overflow(), + stackinfo_get_hyp(), + }; + struct unwind_state state = { + .stacks = stacks, + .nr_stacks = ARRAY_SIZE(stacks), + }; int idx = 0; kvm_nvhe_unwind_init(&state, fp, pc); diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c index 7658c5db47c1..0b4703945780 100644 --- a/arch/arm64/kvm/stacktrace.c +++ b/arch/arm64/kvm/stacktrace.c @@ -31,7 +31,6 @@ static struct stack_info stackinfo_get_overflow(void) return (struct stack_info) { .low = low, .high = high, - .type = STACK_TYPE_OVERFLOW, }; } @@ -45,7 +44,6 @@ static struct stack_info stackinfo_get_hyp(void) return (struct stack_info) { .low = low, .high = high, - .type = STACK_TYPE_HYP, }; } @@ -102,32 +100,9 @@ static bool kvm_nvhe_stack_kern_record_va(unsigned long *addr) return kvm_nvhe_stack_kern_va(addr, 16); } -static bool on_accessible_stack(const struct task_struct *tsk, - unsigned long sp, unsigned long size, - struct stack_info *info) -{ - struct stack_info tmp; - - tmp = stackinfo_get_overflow(); - if (stackinfo_on_stack(&tmp, sp, size)) - goto found; - - tmp = stackinfo_get_hyp(); - if (stackinfo_on_stack(&tmp, sp, size)) - goto found; - - *info = stackinfo_get_unknown(); - return false; - -found: - *info = tmp; - return true; -} - static int unwind_next(struct unwind_state *state) { - return unwind_next_frame_record(state, on_accessible_stack, - kvm_nvhe_stack_kern_record_va); + return unwind_next_frame_record(state, kvm_nvhe_stack_kern_record_va); } static void unwind(struct unwind_state *state, @@ -185,7 +160,14 @@ static void kvm_nvhe_dump_backtrace_end(void) static void hyp_dump_backtrace(unsigned long hyp_offset) { struct kvm_nvhe_stacktrace_info *stacktrace_info; - struct unwind_state state; + struct stack_info stacks[] = { + stackinfo_get_overflow(), + stackinfo_get_hyp(), + }; + struct unwind_state state = { + .stacks = stacks, + .nr_stacks = ARRAY_SIZE(stacks), + }; stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); From 4b5e694e25ca2cbb5c97694a7d2a473f35099829 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 1 Sep 2022 14:06:46 +0100 Subject: [PATCH 063/126] arm64: stacktrace: track hyp stacks in unwinder's address space Currently unwind_next_frame_record() has an optional callback to convert the address space of the FP. This is necessary for the NVHE unwinder, which tracks the stacks in the hyp VA space, but accesses the frame records in the kernel VA space. This is a bit unfortunate since it clutters unwind_next_frame_record(), which will get in the way of future rework. Instead, this patch changes the NVHE unwinder to track the stacks in the kernel's VA space and translate to FP prior to calling unwind_next_frame_record(). This removes the need for the translate_fp() callback, as all unwinders consistently track stacks in the native address space of the unwinder. At the same time, this patch consolidates the generation of the stack addresses behind the stackinfo_get_*() helpers. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Cc: Fuad Tabba Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20220901130646.1316937-10-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace/common.h | 29 ++-------- arch/arm64/kernel/stacktrace.c | 2 +- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 2 +- arch/arm64/kvm/stacktrace.c | 62 ++++++++++++++-------- 4 files changed, 46 insertions(+), 49 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index 638008f48597..508f734de46e 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -77,18 +77,6 @@ static inline void unwind_init_common(struct unwind_state *state, state->stack = stackinfo_get_unknown(); } -/** - * typedef stack_trace_translate_fp_fn() - Translates a non-kernel frame - * pointer to a kernel address. - * - * @fp: the frame pointer to be updated to its kernel address. - * - * Return: true if the VA can be translated, false otherwise. - * - * Upon success @fp is updated to the corresponding kernel virtual address. - */ -typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp); - static struct stack_info *unwind_find_next_stack(const struct unwind_state *state, unsigned long sp, unsigned long size) @@ -160,15 +148,13 @@ found: * unwind_next_frame_record() - Unwind to the next frame record. * * @state: the current unwind state. - * @translate_fp: translates the fp prior to access (may be NULL) * * Return: 0 upon success, an error code otherwise. */ static inline int -unwind_next_frame_record(struct unwind_state *state, - stack_trace_translate_fp_fn translate_fp) +unwind_next_frame_record(struct unwind_state *state) { - unsigned long fp = state->fp, kern_fp = fp; + unsigned long fp = state->fp; int err; if (fp & 0x7) @@ -178,18 +164,11 @@ unwind_next_frame_record(struct unwind_state *state, if (err) return err; - /* - * If fp is not from the current address space perform the necessary - * translation before dereferencing it to get the next fp. - */ - if (translate_fp && !translate_fp(&kern_fp)) - return -EINVAL; - /* * Record this frame record's values. */ - state->fp = READ_ONCE(*(unsigned long *)(kern_fp)); - state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8)); + state->fp = READ_ONCE(*(unsigned long *)(fp)); + state->pc = READ_ONCE(*(unsigned long *)(fp + 8)); return 0; } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 9c8820f24262..634279b3b03d 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -84,7 +84,7 @@ static int notrace unwind_next(struct unwind_state *state) if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) return -ENOENT; - err = unwind_next_frame_record(state, NULL); + err = unwind_next_frame_record(state); if (err) return err; diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index 08e1325ead73..ed6b58b19cfa 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -64,7 +64,7 @@ static struct stack_info stackinfo_get_hyp(void) static int unwind_next(struct unwind_state *state) { - return unwind_next_frame_record(state, NULL); + return unwind_next_frame_record(state); } static void notrace unwind(struct unwind_state *state, diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c index 0b4703945780..3ace5b75813b 100644 --- a/arch/arm64/kvm/stacktrace.c +++ b/arch/arm64/kvm/stacktrace.c @@ -34,6 +34,17 @@ static struct stack_info stackinfo_get_overflow(void) }; } +static struct stack_info stackinfo_get_overflow_kern_va(void) +{ + unsigned long low = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack); + unsigned long high = low + OVERFLOW_STACK_SIZE; + + return (struct stack_info) { + .low = low, + .high = high, + }; +} + static struct stack_info stackinfo_get_hyp(void) { struct kvm_nvhe_stacktrace_info *stacktrace_info @@ -47,6 +58,17 @@ static struct stack_info stackinfo_get_hyp(void) }; } +static struct stack_info stackinfo_get_hyp_kern_va(void) +{ + unsigned long low = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page); + unsigned long high = low + PAGE_SIZE; + + return (struct stack_info) { + .low = low, + .high = high, + }; +} + /* * kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs * @@ -62,33 +84,22 @@ static struct stack_info stackinfo_get_hyp(void) */ static bool kvm_nvhe_stack_kern_va(unsigned long *addr, unsigned long size) { - struct kvm_nvhe_stacktrace_info *stacktrace_info; - unsigned long hyp_base, kern_base, hyp_offset; - struct stack_info stack; + struct stack_info stack_hyp, stack_kern; - stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); - - stack = stackinfo_get_hyp(); - if (stackinfo_on_stack(&stack, *addr, size)) { - kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page); - hyp_base = (unsigned long)stacktrace_info->stack_base; + stack_hyp = stackinfo_get_hyp(); + stack_kern = stackinfo_get_hyp_kern_va(); + if (stackinfo_on_stack(&stack_hyp, *addr, size)) goto found; - } - stack = stackinfo_get_overflow(); - if (stackinfo_on_stack(&stack, *addr, size)) { - kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack); - hyp_base = (unsigned long)stacktrace_info->overflow_stack_base; + stack_hyp = stackinfo_get_overflow(); + stack_kern = stackinfo_get_overflow_kern_va(); + if (stackinfo_on_stack(&stack_hyp, *addr, size)) goto found; - } return false; found: - hyp_offset = *addr - hyp_base; - - *addr = kern_base + hyp_offset; - + *addr = *addr - stack_hyp.low + stack_kern.low; return true; } @@ -102,7 +113,14 @@ static bool kvm_nvhe_stack_kern_record_va(unsigned long *addr) static int unwind_next(struct unwind_state *state) { - return unwind_next_frame_record(state, kvm_nvhe_stack_kern_record_va); + /* + * The FP is in the hypervisor VA space. Convert it to the kernel VA + * space so it can be unwound by the regular unwind functions. + */ + if (!kvm_nvhe_stack_kern_record_va(&state->fp)) + return -EINVAL; + + return unwind_next_frame_record(state); } static void unwind(struct unwind_state *state, @@ -161,8 +179,8 @@ static void hyp_dump_backtrace(unsigned long hyp_offset) { struct kvm_nvhe_stacktrace_info *stacktrace_info; struct stack_info stacks[] = { - stackinfo_get_overflow(), - stackinfo_get_hyp(), + stackinfo_get_overflow_kern_va(), + stackinfo_get_hyp_kern_va(), }; struct unwind_state state = { .stacks = stacks, From b2c3ccbd0011bb3b51d0fec24cb3a5812b1ec8ea Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Aug 2022 16:59:13 +0100 Subject: [PATCH 064/126] arm64: atomics: remove LL/SC trampolines When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in a fragment of code being generated in a subsection without a clear association with its caller. A trampoline in the caller branches to the LL/SC atomic with with a direct branch, and the atomic directly branches back into its trampoline. This breaks backtracing, as any PC within the out-of-line fragment will be symbolized as an offset from the nearest prior symbol (which may not be the function using the atomic), and since the atomic returns with a direct branch, the caller's PC may be missing from the backtrace. For example, with secondary_start_kernel() hacked to contain atomic_inc(NULL), the resulting exception can be reported as being taken from cpus_are_stuck_in_kernel(): | Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 | Mem abort info: | ESR = 0x0000000096000004 | EC = 0x25: DABT (current EL), IL = 32 bits | SET = 0, FnV = 0 | EA = 0, S1PTW = 0 | FSC = 0x04: level 0 translation fault | Data abort info: | ISV = 0, ISS = 0x00000004 | CM = 0, WnR = 0 | [0000000000000000] user address but active_mm is swapper | Internal error: Oops: 96000004 [#1] PREEMPT SMP | Modules linked in: | CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3 | Hardware name: linux,dummy-virt (DT) | pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : cpus_are_stuck_in_kernel+0xa4/0x120 | lr : secondary_start_kernel+0x164/0x170 | sp : ffff80000a4cbe90 | x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000 | x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008 | x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0 | x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030 | x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72 | x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000 | x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100 | x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000 | Call trace: | cpus_are_stuck_in_kernel+0xa4/0x120 | __secondary_switched+0xb0/0xb4 | Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01) | ---[ end trace 0000000000000000 ]--- This is confusing and hinders debugging, and will be problematic for CONFIG_LIVEPATCH as these cases cannot be unwound reliably. This is very similar to recent issues with out-of-line exception fixups, which were removed in commits: 35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body") 4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body") 139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body") When the trampolines were introduced in commit: addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics") The rationale was to improve icache performance by grouping the LL/SC atomics together. This has never been measured, and this theoretical benefit is outweighed by other factors: * As the subsections are collapsed into sections at object file granularity, these are spread out throughout the kernel and can share cachelines with unrelated code regardless. * GCC 12.1.0 has been observed to place the trampoline out-of-line in specialised __ll_sc_*() functions, introducing more branching than was intended. * Removing the trampolines has been observed to shrink a defconfig kernel Image by 64KiB when building with GCC 12.1.0. This patch removes the LL/SC trampolines, meaning that the LL/SC atomics will be inlined into their callers (or placed in out-of line functions using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC atomics are always called in an unlikely branch, and will be placed in a cold portion of the function, so this should have minimal impact to the hot paths. Other than the improved backtracing, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Will Deacon Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic_ll_sc.h | 40 ++++++--------------------- 1 file changed, 9 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index fe0db8d416fb..906e2d8c254c 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -12,19 +12,6 @@ #include -#ifdef CONFIG_ARM64_LSE_ATOMICS -#define __LL_SC_FALLBACK(asm_ops) \ -" b 3f\n" \ -" .subsection 1\n" \ -"3:\n" \ -asm_ops "\n" \ -" b 4f\n" \ -" .previous\n" \ -"4:\n" -#else -#define __LL_SC_FALLBACK(asm_ops) asm_ops -#endif - #ifndef CONFIG_CC_HAS_K_CONSTRAINT #define K #endif @@ -43,12 +30,11 @@ __ll_sc_atomic_##op(int i, atomic_t *v) \ int result; \ \ asm volatile("// atomic_" #op "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ldxr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ " stxr %w1, %w0, %2\n" \ - " cbnz %w1, 1b\n") \ + " cbnz %w1, 1b\n" \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i)); \ } @@ -61,13 +47,12 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \ int result; \ \ asm volatile("// atomic_" #op "_return" #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ld" #acq "xr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ " st" #rel "xr %w1, %w0, %2\n" \ " cbnz %w1, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -83,13 +68,12 @@ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \ int val, result; \ \ asm volatile("// atomic_fetch_" #op #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %3\n" \ "1: ld" #acq "xr %w0, %3\n" \ " " #asm_op " %w1, %w0, %w4\n" \ " st" #rel "xr %w2, %w1, %3\n" \ " cbnz %w2, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -142,12 +126,11 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ldxr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ " stxr %w1, %0, %2\n" \ - " cbnz %w1, 1b") \ + " cbnz %w1, 1b" \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i)); \ } @@ -160,13 +143,12 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "_return" #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ld" #acq "xr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ " st" #rel "xr %w1, %0, %2\n" \ " cbnz %w1, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -182,13 +164,12 @@ __ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile("// atomic64_fetch_" #op #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %3\n" \ "1: ld" #acq "xr %0, %3\n" \ " " #asm_op " %1, %0, %4\n" \ " st" #rel "xr %w2, %1, %3\n" \ " cbnz %w2, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -240,7 +221,6 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_dec_if_positive\n" - __LL_SC_FALLBACK( " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" " subs %0, %0, #1\n" @@ -248,7 +228,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v) " stlxr %w1, %0, %2\n" " cbnz %w1, 1b\n" " dmb ish\n" - "2:") + "2:" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : : "cc", "memory"); @@ -274,7 +254,6 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ old = (u##sz)old; \ \ asm volatile( \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %[v]\n" \ "1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ @@ -282,7 +261,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ " st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \ " cbnz %w[tmp], 1b\n" \ " " #mb "\n" \ - "2:") \ + "2:" \ : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \ [v] "+Q" (*(u##sz *)ptr) \ : [old] __stringify(constraint) "r" (old), [new] "r" (new) \ @@ -326,7 +305,6 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ unsigned long tmp, ret; \ \ asm volatile("// __cmpxchg_double" #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ldxp %0, %1, %2\n" \ " eor %0, %0, %3\n" \ @@ -336,7 +314,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ " st" #rel "xp %w0, %5, %6, %2\n" \ " cbnz %w0, 1b\n" \ " " #mb "\n" \ - "2:") \ + "2:" \ : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ : cl); \ From 78f6f5c994ed22a35ce1cd3ec9aeda8e2fa328e6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Aug 2022 16:59:14 +0100 Subject: [PATCH 065/126] arm64: atomic: always inline the assembly The __lse_*() and __ll_sc_*() atomic implementations are marked as inline rather than __always_inline, permitting a compiler to generate out-of-line versions, which may be instrumented. We marked the atomic wrappers as __always_inline in commit: c35a824c31834d94 ("arm64: make atomic helpers __always_inline") ... but did not think to do the same for the underlying implementations. If the compiler were to out-of-line an LSE or LL/SC atomic, this could break noinstr code. Ensure this doesn't happen by marking the underlying implementations as __always_inline. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Will Deacon Link: https://lore.kernel.org/r/20220817155914.3975112-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic_ll_sc.h | 18 +++++------ arch/arm64/include/asm/atomic_lse.h | 46 +++++++++++++++++---------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 906e2d8c254c..0890e4f568fb 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -23,7 +23,7 @@ */ #define ATOMIC_OP(op, asm_op, constraint) \ -static inline void \ +static __always_inline void \ __ll_sc_atomic_##op(int i, atomic_t *v) \ { \ unsigned long tmp; \ @@ -40,7 +40,7 @@ __ll_sc_atomic_##op(int i, atomic_t *v) \ } #define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\ -static inline int \ +static __always_inline int \ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \ { \ unsigned long tmp; \ @@ -61,7 +61,7 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \ } #define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \ -static inline int \ +static __always_inline int \ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \ { \ unsigned long tmp; \ @@ -119,7 +119,7 @@ ATOMIC_OPS(andnot, bic, ) #undef ATOMIC_OP #define ATOMIC64_OP(op, asm_op, constraint) \ -static inline void \ +static __always_inline void \ __ll_sc_atomic64_##op(s64 i, atomic64_t *v) \ { \ s64 result; \ @@ -136,7 +136,7 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v) \ } #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\ -static inline long \ +static __always_inline long \ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \ { \ s64 result; \ @@ -157,7 +157,7 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\ -static inline long \ +static __always_inline long \ __ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ { \ s64 result, val; \ @@ -214,7 +214,7 @@ ATOMIC64_OPS(andnot, bic, ) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline s64 +static __always_inline s64 __ll_sc_atomic64_dec_if_positive(atomic64_t *v) { s64 result; @@ -237,7 +237,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v) } #define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \ -static inline u##sz \ +static __always_inline u##sz \ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ unsigned long old, \ u##sz new) \ @@ -295,7 +295,7 @@ __CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L) #undef __CMPXCHG_CASE #define __CMPXCHG_DBL(name, mb, rel, cl) \ -static inline long \ +static __always_inline long \ __ll_sc__cmpxchg_double##name(unsigned long old1, \ unsigned long old2, \ unsigned long new1, \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 5d460f6b7675..52075e93de6c 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -11,7 +11,8 @@ #define __ASM_ATOMIC_LSE_H #define ATOMIC_OP(op, asm_op) \ -static inline void __lse_atomic_##op(int i, atomic_t *v) \ +static __always_inline void \ +__lse_atomic_##op(int i, atomic_t *v) \ { \ asm volatile( \ __LSE_PREAMBLE \ @@ -25,7 +26,7 @@ ATOMIC_OP(or, stset) ATOMIC_OP(xor, steor) ATOMIC_OP(add, stadd) -static inline void __lse_atomic_sub(int i, atomic_t *v) +static __always_inline void __lse_atomic_sub(int i, atomic_t *v) { __lse_atomic_add(-i, v); } @@ -33,7 +34,8 @@ static inline void __lse_atomic_sub(int i, atomic_t *v) #undef ATOMIC_OP #define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ -static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_fetch_##op##name(int i, atomic_t *v) \ { \ int old; \ \ @@ -63,7 +65,8 @@ ATOMIC_FETCH_OPS(add, ldadd) #undef ATOMIC_FETCH_OPS #define ATOMIC_FETCH_OP_SUB(name) \ -static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_fetch_sub##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_add##name(-i, v); \ } @@ -76,12 +79,14 @@ ATOMIC_FETCH_OP_SUB( ) #undef ATOMIC_FETCH_OP_SUB #define ATOMIC_OP_ADD_SUB_RETURN(name) \ -static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_add_return##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_add##name(i, v) + i; \ } \ \ -static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_sub_return##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_sub(i, v) - i; \ } @@ -93,13 +98,14 @@ ATOMIC_OP_ADD_SUB_RETURN( ) #undef ATOMIC_OP_ADD_SUB_RETURN -static inline void __lse_atomic_and(int i, atomic_t *v) +static __always_inline void __lse_atomic_and(int i, atomic_t *v) { return __lse_atomic_andnot(~i, v); } #define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ -static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_fetch_and##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_andnot##name(~i, v); \ } @@ -112,7 +118,8 @@ ATOMIC_FETCH_OP_AND( , al, "memory") #undef ATOMIC_FETCH_OP_AND #define ATOMIC64_OP(op, asm_op) \ -static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \ +static __always_inline void \ +__lse_atomic64_##op(s64 i, atomic64_t *v) \ { \ asm volatile( \ __LSE_PREAMBLE \ @@ -126,7 +133,7 @@ ATOMIC64_OP(or, stset) ATOMIC64_OP(xor, steor) ATOMIC64_OP(add, stadd) -static inline void __lse_atomic64_sub(s64 i, atomic64_t *v) +static __always_inline void __lse_atomic64_sub(s64 i, atomic64_t *v) { __lse_atomic64_add(-i, v); } @@ -134,7 +141,8 @@ static inline void __lse_atomic64_sub(s64 i, atomic64_t *v) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ -static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\ +static __always_inline long \ +__lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ { \ s64 old; \ \ @@ -164,7 +172,8 @@ ATOMIC64_FETCH_OPS(add, ldadd) #undef ATOMIC64_FETCH_OPS #define ATOMIC64_FETCH_OP_SUB(name) \ -static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ +static __always_inline long \ +__lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_add##name(-i, v); \ } @@ -177,12 +186,14 @@ ATOMIC64_FETCH_OP_SUB( ) #undef ATOMIC64_FETCH_OP_SUB #define ATOMIC64_OP_ADD_SUB_RETURN(name) \ -static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\ +static __always_inline long \ +__lse_atomic64_add_return##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_add##name(i, v) + i; \ } \ \ -static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)\ +static __always_inline long \ +__lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_sub##name(i, v) - i; \ } @@ -194,13 +205,14 @@ ATOMIC64_OP_ADD_SUB_RETURN( ) #undef ATOMIC64_OP_ADD_SUB_RETURN -static inline void __lse_atomic64_and(s64 i, atomic64_t *v) +static __always_inline void __lse_atomic64_and(s64 i, atomic64_t *v) { return __lse_atomic64_andnot(~i, v); } #define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ -static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ +static __always_inline long \ +__lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_andnot##name(~i, v); \ } @@ -212,7 +224,7 @@ ATOMIC64_FETCH_OP_AND( , al, "memory") #undef ATOMIC64_FETCH_OP_AND -static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) { unsigned long tmp; From 8eb858c44b98e0326bb32fca34ae671995cd73bb Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 15 Aug 2022 20:47:39 +0800 Subject: [PATCH 066/126] arm64: run softirqs on the per-CPU IRQ stack Currently arm64 supports per-CPU IRQ stack, but softirqs are still handled in the task context. Since any call to local_bh_enable() at any level in the task's call stack may trigger a softirq processing run, which could potentially cause a task stack overflow if the combined stack footprints exceed the stack's size, let's run these softirqs on the IRQ stack as well. Signed-off-by: Qi Zheng Reviewed-by: Arnd Bergmann Acked-by: Will Deacon Link: https://lore.kernel.org/r/20220815124739.15948-1-zhengqi.arch@bytedance.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/irq.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b17d6861a3ad..6f1ecf4538f7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -230,6 +230,7 @@ config ARM64 select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_NMI_SUPPORT + select HAVE_SOFTIRQ_ON_OWN_STACK help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index bda49430c9ea..38dbd3828f13 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -21,7 +21,9 @@ #include #include #include +#include #include +#include /* Only access this in an NMI enter/exit */ DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); @@ -71,6 +73,18 @@ static void init_irq_stacks(void) } #endif +#ifndef CONFIG_PREEMPT_RT +static void ____do_softirq(struct pt_regs *regs) +{ + __do_softirq(); +} + +void do_softirq_own_stack(void) +{ + call_on_irq_stack(NULL, ____do_softirq); +} +#endif + static void default_handle_irq(struct pt_regs *regs) { panic("IRQ taken without a root IRQ handler\n"); From 877ace9eab7de032f954533afd5d1ecd0cf62eaf Mon Sep 17 00:00:00 2001 From: Liu Song Date: Fri, 26 Aug 2022 19:40:50 +0800 Subject: [PATCH 067/126] arm64: spectre: increase parameters that can be used to turn off bhb mitigation individually In our environment, it was found that the mitigation BHB has a great impact on the benchmark performance. For example, in the lmbench test, the "process fork && exit" test performance drops by 20%. So it is necessary to have the ability to turn off the mitigation individually through cmdline, thus avoiding having to compile the kernel by adjusting the config. Signed-off-by: Liu Song Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1661514050-22263-1-git-send-email-liusong@linux.alibaba.com Signed-off-by: Catalin Marinas --- Documentation/admin-guide/kernel-parameters.txt | 5 +++++ arch/arm64/kernel/proton-pack.c | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 426fa892d311..a1e88e90e94f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3207,6 +3207,7 @@ spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86,PPC] ssbd=force-off [ARM64] + nospectre_bhb [ARM64] l1tf=off [X86] mds=off [X86] tsx_async_abort=off [X86] @@ -3631,6 +3632,10 @@ vulnerability. System may allow data leaks with this option. + nospectre_bhb [ARM64] Disable all mitigations for Spectre-BHB (branch + history injection) vulnerability. System may allow data leaks + with this option. + nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 40be3a7c2c53..bd1690335bb9 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -988,6 +988,14 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) isb(); } +static bool __read_mostly __nospectre_bhb; +static int __init parse_spectre_bhb_param(char *str) +{ + __nospectre_bhb = true; + return 0; +} +early_param("nospectre_bhb", parse_spectre_bhb_param); + void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) { bp_hardening_cb_t cpu_cb; @@ -1001,7 +1009,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) /* No point mitigating Spectre-BHB alone. */ } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); - } else if (cpu_mitigations_off()) { + } else if (cpu_mitigations_off() || __nospectre_bhb) { pr_info_once("spectre-bhb mitigation disabled by command line option\n"); } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { state = SPECTRE_MITIGATED; From e92072237e6c8497cad5e9d2f2e3c44bb9d26aef Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sun, 11 Sep 2022 12:44:23 +0800 Subject: [PATCH 068/126] arm64: support huge vmalloc mappings As commit 559089e0a93d ("vmalloc: replace VM_NO_HUGE_VMAP with VM_ALLOW_HUGE_VMAP"), the use of hugepage mappings for vmalloc is an opt-in strategy, so it is saftly to support huge vmalloc mappings on arm64, for now, it is used in kvmalloc() and alloc_large_system_hash(). Signed-off-by: Kefeng Wang Link: https://lore.kernel.org/r/20220911044423.139229-1-wangkefeng.wang@huawei.com Signed-off-by: Catalin Marinas --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/arm64/Kconfig | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a1e88e90e94f..dc3a939da2c0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3614,7 +3614,7 @@ nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings. - nohugevmalloc [PPC] Disable kernel huge vmalloc mappings. + nohugevmalloc [KNL,X86,PPC,ARM64] Disable kernel huge vmalloc mappings. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6f1ecf4538f7..7dbcf2076385 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -149,6 +149,7 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE select HAVE_ARCH_COMPILER_H + select HAVE_ARCH_HUGE_VMALLOC select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE From b502c87d2a26c349acbc231ff2acd6f17147926b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Sep 2022 11:17:28 +0100 Subject: [PATCH 069/126] arm64: report EL1 UNDEFs better If an UNDEFINED exception is taken from EL1, and do_undefinstr() doesn't find any suitable undef_hook, it will call: BUG_ON(!user_mode(regs)) ... and the kernel will report a failure witin do_undefinstr() rather than reporting the original context that the UNDEFINED exception was taken from. The pt_regs and ESR value reported within the BUG() handler will be from within do_undefinstr() and the code dump will be for the BRK in BUG_ON(), which isn't sufficient to debug the cause of the original exception. This patch makes the reporting better by having do_undefinstr() call die() directly in this case to report the original context from which the UNDEFINED exception was taken. Prior to this patch, an undefined instruction is reported as: | kernel BUG at arch/arm64/kernel/traps.c:497! | Internal error: Oops - BUG: 0 [#1] PREEMPT SMP | Modules linked in: | CPU: 0 PID: 0 Comm: swapper Not tainted 5.19.0-rc3-00127-geff044f1b04e-dirty #3 | Hardware name: linux,dummy-virt (DT) | pstate: 000000c5 (nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : do_undefinstr+0x28c/0x2ac | lr : do_undefinstr+0x298/0x2ac | sp : ffff800009f63bc0 | x29: ffff800009f63bc0 x28: ffff800009f73c00 x27: ffff800009644a70 | x26: ffff8000096778a8 x25: 0000000000000040 x24: 0000000000000000 | x23: 00000000800000c5 x22: ffff800009894060 x21: ffff800009f63d90 | x20: 0000000000000000 x19: ffff800009f63c40 x18: 0000000000000006 | x17: 0000000000403000 x16: 00000000bfbfd000 x15: ffff800009f63830 | x14: ffffffffffffffff x13: 0000000000000000 x12: 0000000000000019 | x11: 0101010101010101 x10: 0000000000161b98 x9 : 0000000000000000 | x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 | x5 : ffff800009f761d0 x4 : 0000000000000000 x3 : ffff80000a2b80f8 | x2 : 0000000000000000 x1 : ffff800009f73c00 x0 : 00000000800000c5 | Call trace: | do_undefinstr+0x28c/0x2ac | el1_undef+0x2c/0x4c | el1h_64_sync_handler+0x84/0xd0 | el1h_64_sync+0x64/0x68 | setup_arch+0x550/0x598 | start_kernel+0x88/0x6ac | __primary_switched+0xb8/0xc0 | Code: 17ffff95 a9425bf5 17ffffb8 a9025bf5 (d4210000) With this patch applied, an undefined instruction is reported as: | Internal error: Oops - Undefined instruction: 0 [#1] PREEMPT SMP | Modules linked in: | CPU: 0 PID: 0 Comm: swapper Not tainted 5.19.0-rc3-00128-gf27cfcc80e52-dirty #5 | Hardware name: linux,dummy-virt (DT) | pstate: 800000c5 (Nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : setup_arch+0x550/0x598 | lr : setup_arch+0x50c/0x598 | sp : ffff800009f63d90 | x29: ffff800009f63d90 x28: 0000000081000200 x27: ffff800009644a70 | x26: ffff8000096778c8 x25: 0000000000000040 x24: 0000000000000000 | x23: 0000000000000100 x22: ffff800009f69a58 x21: ffff80000a2b80b8 | x20: 0000000000000000 x19: 0000000000000000 x18: 0000000000000006 | x17: 0000000000403000 x16: 00000000bfbfd000 x15: ffff800009f63830 | x14: ffffffffffffffff x13: 0000000000000000 x12: 0000000000000019 | x11: 0101010101010101 x10: 0000000000161b98 x9 : 0000000000000000 | x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 | x5 : 0000000000000008 x4 : 0000000000000010 x3 : 0000000000000000 | x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 | Call trace: | setup_arch+0x550/0x598 | start_kernel+0x88/0x6ac | __primary_switched+0xb8/0xc0 | Code: b4000080 90ffed80 912ac000 97db745f (00000000) Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Cc: Alexandru Elisei Cc: Amit Daniel Kachhap Cc: James Morse Cc: Will Deacon Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220913101732.3925290-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b7fed33981f7..eac4f7a83175 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -494,7 +494,9 @@ void do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; - BUG_ON(!user_mode(regs)); + if (!user_mode(regs)) + die("Oops - Undefined instruction", regs, 0); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } NOKPROBE_SYMBOL(do_undefinstr); From 18906ff9af6517c20763ed63dab602a4150794f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Sep 2022 11:17:29 +0100 Subject: [PATCH 070/126] arm64: die(): pass 'err' as long Recently, we reworked a lot of code to consistentlt pass ESR_ELx as a 64-bit quantity. However, we missed that this can be passed into die() and __die() as the 'err' parameter where it is truncated to a 32-bit int. As notify_die() already takes 'err' as a long, this patch changes die() and __die() to also take 'err' as a long, ensuring that the full value of ESR_ELx is retained. At the same time, die() is updated to consistently log 'err' as a zero-padded 64-bit quantity. Subsequent patches will pass the ESR_ELx value to die() for a number of exceptions. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Reviewed-by: Anshuman Khandual Cc: Alexandru Elisei Cc: Amit Daniel Kachhap Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20220913101732.3925290-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/system_misc.h | 2 +- arch/arm64/kernel/traps.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 0eb7709422e2..c34344256762 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -18,7 +18,7 @@ struct pt_regs; -void die(const char *msg, struct pt_regs *regs, int err); +void die(const char *msg, struct pt_regs *regs, long err); struct siginfo; void arm64_notify_die(const char *str, struct pt_regs *regs, diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index eac4f7a83175..da8ffef6f7c5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -180,12 +180,12 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) #define S_SMP " SMP" -static int __die(const char *str, int err, struct pt_regs *regs) +static int __die(const char *str, long err, struct pt_regs *regs) { static int die_counter; int ret; - pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n", + pr_emerg("Internal error: %s: %016lx [#%d]" S_PREEMPT S_SMP "\n", str, err, ++die_counter); /* trap and error numbers are mostly meaningless on ARM */ @@ -206,7 +206,7 @@ static DEFINE_RAW_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. */ -void die(const char *str, struct pt_regs *regs, int err) +void die(const char *str, struct pt_regs *regs, long err) { int ret; unsigned long flags; From 0f2cb928a1547ae8f89e80a4b8df2c6c02ae5f96 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Sep 2022 11:17:30 +0100 Subject: [PATCH 071/126] arm64: consistently pass ESR_ELx to die() Currently, bug_handler() and kasan_handler() call die() with '0' as the 'err' value, whereas die_kernel_fault() passes the ESR_ELx value. For consistency, this patch ensures we always pass the ESR_ELx value to die(). As this is only called for exceptions taken from kernel mode, there should be no user-visible change as a result of this patch. For UNDEFINED exceptions, I've had to modify do_undefinstr() and its callers to pass the ESR_ELx value. In all cases the ESR_ELx value had already been read and was available. Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Alexandru Elisei Cc: Amit Daniel Kachhap Cc: James Morse Cc: Will Deacon Reviewed-by: Anshuman Khandual Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220913101732.3925290-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/exception.h | 2 +- arch/arm64/kernel/entry-common.c | 14 +++++++------- arch/arm64/kernel/traps.c | 14 +++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d94aecff9690..28c0275666e1 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -58,7 +58,7 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs, asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs); void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs); -void do_undefinstr(struct pt_regs *regs); +void do_undefinstr(struct pt_regs *regs, unsigned long esr); void do_bti(struct pt_regs *regs); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index c75ca36b4a49..3dd17ef6d6d8 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -379,11 +379,11 @@ static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } -static void noinstr el1_undef(struct pt_regs *regs) +static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr) { enter_from_kernel_mode(regs); local_daif_inherit(regs); - do_undefinstr(regs); + do_undefinstr(regs, esr); local_daif_mask(); exit_to_kernel_mode(regs); } @@ -425,7 +425,7 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) break; case ESR_ELx_EC_SYS64: case ESR_ELx_EC_UNKNOWN: - el1_undef(regs); + el1_undef(regs, esr); break; case ESR_ELx_EC_BREAKPT_CUR: case ESR_ELx_EC_SOFTSTP_CUR: @@ -582,11 +582,11 @@ static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } -static void noinstr el0_undef(struct pt_regs *regs) +static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); - do_undefinstr(regs); + do_undefinstr(regs, esr); exit_to_user_mode(regs); } @@ -670,7 +670,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) el0_pc(regs, esr); break; case ESR_ELx_EC_UNKNOWN: - el0_undef(regs); + el0_undef(regs, esr); break; case ESR_ELx_EC_BTI: el0_bti(regs); @@ -788,7 +788,7 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_CP14_MR: case ESR_ELx_EC_CP14_LS: case ESR_ELx_EC_CP14_64: - el0_undef(regs); + el0_undef(regs, esr); break; case ESR_ELx_EC_CP15_32: case ESR_ELx_EC_CP15_64: diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index da8ffef6f7c5..05999cde870a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -485,7 +485,7 @@ void arm64_notify_segfault(unsigned long addr) force_signal_inject(SIGSEGV, code, addr, 0); } -void do_undefinstr(struct pt_regs *regs) +void do_undefinstr(struct pt_regs *regs, unsigned long esr) { /* check for AArch32 breakpoint instructions */ if (!aarch32_break_handler(regs)) @@ -495,7 +495,7 @@ void do_undefinstr(struct pt_regs *regs) return; if (!user_mode(regs)) - die("Oops - Undefined instruction", regs, 0); + die("Oops - Undefined instruction", regs, esr); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } @@ -760,7 +760,7 @@ void do_cp15instr(unsigned long esr, struct pt_regs *regs) hook_base = cp15_64_hooks; break; default: - do_undefinstr(regs); + do_undefinstr(regs, esr); return; } @@ -775,7 +775,7 @@ void do_cp15instr(unsigned long esr, struct pt_regs *regs) * EL0. Fall back to our usual undefined instruction handler * so that we handle these consistently. */ - do_undefinstr(regs); + do_undefinstr(regs, esr); } NOKPROBE_SYMBOL(do_cp15instr); #endif @@ -795,7 +795,7 @@ void do_sysinstr(unsigned long esr, struct pt_regs *regs) * back to our usual undefined instruction handler so that we handle * these consistently. */ - do_undefinstr(regs); + do_undefinstr(regs, esr); } NOKPROBE_SYMBOL(do_sysinstr); @@ -972,7 +972,7 @@ static int bug_handler(struct pt_regs *regs, unsigned long esr) { switch (report_bug(regs->pc, regs)) { case BUG_TRAP_TYPE_BUG: - die("Oops - BUG", regs, 0); + die("Oops - BUG", regs, esr); break; case BUG_TRAP_TYPE_WARN: @@ -1040,7 +1040,7 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr) * This is something that might be fixed at some point in the future. */ if (!recover) - die("Oops - KASAN", regs, 0); + die("Oops - KASAN", regs, esr); /* If thread survives, skip over the brk instruction and continue: */ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); From a1fafa3b24a70461bbf3e5c0770893feb0a49292 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Sep 2022 11:17:31 +0100 Subject: [PATCH 072/126] arm64: rework FPAC exception handling If an FPAC exception is taken from EL1, the entry code will call do_ptrauth_fault(), where due to: BUG_ON(!user_mode(regs)) ... the kernel will report a problem within do_ptrauth_fault() rather than reporting the original context the FPAC exception was taken from. The pt_regs and ESR value reported will be from within do_ptrauth_fault() and the code dump will be for the BRK in BUG_ON(), which isn't sufficient to debug the cause of the original exception. This patch makes the reporting better by having separate EL0 and EL1 FPAC exception handlers, with the latter calling die() directly to report the original context the FPAC exception was taken from. Note that we only need to prevent kprobes of the EL1 FPAC handler, since the EL0 FPAC handler cannot be called recursively. For consistency with do_el0_svc*(), I've named the split functions do_el{0,1}_fpac() rather than do_el{0,1}_ptrauth_fault(). I've also clarified the comment to not imply there are casues other than FPAC exceptions. Prior to this patch FPAC exceptions are reported as: | kernel BUG at arch/arm64/kernel/traps.c:517! | Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc3-00130-g9c8a180a1cdf-dirty #12 | Hardware name: FVP Base RevC (DT) | pstate: 00400009 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : do_ptrauth_fault+0x3c/0x40 | lr : el1_fpac+0x34/0x54 | sp : ffff80000a3bbc80 | x29: ffff80000a3bbc80 x28: ffff0008001d8000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: 0000000020400009 x22: ffff800008f70fa4 x21: ffff80000a3bbe00 | x20: 0000000072000000 x19: ffff80000a3bbcb0 x18: fffffbfffda37000 | x17: 3120676e696d7573 x16: 7361202c6e6f6974 x15: 0000000081a90000 | x14: 0040000000000041 x13: 0040000000000001 x12: ffff000001a90000 | x11: fffffbfffda37480 x10: 0068000000000703 x9 : 0001000080000000 | x8 : 0000000000090000 x7 : 0068000000000f03 x6 : 0060000000000783 | x5 : ffff80000a3bbcb0 x4 : ffff0008001d8000 x3 : 0000000072000000 | x2 : 0000000000000000 x1 : 0000000020400009 x0 : ffff80000a3bbcb0 | Call trace: | do_ptrauth_fault+0x3c/0x40 | el1h_64_sync_handler+0xc4/0xd0 | el1h_64_sync+0x64/0x68 | test_pac+0x8/0x10 | smp_init+0x7c/0x8c | kernel_init_freeable+0x128/0x28c | kernel_init+0x28/0x13c | ret_from_fork+0x10/0x20 | Code: 97fffe5e a8c17bfd d50323bf d65f03c0 (d4210000) With this patch applied FPAC exceptions are reported as: | Internal error: Oops - FPAC: 0000000072000000 [#1] PREEMPT SMP | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc3-00132-g78846e1c4757-dirty #11 | Hardware name: FVP Base RevC (DT) | pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : test_pac+0x8/0x10 | lr : 0x0 | sp : ffff80000a3bbe00 | x29: ffff80000a3bbe00 x28: 0000000000000000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: ffff80000a2c8000 x22: 0000000000000000 x21: 0000000000000000 | x20: ffff8000099fa5b0 x19: ffff80000a007000 x18: fffffbfffda37000 | x17: 3120676e696d7573 x16: 7361202c6e6f6974 x15: 0000000081a90000 | x14: 0040000000000041 x13: 0040000000000001 x12: ffff000001a90000 | x11: fffffbfffda37480 x10: 0068000000000703 x9 : 0001000080000000 | x8 : 0000000000090000 x7 : 0068000000000f03 x6 : 0060000000000783 | x5 : ffff80000a2c6000 x4 : ffff0008001d8000 x3 : ffff800009f88378 | x2 : 0000000000000000 x1 : 0000000080210000 x0 : ffff000001a90000 | Call trace: | test_pac+0x8/0x10 | smp_init+0x7c/0x8c | kernel_init_freeable+0x128/0x28c | kernel_init+0x28/0x13c | ret_from_fork+0x10/0x20 | Code: d50323bf d65f03c0 d503233f aa1f03fe (d50323bf) Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Reviewed-by: Anshuman Khandual Cc: Alexandru Elisei Cc: Amit Daniel Kachhap Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20220913101732.3925290-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/exception.h | 3 ++- arch/arm64/kernel/entry-common.c | 4 ++-- arch/arm64/kernel/traps.c | 18 +++++++++++------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 28c0275666e1..3f5141dc3341 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -72,7 +72,8 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr); void do_cp15instr(unsigned long esr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); -void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr); +void do_el0_fpac(struct pt_regs *regs, unsigned long esr); +void do_el1_fpac(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 3dd17ef6d6d8..f334951d38e3 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -402,7 +402,7 @@ static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) { enter_from_kernel_mode(regs); local_daif_inherit(regs); - do_ptrauth_fault(regs, esr); + do_el1_fpac(regs, esr); local_daif_mask(); exit_to_kernel_mode(regs); } @@ -629,7 +629,7 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); - do_ptrauth_fault(regs, esr); + do_el0_fpac(regs, esr); exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 05999cde870a..6cec65fbf8b8 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -508,16 +508,20 @@ void do_bti(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_bti); -void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr) +void do_el0_fpac(struct pt_regs *regs, unsigned long esr) { - /* - * Unexpected FPAC exception or pointer authentication failure in - * the kernel: kill the task before it does any more harm. - */ - BUG_ON(!user_mode(regs)); force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr); } -NOKPROBE_SYMBOL(do_ptrauth_fault); + +void do_el1_fpac(struct pt_regs *regs, unsigned long esr) +{ + /* + * Unexpected FPAC exception in the kernel: kill the task before it + * does any more harm. + */ + die("Oops - FPAC", regs, esr); +} +NOKPROBE_SYMBOL(do_el1_fpac) #define __user_cache_maint(insn, address, res) \ if (address >= TASK_SIZE_MAX) { \ From 830a2a4d853f2c4a1e4606aa03341b7f273b0e9b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Sep 2022 11:17:32 +0100 Subject: [PATCH 073/126] arm64: rework BTI exception handling If a BTI exception is taken from EL1, the entry code will treat this as an unhandled exception and will panic() the kernel. This is inconsistent with the way we handle FPAC exceptions, which have a dedicated handler and only necessarily kill the thread from which the exception was taken from, and we don't log all the information that could be relevant to debug the issue. The code in do_bti() has: BUG_ON(!user_mode(regs)); ... and it seems like the intent was to call this for EL1 BTI exceptions, as with FPAC, but this was omitted due to an oversight. This patch adds separate EL0 and EL1 BTI exception handlers, with the latter calling die() directly to report the original context the BTI exception was taken from. This matches our handling of FPAC exceptions. Prior to this patch, a BTI failure is reported as: | Unhandled 64-bit el1h sync exception on CPU0, ESR 0x0000000034000002 -- BTI | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc3-00131-g7d937ff0221d-dirty #9 | Hardware name: linux,dummy-virt (DT) | pstate: 20400809 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=-c) | pc : test_bti_callee+0x4/0x10 | lr : test_bti_caller+0x1c/0x28 | sp : ffff80000800bdf0 | x29: ffff80000800bdf0 x28: 0000000000000000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: ffff80000a2b8000 x22: 0000000000000000 x21: 0000000000000000 | x20: ffff8000099fa5b0 x19: ffff800009ff7000 x18: fffffbfffda37000 | x17: 3120676e696d7573 x16: 7361202c6e6f6974 x15: 0000000041a90000 | x14: 0040000000000041 x13: 0040000000000001 x12: ffff000001a90000 | x11: fffffbfffda37480 x10: 0068000000000703 x9 : 0001000040000000 | x8 : 0000000000090000 x7 : 0068000000000f03 x6 : 0060000000000f83 | x5 : ffff80000a2b6000 x4 : ffff0000028d0000 x3 : ffff800009f78378 | x2 : 0000000000000000 x1 : 0000000040210000 x0 : ffff8000080257e4 | Kernel panic - not syncing: Unhandled exception | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc3-00131-g7d937ff0221d-dirty #9 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace.part.0+0xcc/0xe0 | show_stack+0x18/0x5c | dump_stack_lvl+0x64/0x80 | dump_stack+0x18/0x34 | panic+0x170/0x360 | arm64_exit_nmi.isra.0+0x0/0x80 | el1h_64_sync_handler+0x64/0xd0 | el1h_64_sync+0x64/0x68 | test_bti_callee+0x4/0x10 | smp_cpus_done+0xb0/0xbc | smp_init+0x7c/0x8c | kernel_init_freeable+0x128/0x28c | kernel_init+0x28/0x13c | ret_from_fork+0x10/0x20 With this patch applied, a BTI failure is reported as: | Internal error: Oops - BTI: 0000000034000002 [#1] PREEMPT SMP | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc3-00132-g0ad98265d582-dirty #8 | Hardware name: linux,dummy-virt (DT) | pstate: 20400809 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=-c) | pc : test_bti_callee+0x4/0x10 | lr : test_bti_caller+0x1c/0x28 | sp : ffff80000800bdf0 | x29: ffff80000800bdf0 x28: 0000000000000000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: ffff80000a2b8000 x22: 0000000000000000 x21: 0000000000000000 | x20: ffff8000099fa5b0 x19: ffff800009ff7000 x18: fffffbfffda37000 | x17: 3120676e696d7573 x16: 7361202c6e6f6974 x15: 0000000041a90000 | x14: 0040000000000041 x13: 0040000000000001 x12: ffff000001a90000 | x11: fffffbfffda37480 x10: 0068000000000703 x9 : 0001000040000000 | x8 : 0000000000090000 x7 : 0068000000000f03 x6 : 0060000000000f83 | x5 : ffff80000a2b6000 x4 : ffff0000028d0000 x3 : ffff800009f78378 | x2 : 0000000000000000 x1 : 0000000040210000 x0 : ffff800008025804 | Call trace: | test_bti_callee+0x4/0x10 | smp_cpus_done+0xb0/0xbc | smp_init+0x7c/0x8c | kernel_init_freeable+0x128/0x28c | kernel_init+0x28/0x13c | ret_from_fork+0x10/0x20 | Code: d50323bf d53cd040 d65f03c0 d503233f (d50323bf) Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Reviewed-by: Anshuman Khandual Cc: Alexandru Elisei Cc: Amit Daniel Kachhap Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20220913101732.3925290-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/exception.h | 3 ++- arch/arm64/kernel/entry-common.c | 14 +++++++++++++- arch/arm64/kernel/traps.c | 10 +++++++--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 3f5141dc3341..0278a58abe69 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -59,7 +59,8 @@ asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs); void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs, unsigned long esr); -void do_bti(struct pt_regs *regs); +void do_el0_bti(struct pt_regs *regs); +void do_el1_bti(struct pt_regs *regs, unsigned long esr); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index f334951d38e3..9173fad279af 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -388,6 +388,15 @@ static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } +static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr) +{ + enter_from_kernel_mode(regs); + local_daif_inherit(regs); + do_el1_bti(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -427,6 +436,9 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_UNKNOWN: el1_undef(regs, esr); break; + case ESR_ELx_EC_BTI: + el1_bti(regs, esr); + break; case ESR_ELx_EC_BREAKPT_CUR: case ESR_ELx_EC_SOFTSTP_CUR: case ESR_ELx_EC_WATCHPT_CUR: @@ -594,7 +606,7 @@ static void noinstr el0_bti(struct pt_regs *regs) { enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); - do_bti(regs); + do_el0_bti(regs); exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 6cec65fbf8b8..54b5ba135b97 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -501,12 +501,16 @@ void do_undefinstr(struct pt_regs *regs, unsigned long esr) } NOKPROBE_SYMBOL(do_undefinstr); -void do_bti(struct pt_regs *regs) +void do_el0_bti(struct pt_regs *regs) { - BUG_ON(!user_mode(regs)); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } -NOKPROBE_SYMBOL(do_bti); + +void do_el1_bti(struct pt_regs *regs, unsigned long esr) +{ + die("Oops - BTI", regs, esr); +} +NOKPROBE_SYMBOL(do_el1_bti); void do_el0_fpac(struct pt_regs *regs, unsigned long esr) { From c0357a73fa4a96d8ed9ee46e9927d9fcbc9d0828 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 10 Sep 2022 17:33:49 +0100 Subject: [PATCH 074/126] arm64/sysreg: Align field names in ID_AA64DFR0_EL1 with architecture The naming scheme the architecture uses for the fields in ID_AA64DFR0_EL1 does not align well with kernel conventions, using as it does a lot of MixedCase in various arrangements. In preparation for automatically generating the defines for this register rename the defines used to match what is in the architecture. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220910163354.860255-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 2 +- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/el2_setup.h | 8 +++--- arch/arm64/include/asm/hw_breakpoint.h | 4 +-- arch/arm64/include/asm/sysreg.h | 36 +++++++++++++------------- arch/arm64/kernel/cpufeature.c | 14 +++++----- arch/arm64/kernel/debug-monitors.c | 2 +- arch/arm64/kernel/perf_event.c | 8 +++--- arch/arm64/kvm/debug.c | 4 +-- arch/arm64/kvm/hyp/nvhe/pkvm.c | 12 ++++----- arch/arm64/kvm/pmu-emul.c | 16 ++++++------ arch/arm64/kvm/sys_regs.c | 16 ++++++------ 12 files changed, 62 insertions(+), 62 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index c1fc5f7bb978..71222f2b8a5a 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -512,7 +512,7 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4 + sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVer_SHIFT, #4 cmp \tmpreg, #1 // Skip if no PMU present b.lt 9000f msr pmuserenr_el0, xzr // Disable PMU access from EL0 diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 79bb9e58d9c6..e3b63967c8a9 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -553,7 +553,7 @@ cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap) u64 mask = GENMASK_ULL(field + 3, field); /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */ - if (val == ID_AA64DFR0_PMUVER_IMP_DEF) + if (val == ID_AA64DFR0_PMUVer_IMP_DEF) val = 0; if (val > cap) { diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b6e9bea7c9ec..03af4278bc23 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -40,7 +40,7 @@ .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 + sbfx x0, x1, #ID_AA64DFR0_PMUVer_SHIFT, #4 cmp x0, #1 b.lt .Lskip_pmu_\@ // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps @@ -49,7 +49,7 @@ csel x2, xzr, x0, lt // all PMU counters from EL1 /* Statistical profiling */ - ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 + ubfx x0, x1, #ID_AA64DFR0_PMSVer_SHIFT, #4 cbz x0, .Lskip_spe_\@ // Skip if SPE not present mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2, @@ -65,7 +65,7 @@ .Lskip_spe_\@: /* Trace buffer */ - ubfx x0, x1, #ID_AA64DFR0_TRBE_SHIFT, #4 + ubfx x0, x1, #ID_AA64DFR0_TraceBuffer_SHIFT, #4 cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present mrs_s x0, SYS_TRBIDR_EL1 @@ -137,7 +137,7 @@ mov x0, xzr mrs x1, id_aa64dfr0_el1 - ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 + ubfx x1, x1, #ID_AA64DFR0_PMSVer_SHIFT, #4 cmp x1, #3 b.lt .Lset_debug_fgt_\@ /* Disable PMSNEVFR_EL1 read and write traps */ diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index bc7aaed4b34e..d667c03d5f5e 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -142,7 +142,7 @@ static inline int get_num_brps(void) u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_BRPS_SHIFT); + ID_AA64DFR0_BRPs_SHIFT); } /* Determine number of WRP registers available. */ @@ -151,7 +151,7 @@ static inline int get_num_wrps(void) u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_WRPS_SHIFT); + ID_AA64DFR0_WRPs_SHIFT); } #endif /* __ASM_BREAKPOINT_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c7876363c6e5..b1e9e4d3d964 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -700,26 +700,26 @@ /* id_aa64dfr0 */ #define ID_AA64DFR0_MTPMU_SHIFT 48 -#define ID_AA64DFR0_TRBE_SHIFT 44 -#define ID_AA64DFR0_TRACE_FILT_SHIFT 40 -#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36 -#define ID_AA64DFR0_PMSVER_SHIFT 32 -#define ID_AA64DFR0_CTX_CMPS_SHIFT 28 -#define ID_AA64DFR0_WRPS_SHIFT 20 -#define ID_AA64DFR0_BRPS_SHIFT 12 -#define ID_AA64DFR0_PMUVER_SHIFT 8 -#define ID_AA64DFR0_TRACEVER_SHIFT 4 -#define ID_AA64DFR0_DEBUGVER_SHIFT 0 +#define ID_AA64DFR0_TraceBuffer_SHIFT 44 +#define ID_AA64DFR0_TraceFilt_SHIFT 40 +#define ID_AA64DFR0_DoubleLock_SHIFT 36 +#define ID_AA64DFR0_PMSVer_SHIFT 32 +#define ID_AA64DFR0_CTX_CMPs_SHIFT 28 +#define ID_AA64DFR0_WRPs_SHIFT 20 +#define ID_AA64DFR0_BRPs_SHIFT 12 +#define ID_AA64DFR0_PMUVer_SHIFT 8 +#define ID_AA64DFR0_TraceVer_SHIFT 4 +#define ID_AA64DFR0_DebugVer_SHIFT 0 -#define ID_AA64DFR0_PMUVER_8_0 0x1 -#define ID_AA64DFR0_PMUVER_8_1 0x4 -#define ID_AA64DFR0_PMUVER_8_4 0x5 -#define ID_AA64DFR0_PMUVER_8_5 0x6 -#define ID_AA64DFR0_PMUVER_8_7 0x7 -#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf +#define ID_AA64DFR0_PMUVer_8_0 0x1 +#define ID_AA64DFR0_PMUVer_8_1 0x4 +#define ID_AA64DFR0_PMUVer_8_4 0x5 +#define ID_AA64DFR0_PMUVer_8_5 0x6 +#define ID_AA64DFR0_PMUVer_8_7 0x7 +#define ID_AA64DFR0_PMUVer_IMP_DEF 0xf -#define ID_AA64DFR0_PMSVER_8_2 0x1 -#define ID_AA64DFR0_PMSVER_8_3 0x2 +#define ID_AA64DFR0_PMSVer_8_2 0x1 +#define ID_AA64DFR0_PMSVer_8_3 0x2 #define ID_DFR0_PERFMON_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c2e42feb3e1a..c694d6cbf82d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -434,17 +434,17 @@ static const struct arm64_ftr_bits ftr_id_mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DOUBLELOCK_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DoubleLock_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVer_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPs_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPs_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPs_SHIFT, 4, 0), /* * We can instantiate multiple PMU instances with different levels * of support. */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVer_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DebugVer_SHIFT, 4, 0x6), ARM64_FTR_END, }; diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index bf9fe71589bc..572c8ac2873c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -28,7 +28,7 @@ u8 debug_monitors_arch(void) { return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1), - ID_AA64DFR0_DEBUGVER_SHIFT); + ID_AA64DFR0_DebugVer_SHIFT); } /* diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cb69ff1e6138..83f7a8980623 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -390,7 +390,7 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5); + return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVer_8_5); } static inline bool armv8pmu_event_has_user_read(struct perf_event *event) @@ -1145,8 +1145,8 @@ static void __armv8pmu_probe_pmu(void *info) dfr0 = read_sysreg(id_aa64dfr0_el1); pmuver = cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_PMUVER_SHIFT); - if (pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || pmuver == 0) + ID_AA64DFR0_PMUVer_SHIFT); + if (pmuver == ID_AA64DFR0_PMUVer_IMP_DEF || pmuver == 0) return; cpu_pmu->pmuver = pmuver; @@ -1172,7 +1172,7 @@ static void __armv8pmu_probe_pmu(void *info) pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* store PMMIR_EL1 register for sysfs */ - if (pmuver >= ID_AA64DFR0_PMUVER_8_4 && (pmceid_raw[1] & BIT(31))) + if (pmuver >= ID_AA64DFR0_PMUVer_8_4 && (pmceid_raw[1] & BIT(31))) cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); else cpu_pmu->reg_pmmir = 0; diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 0b28d7db7c76..14878cd55c53 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -295,12 +295,12 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) * If SPE is present on this CPU and is available at current EL, * we may need to check if the host state needs to be saved. */ - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) && + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVer_SHIFT) && !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT))) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE); /* Check if we have TRBE implemented and available at the host */ - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) && + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TraceBuffer_SHIFT) && !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG)) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE); } diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index fc3e32709ba2..ba1327fac03f 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -86,32 +86,32 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) u64 cptr_set = 0; /* Trap/constrain PMU */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) { + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVer), feature_ids)) { mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK; } /* Trap Debug */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DebugVer), feature_ids)) mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE; /* Trap OS Double Lock */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DoubleLock), feature_ids)) mdcr_set |= MDCR_EL2_TDOSA; /* Trap SPE */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) { + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVer), feature_ids)) { mdcr_set |= MDCR_EL2_TPMS; mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; } /* Trap Trace Filter */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TraceFilt), feature_ids)) mdcr_set |= MDCR_EL2_TTRF; /* Trap Trace */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TraceVer), feature_ids)) cptr_set |= CPTR_EL2_TTA; vcpu->arch.mdcr_el2 |= mdcr_set; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 11c43bed5f97..331478c67dca 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -33,12 +33,12 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) pmuver = kvm->arch.arm_pmu->pmuver; switch (pmuver) { - case ID_AA64DFR0_PMUVER_8_0: + case ID_AA64DFR0_PMUVer_8_0: return GENMASK(9, 0); - case ID_AA64DFR0_PMUVER_8_1: - case ID_AA64DFR0_PMUVER_8_4: - case ID_AA64DFR0_PMUVER_8_5: - case ID_AA64DFR0_PMUVER_8_7: + case ID_AA64DFR0_PMUVer_8_1: + case ID_AA64DFR0_PMUVer_8_4: + case ID_AA64DFR0_PMUVer_8_5: + case ID_AA64DFR0_PMUVer_8_7: return GENMASK(15, 0); default: /* Shouldn't be here, just for sanity */ WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); @@ -774,7 +774,7 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) { struct arm_pmu_entry *entry; - if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) + if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVer_IMP_DEF) return; mutex_lock(&arm_pmus_lock); @@ -828,7 +828,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void) if (event->pmu) { pmu = to_arm_pmu(event->pmu); if (pmu->pmuver == 0 || - pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) + pmu->pmuver == ID_AA64DFR0_PMUVer_IMP_DEF) pmu = NULL; } @@ -856,7 +856,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled * as RAZ */ - if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4) + if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVer_8_4) val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); base = 32; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index fa61793467a7..626d7769e23d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1110,14 +1110,14 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), 6); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DebugVer); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DebugVer), 6); /* Limit guests to PMUv3 for ARMv8.4 */ val = cpuid_feature_cap_perfmon_field(val, - ID_AA64DFR0_PMUVER_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0); + ID_AA64DFR0_PMUVer_SHIFT, + kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVer_8_4 : 0); /* Hide SPE from guests */ - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVer); break; case SYS_ID_DFR0_EL1: /* Limit guests to PMUv3 for ARMv8.4 */ @@ -1827,9 +1827,9 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu, u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL1_EL3_SHIFT); - p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | - (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) | - (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) + p->regval = ((((dfr >> ID_AA64DFR0_WRPs_SHIFT) & 0xf) << 28) | + (((dfr >> ID_AA64DFR0_BRPs_SHIFT) & 0xf) << 24) | + (((dfr >> ID_AA64DFR0_CTX_CMPs_SHIFT) & 0xf) << 20) | (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12)); return true; } From fcf37b38ff2282ef3dc6ba1966c83b29e5734edd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 10 Sep 2022 17:33:50 +0100 Subject: [PATCH 075/126] arm64/sysreg: Add _EL1 into ID_AA64DFR0_EL1 definition names Normally we include the full register name in the defines for fields within registers but this has not been followed for ID registers. In preparation for automatic generation of defines add the _EL1s into the defines for ID_AA64DFR0_EL1 to follow the convention. No functional changes. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220910163354.860255-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 2 +- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/el2_setup.h | 8 +++--- arch/arm64/include/asm/hw_breakpoint.h | 4 +-- arch/arm64/include/asm/sysreg.h | 38 +++++++++++++------------- arch/arm64/kernel/cpufeature.c | 14 +++++----- arch/arm64/kernel/debug-monitors.c | 2 +- arch/arm64/kernel/perf_event.c | 8 +++--- arch/arm64/kvm/debug.c | 4 +-- arch/arm64/kvm/hyp/nvhe/pkvm.c | 12 ++++---- arch/arm64/kvm/pmu-emul.c | 16 +++++------ arch/arm64/kvm/sys_regs.c | 16 +++++------ 12 files changed, 63 insertions(+), 63 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 71222f2b8a5a..cf8e72e733de 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -512,7 +512,7 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVer_SHIFT, #4 + sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 cmp \tmpreg, #1 // Skip if no PMU present b.lt 9000f msr pmuserenr_el0, xzr // Disable PMU access from EL0 diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index e3b63967c8a9..ff06e6fb5939 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -553,7 +553,7 @@ cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap) u64 mask = GENMASK_ULL(field + 3, field); /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */ - if (val == ID_AA64DFR0_PMUVer_IMP_DEF) + if (val == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) val = 0; if (val > cap) { diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 03af4278bc23..668569adf4d3 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -40,7 +40,7 @@ .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_PMUVer_SHIFT, #4 + sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 cmp x0, #1 b.lt .Lskip_pmu_\@ // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps @@ -49,7 +49,7 @@ csel x2, xzr, x0, lt // all PMU counters from EL1 /* Statistical profiling */ - ubfx x0, x1, #ID_AA64DFR0_PMSVer_SHIFT, #4 + ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 cbz x0, .Lskip_spe_\@ // Skip if SPE not present mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2, @@ -65,7 +65,7 @@ .Lskip_spe_\@: /* Trace buffer */ - ubfx x0, x1, #ID_AA64DFR0_TraceBuffer_SHIFT, #4 + ubfx x0, x1, #ID_AA64DFR0_EL1_TraceBuffer_SHIFT, #4 cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present mrs_s x0, SYS_TRBIDR_EL1 @@ -137,7 +137,7 @@ mov x0, xzr mrs x1, id_aa64dfr0_el1 - ubfx x1, x1, #ID_AA64DFR0_PMSVer_SHIFT, #4 + ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 cmp x1, #3 b.lt .Lset_debug_fgt_\@ /* Disable PMSNEVFR_EL1 read and write traps */ diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index d667c03d5f5e..fa4c6ff3aa9b 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -142,7 +142,7 @@ static inline int get_num_brps(void) u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_BRPs_SHIFT); + ID_AA64DFR0_EL1_BRPs_SHIFT); } /* Determine number of WRP registers available. */ @@ -151,7 +151,7 @@ static inline int get_num_wrps(void) u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_WRPs_SHIFT); + ID_AA64DFR0_EL1_WRPs_SHIFT); } #endif /* __ASM_BREAKPOINT_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b1e9e4d3d964..a9544561397d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -699,27 +699,27 @@ #endif /* id_aa64dfr0 */ -#define ID_AA64DFR0_MTPMU_SHIFT 48 -#define ID_AA64DFR0_TraceBuffer_SHIFT 44 -#define ID_AA64DFR0_TraceFilt_SHIFT 40 -#define ID_AA64DFR0_DoubleLock_SHIFT 36 -#define ID_AA64DFR0_PMSVer_SHIFT 32 -#define ID_AA64DFR0_CTX_CMPs_SHIFT 28 -#define ID_AA64DFR0_WRPs_SHIFT 20 -#define ID_AA64DFR0_BRPs_SHIFT 12 -#define ID_AA64DFR0_PMUVer_SHIFT 8 -#define ID_AA64DFR0_TraceVer_SHIFT 4 -#define ID_AA64DFR0_DebugVer_SHIFT 0 +#define ID_AA64DFR0_EL1_MTPMU_SHIFT 48 +#define ID_AA64DFR0_EL1_TraceBuffer_SHIFT 44 +#define ID_AA64DFR0_EL1_TraceFilt_SHIFT 40 +#define ID_AA64DFR0_EL1_DoubleLock_SHIFT 36 +#define ID_AA64DFR0_EL1_PMSVer_SHIFT 32 +#define ID_AA64DFR0_EL1_CTX_CMPs_SHIFT 28 +#define ID_AA64DFR0_EL1_WRPs_SHIFT 20 +#define ID_AA64DFR0_EL1_BRPs_SHIFT 12 +#define ID_AA64DFR0_EL1_PMUVer_SHIFT 8 +#define ID_AA64DFR0_EL1_TraceVer_SHIFT 4 +#define ID_AA64DFR0_EL1_DebugVer_SHIFT 0 -#define ID_AA64DFR0_PMUVer_8_0 0x1 -#define ID_AA64DFR0_PMUVer_8_1 0x4 -#define ID_AA64DFR0_PMUVer_8_4 0x5 -#define ID_AA64DFR0_PMUVer_8_5 0x6 -#define ID_AA64DFR0_PMUVer_8_7 0x7 -#define ID_AA64DFR0_PMUVer_IMP_DEF 0xf +#define ID_AA64DFR0_EL1_PMUVer_8_0 0x1 +#define ID_AA64DFR0_EL1_PMUVer_8_1 0x4 +#define ID_AA64DFR0_EL1_PMUVer_8_4 0x5 +#define ID_AA64DFR0_EL1_PMUVer_8_5 0x6 +#define ID_AA64DFR0_EL1_PMUVer_8_7 0x7 +#define ID_AA64DFR0_EL1_PMUVer_IMP_DEF 0xf -#define ID_AA64DFR0_PMSVer_8_2 0x1 -#define ID_AA64DFR0_PMSVer_8_3 0x2 +#define ID_AA64DFR0_EL1_PMSVer_8_2 0x1 +#define ID_AA64DFR0_EL1_PMSVer_8_3 0x2 #define ID_DFR0_PERFMON_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c694d6cbf82d..c22732a6908b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -434,17 +434,17 @@ static const struct arm64_ftr_bits ftr_id_mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DoubleLock_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVer_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPs_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPs_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPs_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_DoubleLock_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_PMSVer_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_CTX_CMPs_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_WRPs_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_BRPs_SHIFT, 4, 0), /* * We can instantiate multiple PMU instances with different levels * of support. */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVer_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DebugVer_SHIFT, 4, 0x6), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_EL1_PMUVer_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_EL1_DebugVer_SHIFT, 4, 0x6), ARM64_FTR_END, }; diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 572c8ac2873c..3da09778267e 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -28,7 +28,7 @@ u8 debug_monitors_arch(void) { return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1), - ID_AA64DFR0_DebugVer_SHIFT); + ID_AA64DFR0_EL1_DebugVer_SHIFT); } /* diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 83f7a8980623..8b878837d8f1 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -390,7 +390,7 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVer_8_5); + return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_8_5); } static inline bool armv8pmu_event_has_user_read(struct perf_event *event) @@ -1145,8 +1145,8 @@ static void __armv8pmu_probe_pmu(void *info) dfr0 = read_sysreg(id_aa64dfr0_el1); pmuver = cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_PMUVer_SHIFT); - if (pmuver == ID_AA64DFR0_PMUVer_IMP_DEF || pmuver == 0) + ID_AA64DFR0_EL1_PMUVer_SHIFT); + if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || pmuver == 0) return; cpu_pmu->pmuver = pmuver; @@ -1172,7 +1172,7 @@ static void __armv8pmu_probe_pmu(void *info) pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* store PMMIR_EL1 register for sysfs */ - if (pmuver >= ID_AA64DFR0_PMUVer_8_4 && (pmceid_raw[1] & BIT(31))) + if (pmuver >= ID_AA64DFR0_EL1_PMUVer_8_4 && (pmceid_raw[1] & BIT(31))) cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); else cpu_pmu->reg_pmmir = 0; diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 14878cd55c53..3f7563d768e2 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -295,12 +295,12 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) * If SPE is present on this CPU and is available at current EL, * we may need to check if the host state needs to be saved. */ - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVer_SHIFT) && + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT))) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE); /* Check if we have TRBE implemented and available at the host */ - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TraceBuffer_SHIFT) && + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) && !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG)) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE); } diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index ba1327fac03f..85d3b7ae720f 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -86,32 +86,32 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) u64 cptr_set = 0; /* Trap/constrain PMU */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVer), feature_ids)) { + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) { mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK; } /* Trap Debug */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DebugVer), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids)) mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE; /* Trap OS Double Lock */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DoubleLock), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids)) mdcr_set |= MDCR_EL2_TDOSA; /* Trap SPE */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVer), feature_ids)) { + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) { mdcr_set |= MDCR_EL2_TPMS; mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; } /* Trap Trace Filter */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TraceFilt), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids)) mdcr_set |= MDCR_EL2_TTRF; /* Trap Trace */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TraceVer), feature_ids)) + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) cptr_set |= CPTR_EL2_TTA; vcpu->arch.mdcr_el2 |= mdcr_set; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 331478c67dca..7122b5387de6 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -33,12 +33,12 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) pmuver = kvm->arch.arm_pmu->pmuver; switch (pmuver) { - case ID_AA64DFR0_PMUVer_8_0: + case ID_AA64DFR0_EL1_PMUVer_8_0: return GENMASK(9, 0); - case ID_AA64DFR0_PMUVer_8_1: - case ID_AA64DFR0_PMUVer_8_4: - case ID_AA64DFR0_PMUVer_8_5: - case ID_AA64DFR0_PMUVer_8_7: + case ID_AA64DFR0_EL1_PMUVer_8_1: + case ID_AA64DFR0_EL1_PMUVer_8_4: + case ID_AA64DFR0_EL1_PMUVer_8_5: + case ID_AA64DFR0_EL1_PMUVer_8_7: return GENMASK(15, 0); default: /* Shouldn't be here, just for sanity */ WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); @@ -774,7 +774,7 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) { struct arm_pmu_entry *entry; - if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVer_IMP_DEF) + if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) return; mutex_lock(&arm_pmus_lock); @@ -828,7 +828,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void) if (event->pmu) { pmu = to_arm_pmu(event->pmu); if (pmu->pmuver == 0 || - pmu->pmuver == ID_AA64DFR0_PMUVer_IMP_DEF) + pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) pmu = NULL; } @@ -856,7 +856,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled * as RAZ */ - if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVer_8_4) + if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_8_4) val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); base = 32; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 626d7769e23d..d4546a5b0ea5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1110,14 +1110,14 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DebugVer); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DebugVer), 6); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); /* Limit guests to PMUv3 for ARMv8.4 */ val = cpuid_feature_cap_perfmon_field(val, - ID_AA64DFR0_PMUVer_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVer_8_4 : 0); + ID_AA64DFR0_EL1_PMUVer_SHIFT, + kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_8_4 : 0); /* Hide SPE from guests */ - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVer); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); break; case SYS_ID_DFR0_EL1: /* Limit guests to PMUv3 for ARMv8.4 */ @@ -1827,9 +1827,9 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu, u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL1_EL3_SHIFT); - p->regval = ((((dfr >> ID_AA64DFR0_WRPs_SHIFT) & 0xf) << 28) | - (((dfr >> ID_AA64DFR0_BRPs_SHIFT) & 0xf) << 24) | - (((dfr >> ID_AA64DFR0_CTX_CMPs_SHIFT) & 0xf) << 20) + p->regval = ((((dfr >> ID_AA64DFR0_EL1_WRPs_SHIFT) & 0xf) << 28) | + (((dfr >> ID_AA64DFR0_EL1_BRPs_SHIFT) & 0xf) << 24) | + (((dfr >> ID_AA64DFR0_EL1_CTX_CMPs_SHIFT) & 0xf) << 20) | (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12)); return true; } From 121a8fc088f13c64d9f3c9b3e7faa4c246e0a32c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 10 Sep 2022 17:33:51 +0100 Subject: [PATCH 076/126] arm64/sysreg: Use feature numbering for PMU and SPE revisions Currently the kernel refers to the versions of the PMU and SPE features by the version of the architecture where those features were updated but the ARM refers to them using the FEAT_ names for the features. To improve consistency and help with updating for newer features and since v9 will make our current naming scheme a bit more confusing update the macros identfying features to use the FEAT_ based scheme. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220910163354.860255-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 14 +++++++------- arch/arm64/kernel/perf_event.c | 4 ++-- arch/arm64/kvm/pmu-emul.c | 12 ++++++------ arch/arm64/kvm/sys_regs.c | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a9544561397d..aea3ec657c3f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -711,15 +711,15 @@ #define ID_AA64DFR0_EL1_TraceVer_SHIFT 4 #define ID_AA64DFR0_EL1_DebugVer_SHIFT 0 -#define ID_AA64DFR0_EL1_PMUVer_8_0 0x1 -#define ID_AA64DFR0_EL1_PMUVer_8_1 0x4 -#define ID_AA64DFR0_EL1_PMUVer_8_4 0x5 -#define ID_AA64DFR0_EL1_PMUVer_8_5 0x6 -#define ID_AA64DFR0_EL1_PMUVer_8_7 0x7 +#define ID_AA64DFR0_EL1_PMUVer_IMP 0x1 +#define ID_AA64DFR0_EL1_PMUVer_V3P1 0x4 +#define ID_AA64DFR0_EL1_PMUVer_V3P4 0x5 +#define ID_AA64DFR0_EL1_PMUVer_V3P5 0x6 +#define ID_AA64DFR0_EL1_PMUVer_V3P7 0x7 #define ID_AA64DFR0_EL1_PMUVer_IMP_DEF 0xf -#define ID_AA64DFR0_EL1_PMSVer_8_2 0x1 -#define ID_AA64DFR0_EL1_PMSVer_8_3 0x2 +#define ID_AA64DFR0_EL1_PMSVer_IMP 0x1 +#define ID_AA64DFR0_EL1_PMSVer_V1P1 0x2 #define ID_DFR0_PERFMON_SHIFT 24 diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8b878837d8f1..7b0643fe2f13 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -390,7 +390,7 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_8_5); + return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5); } static inline bool armv8pmu_event_has_user_read(struct perf_event *event) @@ -1172,7 +1172,7 @@ static void __armv8pmu_probe_pmu(void *info) pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* store PMMIR_EL1 register for sysfs */ - if (pmuver >= ID_AA64DFR0_EL1_PMUVer_8_4 && (pmceid_raw[1] & BIT(31))) + if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); else cpu_pmu->reg_pmmir = 0; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 7122b5387de6..0003c7d37533 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -33,12 +33,12 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) pmuver = kvm->arch.arm_pmu->pmuver; switch (pmuver) { - case ID_AA64DFR0_EL1_PMUVer_8_0: + case ID_AA64DFR0_EL1_PMUVer_IMP: return GENMASK(9, 0); - case ID_AA64DFR0_EL1_PMUVer_8_1: - case ID_AA64DFR0_EL1_PMUVer_8_4: - case ID_AA64DFR0_EL1_PMUVer_8_5: - case ID_AA64DFR0_EL1_PMUVer_8_7: + case ID_AA64DFR0_EL1_PMUVer_V3P1: + case ID_AA64DFR0_EL1_PMUVer_V3P4: + case ID_AA64DFR0_EL1_PMUVer_V3P5: + case ID_AA64DFR0_EL1_PMUVer_V3P7: return GENMASK(15, 0); default: /* Shouldn't be here, just for sanity */ WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); @@ -856,7 +856,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled * as RAZ */ - if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_8_4) + if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4) val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); base = 32; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d4546a5b0ea5..2ef1121ab844 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1115,7 +1115,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, /* Limit guests to PMUv3 for ARMv8.4 */ val = cpuid_feature_cap_perfmon_field(val, ID_AA64DFR0_EL1_PMUVer_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_8_4 : 0); + kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0); /* Hide SPE from guests */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); break; From e62a2d2610f0e6cf803027e3803c822140a2a407 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 10 Sep 2022 17:33:52 +0100 Subject: [PATCH 077/126] arm64/sysreg: Convert ID_AA64FDR0_EL1 to automatic generation Convert ID_AA64DFR0_EL1 to automatic generation as per DDI0487I.a, no functional changes. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220910163354.860255-5-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 24 ------------- arch/arm64/tools/sysreg | 63 +++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 24 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index aea3ec657c3f..943def0d28f2 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -190,7 +190,6 @@ #define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) #define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) -#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) #define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) #define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) @@ -698,29 +697,6 @@ #define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48 #endif -/* id_aa64dfr0 */ -#define ID_AA64DFR0_EL1_MTPMU_SHIFT 48 -#define ID_AA64DFR0_EL1_TraceBuffer_SHIFT 44 -#define ID_AA64DFR0_EL1_TraceFilt_SHIFT 40 -#define ID_AA64DFR0_EL1_DoubleLock_SHIFT 36 -#define ID_AA64DFR0_EL1_PMSVer_SHIFT 32 -#define ID_AA64DFR0_EL1_CTX_CMPs_SHIFT 28 -#define ID_AA64DFR0_EL1_WRPs_SHIFT 20 -#define ID_AA64DFR0_EL1_BRPs_SHIFT 12 -#define ID_AA64DFR0_EL1_PMUVer_SHIFT 8 -#define ID_AA64DFR0_EL1_TraceVer_SHIFT 4 -#define ID_AA64DFR0_EL1_DebugVer_SHIFT 0 - -#define ID_AA64DFR0_EL1_PMUVer_IMP 0x1 -#define ID_AA64DFR0_EL1_PMUVer_V3P1 0x4 -#define ID_AA64DFR0_EL1_PMUVer_V3P4 0x5 -#define ID_AA64DFR0_EL1_PMUVer_V3P5 0x6 -#define ID_AA64DFR0_EL1_PMUVer_V3P7 0x7 -#define ID_AA64DFR0_EL1_PMUVer_IMP_DEF 0xf - -#define ID_AA64DFR0_EL1_PMSVer_IMP 0x1 -#define ID_AA64DFR0_EL1_PMSVer_V1P1 0x2 - #define ID_DFR0_PERFMON_SHIFT 24 #define ID_DFR0_PERFMON_8_0 0x3 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 5bccf7712ec3..e552805c7501 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -252,6 +252,69 @@ EndEnum Res0 31:0 EndSysreg +Sysreg ID_AA64DFR0_EL1 3 0 0 5 0 +Enum 63:60 HPMN0 + 0b0000 UNPREDICTABLE + 0b0001 DEF +EndEnum +Res0 59:56 +Enum 55:52 BRBE + 0b0000 NI + 0b0001 IMP + 0b0010 BRBE_V1P1 +EndEnum +Enum 51:48 MTPMU + 0b0000 NI_IMPDEF + 0b0001 IMP + 0b1111 NI +EndEnum +Enum 47:44 TraceBuffer + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 43:40 TraceFilt + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 39:36 DoubleLock + 0b0000 IMP + 0b1111 NI +EndEnum +Enum 35:32 PMSVer + 0b0000 NI + 0b0001 IMP + 0b0010 V1P1 + 0b0011 V1P2 + 0b0100 V1P3 +EndEnum +Field 31:28 CTX_CMPs +Res0 27:24 +Field 23:20 WRPs +Res0 19:16 +Field 15:12 BRPs +Enum 11:8 PMUVer + 0b0000 NI + 0b0001 IMP + 0b0100 V3P1 + 0b0101 V3P4 + 0b0110 V3P5 + 0b0111 V3P7 + 0b1000 V3P8 + 0b1111 IMP_DEF +EndEnum +Enum 7:4 TraceVer + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 DebugVer + 0b0110 IMP + 0b0111 VHE + 0b1000 V8P2 + 0b1001 V8P4 + 0b1010 V8P8 +EndEnum +EndSysreg + Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0 Enum 63:60 RNDR 0b0000 NI From c65c617806ed490cf0ed09a151c627e41ce6e0c6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 10 Sep 2022 17:33:53 +0100 Subject: [PATCH 078/126] arm64/sysreg: Convert ID_AA64DFR1_EL1 to automatic generation Convert ID_AA64FDR1_EL1 to automatic generation as per DDI0487I.a, no functional changes. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220910163354.860255-6-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/tools/sysreg | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 943def0d28f2..9a7d84d39b0b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -190,8 +190,6 @@ #define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) #define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) -#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) - #define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) #define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index e552805c7501..076766b6faf0 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -315,6 +315,10 @@ Enum 3:0 DebugVer EndEnum EndSysreg +Sysreg ID_AA64DFR1_EL1 3 0 0 5 1 +Res0 63:0 +EndSysreg + Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0 Enum 63:60 RNDR 0b0000 NI From 10453bf149c9539c446574932f00ea50438cede5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 10 Sep 2022 17:33:54 +0100 Subject: [PATCH 079/126] arm64/sysreg: Convert ID_AA64AFRn_EL1 to automatic generation Convert ID_AA64AFRn_EL1 to automatic generation as per DDI0487I.a, no functional changes. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220910163354.860255-7-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 3 --- arch/arm64/tools/sysreg | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9a7d84d39b0b..debc1c0b2b7f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -190,9 +190,6 @@ #define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) #define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) -#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) -#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) - #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 076766b6faf0..7f1fb36f208c 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -319,6 +319,22 @@ Sysreg ID_AA64DFR1_EL1 3 0 0 5 1 Res0 63:0 EndSysreg +Sysreg ID_AA64AFR0_EL1 3 0 0 5 4 +Res0 63:32 +Field 31:28 IMPDEF7 +Field 27:24 IMPDEF6 +Field 23:20 IMPDEF5 +Field 19:16 IMPDEF4 +Field 15:12 IMPDEF3 +Field 11:8 IMPDEF2 +Field 7:4 IMPDEF1 +Field 3:0 IMPDEF0 +EndSysreg + +Sysreg ID_AA64AFR1_EL1 3 0 0 5 5 +Res0 63:0 +EndSysreg + Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0 Enum 63:60 RNDR 0b0000 NI From 78d2b1976b515a9f24a9784c8415b285a092dd3c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Sep 2022 15:10:59 +0100 Subject: [PATCH 080/126] kselftest/arm64: Add missing newline in hwcap output Clean up the output of the test by adding a missing newline, the fix had been done locally but didn't make it into the applied version. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220913141101.151400-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/hwcap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 8b5c646cea63..04e4a81dd159 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -151,7 +151,7 @@ int main(void) have_cpuinfo = cpuinfo_present(hwcap->cpuinfo); if (have_hwcap) - ksft_print_msg("%s present", hwcap->name); + ksft_print_msg("%s present\n", hwcap->name); ksft_test_result(have_hwcap == have_cpuinfo, "cpuinfo_match_%s\n", hwcap->name); From 859a9d51a268f176b6b42b2c0fba997967a198ed Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Sep 2022 15:11:00 +0100 Subject: [PATCH 081/126] kselftest/arm64: Add SVE 2 to the tested hwcaps Include SVE 2 and the various subfeatures it adds in the set of hwcaps we check for. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220913141101.151400-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/hwcap.c | 136 ++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 04e4a81dd159..6c91b4a0987e 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -45,6 +45,66 @@ static void sve_sigill(void) asm volatile(".inst 0x04bf5000" : : : "x0"); } +static void sve2_sigill(void) +{ + /* SQABS Z0.b, P0/M, Z0.B */ + asm volatile(".inst 0x4408A000" : : : "z0"); +} + +static void sveaes_sigill(void) +{ + /* AESD z0.b, z0.b, z0.b */ + asm volatile(".inst 0x4522e400" : : : "z0"); +} + +static void svepmull_sigill(void) +{ + /* PMULLB Z0.Q, Z0.D, Z0.D */ + asm volatile(".inst 0x45006800" : : : "z0"); +} + +static void svebitperm_sigill(void) +{ + /* BDEP Z0.B, Z0.B, Z0.B */ + asm volatile(".inst 0x4500b400" : : : "z0"); +} + +static void svesha3_sigill(void) +{ + /* EOR3 Z0.D, Z0.D, Z0.D, Z0.D */ + asm volatile(".inst 0x4203800" : : : "z0"); +} + +static void svesm4_sigill(void) +{ + /* SM4E Z0.S, Z0.S, Z0.S */ + asm volatile(".inst 0x4523e000" : : : "z0"); +} + +static void svei8mm_sigill(void) +{ + /* USDOT Z0.S, Z0.B, Z0.B[0] */ + asm volatile(".inst 0x44a01800" : : : "z0"); +} + +static void svef32mm_sigill(void) +{ + /* FMMLA Z0.S, Z0.S, Z0.S */ + asm volatile(".inst 0x64a0e400" : : : "z0"); +} + +static void svef64mm_sigill(void) +{ + /* FMMLA Z0.D, Z0.D, Z0.D */ + asm volatile(".inst 0x64e0e400" : : : "z0"); +} + +static void svebf16_sigill(void) +{ + /* BFCVT Z0.H, P0/M, Z0.S */ + asm volatile(".inst 0x658aa000" : : : "z0"); +} + static const struct hwcap_data { const char *name; unsigned long at_hwcap; @@ -69,6 +129,82 @@ static const struct hwcap_data { .sigill_fn = sve_sigill, .sigill_reliable = true, }, + { + .name = "SVE 2", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVE2, + .cpuinfo = "sve2", + .sigill_fn = sve2_sigill, + }, + { + .name = "SVE AES", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVEAES, + .cpuinfo = "sveaes", + .sigill_fn = sveaes_sigill, + }, + { + .name = "SVE2 PMULL", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVEPMULL, + .cpuinfo = "svepmull", + .sigill_fn = svepmull_sigill, + }, + { + .name = "SVE2 BITPERM", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVEBITPERM, + .cpuinfo = "svebitperm", + .sigill_fn = svebitperm_sigill, + }, + { + .name = "SVE2 SHA3", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVESHA3, + .cpuinfo = "svesha3", + .sigill_fn = svesha3_sigill, + }, + { + .name = "SVE2 SM4", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVESM4, + .cpuinfo = "svesm4", + .sigill_fn = svesm4_sigill, + }, + { + .name = "SVE2 I8MM", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVEI8MM, + .cpuinfo = "svei8mm", + .sigill_fn = svei8mm_sigill, + }, + { + .name = "SVE2 F32MM", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVEF32MM, + .cpuinfo = "svef32mm", + .sigill_fn = svef32mm_sigill, + }, + { + .name = "SVE2 F64MM", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVEF64MM, + .cpuinfo = "svef64mm", + .sigill_fn = svef64mm_sigill, + }, + { + .name = "SVE2 BF16", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVEBF16, + .cpuinfo = "svebf16", + .sigill_fn = svebf16_sigill, + }, + { + .name = "SVE2 EBF16", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVE_EBF16, + .cpuinfo = "sveebf16", + }, }; static bool seen_sigill; From ef939f30510b43e90f1fba30477b678107faeae7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Sep 2022 15:11:01 +0100 Subject: [PATCH 082/126] kselftest/arm64: Add hwcap test for RNG Validate the RNG hwcap and make sure we don't generate a SIGILL reading RNDR when it is reported. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220913141101.151400-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/hwcap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 6c91b4a0987e..d173e41f2123 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -33,6 +33,11 @@ */ typedef void (*sigill_fn)(void); +static void rng_sigill(void) +{ + asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); +} + static void sme_sigill(void) { /* RDSVL x0, #0 */ @@ -113,6 +118,13 @@ static const struct hwcap_data { sigill_fn sigill_fn; bool sigill_reliable; } hwcaps[] = { + { + .name = "RNG", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_RNG, + .cpuinfo = "rng", + .sigill_fn = rng_sigill, + }, { .name = "SME", .at_hwcap = AT_HWCAP2, From 237405ebef580a7352a52129b2465c117145eafa Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 9 Sep 2022 17:59:36 +0100 Subject: [PATCH 083/126] arm64: cpufeature: Force HWCAP to be based on the sysreg visible to user-space arm64 advertises hardware features to user-space via HWCAPs, and by emulating access to the CPUs id registers. The cpufeature code has a sanitised system-wide view of an id register, and a sanitised user-space view of an id register, where some features use their 'safe' value instead of the hardware value. It is currently possible for a HWCAP to be advertised where the user-space view of the id register does not show the feature as supported. Erratum workaround need to remove both the HWCAP, and the feature from the user-space view of the id register. This involves duplicating the code, and spreading it over cpufeature.c and cpu_errata.c. Make the HWCAP code use the user-space view of id registers. This ensures the values never diverge, and allows erratum workaround to remove HWCAP by modifying the user-space view of the id register. Signed-off-by: James Morse Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20220909165938.3931307-2-james.morse@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 41 ++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af4de817d712..b6a4e97805a4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1401,17 +1401,40 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) return val >= entry->min_field_value; } +static u64 +read_scoped_sysreg(const struct arm64_cpu_capabilities *entry, int scope) +{ + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + return read_sanitised_ftr_reg(entry->sys_reg); + else + return __read_sysreg_by_encoding(entry->sys_reg); +} + +static bool +has_user_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) +{ + int mask; + struct arm64_ftr_reg *regp; + u64 val = read_scoped_sysreg(entry, scope); + + regp = get_arm64_ftr_reg(entry->sys_reg); + if (!regp) + return false; + + mask = cpuid_feature_extract_unsigned_field_width(regp->user_mask, + entry->field_pos, + entry->field_width); + if (!mask) + return false; + + return feature_matches(val, entry); +} + static bool has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) { - u64 val; - - WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); - if (scope == SCOPE_SYSTEM) - val = read_sanitised_ftr_reg(entry->sys_reg); - else - val = __read_sysreg_by_encoding(entry->sys_reg); - + u64 val = read_scoped_sysreg(entry, scope); return feature_matches(val, entry); } @@ -2624,7 +2647,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { }; #define HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \ - .matches = has_cpuid_feature, \ + .matches = has_user_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ .field_width = width, \ From 445c953e4a84b2942bdb5c0b5aff571c903680fe Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 9 Sep 2022 17:59:37 +0100 Subject: [PATCH 084/126] arm64: cpufeature: Expose get_arm64_ftr_reg() outside cpufeature.c get_arm64_ftr_reg() returns the properties of a system register based on its instruction encoding. This is needed by erratum workaround in cpu_errata.c to modify the user-space visible view of id registers. Signed-off-by: James Morse Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20220909165938.3931307-3-james.morse@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/kernel/cpufeature.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index fd7d75a275f6..5c3317bc06c7 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -907,6 +907,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) return 8; } +struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); + extern struct arm64_ftr_override id_aa64mmfr1_override; extern struct arm64_ftr_override id_aa64pfr0_override; extern struct arm64_ftr_override id_aa64pfr1_override; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b6a4e97805a4..24f43ad02cd3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -750,7 +750,7 @@ static struct arm64_ftr_reg *get_arm64_ftr_reg_nowarn(u32 sys_id) * returns - Upon success, matching ftr_reg entry for id. * - NULL on failure but with an WARN_ON(). */ -static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) +struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) { struct arm64_ftr_reg *reg; From 1bdb0fbb2e27c51a6f311867726462c983a1d9ee Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 9 Sep 2022 17:59:38 +0100 Subject: [PATCH 085/126] arm64: errata: remove BF16 HWCAP due to incorrect result on Cortex-A510 Cortex-A510's erratum #2658417 causes two BF16 instructions to return the wrong result in rare circumstances when a pair of A510 CPUs are using shared neon hardware. The two instructions affected are BFMMLA and VMMLA, support for these is indicated by the BF16 HWCAP. Remove it on affected platforms. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20220909165938.3931307-4-james.morse@arm.com [catalin.marinas@arm.com: add revision to the Kconfig help; remove .type] Signed-off-by: Catalin Marinas --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 13 +++++++++++++ arch/arm64/kernel/cpu_errata.c | 26 ++++++++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 4 files changed, 42 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index fda97b3fcf01..17d9fc5d14fb 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -110,6 +110,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9fb9fff08c94..526ab76cd233 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -733,6 +733,19 @@ config ARM64_ERRATUM_2077057 If unsure, say Y. +config ARM64_ERRATUM_2658417 + bool "Cortex-A510: 2658417: remove BF16 support due to incorrect result" + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2658417. + Affected Cortex-A510 (r0p0 to r1p1) may produce the wrong result for + BFMMLA or VMMLA instructions in rare circumstances when a pair of + A510 CPUs are using shared neon hardware. As the sharing is not + discoverable by the kernel, hide the BF16 HWCAP to indicate that + user-space should not be using these instructions. + + If unsure, say Y. + config ARM64_ERRATUM_2119858 bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 53b973b6059f..58ca4f6b25d6 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -121,6 +121,22 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); } +static DEFINE_RAW_SPINLOCK(reg_user_mask_modification); +static void __maybe_unused +cpu_clear_bf16_from_user_emulation(const struct arm64_cpu_capabilities *__unused) +{ + struct arm64_ftr_reg *regp; + + regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1); + if (!regp) + return; + + raw_spin_lock(®_user_mask_modification); + if (regp->user_mask & ID_AA64ISAR1_EL1_BF16_MASK) + regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK; + raw_spin_unlock(®_user_mask_modification); +} + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) @@ -691,6 +707,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { CAP_MIDR_RANGE_LIST(broken_aarch32_aes), .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_2658417 + { + .desc = "ARM erratum 2658417", + .capability = ARM64_WORKAROUND_2658417, + /* Cortex-A510 r0p0 - r1p1 */ + ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1), + MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)), + .cpu_enable = cpu_clear_bf16_from_user_emulation, + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 63b2484ce6c3..f553a7cb1b07 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -68,6 +68,7 @@ WORKAROUND_2038923 WORKAROUND_2064142 WORKAROUND_2077057 WORKAROUND_2457168 +WORKAROUND_2658417 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE From 92b4b5619f12770d455ae639ed4e773bdf01aff6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 12 Sep 2022 17:22:03 +0100 Subject: [PATCH 086/126] arm64: cpufeature: make cpus_have_cap() noinstr-safe Currently it isn't safe to use cpus_have_cap() from noinstr code as test_bit() is explicitly instrumented, and were cpus_have_cap() placed out-of-line, cpus_have_cap() itself could be instrumented. Make cpus_have_cap() noinstr safe by marking it __always_inline and using arch_test_bit(). Aside from the prevention of instrumentation, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: James Morse Cc: Joey Gouly Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220912162210.3626215-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index fd7d75a275f6..630c337c7774 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -448,11 +448,11 @@ static __always_inline bool system_capabilities_finalized(void) * * Before the capability is detected, this returns false. */ -static inline bool cpus_have_cap(unsigned int num) +static __always_inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) return false; - return test_bit(num, cpu_hwcaps); + return arch_test_bit(num, cpu_hwcaps); } /* From 34bbfdfb146be3ad485cbe526b2a1e9ce220649c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 12 Sep 2022 17:22:04 +0100 Subject: [PATCH 087/126] arm64: alternatives: kvm: prepare for cap changes The KVM patching callbacks use cpus_have_final_cap() internally within has_vhe(), and subsequent patches will make it invalid to call cpus_have_final_cap() before alternatives patching has completed, and will mean that cpus_have_const_cap() will always fall back to dynamic checks prior to alternatives patching. In preparation for said change, this patch modifies the KVM patching callbacks to use cpus_have_cap() directly. This is not subject to patching, and will dynamically check the cpu_hwcaps array, which is functionally equivalent to the existing behaviour. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: James Morse Cc: Joey Gouly Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220912162210.3626215-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kvm/va_layout.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index acdb7b3cc97d..91b22a014610 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -169,7 +169,7 @@ void __init kvm_update_va_mask(struct alt_instr *alt, * dictates it and we don't have any spare bits in the * address), NOP everything after masking the kernel VA. */ - if (has_vhe() || (!tag_val && i > 0)) { + if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN) || (!tag_val && i > 0)) { updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); continue; } @@ -193,7 +193,8 @@ void kvm_patch_vector_branch(struct alt_instr *alt, BUG_ON(nr_inst != 4); - if (!cpus_have_const_cap(ARM64_SPECTRE_V3A) || WARN_ON_ONCE(has_vhe())) + if (!cpus_have_cap(ARM64_SPECTRE_V3A) || + WARN_ON_ONCE(cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))) return; /* From 747ad8d557645b8c9adb83334a620426e4c9d60f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 12 Sep 2022 17:22:05 +0100 Subject: [PATCH 088/126] arm64: alternatives: proton-pack: prepare for cap changes The spectre patching callbacks use cpus_have_final_cap(), and subsequent patches will make it invalid to call cpus_have_final_cap() before alternatives patching has completed. In preparation for said change, this patch modifies the spectre patching callbacks use cpus_have_cap(). This is not subject to patching, and will dynamically check the cpu_hwcaps array, which is functionally equivalent to the existing behaviour. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Joey Gouly Cc: Ard Biesheuvel Cc: James Morse Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220912162210.3626215-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/proton-pack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 40be3a7c2c53..8010b2e573fa 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -586,7 +586,7 @@ void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, if (spectre_v4_mitigations_off()) return; - if (cpus_have_final_cap(ARM64_SSBS)) + if (cpus_have_cap(ARM64_SSBS)) return; if (spectre_v4_mitigations_dynamic()) From c5ba03260c7afad6977654d6bc7fcabbcd35c379 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 12 Sep 2022 17:22:06 +0100 Subject: [PATCH 089/126] arm64: alternatives: hoist print out of __apply_alternatives() Printing in the middle of __apply_alternatives() is potentially unsafe and not all that helpful given these days we practically always patch *something*. Hoist the print out of __apply_alternatives(), and add separate prints to __apply_alternatives() and apply_alternatives_all(), which will make it easier to spot if either patching call goes wrong. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: James Morse Cc: Joey Gouly Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220912162210.3626215-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/alternative.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 9bcaa5eacf16..d94d97cb4a0b 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -156,8 +156,6 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu else BUG_ON(alt->alt_len != alt->orig_len); - pr_info_once("patching kernel code\n"); - origptr = ALT_ORIG_PTR(alt); updptr = is_module ? origptr : lm_alias(origptr); nr_inst = alt->orig_len / AARCH64_INSN_SIZE; @@ -225,6 +223,8 @@ static int __apply_alternatives_multi_stop(void *unused) void __init apply_alternatives_all(void) { + pr_info("applying system-wide alternatives\n"); + /* better not try code patching on a live SMP system */ stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask); } @@ -244,6 +244,8 @@ void __init apply_boot_alternatives(void) /* If called on non-boot cpu things could go wrong */ WARN_ON(smp_processor_id() != 0); + pr_info("applying boot alternatives\n"); + __apply_alternatives(®ion, false, &boot_capabilities[0]); } From b723edf3a12a2604cdc0dfcd7cc93d210a815e0c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 12 Sep 2022 17:22:07 +0100 Subject: [PATCH 090/126] arm64: alternatives: make alt_region const We never alter a struct alt_region after creation, and we open-code the bounds of the kernel alternatives region in two functions. The duplication is a bit unfortunate for clarity (and in future we're likely to have more functions altering alternative regions), and to avoid accidents it would be good to make the structure const. This patch adds a shared struct `kernel_alternatives` alt_region for the main kernel image, and marks the alt_regions as const to prevent unintentional modification. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: James Morse Cc: Joey Gouly Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220912162210.3626215-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/alternative.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d94d97cb4a0b..2e18c9c0f612 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -133,8 +133,9 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) } while (cur += d_size, cur < end); } -static void __nocfi __apply_alternatives(struct alt_region *region, bool is_module, - unsigned long *feature_mask) +static void __nocfi __apply_alternatives(const struct alt_region *region, + bool is_module, + unsigned long *feature_mask) { struct alt_instr *alt; __le32 *origptr, *updptr; @@ -190,17 +191,17 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu } } +static const struct alt_region kernel_alternatives = { + .begin = (struct alt_instr *)__alt_instructions, + .end = (struct alt_instr *)__alt_instructions_end, +}; + /* * We might be patching the stop_machine state machine, so implement a * really simple polling protocol here. */ static int __apply_alternatives_multi_stop(void *unused) { - struct alt_region region = { - .begin = (struct alt_instr *)__alt_instructions, - .end = (struct alt_instr *)__alt_instructions_end, - }; - /* We always have a CPU 0 at this point (__init) */ if (smp_processor_id()) { while (!all_alternatives_applied) @@ -213,7 +214,8 @@ static int __apply_alternatives_multi_stop(void *unused) ARM64_NPATCHABLE); BUG_ON(all_alternatives_applied); - __apply_alternatives(®ion, false, remaining_capabilities); + __apply_alternatives(&kernel_alternatives, false, + remaining_capabilities); /* Barriers provided by the cache flushing */ all_alternatives_applied = 1; } @@ -236,17 +238,13 @@ void __init apply_alternatives_all(void) */ void __init apply_boot_alternatives(void) { - struct alt_region region = { - .begin = (struct alt_instr *)__alt_instructions, - .end = (struct alt_instr *)__alt_instructions_end, - }; - /* If called on non-boot cpu things could go wrong */ WARN_ON(smp_processor_id() != 0); pr_info("applying boot alternatives\n"); - __apply_alternatives(®ion, false, &boot_capabilities[0]); + __apply_alternatives(&kernel_alternatives, false, + &boot_capabilities[0]); } #ifdef CONFIG_MODULES From 4c0bd995d73ed8897650095c7892b132a0bd66a4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 12 Sep 2022 17:22:08 +0100 Subject: [PATCH 091/126] arm64: alternatives: have callbacks take a cap Today, callback alternatives are special-cased within __apply_alternatives(), and are applied alongside patching for system capabilities as ARM64_NCAPS is not part of the boot_capabilities feature mask. This special-casing is less than ideal. Giving special meaning to ARM64_NCAPS for this requires some structures and loops to use ARM64_NCAPS + 1 (AKA ARM64_NPATCHABLE), while others use ARM64_NCAPS. It's also not immediately clear callback alternatives are only applied when applying alternatives for system-wide features. To make this a bit clearer, changes the way that callback alternatives are identified to remove the special-casing of ARM64_NCAPS, and to allow callback alternatives to be associated with a cpucap as with all other alternatives. New cpucaps, ARM64_ALWAYS_BOOT and ARM64_ALWAYS_SYSTEM are added which are always detected alongside boot cpu capabilities and system capabilities respectively. All existing callback alternatives are made to use ARM64_ALWAYS_SYSTEM, and so will be patched at the same point during the boot flow as before. Subsequent patches will make more use of these new cpucaps. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: James Morse Cc: Joey Gouly Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220912162210.3626215-7-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative-macros.h | 18 +++++++++----- arch/arm64/include/asm/assembler.h | 10 ++++---- arch/arm64/include/asm/cpufeature.h | 4 +--- arch/arm64/include/asm/kvm_mmu.h | 5 ++-- arch/arm64/kernel/alternative.c | 26 +++++++++++---------- arch/arm64/kernel/cpufeature.c | 19 +++++++++++++-- arch/arm64/kernel/entry.S | 8 +++---- arch/arm64/kvm/hyp/hyp-entry.S | 4 ++-- arch/arm64/tools/cpucaps | 2 ++ 9 files changed, 60 insertions(+), 36 deletions(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 7e157ab6cd50..189c31be163c 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -2,10 +2,16 @@ #ifndef __ASM_ALTERNATIVE_MACROS_H #define __ASM_ALTERNATIVE_MACROS_H +#include + #include #include -#define ARM64_CB_PATCH ARM64_NCAPS +#define ARM64_CB_BIT (UL(1) << 15) + +#if ARM64_NCAPS >= ARM64_CB_BIT +#error "cpucaps have overflown ARM64_CB_BIT" +#endif #ifndef __ASSEMBLY__ @@ -73,8 +79,8 @@ #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) -#define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) +#define ALTERNATIVE_CB(oldinstr, feature, cb) \ + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_BIT | (feature), 1, cb) #else #include @@ -82,7 +88,7 @@ .macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len .word \orig_offset - . .word \alt_offset - . - .hword \feature + .hword (\feature) .byte \orig_len .byte \alt_len .endm @@ -141,10 +147,10 @@ 661: .endm -.macro alternative_cb cb +.macro alternative_cb cap, cb .set .Lasm_alt_mode, 0 .pushsection .altinstructions, "a" - altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0 + altinstruction_entry 661f, \cb, ARM64_CB_BIT | \cap, 662f-661f, 0 .popsection 661: .endm diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 5846145be523..d851e2733439 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -293,7 +293,7 @@ alternative_endif alternative_if_not ARM64_KVM_PROTECTED_MODE ASM_BUG() alternative_else_nop_endif -alternative_cb kvm_compute_final_ctr_el0 +alternative_cb ARM64_ALWAYS_SYSTEM, kvm_compute_final_ctr_el0 movz \reg, #0 movk \reg, #0, lsl #16 movk \reg, #0, lsl #32 @@ -877,7 +877,7 @@ alternative_endif .macro __mitigate_spectre_bhb_loop tmp #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY -alternative_cb spectre_bhb_patch_loop_iter +alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_iter mov \tmp, #32 // Patched to correct the immediate alternative_cb_end .Lspectre_bhb_loop\@: @@ -890,7 +890,7 @@ alternative_cb_end .macro mitigate_spectre_bhb_loop tmp #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY -alternative_cb spectre_bhb_patch_loop_mitigation_enable +alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_mitigation_enable b .L_spectre_bhb_loop_done\@ // Patched to NOP alternative_cb_end __mitigate_spectre_bhb_loop \tmp @@ -904,7 +904,7 @@ alternative_cb_end stp x0, x1, [sp, #-16]! stp x2, x3, [sp, #-16]! mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 -alternative_cb smccc_patch_fw_mitigation_conduit +alternative_cb ARM64_ALWAYS_SYSTEM, smccc_patch_fw_mitigation_conduit nop // Patched to SMC/HVC #0 alternative_cb_end ldp x2, x3, [sp], #16 @@ -914,7 +914,7 @@ alternative_cb_end .macro mitigate_spectre_bhb_clear_insn #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY -alternative_cb spectre_bhb_patch_clearbhb +alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_clearbhb /* Patched to NOP when not supported */ clearbhb isb diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 630c337c7774..f5852ad6e884 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -422,9 +422,7 @@ extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; -/* ARM64 CAPS + alternative_cb */ -#define ARM64_NPATCHABLE (ARM64_NCAPS + 1) -extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); +extern DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS); #define for_each_available_cap(cap) \ for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b208da3bebec..7784081088e7 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -63,7 +63,7 @@ * specific registers encoded in the instructions). */ .macro kern_hyp_va reg -alternative_cb kvm_update_va_mask +alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask and \reg, \reg, #1 /* mask with va_mask */ ror \reg, \reg, #1 /* rotate to the first tag bit */ add \reg, \reg, #0 /* insert the low 12 bits of the tag */ @@ -97,7 +97,7 @@ alternative_cb_end hyp_pa \reg, \tmp /* Load kimage_voffset. */ -alternative_cb kvm_get_kimage_voffset +alternative_cb ARM64_ALWAYS_SYSTEM, kvm_get_kimage_voffset movz \tmp, #0 movk \tmp, #0, lsl #16 movk \tmp, #0, lsl #32 @@ -131,6 +131,7 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) "add %0, %0, #0\n" "add %0, %0, #0, lsl 12\n" "ror %0, %0, #63\n", + ARM64_ALWAYS_SYSTEM, kvm_update_va_mask) : "+r" (v)); return v; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 2e18c9c0f612..9a071a5fcb67 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -21,6 +21,9 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) +#define ALT_CAP(a) ((a)->cpufeature & ~ARM64_CB_BIT) +#define ALT_HAS_CB(a) ((a)->cpufeature & ARM64_CB_BIT) + /* Volatile, as we may be patching the guts of READ_ONCE() */ static volatile int all_alternatives_applied; @@ -143,16 +146,15 @@ static void __nocfi __apply_alternatives(const struct alt_region *region, for (alt = region->begin; alt < region->end; alt++) { int nr_inst; + int cap = ALT_CAP(alt); - if (!test_bit(alt->cpufeature, feature_mask)) + if (!test_bit(cap, feature_mask)) continue; - /* Use ARM64_CB_PATCH as an unconditional patch */ - if (alt->cpufeature < ARM64_CB_PATCH && - !cpus_have_cap(alt->cpufeature)) + if (!cpus_have_cap(cap)) continue; - if (alt->cpufeature == ARM64_CB_PATCH) + if (ALT_HAS_CB(alt)) BUG_ON(alt->alt_len != 0); else BUG_ON(alt->alt_len != alt->orig_len); @@ -161,10 +163,10 @@ static void __nocfi __apply_alternatives(const struct alt_region *region, updptr = is_module ? origptr : lm_alias(origptr); nr_inst = alt->orig_len / AARCH64_INSN_SIZE; - if (alt->cpufeature < ARM64_CB_PATCH) - alt_cb = patch_alternative; - else + if (ALT_HAS_CB(alt)) alt_cb = ALT_REPL_PTR(alt); + else + alt_cb = patch_alternative; alt_cb(alt, origptr, updptr, nr_inst); @@ -208,10 +210,10 @@ static int __apply_alternatives_multi_stop(void *unused) cpu_relax(); isb(); } else { - DECLARE_BITMAP(remaining_capabilities, ARM64_NPATCHABLE); + DECLARE_BITMAP(remaining_capabilities, ARM64_NCAPS); bitmap_complement(remaining_capabilities, boot_capabilities, - ARM64_NPATCHABLE); + ARM64_NCAPS); BUG_ON(all_alternatives_applied); __apply_alternatives(&kernel_alternatives, false, @@ -254,9 +256,9 @@ void apply_alternatives_module(void *start, size_t length) .begin = start, .end = start + length, }; - DECLARE_BITMAP(all_capabilities, ARM64_NPATCHABLE); + DECLARE_BITMAP(all_capabilities, ARM64_NCAPS); - bitmap_fill(all_capabilities, ARM64_NPATCHABLE); + bitmap_fill(all_capabilities, ARM64_NCAPS); __apply_alternatives(®ion, true, &all_capabilities[0]); } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af4de817d712..68a0545285a1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -108,8 +108,7 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcaps); static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS]; -/* Need also bit for ARM64_CB_PATCH */ -DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); +DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS); bool arm64_use_ng_mappings = false; EXPORT_SYMBOL(arm64_use_ng_mappings); @@ -1391,6 +1390,12 @@ u64 __read_sysreg_by_encoding(u32 sys_id) #include +static bool +has_always(const struct arm64_cpu_capabilities *entry, int scope) +{ + return true; +} + static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { @@ -2087,6 +2092,16 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap) } static const struct arm64_cpu_capabilities arm64_features[] = { + { + .capability = ARM64_ALWAYS_BOOT, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .matches = has_always, + }, + { + .capability = ARM64_ALWAYS_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_always, + }, { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2d73b3e793b2..e28137d64b76 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -114,7 +114,7 @@ * them if required. */ .macro apply_ssbd, state, tmp1, tmp2 -alternative_cb spectre_v4_patch_fw_mitigation_enable +alternative_cb ARM64_ALWAYS_SYSTEM, spectre_v4_patch_fw_mitigation_enable b .L__asm_ssbd_skip\@ // Patched to NOP alternative_cb_end ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 @@ -123,7 +123,7 @@ alternative_cb_end tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state -alternative_cb smccc_patch_fw_mitigation_conduit +alternative_cb ARM64_ALWAYS_SYSTEM, smccc_patch_fw_mitigation_conduit nop // Patched to SMC/HVC #0 alternative_cb_end .L__asm_ssbd_skip\@: @@ -175,7 +175,7 @@ alternative_else_nop_endif .macro mte_set_kernel_gcr, tmp, tmp2 #ifdef CONFIG_KASAN_HW_TAGS -alternative_cb kasan_hw_tags_enable +alternative_cb ARM64_ALWAYS_SYSTEM, kasan_hw_tags_enable b 1f alternative_cb_end mov \tmp, KERNEL_GCR_EL1 @@ -186,7 +186,7 @@ alternative_cb_end .macro mte_set_user_gcr, tsk, tmp, tmp2 #ifdef CONFIG_KASAN_HW_TAGS -alternative_cb kasan_hw_tags_enable +alternative_cb ARM64_ALWAYS_SYSTEM, kasan_hw_tags_enable b 1f alternative_cb_end ldr \tmp, [\tsk, #THREAD_MTE_CTRL] diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 7839d075729b..8f3f93fa119e 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -196,7 +196,7 @@ SYM_CODE_END(__kvm_hyp_vector) sub sp, sp, #(8 * 4) stp x2, x3, [sp, #(8 * 0)] stp x0, x1, [sp, #(8 * 2)] - alternative_cb spectre_bhb_patch_wa3 + alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_wa3 /* Patched to mov WA3 when supported */ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 alternative_cb_end @@ -216,7 +216,7 @@ SYM_CODE_END(__kvm_hyp_vector) mitigate_spectre_bhb_clear_insn .endif .if \indirect != 0 - alternative_cb kvm_patch_vector_branch + alternative_cb ARM64_ALWAYS_SYSTEM, kvm_patch_vector_branch /* * For ARM64_SPECTRE_V3A configurations, these NOPs get replaced with: * diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 63b2484ce6c3..ff882ec175e7 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -2,6 +2,8 @@ # # Internal CPU capabilities constants, keep this list sorted +ALWAYS_BOOT +ALWAYS_SYSTEM BTI # Unreliable: use system_supports_32bit_el0() instead. HAS_32BIT_EL0_DO_NOT_USE From 21fb26bfb01ffe0d9a9a967ffe061092128bbffe Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 12 Sep 2022 17:22:09 +0100 Subject: [PATCH 092/126] arm64: alternatives: add alternative_has_feature_*() Currrently we use a mixture of alternative sequences and static branches to handle features detected at boot time. For ease of maintenance we generally prefer to use static branches in C code, but this has a few downsides: * Each static branch has metadata in the __jump_table section, which is not discarded after features are finalized. This wastes some space, and slows down the patching of other static branches. * The static branches are patched at a different point in time from the alternatives, so changes are not atomic. This leaves a transient period where there could be a mismatch between the behaviour of alternatives and static branches, which could be problematic for some features (e.g. pseudo-NMI). * More (instrumentable) kernel code is executed to patch each static branch, which can be risky when patching certain features (e.g. irqflags management for pseudo-NMI). * When CONFIG_JUMP_LABEL=n, static branches are turned into a load of a flag and a conditional branch. This means it isn't safe to use such static branches in an alternative address space (e.g. the NVHE/PKVM hyp code), where the generated address isn't safe to acccess. To deal with these issues, this patch introduces new alternative_has_feature_*() helpers, which work like static branches but are patched using alternatives. This ensures the patching is performed at the same time as other alternative patching, allows the metadata to be freed after patching, and is safe for use in alternative address spaces. Note that all supported toolchains have asm goto support, and since commit: a0a12c3ed057af57 ("asm goto: eradicate CC_HAS_ASM_GOTO)" ... the CC_HAS_ASM_GOTO Kconfig symbol has been removed, so no feature check is necessary, and we can always make use of asm goto. Additionally, note that: * This has no impact on cpus_have_cap(), which is a dynamic check. * This has no functional impact on cpus_have_const_cap(). The branches are patched slightly later than before this patch, but these branches are not reachable until caps have been finalised. * It is now invalid to use cpus_have_final_cap() in the window between feature detection and patching. All existing uses are only expected after patching anyway, so this should not be a problem. * The LSE atomics will now be enabled during alternatives patching rather than immediately before. As the LL/SC an LSE atomics are functionally equivalent this should not be problematic. When building defconfig with GCC 12.1.0, the resulting Image is 64KiB smaller: | % ls -al Image-* | -rw-r--r-- 1 mark mark 37108224 Aug 23 09:56 Image-after | -rw-r--r-- 1 mark mark 37173760 Aug 23 09:54 Image-before According to bloat-o-meter.pl: | add/remove: 44/34 grow/shrink: 602/1294 up/down: 39692/-61108 (-21416) | Function old new delta | [...] | Total: Before=16618336, After=16596920, chg -0.13% | add/remove: 0/2 grow/shrink: 0/0 up/down: 0/-1296 (-1296) | Data old new delta | arm64_const_caps_ready 16 - -16 | cpu_hwcap_keys 1280 - -1280 | Total: Before=8987120, After=8985824, chg -0.01% | add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0) | RO Data old new delta | Total: Before=18408, After=18408, chg +0.00% Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: James Morse Cc: Joey Gouly Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220912162210.3626215-8-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative-macros.h | 41 +++++++++++++++++++++ arch/arm64/include/asm/cpufeature.h | 7 ++-- arch/arm64/include/asm/lse.h | 5 +-- arch/arm64/kernel/cpufeature.c | 25 ------------- arch/arm64/kernel/image-vars.h | 4 -- 5 files changed, 46 insertions(+), 36 deletions(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 189c31be163c..eaba9ec12789 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -213,4 +213,45 @@ alternative_endif #define ALTERNATIVE(oldinstr, newinstr, ...) \ _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1) +#ifndef __ASSEMBLY__ + +#include +#include + +static __always_inline bool +alternative_has_feature_likely(unsigned long feature) +{ + BUILD_BUG_ON(feature >= ARM64_NCAPS); + + asm_volatile_goto( + ALTERNATIVE("b %l[l_no]", "nop", %[feature]) + : + : [feature] "i" (feature) + : + : l_no); + + return true; +l_no: + return false; +} + +static __always_inline bool +alternative_has_feature_unlikely(unsigned long feature) +{ + BUILD_BUG_ON(feature >= ARM64_NCAPS); + + asm_volatile_goto( + ALTERNATIVE("nop", "b %l[l_yes]", %[feature]) + : + : [feature] "i" (feature) + : + : l_yes); + + return false; +l_yes: + return true; +} + +#endif /* __ASSEMBLY__ */ + #endif /* __ASM_ALTERNATIVE_MACROS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f5852ad6e884..ba8a47433b5a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -6,6 +6,7 @@ #ifndef __ASM_CPUFEATURE_H #define __ASM_CPUFEATURE_H +#include #include #include #include @@ -419,8 +420,6 @@ static __always_inline bool is_hyp_code(void) } extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); -extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; -extern struct static_key_false arm64_const_caps_ready; extern DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS); @@ -438,7 +437,7 @@ unsigned long cpu_get_elf_hwcap2(void); static __always_inline bool system_capabilities_finalized(void) { - return static_branch_likely(&arm64_const_caps_ready); + return alternative_has_feature_likely(ARM64_ALWAYS_SYSTEM); } /* @@ -465,7 +464,7 @@ static __always_inline bool __cpus_have_const_cap(int num) { if (num >= ARM64_NCAPS) return false; - return static_branch_unlikely(&cpu_hwcap_keys[num]); + return alternative_has_feature_unlikely(num); } /* diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 29c85810ae69..c503db8e73b0 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -13,14 +13,13 @@ #include #include #include +#include #include #include -extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; - static __always_inline bool system_uses_lse_atomics(void) { - return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); + return alternative_has_feature_likely(ARM64_HAS_LSE_ATOMICS); } #define __lse_ll_sc_body(op, ...) \ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 68a0545285a1..d9c3879728bc 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -133,31 +133,12 @@ DEFINE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); */ static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly; -/* - * Flag to indicate if we have computed the system wide - * capabilities based on the boot time active CPUs. This - * will be used to determine if a new booting CPU should - * go through the verification process to make sure that it - * supports the system capabilities, without using a hotplug - * notifier. This is also used to decide if we could use - * the fast path for checking constant CPU caps. - */ -DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); -EXPORT_SYMBOL(arm64_const_caps_ready); -static inline void finalize_system_capabilities(void) -{ - static_branch_enable(&arm64_const_caps_ready); -} - void dump_cpu_features(void) { /* file-wide pr_fmt adds "CPU features: " prefix */ pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); } -DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); -EXPORT_SYMBOL(cpu_hwcap_keys); - #define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ .sign = SIGNED, \ @@ -2944,9 +2925,6 @@ static void __init enable_cpu_capabilities(u16 scope_mask) if (!cpus_have_cap(num)) continue; - /* Ensure cpus_have_const_cap(num) works */ - static_branch_enable(&cpu_hwcap_keys[num]); - if (boot_scope && caps->cpu_enable) /* * Capabilities with SCOPE_BOOT_CPU scope are finalised @@ -3268,9 +3246,6 @@ void __init setup_cpu_features(void) sme_setup(); minsigstksz_setup(); - /* Advertise that we have computed the system capabilities */ - finalize_system_capabilities(); - /* * Check for sane CTR_EL0.CWG value. */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index afa69e04e75e..118973a6ab05 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -89,10 +89,6 @@ KVM_NVHE_ALIAS(__icache_flags); /* VMID bits set by the KVM VMID allocator */ KVM_NVHE_ALIAS(kvm_arm_vmid_bits); -/* Kernel symbols needed for cpus_have_final/const_caps checks. */ -KVM_NVHE_ALIAS(arm64_const_caps_ready); -KVM_NVHE_ALIAS(cpu_hwcap_keys); - /* Static keys which are set if a vGIC trap should be handled in hyp. */ KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); From d926079f17bf8aa47485b6a55be1fc0175dbd1db Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 12 Sep 2022 17:22:10 +0100 Subject: [PATCH 093/126] arm64: alternatives: add shared NOP callback For each instance of an alternative, the compiler outputs a distinct copy of the alternative instructions into a subsection. As the compiler doesn't have special knowledge of alternatives, it cannot coalesce these to save space. In a defconfig kernel built with GCC 12.1.0, there are approximately 10,000 instances of alternative_has_feature_likely(), where the replacement instruction is always a NOP. As NOPs are position-independent, we don't need a unique copy per alternative sequence. This patch adds a callback to patch an alternative sequence with NOPs, and make use of this in alternative_has_feature_likely(). So that this can be used for other sites in future, this is written to patch multiple instructions up to the original sequence length. For NVHE, an alias is added to image-vars.h. For modules, the callback is exported. Note that as modules are loaded within 2GiB of the kernel, an alt_instr entry in a module can always refer directly to the callback, and no special handling is necessary. When building with GCC 12.1.0, the vmlinux is ~158KiB smaller, though the resulting Image size is unchanged due to alignment constraints and padding: | % ls -al vmlinux-* | -rwxr-xr-x 1 mark mark 134644592 Sep 1 14:52 vmlinux-after | -rwxr-xr-x 1 mark mark 134486232 Sep 1 14:50 vmlinux-before | % ls -al Image-* | -rw-r--r-- 1 mark mark 37108224 Sep 1 14:52 Image-after | -rw-r--r-- 1 mark mark 37108224 Sep 1 14:50 Image-before Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: James Morse Cc: Joey Gouly Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220912162210.3626215-9-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative-macros.h | 2 +- arch/arm64/kernel/alternative.c | 8 ++++++++ arch/arm64/kernel/image-vars.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index eaba9ec12789..4a2a98d6d222 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -224,7 +224,7 @@ alternative_has_feature_likely(unsigned long feature) BUILD_BUG_ON(feature >= ARM64_NCAPS); asm_volatile_goto( - ALTERNATIVE("b %l[l_no]", "nop", %[feature]) + ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) : : [feature] "i" (feature) : diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 9a071a5fcb67..5a904d4e98ea 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -263,3 +263,11 @@ void apply_alternatives_module(void *start, size_t length) __apply_alternatives(®ion, true, &all_capabilities[0]); } #endif + +noinstr void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + for (int i = 0; i < nr_inst; i++) + updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); +} +EXPORT_SYMBOL(alt_cb_patch_nops); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 118973a6ab05..4aaa5f3d1f65 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -73,6 +73,7 @@ KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter); KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable); KVM_NVHE_ALIAS(spectre_bhb_patch_wa3); KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb); +KVM_NVHE_ALIAS(alt_cb_patch_nops); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); From db74cd6337d2691ea932e36b84683090f0712ec1 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 19 Sep 2022 09:09:28 -0700 Subject: [PATCH 094/126] arm64/sysreg: Fix a few missed conversions After the conversion to automatically generating the ID_AA64DFR0_EL1 definition names, the build fails in a few different places because some of the definitions were not changed to their new names along the way. Update the names to resolve the build errors. Fixes: c0357a73fa4a ("arm64/sysreg: Align field names in ID_AA64DFR0_EL1 with architecture") Signed-off-by: Nathan Chancellor Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220919160928.3905780-1-nathan@kernel.org Signed-off-by: Catalin Marinas --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 4 ++-- drivers/hwtracing/coresight/coresight-trbe.h | 3 ++- drivers/perf/arm_spe_pmu.c | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index d39660a3e50c..80fefaba58ee 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -966,7 +966,7 @@ static inline bool cpu_supports_sysreg_trace(void) { u64 dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1); - return ((dfr0 >> ID_AA64DFR0_TRACEVER_SHIFT) & 0xfUL) > 0; + return ((dfr0 >> ID_AA64DFR0_EL1_TraceVer_SHIFT) & 0xfUL) > 0; } static bool etm4_init_sysreg_access(struct etmv4_drvdata *drvdata, @@ -1054,7 +1054,7 @@ static void cpu_detect_trace_filtering(struct etmv4_drvdata *drvdata) u64 trfcr; drvdata->trfcr = 0; - if (!cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRACE_FILT_SHIFT)) + if (!cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT)) return; /* diff --git a/drivers/hwtracing/coresight/coresight-trbe.h b/drivers/hwtracing/coresight/coresight-trbe.h index 30e4d7db4f8e..98ff1b17ad07 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.h +++ b/drivers/hwtracing/coresight/coresight-trbe.h @@ -20,7 +20,8 @@ static inline bool is_trbe_available(void) { u64 aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1); - unsigned int trbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_TRBE_SHIFT); + unsigned int trbe = cpuid_feature_extract_unsigned_field(aa64dfr0, + ID_AA64DFR0_EL1_TraceBuffer_SHIFT); return trbe >= 0b0001; } diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index b65a7d9640e1..6ce05ef4844d 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -674,9 +674,9 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) static u64 arm_spe_pmsevfr_res0(u16 pmsver) { switch (pmsver) { - case ID_AA64DFR0_PMSVER_8_2: + case ID_AA64DFR0_EL1_PMSVer_IMP: return SYS_PMSEVFR_EL1_RES0_8_2; - case ID_AA64DFR0_PMSVER_8_3: + case ID_AA64DFR0_EL1_PMSVer_V1P1: /* Return the highest version we support in default */ default: return SYS_PMSEVFR_EL1_RES0_8_3; @@ -958,7 +958,7 @@ static void __arm_spe_pmu_dev_probe(void *info) struct device *dev = &spe_pmu->pdev->dev; fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64DFR0_EL1), - ID_AA64DFR0_PMSVER_SHIFT); + ID_AA64DFR0_EL1_PMSVer_SHIFT); if (!fld) { dev_err(dev, "unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n", From 0072dc1b53c39fb7c4cfc5c9e5d5a30622198613 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 20 Sep 2022 15:00:44 +0100 Subject: [PATCH 095/126] arm64: avoid BUILD_BUG_ON() in alternative-macros Nathan reports that the build fails when using clang and LTO: | In file included from kernel/bounds.c:10: | In file included from ./include/linux/page-flags.h:10: | In file included from ./include/linux/bug.h:5: | In file included from ./arch/arm64/include/asm/bug.h:26: | In file included from ./include/asm-generic/bug.h:5: | In file included from ./include/linux/compiler.h:248: | In file included from ./arch/arm64/include/asm/rwonce.h:11: | ./arch/arm64/include/asm/alternative-macros.h:224:2: error: call to undeclared function 'BUILD_BUG_ON'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] | BUILD_BUG_ON(feature >= ARM64_NCAPS); | ^ | ./arch/arm64/include/asm/alternative-macros.h:241:2: error: call to undeclared function 'BUILD_BUG_ON'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] | BUILD_BUG_ON(feature >= ARM64_NCAPS); | ^ | 2 errors generated. ... the problem being that when LTO is enabled, includes , and causes a circular include dependency through . This manifests as BUILD_BUG_ON() not being defined when used within . This patch avoids the problem and simplifies the include dependencies by using compiletime_assert() instead of BUILD_BUG_ON(). Signed-off-by: Mark Rutland Fixes: 21fb26bfb01f ("arm64: alternatives: add alternative_has_feature_*()") Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: http://lore.kernel.org/r/YyigTrxhE3IRPzjs@dev-arch.thelio-3990X Cc: Ard Biesheuvel Cc: James Morse Cc: Joey Gouly Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Ard Biesheuvel Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220920140044.1709073-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative-macros.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 4a2a98d6d222..966767debaa3 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -215,13 +215,13 @@ alternative_endif #ifndef __ASSEMBLY__ -#include #include static __always_inline bool alternative_has_feature_likely(unsigned long feature) { - BUILD_BUG_ON(feature >= ARM64_NCAPS); + compiletime_assert(feature < ARM64_NCAPS, + "feature must be < ARM64_NCAPS"); asm_volatile_goto( ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) @@ -238,7 +238,8 @@ l_no: static __always_inline bool alternative_has_feature_unlikely(unsigned long feature) { - BUILD_BUG_ON(feature >= ARM64_NCAPS); + compiletime_assert(feature < ARM64_NCAPS, + "feature must be < ARM64_NCAPS"); asm_volatile_goto( ALTERNATIVE("nop", "b %l[l_yes]", %[feature]) From 120072f48f4ef0b017f65d5efd0548938cb43052 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 19 Sep 2022 17:27:53 +0100 Subject: [PATCH 096/126] arm64: configs: Enable all PMUs provided by Arm The selection of PMUs enabled in the defconfig is currently a bit random and does not include a number of those provided by Arm and present in a fairly wide range of SoCs. Improve coverage and defconfig utility by enabling all the Arm provided PMUs by default. Signed-off-by: Mark Brown Reviewed-by: James Clark Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220919162753.3079869-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index d5b2d2dd4904..ab8c014eb314 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1229,8 +1229,14 @@ CONFIG_PHY_UNIPHIER_USB3=y CONFIG_PHY_TEGRA_XUSB=y CONFIG_PHY_AM654_SERDES=m CONFIG_PHY_J721E_WIZ=m +CONFIG_ARM_CCI_PMU=m +CONFIG_ARM_CCN=m +CONFIG_ARM_CMN=m CONFIG_ARM_SMMU_V3_PMU=m +CONFIG_ARM_DSU_PMU=m CONFIG_FSL_IMX8_DDR_PMU=m +CONFIG_ARM_SPE_PMU=m +CONFIG_ARM_DMC620_PMU=m CONFIG_QCOM_L2_PMU=y CONFIG_QCOM_L3_PMU=y CONFIG_HISI_PMU=y From 31dbadcc2828ac8c4608bf2ac6b12b286943e634 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 29 Jun 2022 17:35:24 +0800 Subject: [PATCH 097/126] arm64: defconfig: Enable memory hotplug and hotremove config Let's enable ACPI_HMAT, ACPI_HOTPLUG_MEMORY, MEMORY_HOTPLUG and MEMORY_HOTREMOVE for more test coverage, also there are useful for heterogeneous memory scene. Signed-off-by: Kefeng Wang Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220629093524.34801-1-wangkefeng.wang@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index ab8c014eb314..ef8d92d42aa9 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -102,6 +102,8 @@ CONFIG_ARM_SCMI_CPUFREQ=y CONFIG_ARM_TEGRA186_CPUFREQ=y CONFIG_QORIQ_CPUFREQ=y CONFIG_ACPI=y +CONFIG_ACPI_HOTPLUG_MEMORY=y +CONFIG_ACPI_HMAT=y CONFIG_ACPI_APEI=y CONFIG_ACPI_APEI_GHES=y CONFIG_ACPI_APEI_PCIEAER=y @@ -126,6 +128,8 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPAT_BRK is not set +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_MEMORY_FAILURE=y CONFIG_TRANSPARENT_HUGEPAGE=y From e2c12540420dd32be43a61533d87050d5fb0bcdf Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 7 Sep 2022 12:02:35 +0100 Subject: [PATCH 098/126] arm64: Enable docker support in defconfig The arm64 defconfig does not support the docker usecase. Enable the missing configuration options. The resulting .config was validated with [1]. ... Generally Necessary: - cgroup hierarchy: properly mounted [/sys/fs/cgroup] - apparmor: enabled and tools installed - CONFIG_NAMESPACES: enabled - CONFIG_NET_NS: enabled - CONFIG_PID_NS: enabled - CONFIG_IPC_NS: enabled - CONFIG_UTS_NS: enabled - CONFIG_CGROUPS: enabled - CONFIG_CGROUP_CPUACCT: enabled - CONFIG_CGROUP_DEVICE: enabled - CONFIG_CGROUP_FREEZER: enabled - CONFIG_CGROUP_SCHED: enabled - CONFIG_CPUSETS: enabled - CONFIG_MEMCG: enabled - CONFIG_KEYS: enabled - CONFIG_VETH: enabled (as module) - CONFIG_BRIDGE: enabled (as module) - CONFIG_BRIDGE_NETFILTER: enabled (as module) - CONFIG_IP_NF_FILTER: enabled (as module) - CONFIG_IP_NF_TARGET_MASQUERADE: enabled (as module) - CONFIG_NETFILTER_XT_MATCH_ADDRTYPE: enabled (as module) - CONFIG_NETFILTER_XT_MATCH_CONNTRACK: enabled (as module) - CONFIG_NETFILTER_XT_MATCH_IPVS: enabled (as module) - CONFIG_NETFILTER_XT_MARK: enabled (as module) - CONFIG_IP_NF_NAT: enabled (as module) - CONFIG_NF_NAT: enabled (as module) - CONFIG_POSIX_MQUEUE: enabled - CONFIG_CGROUP_BPF: enabled ... [1] https://github.com/moby/moby/blob/master/contrib/check-config.sh Cc: Will Deacon Cc: Arnd Bergmann Acked-by: Catalin Marinas Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20220907110235.14708-1-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index ef8d92d42aa9..ef3467092ded 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -18,6 +18,7 @@ CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y @@ -143,12 +144,16 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IPV6=m CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NETFILTER_XT_MARK=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_IP_VS=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m From ecaf4d3f734fe8d10aeda52193d325453e3e84ee Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:49:18 +0100 Subject: [PATCH 099/126] kselftest/arm64: Add test coverage for NT_ARM_TLS In preparation for extending support for NT_ARM_TLS to cover additional TPIDRs add some tests for the existing interface. Do this in a generic ptrace test program to provide a place to collect additional tests in the future. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154921.837871-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/.gitignore | 1 + tools/testing/selftests/arm64/abi/Makefile | 2 +- tools/testing/selftests/arm64/abi/ptrace.c | 165 +++++++++++++++++++ 3 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/abi/ptrace.c diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore index b9e54417250d..12607c4580c6 100644 --- a/tools/testing/selftests/arm64/abi/.gitignore +++ b/tools/testing/selftests/arm64/abi/.gitignore @@ -1,2 +1,3 @@ +ptrace syscall-abi tpidr2 diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile index c8d7f2495eb2..445ac2dac4ee 100644 --- a/tools/testing/selftests/arm64/abi/Makefile +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2021 ARM Limited -TEST_GEN_PROGS := syscall-abi tpidr2 +TEST_GEN_PROGS := ptrace syscall-abi tpidr2 include ../../lib.mk diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c new file mode 100644 index 000000000000..a74157dcc4fc --- /dev/null +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ARM Limited. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +#define EXPECTED_TESTS 3 + +#define MAX_TPIDRS 1 + +static bool have_sme(void) +{ + return getauxval(AT_HWCAP2) & HWCAP2_SME; +} + +static void test_tpidr(pid_t child) +{ + uint64_t read_val[MAX_TPIDRS]; + uint64_t write_val[MAX_TPIDRS]; + struct iovec read_iov, write_iov; + int ret; + + read_iov.iov_base = read_val; + write_iov.iov_base = write_val; + + /* Should be able to read a single TPIDR... */ + read_iov.iov_len = sizeof(uint64_t); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov); + ksft_test_result(ret == 0, "read_tpidr_one\n"); + + /* ...write a new value.. */ + write_iov.iov_len = sizeof(uint64_t); + write_val[0] = read_val[0]++; + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov); + ksft_test_result(ret == 0, "write_tpidr_one\n"); + + /* ...then read it back */ + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov); + ksft_test_result(ret == 0 && write_val[0] == read_val[0], + "verify_tpidr_one\n"); +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); + + test_tpidr(child); + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + pid_t child; + + srandom(getpid()); + + ksft_print_header(); + + ksft_set_plan(EXPECTED_TESTS); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return ret; +} From f285da05c62a429f1978c5520cf069d483a7d9af Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:49:19 +0100 Subject: [PATCH 100/126] arm64/ptrace: Document extension of NT_ARM_TLS to cover TPIDR2_EL0 In order to allow debuggers to discover lazily saved SME state we need to provide access to TPIDR2_EL0, we will extend the existing NT_ARM_TLS used for TPIDR to also include TPIDR2_EL0 as the second register in the regset. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154921.837871-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arm64/sme.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/arm64/sme.rst b/Documentation/arm64/sme.rst index 937147f58cc5..16d2db4c2e2e 100644 --- a/Documentation/arm64/sme.rst +++ b/Documentation/arm64/sme.rst @@ -331,6 +331,9 @@ The regset data starts with struct user_za_header, containing: been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread when the coredump was generated. +* The NT_ARM_TLS note will be extended to two registers, the second register + will contain TPIDR2_EL0 on systems that support SME and will be read as + zero with writes ignored otherwise. 9. System runtime configuration -------------------------------- From 0027d9c6c7b57b61b82f7275633ba89d0de2d2fd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:49:20 +0100 Subject: [PATCH 101/126] arm64/ptrace: Support access to TPIDR2_EL0 SME introduces an additional EL0 register, TPIDR2_EL0, intended for use by userspace as part of the SME. Provide ptrace access to it through the existing NT_ARM_TLS regset used for TPIDR_EL0 by expanding it to two registers with TPIDR2_EL0 being the second one. Existing programs that query the size of the register set will be able to observe the increased size of the register set. Programs that assume the register set is single register will see no change. On systems that do not support SME TPIDR2_EL0 will read as 0 and writes will be ignored, support for SME should be queried via hwcaps as normal. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154921.837871-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index eb7c08dfb834..0e9764f73d61 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -666,10 +666,18 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, static int tls_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + int ret; + if (target == current) tls_preserve_current_state(); - return membuf_store(&to, target->thread.uw.tp_value); + ret = membuf_store(&to, target->thread.uw.tp_value); + if (system_supports_tpidr2()) + ret = membuf_store(&to, target->thread.tpidr2_el0); + else + ret = membuf_zero(&to, sizeof(u64)); + + return ret; } static int tls_set(struct task_struct *target, const struct user_regset *regset, @@ -677,13 +685,20 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - unsigned long tls = target->thread.uw.tp_value; + unsigned long tls[2]; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + tls[0] = target->thread.uw.tp_value; + if (system_supports_sme()) + tls[1] = target->thread.tpidr2_el0; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, tls, 0, count); if (ret) return ret; - target->thread.uw.tp_value = tls; + target->thread.uw.tp_value = tls[0]; + if (system_supports_sme()) + target->thread.tpidr2_el0 = tls[1]; + return ret; } @@ -1392,7 +1407,7 @@ static const struct user_regset aarch64_regsets[] = { }, [REGSET_TLS] = { .core_note_type = NT_ARM_TLS, - .n = 1, + .n = 2, .size = sizeof(void *), .align = sizeof(void *), .regset_get = tls_get, From 2ddadec2206ca47a30995d22fa83be575e78f2a2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 29 Aug 2022 16:49:21 +0100 Subject: [PATCH 102/126] kselftest/arm64: Add coverage of TPIDR2_EL0 ptrace interface Extend the ptrace test support for NT_ARM_TLS to cover TPIDR2_EL0 - on systems that support SME the NT_ARM_TLS regset can be up to 2 elements long with the second element containing TPIDR2_EL0. On systems supporting SME we verify that this value can be read and written while on systems that do not support SME we verify correct truncation of reads and writes. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220829154921.837871-5-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/ptrace.c | 82 +++++++++++++++++++++- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index a74157dcc4fc..be952511af22 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -20,9 +20,9 @@ #include "../../kselftest.h" -#define EXPECTED_TESTS 3 +#define EXPECTED_TESTS 7 -#define MAX_TPIDRS 1 +#define MAX_TPIDRS 2 static bool have_sme(void) { @@ -34,7 +34,8 @@ static void test_tpidr(pid_t child) uint64_t read_val[MAX_TPIDRS]; uint64_t write_val[MAX_TPIDRS]; struct iovec read_iov, write_iov; - int ret; + bool test_tpidr2 = false; + int ret, i; read_iov.iov_base = read_val; write_iov.iov_base = write_val; @@ -54,6 +55,81 @@ static void test_tpidr(pid_t child) ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov); ksft_test_result(ret == 0 && write_val[0] == read_val[0], "verify_tpidr_one\n"); + + /* If we have TPIDR2 we should be able to read it */ + read_iov.iov_len = sizeof(read_val); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov); + if (ret == 0) { + /* If we have SME there should be two TPIDRs */ + if (read_iov.iov_len >= sizeof(read_val)) + test_tpidr2 = true; + + if (have_sme() && test_tpidr2) { + ksft_test_result(test_tpidr2, "count_tpidrs\n"); + } else { + ksft_test_result(read_iov.iov_len % sizeof(uint64_t) == 0, + "count_tpidrs\n"); + } + } else { + ksft_test_result_fail("count_tpidrs\n"); + } + + if (test_tpidr2) { + /* Try to write new values to all known TPIDRs... */ + write_iov.iov_len = sizeof(write_val); + for (i = 0; i < MAX_TPIDRS; i++) + write_val[i] = read_val[i] + 1; + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov); + + ksft_test_result(ret == 0 && + write_iov.iov_len == sizeof(write_val), + "tpidr2_write\n"); + + /* ...then read them back */ + read_iov.iov_len = sizeof(read_val); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov); + + if (have_sme()) { + /* Should read back the written value */ + ksft_test_result(ret == 0 && + read_iov.iov_len >= sizeof(read_val) && + memcmp(read_val, write_val, + sizeof(read_val)) == 0, + "tpidr2_read\n"); + } else { + /* TPIDR2 should read as zero */ + ksft_test_result(ret == 0 && + read_iov.iov_len >= sizeof(read_val) && + read_val[0] == write_val[0] && + read_val[1] == 0, + "tpidr2_read\n"); + } + + /* Writing only TPIDR... */ + write_iov.iov_len = sizeof(uint64_t); + memcpy(write_val, read_val, sizeof(read_val)); + write_val[0] += 1; + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov); + + if (ret == 0) { + /* ...should leave TPIDR2 untouched */ + read_iov.iov_len = sizeof(read_val); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, + &read_iov); + + ksft_test_result(ret == 0 && + read_iov.iov_len >= sizeof(read_val) && + memcmp(read_val, write_val, + sizeof(read_val)) == 0, + "write_tpidr_only\n"); + } else { + ksft_test_result_fail("write_tpidr_only\n"); + } + } else { + ksft_test_result_skip("tpidr2_write\n"); + ksft_test_result_skip("tpidr2_read\n"); + ksft_test_result_skip("write_tpidr_only\n"); + } } static int do_child(void) From a6f92909d6bb59eafa004178983850a1b739e304 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 14 Sep 2022 10:23:24 +0800 Subject: [PATCH 103/126] docs: perf: Add description for Alibaba's T-Head PMU driver Alibaba's T-Head SoC implements uncore PMU for performance and functional debugging to facilitate system maintenance. Document it to provide guidance on how to use it. Signed-off-by: Shuai Xue Reviewed-by: Jonathan Cameron Reviewed-by: Baolin Wang Link: https://lore.kernel.org/r/20220914022326.88550-2-xueshuai@linux.alibaba.com Signed-off-by: Will Deacon --- .../admin-guide/perf/alibaba_pmu.rst | 100 ++++++++++++++++++ Documentation/admin-guide/perf/index.rst | 1 + 2 files changed, 101 insertions(+) create mode 100644 Documentation/admin-guide/perf/alibaba_pmu.rst diff --git a/Documentation/admin-guide/perf/alibaba_pmu.rst b/Documentation/admin-guide/perf/alibaba_pmu.rst new file mode 100644 index 000000000000..11de998bb480 --- /dev/null +++ b/Documentation/admin-guide/perf/alibaba_pmu.rst @@ -0,0 +1,100 @@ +============================================================= +Alibaba's T-Head SoC Uncore Performance Monitoring Unit (PMU) +============================================================= + +The Yitian 710, custom-built by Alibaba Group's chip development business, +T-Head, implements uncore PMU for performance and functional debugging to +facilitate system maintenance. + +DDR Sub-System Driveway (DRW) PMU Driver +========================================= + +Yitian 710 employs eight DDR5/4 channels, four on each die. Each DDR5 channel +is independent of others to service system memory requests. And one DDR5 +channel is split into two independent sub-channels. The DDR Sub-System Driveway +implements separate PMUs for each sub-channel to monitor various performance +metrics. + +The Driveway PMU devices are named as ali_drw_ with perf. +For example, ali_drw_21000 and ali_drw_21080 are two PMU devices for two +sub-channels of the same channel in die 0. And the PMU device of die 1 is +prefixed with ali_drw_400XXXXX, e.g. ali_drw_40021000. + +Each sub-channel has 36 PMU counters in total, which is classified into +four groups: + +- Group 0: PMU Cycle Counter. This group has one pair of counters + pmu_cycle_cnt_low and pmu_cycle_cnt_high, that is used as the cycle count + based on DDRC core clock. + +- Group 1: PMU Bandwidth Counters. This group has 8 counters that are used + to count the total access number of either the eight bank groups in a + selected rank, or four ranks separately in the first 4 counters. The base + transfer unit is 64B. + +- Group 2: PMU Retry Counters. This group has 10 counters, that intend to + count the total retry number of each type of uncorrectable error. + +- Group 3: PMU Common Counters. This group has 16 counters, that are used + to count the common events. + +For now, the Driveway PMU driver only uses counters in group 0 and group 3. + +The DDR Controller (DDRCTL) and DDR PHY combine to create a complete solution +for connecting an SoC application bus to DDR memory devices. The DDRCTL +receives transactions Host Interface (HIF) which is custom-defined by Synopsys. +These transactions are queued internally and scheduled for access while +satisfying the SDRAM protocol timing requirements, transaction priorities, and +dependencies between the transactions. The DDRCTL in turn issues commands on +the DDR PHY Interface (DFI) to the PHY module, which launches and captures data +to and from the SDRAM. The driveway PMUs have hardware logic to gather +statistics and performance logging signals on HIF, DFI, etc. + +By counting the READ, WRITE and RMW commands sent to the DDRC through the HIF +interface, we could calculate the bandwidth. Example usage of counting memory +data bandwidth:: + + perf stat \ + -e ali_drw_21000/hif_wr/ \ + -e ali_drw_21000/hif_rd/ \ + -e ali_drw_21000/hif_rmw/ \ + -e ali_drw_21000/cycle/ \ + -e ali_drw_21080/hif_wr/ \ + -e ali_drw_21080/hif_rd/ \ + -e ali_drw_21080/hif_rmw/ \ + -e ali_drw_21080/cycle/ \ + -e ali_drw_23000/hif_wr/ \ + -e ali_drw_23000/hif_rd/ \ + -e ali_drw_23000/hif_rmw/ \ + -e ali_drw_23000/cycle/ \ + -e ali_drw_23080/hif_wr/ \ + -e ali_drw_23080/hif_rd/ \ + -e ali_drw_23080/hif_rmw/ \ + -e ali_drw_23080/cycle/ \ + -e ali_drw_25000/hif_wr/ \ + -e ali_drw_25000/hif_rd/ \ + -e ali_drw_25000/hif_rmw/ \ + -e ali_drw_25000/cycle/ \ + -e ali_drw_25080/hif_wr/ \ + -e ali_drw_25080/hif_rd/ \ + -e ali_drw_25080/hif_rmw/ \ + -e ali_drw_25080/cycle/ \ + -e ali_drw_27000/hif_wr/ \ + -e ali_drw_27000/hif_rd/ \ + -e ali_drw_27000/hif_rmw/ \ + -e ali_drw_27000/cycle/ \ + -e ali_drw_27080/hif_wr/ \ + -e ali_drw_27080/hif_rd/ \ + -e ali_drw_27080/hif_rmw/ \ + -e ali_drw_27080/cycle/ -- sleep 10 + +The average DRAM bandwidth can be calculated as follows: + +- Read Bandwidth = perf_hif_rd * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle +- Write Bandwidth = (perf_hif_wr + perf_hif_rmw) * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle + +Here, DDRC_WIDTH = 64 bytes. + +The current driver does not support sampling. So "perf record" is +unsupported. Also attach to a task is unsupported as the events are all +uncore. diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index 9c9ece88ce53..793e1970bc05 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -18,3 +18,4 @@ Performance monitor support xgene-pmu arm_dsu_pmu thunderx2-pmu + alibaba_pmu From cf7b61073e4526caa247616f6fbb174cbd2a5366 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Thu, 18 Aug 2022 11:18:21 +0800 Subject: [PATCH 104/126] drivers/perf: add DDR Sub-System Driveway PMU driver for Yitian 710 SoC Add the DDR Sub-System Driveway Performance Monitoring Unit (PMU) driver support for Alibaba T-Head Yitian 710 SoC chip. Yitian supports DDR5/4 DRAM and targets cloud computing and HPC. Each PMU is registered as a device in /sys/bus/event_source/devices, and users can select event to monitor in each sub-channel, independently. For example, ali_drw_21000 and ali_drw_21080 are two PMU devices for two sub-channels of the same channel in die 0. And the PMU device of die 1 is prefixed with ali_drw_400XXXXX, e.g. ali_drw_40021000. Due to hardware limitation, one of DDRSS Driveway PMU overflow interrupt shares the same irq number with MPAM ERR_IRQ. To register DDRSS PMU and MPAM drivers successfully, add IRQF_SHARED flag. Signed-off-by: Shuai Xue Co-developed-by: Hongbo Yao Signed-off-by: Hongbo Yao Co-developed-by: Neng Chen Signed-off-by: Neng Chen Reviewed-by: Jonathan Cameron Reviewed-by: Baolin Wang Link: https://lore.kernel.org/r/20220818031822.38415-3-xueshuai@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 7 + drivers/perf/Makefile | 1 + drivers/perf/alibaba_uncore_drw_pmu.c | 810 ++++++++++++++++++++++++++ 3 files changed, 818 insertions(+) create mode 100644 drivers/perf/alibaba_uncore_drw_pmu.c diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 1e2d69453771..44c07ea487f4 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -183,6 +183,13 @@ config APPLE_M1_CPU_PMU Provides support for the non-architectural CPU PMUs present on the Apple M1 SoCs and derivatives. +config ALIBABA_UNCORE_DRW_PMU + tristate "Alibaba T-Head Yitian 710 DDR Sub-system Driveway PMU driver" + depends on ARM64 || COMPILE_TEST + help + Support for Driveway PMU events monitoring on Yitian 710 DDR + Sub-system. + source "drivers/perf/hisilicon/Kconfig" config MARVELL_CN10K_DDR_PMU diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 57a279c61df5..050d04ee19dd 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -20,3 +20,4 @@ obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o +obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c new file mode 100644 index 000000000000..82729b874f09 --- /dev/null +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -0,0 +1,810 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Alibaba DDR Sub-System Driveway PMU driver + * + * Copyright (C) 2022 Alibaba Inc + */ + +#define ALI_DRW_PMUNAME "ali_drw" +#define ALI_DRW_DRVNAME ALI_DRW_PMUNAME "_pmu" +#define pr_fmt(fmt) ALI_DRW_DRVNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define ALI_DRW_PMU_COMMON_MAX_COUNTERS 16 +#define ALI_DRW_PMU_TEST_SEL_COMMON_COUNTER_BASE 19 + +#define ALI_DRW_PMU_PA_SHIFT 12 +#define ALI_DRW_PMU_CNT_INIT 0x00000000 +#define ALI_DRW_CNT_MAX_PERIOD 0xffffffff +#define ALI_DRW_PMU_CYCLE_EVT_ID 0x80 + +#define ALI_DRW_PMU_CNT_CTRL 0xC00 +#define ALI_DRW_PMU_CNT_RST BIT(2) +#define ALI_DRW_PMU_CNT_STOP BIT(1) +#define ALI_DRW_PMU_CNT_START BIT(0) + +#define ALI_DRW_PMU_CNT_STATE 0xC04 +#define ALI_DRW_PMU_TEST_CTRL 0xC08 +#define ALI_DRW_PMU_CNT_PRELOAD 0xC0C + +#define ALI_DRW_PMU_CYCLE_CNT_HIGH_MASK GENMASK(23, 0) +#define ALI_DRW_PMU_CYCLE_CNT_LOW_MASK GENMASK(31, 0) +#define ALI_DRW_PMU_CYCLE_CNT_HIGH 0xC10 +#define ALI_DRW_PMU_CYCLE_CNT_LOW 0xC14 + +/* PMU EVENT SEL 0-3 are paired in 32-bit registers on a 4-byte stride */ +#define ALI_DRW_PMU_EVENT_SEL0 0xC68 +/* counter 0-3 use sel0, counter 4-7 use sel1...*/ +#define ALI_DRW_PMU_EVENT_SELn(n) \ + (ALI_DRW_PMU_EVENT_SEL0 + (n / 4) * 0x4) +#define ALI_DRW_PMCOM_CNT_EN BIT(7) +#define ALI_DRW_PMCOM_CNT_EVENT_MASK GENMASK(5, 0) +#define ALI_DRW_PMCOM_CNT_EVENT_OFFSET(n) \ + (8 * (n % 4)) + +/* PMU COMMON COUNTER 0-15, are paired in 32-bit registers on a 4-byte stride */ +#define ALI_DRW_PMU_COMMON_COUNTER0 0xC78 +#define ALI_DRW_PMU_COMMON_COUNTERn(n) \ + (ALI_DRW_PMU_COMMON_COUNTER0 + 0x4 * (n)) + +#define ALI_DRW_PMU_OV_INTR_ENABLE_CTL 0xCB8 +#define ALI_DRW_PMU_OV_INTR_DISABLE_CTL 0xCBC +#define ALI_DRW_PMU_OV_INTR_ENABLE_STATUS 0xCC0 +#define ALI_DRW_PMU_OV_INTR_CLR 0xCC4 +#define ALI_DRW_PMU_OV_INTR_STATUS 0xCC8 +#define ALI_DRW_PMCOM_CNT_OV_INTR_MASK GENMASK(23, 8) +#define ALI_DRW_PMBW_CNT_OV_INTR_MASK GENMASK(7, 0) +#define ALI_DRW_PMU_OV_INTR_MASK GENMASK_ULL(63, 0) + +static int ali_drw_cpuhp_state_num; + +static LIST_HEAD(ali_drw_pmu_irqs); +static DEFINE_MUTEX(ali_drw_pmu_irqs_lock); + +struct ali_drw_pmu_irq { + struct hlist_node node; + struct list_head irqs_node; + struct list_head pmus_node; + int irq_num; + int cpu; + refcount_t refcount; +}; + +struct ali_drw_pmu { + void __iomem *cfg_base; + struct device *dev; + + struct list_head pmus_node; + struct ali_drw_pmu_irq *irq; + int irq_num; + int cpu; + DECLARE_BITMAP(used_mask, ALI_DRW_PMU_COMMON_MAX_COUNTERS); + struct perf_event *events[ALI_DRW_PMU_COMMON_MAX_COUNTERS]; + int evtids[ALI_DRW_PMU_COMMON_MAX_COUNTERS]; + + struct pmu pmu; +}; + +#define to_ali_drw_pmu(p) (container_of(p, struct ali_drw_pmu, pmu)) + +#define DRW_CONFIG_EVENTID GENMASK(7, 0) +#define GET_DRW_EVENTID(event) FIELD_GET(DRW_CONFIG_EVENTID, (event)->attr.config) + +static ssize_t ali_drw_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sprintf(buf, "%s\n", (char *)eattr->var); +} + +/* + * PMU event attributes + */ +static ssize_t ali_drw_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var); +} + +#define ALI_DRW_PMU_ATTR(_name, _func, _config) \ + (&((struct dev_ext_attribute[]) { \ + { __ATTR(_name, 0444, _func, NULL), (void *)_config } \ + })[0].attr.attr) + +#define ALI_DRW_PMU_FORMAT_ATTR(_name, _config) \ + ALI_DRW_PMU_ATTR(_name, ali_drw_pmu_format_show, (void *)_config) +#define ALI_DRW_PMU_EVENT_ATTR(_name, _config) \ + ALI_DRW_PMU_ATTR(_name, ali_drw_pmu_event_show, (unsigned long)_config) + +static struct attribute *ali_drw_pmu_events_attrs[] = { + ALI_DRW_PMU_EVENT_ATTR(hif_rd_or_wr, 0x0), + ALI_DRW_PMU_EVENT_ATTR(hif_wr, 0x1), + ALI_DRW_PMU_EVENT_ATTR(hif_rd, 0x2), + ALI_DRW_PMU_EVENT_ATTR(hif_rmw, 0x3), + ALI_DRW_PMU_EVENT_ATTR(hif_hi_pri_rd, 0x4), + ALI_DRW_PMU_EVENT_ATTR(dfi_wr_data_cycles, 0x7), + ALI_DRW_PMU_EVENT_ATTR(dfi_rd_data_cycles, 0x8), + ALI_DRW_PMU_EVENT_ATTR(hpr_xact_when_critical, 0x9), + ALI_DRW_PMU_EVENT_ATTR(lpr_xact_when_critical, 0xA), + ALI_DRW_PMU_EVENT_ATTR(wr_xact_when_critical, 0xB), + ALI_DRW_PMU_EVENT_ATTR(op_is_activate, 0xC), + ALI_DRW_PMU_EVENT_ATTR(op_is_rd_or_wr, 0xD), + ALI_DRW_PMU_EVENT_ATTR(op_is_rd_activate, 0xE), + ALI_DRW_PMU_EVENT_ATTR(op_is_rd, 0xF), + ALI_DRW_PMU_EVENT_ATTR(op_is_wr, 0x10), + ALI_DRW_PMU_EVENT_ATTR(op_is_mwr, 0x11), + ALI_DRW_PMU_EVENT_ATTR(op_is_precharge, 0x12), + ALI_DRW_PMU_EVENT_ATTR(precharge_for_rdwr, 0x13), + ALI_DRW_PMU_EVENT_ATTR(precharge_for_other, 0x14), + ALI_DRW_PMU_EVENT_ATTR(rdwr_transitions, 0x15), + ALI_DRW_PMU_EVENT_ATTR(write_combine, 0x16), + ALI_DRW_PMU_EVENT_ATTR(war_hazard, 0x17), + ALI_DRW_PMU_EVENT_ATTR(raw_hazard, 0x18), + ALI_DRW_PMU_EVENT_ATTR(waw_hazard, 0x19), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk0, 0x1A), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk1, 0x1B), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk2, 0x1C), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk3, 0x1D), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk0, 0x1E), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk1, 0x1F), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk2, 0x20), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk3, 0x21), + ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk0, 0x26), + ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk1, 0x27), + ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk2, 0x28), + ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk3, 0x29), + ALI_DRW_PMU_EVENT_ATTR(op_is_refresh, 0x2A), + ALI_DRW_PMU_EVENT_ATTR(op_is_crit_ref, 0x2B), + ALI_DRW_PMU_EVENT_ATTR(op_is_load_mode, 0x2D), + ALI_DRW_PMU_EVENT_ATTR(op_is_zqcl, 0x2E), + ALI_DRW_PMU_EVENT_ATTR(visible_window_limit_reached_rd, 0x30), + ALI_DRW_PMU_EVENT_ATTR(visible_window_limit_reached_wr, 0x31), + ALI_DRW_PMU_EVENT_ATTR(op_is_dqsosc_mpc, 0x34), + ALI_DRW_PMU_EVENT_ATTR(op_is_dqsosc_mrr, 0x35), + ALI_DRW_PMU_EVENT_ATTR(op_is_tcr_mrr, 0x36), + ALI_DRW_PMU_EVENT_ATTR(op_is_zqstart, 0x37), + ALI_DRW_PMU_EVENT_ATTR(op_is_zqlatch, 0x38), + ALI_DRW_PMU_EVENT_ATTR(chi_txreq, 0x39), + ALI_DRW_PMU_EVENT_ATTR(chi_txdat, 0x3A), + ALI_DRW_PMU_EVENT_ATTR(chi_rxdat, 0x3B), + ALI_DRW_PMU_EVENT_ATTR(chi_rxrsp, 0x3C), + ALI_DRW_PMU_EVENT_ATTR(tsz_vio, 0x3D), + ALI_DRW_PMU_EVENT_ATTR(cycle, 0x80), + NULL, +}; + +static struct attribute_group ali_drw_pmu_events_attr_group = { + .name = "events", + .attrs = ali_drw_pmu_events_attrs, +}; + +static struct attribute *ali_drw_pmu_format_attr[] = { + ALI_DRW_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL, +}; + +static const struct attribute_group ali_drw_pmu_format_group = { + .name = "format", + .attrs = ali_drw_pmu_format_attr, +}; + +static ssize_t ali_drw_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(drw_pmu->cpu)); +} + +static struct device_attribute ali_drw_pmu_cpumask_attr = + __ATTR(cpumask, 0444, ali_drw_pmu_cpumask_show, NULL); + +static struct attribute *ali_drw_pmu_cpumask_attrs[] = { + &ali_drw_pmu_cpumask_attr.attr, + NULL, +}; + +static const struct attribute_group ali_drw_pmu_cpumask_attr_group = { + .attrs = ali_drw_pmu_cpumask_attrs, +}; + +static const struct attribute_group *ali_drw_pmu_attr_groups[] = { + &ali_drw_pmu_events_attr_group, + &ali_drw_pmu_cpumask_attr_group, + &ali_drw_pmu_format_group, + NULL, +}; + +/* find a counter for event, then in add func, hw.idx will equal to counter */ +static int ali_drw_get_counter_idx(struct perf_event *event) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + int idx; + + for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; ++idx) { + if (!test_and_set_bit(idx, drw_pmu->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EBUSY; +} + +static u64 ali_drw_pmu_read_counter(struct perf_event *event) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + u64 cycle_high, cycle_low; + + if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) { + cycle_high = readl(drw_pmu->cfg_base + ALI_DRW_PMU_CYCLE_CNT_HIGH); + cycle_high &= ALI_DRW_PMU_CYCLE_CNT_HIGH_MASK; + cycle_low = readl(drw_pmu->cfg_base + ALI_DRW_PMU_CYCLE_CNT_LOW); + cycle_low &= ALI_DRW_PMU_CYCLE_CNT_LOW_MASK; + return (cycle_high << 32 | cycle_low); + } + + return readl(drw_pmu->cfg_base + + ALI_DRW_PMU_COMMON_COUNTERn(event->hw.idx)); +} + +static void ali_drw_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev, now; + + do { + prev = local64_read(&hwc->prev_count); + now = ali_drw_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); + + /* handle overflow. */ + delta = now - prev; + if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) + delta &= ALI_DRW_PMU_OV_INTR_MASK; + else + delta &= ALI_DRW_CNT_MAX_PERIOD; + local64_add(delta, &event->count); +} + +static void ali_drw_pmu_event_set_period(struct perf_event *event) +{ + u64 pre_val; + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + + /* set a preload counter for test purpose */ + writel(ALI_DRW_PMU_TEST_SEL_COMMON_COUNTER_BASE + event->hw.idx, + drw_pmu->cfg_base + ALI_DRW_PMU_TEST_CTRL); + + /* set conunter initial value */ + pre_val = ALI_DRW_PMU_CNT_INIT; + writel(pre_val, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_PRELOAD); + local64_set(&event->hw.prev_count, pre_val); + + /* set sel mode to zero to start test */ + writel(0x0, drw_pmu->cfg_base + ALI_DRW_PMU_TEST_CTRL); +} + +static void ali_drw_pmu_enable_counter(struct perf_event *event) +{ + u32 val, subval, reg, shift; + int counter = event->hw.idx; + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + + reg = ALI_DRW_PMU_EVENT_SELn(counter); + val = readl(drw_pmu->cfg_base + reg); + subval = FIELD_PREP(ALI_DRW_PMCOM_CNT_EN, 1) | + FIELD_PREP(ALI_DRW_PMCOM_CNT_EVENT_MASK, drw_pmu->evtids[counter]); + + shift = ALI_DRW_PMCOM_CNT_EVENT_OFFSET(counter); + val &= ~(GENMASK(7, 0) << shift); + val |= subval << shift; + + writel(val, drw_pmu->cfg_base + reg); +} + +static void ali_drw_pmu_disable_counter(struct perf_event *event) +{ + u32 val, reg, subval, shift; + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + int counter = event->hw.idx; + + reg = ALI_DRW_PMU_EVENT_SELn(counter); + val = readl(drw_pmu->cfg_base + reg); + subval = FIELD_PREP(ALI_DRW_PMCOM_CNT_EN, 0) | + FIELD_PREP(ALI_DRW_PMCOM_CNT_EVENT_MASK, 0); + + shift = ALI_DRW_PMCOM_CNT_EVENT_OFFSET(counter); + val &= ~(GENMASK(7, 0) << shift); + val |= subval << shift; + + writel(val, drw_pmu->cfg_base + reg); +} + +static irqreturn_t ali_drw_pmu_isr(int irq_num, void *data) +{ + struct ali_drw_pmu_irq *irq = data; + struct ali_drw_pmu *drw_pmu; + irqreturn_t ret = IRQ_NONE; + + rcu_read_lock(); + list_for_each_entry_rcu(drw_pmu, &irq->pmus_node, pmus_node) { + unsigned long status, clr_status; + struct perf_event *event; + unsigned int idx; + + for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; idx++) { + event = drw_pmu->events[idx]; + if (!event) + continue; + ali_drw_pmu_disable_counter(event); + } + + /* common counter intr status */ + status = readl(drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_STATUS); + status = FIELD_GET(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, status); + if (status) { + for_each_set_bit(idx, &status, + ALI_DRW_PMU_COMMON_MAX_COUNTERS) { + event = drw_pmu->events[idx]; + if (WARN_ON_ONCE(!event)) + continue; + ali_drw_pmu_event_update(event); + ali_drw_pmu_event_set_period(event); + } + + /* clear common counter intr status */ + clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, 1); + writel(clr_status, + drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR); + } + + for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; idx++) { + event = drw_pmu->events[idx]; + if (!event) + continue; + if (!(event->hw.state & PERF_HES_STOPPED)) + ali_drw_pmu_enable_counter(event); + } + if (status) + ret = IRQ_HANDLED; + } + rcu_read_unlock(); + return ret; +} + +static struct ali_drw_pmu_irq *__ali_drw_pmu_init_irq(struct platform_device + *pdev, int irq_num) +{ + int ret; + struct ali_drw_pmu_irq *irq; + + list_for_each_entry(irq, &ali_drw_pmu_irqs, irqs_node) { + if (irq->irq_num == irq_num + && refcount_inc_not_zero(&irq->refcount)) + return irq; + } + + irq = kzalloc(sizeof(*irq), GFP_KERNEL); + if (!irq) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&irq->pmus_node); + + /* Pick one CPU to be the preferred one to use */ + irq->cpu = smp_processor_id(); + refcount_set(&irq->refcount, 1); + + /* + * FIXME: one of DDRSS Driveway PMU overflow interrupt shares the same + * irq number with MPAM ERR_IRQ. To register DDRSS PMU and MPAM drivers + * successfully, add IRQF_SHARED flag. Howerer, PMU interrupt should not + * share with other component. + */ + ret = devm_request_irq(&pdev->dev, irq_num, ali_drw_pmu_isr, + IRQF_SHARED, dev_name(&pdev->dev), irq); + if (ret < 0) { + dev_err(&pdev->dev, + "Fail to request IRQ:%d ret:%d\n", irq_num, ret); + goto out_free; + } + + ret = irq_set_affinity_hint(irq_num, cpumask_of(irq->cpu)); + if (ret) + goto out_free; + + ret = cpuhp_state_add_instance_nocalls(ali_drw_cpuhp_state_num, + &irq->node); + if (ret) + goto out_free; + + irq->irq_num = irq_num; + list_add(&irq->irqs_node, &ali_drw_pmu_irqs); + + return irq; + +out_free: + kfree(irq); + return ERR_PTR(ret); +} + +static int ali_drw_pmu_init_irq(struct ali_drw_pmu *drw_pmu, + struct platform_device *pdev) +{ + int irq_num; + struct ali_drw_pmu_irq *irq; + + /* Read and init IRQ */ + irq_num = platform_get_irq(pdev, 0); + if (irq_num < 0) + return irq_num; + + mutex_lock(&ali_drw_pmu_irqs_lock); + irq = __ali_drw_pmu_init_irq(pdev, irq_num); + mutex_unlock(&ali_drw_pmu_irqs_lock); + + if (IS_ERR(irq)) + return PTR_ERR(irq); + + drw_pmu->irq = irq; + + mutex_lock(&ali_drw_pmu_irqs_lock); + list_add_rcu(&drw_pmu->pmus_node, &irq->pmus_node); + mutex_unlock(&ali_drw_pmu_irqs_lock); + + return 0; +} + +static void ali_drw_pmu_uninit_irq(struct ali_drw_pmu *drw_pmu) +{ + struct ali_drw_pmu_irq *irq = drw_pmu->irq; + + mutex_lock(&ali_drw_pmu_irqs_lock); + list_del_rcu(&drw_pmu->pmus_node); + + if (!refcount_dec_and_test(&irq->refcount)) { + mutex_unlock(&ali_drw_pmu_irqs_lock); + return; + } + + list_del(&irq->irqs_node); + mutex_unlock(&ali_drw_pmu_irqs_lock); + + WARN_ON(irq_set_affinity_hint(irq->irq_num, NULL)); + cpuhp_state_remove_instance_nocalls(ali_drw_cpuhp_state_num, + &irq->node); + kfree(irq); +} + +static int ali_drw_pmu_event_init(struct perf_event *event) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct perf_event *sibling; + struct device *dev = drw_pmu->pmu.dev; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event)) { + dev_err(dev, "Sampling not supported!\n"); + return -EOPNOTSUPP; + } + + if (event->attach_state & PERF_ATTACH_TASK) { + dev_err(dev, "Per-task counter cannot allocate!\n"); + return -EOPNOTSUPP; + } + + event->cpu = drw_pmu->cpu; + if (event->cpu < 0) { + dev_err(dev, "Per-task mode not supported!\n"); + return -EOPNOTSUPP; + } + + if (event->group_leader != event && + !is_software_event(event->group_leader)) { + dev_err(dev, "driveway only allow one event!\n"); + return -EINVAL; + } + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling != event && !is_software_event(sibling)) { + dev_err(dev, "driveway event not allowed!\n"); + return -EINVAL; + } + } + + /* reset all the pmu counters */ + writel(ALI_DRW_PMU_CNT_RST, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); + + hwc->idx = -1; + + return 0; +} + +static void ali_drw_pmu_start(struct perf_event *event, int flags) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + + event->hw.state = 0; + + if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) { + writel(ALI_DRW_PMU_CNT_START, + drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); + return; + } + + ali_drw_pmu_event_set_period(event); + if (flags & PERF_EF_RELOAD) { + unsigned long prev_raw_count = + local64_read(&event->hw.prev_count); + writel(prev_raw_count, + drw_pmu->cfg_base + ALI_DRW_PMU_CNT_PRELOAD); + } + + ali_drw_pmu_enable_counter(event); + + writel(ALI_DRW_PMU_CNT_START, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); +} + +static void ali_drw_pmu_stop(struct perf_event *event, int flags) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + + if (event->hw.state & PERF_HES_STOPPED) + return; + + if (GET_DRW_EVENTID(event) != ALI_DRW_PMU_CYCLE_EVT_ID) + ali_drw_pmu_disable_counter(event); + + writel(ALI_DRW_PMU_CNT_STOP, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); + + ali_drw_pmu_event_update(event); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +} + +static int ali_drw_pmu_add(struct perf_event *event, int flags) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = -1; + int evtid; + + evtid = GET_DRW_EVENTID(event); + + if (evtid != ALI_DRW_PMU_CYCLE_EVT_ID) { + idx = ali_drw_get_counter_idx(event); + if (idx < 0) + return idx; + drw_pmu->events[idx] = event; + drw_pmu->evtids[idx] = evtid; + } + hwc->idx = idx; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + if (flags & PERF_EF_START) + ali_drw_pmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + + return 0; +} + +static void ali_drw_pmu_del(struct perf_event *event, int flags) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + ali_drw_pmu_stop(event, PERF_EF_UPDATE); + + if (idx >= 0 && idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS) { + drw_pmu->events[idx] = NULL; + drw_pmu->evtids[idx] = 0; + clear_bit(idx, drw_pmu->used_mask); + } + + perf_event_update_userpage(event); +} + +static void ali_drw_pmu_read(struct perf_event *event) +{ + ali_drw_pmu_event_update(event); +} + +static int ali_drw_pmu_probe(struct platform_device *pdev) +{ + struct ali_drw_pmu *drw_pmu; + struct resource *res; + char *name; + int ret; + + drw_pmu = devm_kzalloc(&pdev->dev, sizeof(*drw_pmu), GFP_KERNEL); + if (!drw_pmu) + return -ENOMEM; + + drw_pmu->dev = &pdev->dev; + platform_set_drvdata(pdev, drw_pmu); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + drw_pmu->cfg_base = devm_ioremap_resource(&pdev->dev, res); + if (!drw_pmu->cfg_base) + return -ENOMEM; + + name = devm_kasprintf(drw_pmu->dev, GFP_KERNEL, "ali_drw_%llx", + (u64) (res->start >> ALI_DRW_PMU_PA_SHIFT)); + if (!name) + return -ENOMEM; + + writel(ALI_DRW_PMU_CNT_RST, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); + + /* enable the generation of interrupt by all common counters */ + writel(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, + drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_ENABLE_CTL); + + /* clearing interrupt status */ + writel(0xffffff, drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR); + + drw_pmu->cpu = smp_processor_id(); + + ret = ali_drw_pmu_init_irq(drw_pmu, pdev); + if (ret) + return ret; + + drw_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = ali_drw_pmu_event_init, + .add = ali_drw_pmu_add, + .del = ali_drw_pmu_del, + .start = ali_drw_pmu_start, + .stop = ali_drw_pmu_stop, + .read = ali_drw_pmu_read, + .attr_groups = ali_drw_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + ret = perf_pmu_register(&drw_pmu->pmu, name, -1); + if (ret) { + dev_err(drw_pmu->dev, "DRW Driveway PMU PMU register failed!\n"); + ali_drw_pmu_uninit_irq(drw_pmu); + } + + return ret; +} + +static int ali_drw_pmu_remove(struct platform_device *pdev) +{ + struct ali_drw_pmu *drw_pmu = platform_get_drvdata(pdev); + + /* disable the generation of interrupt by all common counters */ + writel(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, + drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_DISABLE_CTL); + + ali_drw_pmu_uninit_irq(drw_pmu); + perf_pmu_unregister(&drw_pmu->pmu); + + return 0; +} + +static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct ali_drw_pmu_irq *irq; + struct ali_drw_pmu *drw_pmu; + unsigned int target; + int ret; + cpumask_t node_online_cpus; + + irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node); + if (cpu != irq->cpu) + return 0; + + ret = cpumask_and(&node_online_cpus, + cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask); + if (ret) + target = cpumask_any_but(&node_online_cpus, cpu); + else + target = cpumask_any_but(cpu_online_mask, cpu); + + if (target >= nr_cpu_ids) + return 0; + + /* We're only reading, but this isn't the place to be involving RCU */ + mutex_lock(&ali_drw_pmu_irqs_lock); + list_for_each_entry(drw_pmu, &irq->pmus_node, pmus_node) + perf_pmu_migrate_context(&drw_pmu->pmu, irq->cpu, target); + mutex_unlock(&ali_drw_pmu_irqs_lock); + + WARN_ON(irq_set_affinity_hint(irq->irq_num, cpumask_of(target))); + irq->cpu = target; + + return 0; +} + +/* + * Due to historical reasons, the HID used in the production environment is + * ARMHD700, so we leave ARMHD700 as Compatible ID. + */ +static const struct acpi_device_id ali_drw_acpi_match[] = { + {"BABA5000", 0}, + {"ARMHD700", 0}, + {} +}; + +MODULE_DEVICE_TABLE(acpi, ali_drw_acpi_match); + +static struct platform_driver ali_drw_pmu_driver = { + .driver = { + .name = "ali_drw_pmu", + .acpi_match_table = ali_drw_acpi_match, + }, + .probe = ali_drw_pmu_probe, + .remove = ali_drw_pmu_remove, +}; + +static int __init ali_drw_pmu_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "ali_drw_pmu:online", + NULL, ali_drw_pmu_offline_cpu); + + if (ret < 0) { + pr_err("DRW Driveway PMU: setup hotplug failed, ret = %d\n", + ret); + return ret; + } + ali_drw_cpuhp_state_num = ret; + + ret = platform_driver_register(&ali_drw_pmu_driver); + if (ret) + cpuhp_remove_multi_state(ali_drw_cpuhp_state_num); + + return ret; +} + +static void __exit ali_drw_pmu_exit(void) +{ + platform_driver_unregister(&ali_drw_pmu_driver); + cpuhp_remove_multi_state(ali_drw_cpuhp_state_num); +} + +module_init(ali_drw_pmu_init); +module_exit(ali_drw_pmu_exit); + +MODULE_AUTHOR("Hongbo Yao "); +MODULE_AUTHOR("Neng Chen "); +MODULE_AUTHOR("Shuai Xue "); +MODULE_DESCRIPTION("Alibaba DDR Sub-System Driveway PMU driver"); +MODULE_LICENSE("GPL v2"); From d813a19e7d2ec0c57477ee114f9b5e9a43cacb76 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Thu, 18 Aug 2022 11:18:22 +0800 Subject: [PATCH 105/126] MAINTAINERS: add maintainers for Alibaba' T-Head PMU driver Add maintainers for Alibaba PMU document and driver Signed-off-by: Shuai Xue Reviewed-by: Baolin Wang Link: https://lore.kernel.org/r/20220818031822.38415-4-xueshuai@linux.alibaba.com Signed-off-by: Will Deacon --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 589517372408..58d0aefead54 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -749,6 +749,12 @@ S: Supported F: drivers/infiniband/hw/erdma F: include/uapi/rdma/erdma-abi.h +ALIBABA PMU DRIVER +M: Shuai Xue +S: Supported +F: Documentation/admin-guide/perf/alibaba_pmu.rst +F: drivers/perf/alibaba_uncore_dwr_pmu.c + ALIENWARE WMI DRIVER L: Dell.Client.Kernel@dell.com S: Maintained From cbb0c02caf4bd98b9e0cd6d7420734b8e9a35703 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 1 Sep 2022 14:26:57 +0100 Subject: [PATCH 106/126] perf: arm64: Add SVE vector granule register to user regs Dwarf based unwinding in a function that pushes SVE registers onto the stack requires the unwinder to know the length of the SVE register to calculate the stack offsets correctly. This was added to the Arm specific Dwarf spec as the VG pseudo register[1]. Add the vector length at position 46 if it's requested by userspace and SVE is supported. If it's not supported then fail to open the event. The vector length must be on each sample because it can be changed at runtime via a prctl or ptrace call. Also by adding it as a register rather than a separate attribute, minimal changes will be required in an unwinder that already indexes into the register list. [1]: https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst Reviewed-by: Mark Brown Signed-off-by: James Clark Link: https://lore.kernel.org/r/20220901132658.1024635-2-james.clark@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/uapi/asm/perf_regs.h | 7 ++++++ arch/arm64/kernel/perf_regs.c | 30 +++++++++++++++++++++++-- drivers/perf/arm_pmu.c | 2 +- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h index d54daafa89e3..0d4b40c78e47 100644 --- a/arch/arm64/include/uapi/asm/perf_regs.h +++ b/arch/arm64/include/uapi/asm/perf_regs.h @@ -37,5 +37,12 @@ enum perf_event_arm_regs { PERF_REG_ARM64_SP, PERF_REG_ARM64_PC, PERF_REG_ARM64_MAX, + + /* Extended/pseudo registers */ + PERF_REG_ARM64_VG = 46, // SVE Vector Granule + PERF_REG_ARM64_EXTENDED_MAX }; + +#define PERF_REG_EXTENDED_MASK (1ULL << PERF_REG_ARM64_VG) + #endif /* _ASM_ARM64_PERF_REGS_H */ diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index f6f58e6265df..b4eece3eb17d 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -9,9 +9,27 @@ #include #include +static u64 perf_ext_regs_value(int idx) +{ + switch (idx) { + case PERF_REG_ARM64_VG: + if (WARN_ON_ONCE(!system_supports_sve())) + return 0; + + /* + * Vector granule is current length in bits of SVE registers + * divided by 64. + */ + return (task_get_sve_vl(current) * 8) / 64; + default: + WARN_ON_ONCE(true); + return 0; + } +} + u64 perf_reg_value(struct pt_regs *regs, int idx) { - if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX)) + if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_EXTENDED_MAX)) return 0; /* @@ -51,6 +69,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) if ((u32)idx == PERF_REG_ARM64_PC) return regs->pc; + if ((u32)idx >= PERF_REG_ARM64_MAX) + return perf_ext_regs_value(idx); + return regs->regs[idx]; } @@ -58,7 +79,12 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) int perf_reg_validate(u64 mask) { - if (!mask || mask & REG_RESERVED) + u64 reserved_mask = REG_RESERVED; + + if (system_supports_sve()) + reserved_mask &= ~(1ULL << PERF_REG_ARM64_VG); + + if (!mask || mask & reserved_mask) return -EINVAL; return 0; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 59d3980b8ca2..3f07df5a7e95 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -894,7 +894,7 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags) * pmu::filter_match callback and pmu::event_init group * validation). */ - .capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS, + .capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS, }; pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] = From 1f2906d1e10ac8b63f06c10b0db4282b8b38c509 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 1 Sep 2022 14:26:58 +0100 Subject: [PATCH 107/126] arm64/sve: Add Perf extensions documentation Document that the VG register is available in Perf samples Signed-off-by: James Clark Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220901132658.1024635-3-james.clark@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/sve.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/arm64/sve.rst b/Documentation/arm64/sve.rst index 93c2c2990584..8955bf1bf757 100644 --- a/Documentation/arm64/sve.rst +++ b/Documentation/arm64/sve.rst @@ -452,6 +452,24 @@ The regset data starts with struct user_sve_header, containing: * Modifying the system default vector length does not affect the vector length of any existing process or thread that does not make an execve() call. +10. Perf extensions +-------------------------------- + +* The arm64 specific DWARF standard [5] added the VG (Vector Granule) register + at index 46. This register is used for DWARF unwinding when variable length + SVE registers are pushed onto the stack. + +* Its value is equivalent to the current SVE vector length (VL) in bits divided + by 64. + +* The value is included in Perf samples in the regs[46] field if + PERF_SAMPLE_REGS_USER is set and the sample_regs_user mask has bit 46 set. + +* The value is the current value at the time the sample was taken, and it can + change over time. + +* If the system doesn't support SVE when perf_event_open is called with these + settings, the event will fail to open. Appendix A. SVE programmer's model (informative) ================================================= @@ -593,3 +611,5 @@ References http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf http://infocenter.arm.com/help/topic/com.arm.doc.subset.swdev.abi/index.html Procedure Call Standard for the ARM 64-bit Architecture (AArch64) + +[5] https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst From c44094eee32f32f175aadc0efcac449d99b1bbf7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 23 Aug 2022 13:21:11 +0100 Subject: [PATCH 108/126] arm64: dma: Drop cache invalidation from arch_dma_prep_coherent() arch_dma_prep_coherent() is called when preparing a non-cacheable region for a consistent DMA buffer allocation. Since the buffer pages may previously have been written via a cacheable mapping and consequently allocated as dirty cachelines, the purpose of this function is to remove these dirty lines from the cache, writing them back so that the non-coherent device is able to see them. On arm64, this operation can be achieved with a clean to the point of coherency; a subsequent invalidation is not required and serves little purpose in the presence of a cacheable alias (e.g. the linear map), since clean lines can be speculatively fetched back into the cache after the invalidation operation has completed. Relax the cache maintenance in arch_dma_prep_coherent() so that only a clean, and not a clean-and-invalidate operation is performed. Cc: Mark Rutland Cc: Robin Murphy Cc: Christoph Hellwig Cc: Ard Biesheuvel Signed-off-by: Will Deacon Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220823122111.17439-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 599cf81f5685..83a512a6ff0d 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -36,7 +36,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { unsigned long start = (unsigned long)page_address(page); - dcache_clean_inval_poc(start, start + size); + dcache_clean_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA From 739e49e0fc80990a351961c99a3142094822f040 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 20 Sep 2022 09:49:51 +0800 Subject: [PATCH 109/126] arm64: mm: handle ARM64_KERNEL_USES_PMD_MAPS in vmemmap_populate() Directly check ARM64_SWAPPER_USES_SECTION_MAPS to choose base page or PMD level huge page mapping in vmemmap_populate() to simplify code a bit. Signed-off-by: Kefeng Wang Link: https://lore.kernel.org/r/20220920014951.196191-1-wangkefeng.wang@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e7ad44585f40..69deed27dec8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1180,14 +1180,6 @@ static void free_empty_tables(unsigned long addr, unsigned long end, } #endif -#if !ARM64_KERNEL_USES_PMD_MAPS -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, - struct vmem_altmap *altmap) -{ - WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); - return vmemmap_populate_basepages(start, end, node, altmap); -} -#else /* !ARM64_KERNEL_USES_PMD_MAPS */ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { @@ -1199,6 +1191,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, pmd_t *pmdp; WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); + + if (!ARM64_KERNEL_USES_PMD_MAPS) + return vmemmap_populate_basepages(start, end, node, altmap); + do { next = pmd_addr_end(addr, end); @@ -1232,7 +1228,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } -#endif /* !ARM64_KERNEL_USES_PMD_MAPS */ #ifdef CONFIG_MEMORY_HOTPLUG void vmemmap_free(unsigned long start, unsigned long end, From 973b9e37330656dec719ede508e4dc40e5c2d80c Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 15 Sep 2022 15:20:53 -0700 Subject: [PATCH 110/126] arm64: mte: move register initialization to C If FEAT_MTE2 is disabled via the arm64.nomte command line argument on a CPU that claims to support FEAT_MTE2, the kernel will use Tagged Normal in the MAIR. If we interpret arm64.nomte to mean that the CPU does not in fact implement FEAT_MTE2, setting the system register like this may lead to UNSPECIFIED behavior. Fix it by arranging for MAIR to be set in the C function cpu_enable_mte which is called based on the sanitized version of the system register. There is no need for the rest of the MTE-related system register initialization to happen from assembly, with the exception of TCR_EL1, which must be set to include at least TBI1 because the secondary CPUs access KASan-allocated data structures early. Therefore, make the TCR_EL1 initialization unconditional and move the rest of the initialization to cpu_enable_mte so that we no longer have a dependency on the unsanitized ID register value. Co-developed-by: Evgenii Stepanov Signed-off-by: Peter Collingbourne Signed-off-by: Evgenii Stepanov Suggested-by: Catalin Marinas Reported-by: kernel test robot Fixes: 3b714d24ef17 ("arm64: mte: CPU feature detection and initial sysreg configuration") Cc: # 5.10.x Link: https://lore.kernel.org/r/20220915222053.3484231-1-eugenis@google.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 5 ++++ arch/arm64/kernel/cpufeature.c | 3 +- arch/arm64/kernel/mte.c | 51 ++++++++++++++++++++++++++++++++++ arch/arm64/kernel/suspend.c | 2 ++ arch/arm64/mm/proc.S | 46 ++++-------------------------- 5 files changed, 65 insertions(+), 42 deletions(-) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index aa523591a44e..760c62f8e22f 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -42,7 +42,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); +void mte_cpu_setup(void); void mte_suspend_enter(void); +void mte_suspend_exit(void); long set_mte_ctrl(struct task_struct *task, unsigned long arg); long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, @@ -72,6 +74,9 @@ static inline void mte_thread_switch(struct task_struct *next) static inline void mte_suspend_enter(void) { } +static inline void mte_suspend_exit(void) +{ +} static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg) { return 0; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af4de817d712..d7a077b5ccd1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2034,7 +2034,8 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) { sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); - isb(); + + mte_cpu_setup(); /* * Clear the tags in the zero page. This needs to be done via the diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index b2b730233274..aca88470fb69 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -285,6 +285,49 @@ void mte_thread_switch(struct task_struct *next) mte_check_tfsr_el1(); } +void mte_cpu_setup(void) +{ + u64 rgsr; + + /* + * CnP must be enabled only after the MAIR_EL1 register has been set + * up. Inconsistent MAIR_EL1 between CPUs sharing the same TLB may + * lead to the wrong memory type being used for a brief window during + * CPU power-up. + * + * CnP is not a boot feature so MTE gets enabled before CnP, but let's + * make sure that is the case. + */ + BUG_ON(read_sysreg(ttbr0_el1) & TTBR_CNP_BIT); + BUG_ON(read_sysreg(ttbr1_el1) & TTBR_CNP_BIT); + + /* Normal Tagged memory type at the corresponding MAIR index */ + sysreg_clear_set(mair_el1, + MAIR_ATTRIDX(MAIR_ATTR_MASK, MT_NORMAL_TAGGED), + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_TAGGED, + MT_NORMAL_TAGGED)); + + write_sysreg_s(KERNEL_GCR_EL1, SYS_GCR_EL1); + + /* + * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then + * RGSR_EL1.SEED must be non-zero for IRG to produce + * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we + * must initialize it. + */ + rgsr = (read_sysreg(CNTVCT_EL0) & SYS_RGSR_EL1_SEED_MASK) << + SYS_RGSR_EL1_SEED_SHIFT; + if (rgsr == 0) + rgsr = 1 << SYS_RGSR_EL1_SEED_SHIFT; + write_sysreg_s(rgsr, SYS_RGSR_EL1); + + /* clear any pending tag check faults in TFSR*_EL1 */ + write_sysreg_s(0, SYS_TFSR_EL1); + write_sysreg_s(0, SYS_TFSRE0_EL1); + + local_flush_tlb_all(); +} + void mte_suspend_enter(void) { if (!system_supports_mte()) @@ -301,6 +344,14 @@ void mte_suspend_enter(void) mte_check_tfsr_el1(); } +void mte_suspend_exit(void) +{ + if (!system_supports_mte()) + return; + + mte_cpu_setup(); +} + long set_mte_ctrl(struct task_struct *task, unsigned long arg) { u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) & diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 9135fe0f3df5..8b02d310838f 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -43,6 +43,8 @@ void notrace __cpu_suspend_exit(void) { unsigned int cpu = smp_processor_id(); + mte_suspend_exit(); + /* * We are resuming from reset with the idmap active in TTBR0_EL1. * We must uninstall the idmap and restore the expected MMU diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 7837a69524c5..f38bccdd374a 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -48,17 +48,19 @@ #ifdef CONFIG_KASAN_HW_TAGS #define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1 -#else +#elif defined(CONFIG_ARM64_MTE) /* * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on * TBI being enabled at EL1. */ #define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1 +#else +#define TCR_MTE_FLAGS 0 #endif /* * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and - * changed during __cpu_setup to Normal Tagged if the system supports MTE. + * changed during mte_cpu_setup to Normal Tagged if the system supports MTE. */ #define MAIR_EL1_SET \ (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ @@ -426,46 +428,8 @@ SYM_FUNC_START(__cpu_setup) mov_q mair, MAIR_EL1_SET mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ - TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS + TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS -#ifdef CONFIG_ARM64_MTE - /* - * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported - * (ID_AA64PFR1_EL1[11:8] > 1). - */ - mrs x10, ID_AA64PFR1_EL1 - ubfx x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4 - cmp x10, #ID_AA64PFR1_MTE - b.lt 1f - - /* Normal Tagged memory type at the corresponding MAIR index */ - mov x10, #MAIR_ATTR_NORMAL_TAGGED - bfi mair, x10, #(8 * MT_NORMAL_TAGGED), #8 - - mov x10, #KERNEL_GCR_EL1 - msr_s SYS_GCR_EL1, x10 - - /* - * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then - * RGSR_EL1.SEED must be non-zero for IRG to produce - * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we - * must initialize it. - */ - mrs x10, CNTVCT_EL0 - ands x10, x10, #SYS_RGSR_EL1_SEED_MASK - csinc x10, x10, xzr, ne - lsl x10, x10, #SYS_RGSR_EL1_SEED_SHIFT - msr_s SYS_RGSR_EL1, x10 - - /* clear any pending tag check faults in TFSR*_EL1 */ - msr_s SYS_TFSR_EL1, xzr - msr_s SYS_TFSRE0_EL1, xzr - - /* set the TCR_EL1 bits */ - mov_q x10, TCR_MTE_FLAGS - orr tcr, tcr, x10 -1: -#endif tcr_clear_errata_bits tcr, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 From 33060a64901e61f09ea6faffe367551df18a54c3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 7 Sep 2022 12:33:59 +0100 Subject: [PATCH 111/126] kselftest/arm64: Fix typo in hwcap check We use a local variable hwcap to refer to the element of the hwcaps array which we are currently checking. When checking for the relevant hwcap bit being set in testing we were dereferencing hwcaps rather than hwcap in fetching the AT_HWCAP to use, which is perfectly valid C but means we were always checking the bit was set in the hwcap for whichever feature is first in the array. Remove the stray s. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220907113400.12982-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/hwcap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index d173e41f2123..9f1a7b5c6193 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -295,7 +295,7 @@ int main(void) for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { hwcap = &hwcaps[i]; - have_hwcap = getauxval(hwcaps->at_hwcap) & hwcap->hwcap_bit; + have_hwcap = getauxval(hwcap->at_hwcap) & hwcap->hwcap_bit; have_cpuinfo = cpuinfo_present(hwcap->cpuinfo); if (have_hwcap) From aa3e49b606e0796aa766ded2fb7b8c2d36d7cf2f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 22 Sep 2022 22:11:10 +0100 Subject: [PATCH 112/126] arm64: asm/perf_regs.h: Avoid C++-style comment in UAPI header An arm64 'allmodconfig' build fails with GCC due to use of a C++-style comment for the new SVE vector granule 'enum perf_event_arm_regs' entry: | /usr/include/asm/perf_regs.h:42:26: error: C++ style comments are not allowed in ISO C90 Use good ol' /* */ comment syntax to keep things rosey. Link: https://lore.kernel.org/r/632cceb2.170a0220.599ec.0a3a@mx.google.com Fixes: cbb0c02caf4b ("perf: arm64: Add SVE vector granule register to user regs") Signed-off-by: Will Deacon --- arch/arm64/include/uapi/asm/perf_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h index 0d4b40c78e47..86e556429e0e 100644 --- a/arch/arm64/include/uapi/asm/perf_regs.h +++ b/arch/arm64/include/uapi/asm/perf_regs.h @@ -39,7 +39,7 @@ enum perf_event_arm_regs { PERF_REG_ARM64_MAX, /* Extended/pseudo registers */ - PERF_REG_ARM64_VG = 46, // SVE Vector Granule + PERF_REG_ARM64_VG = 46, /* SVE Vector Granule */ PERF_REG_ARM64_EXTENDED_MAX }; From 2305b809be930ce02e095ee7207e8b1be4a5cd75 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 22 Sep 2022 16:10:53 +0100 Subject: [PATCH 113/126] arm64: uaccess: simplify uaccess_mask_ptr() We introduced uaccess pointer masking for arm64 in commit: 4d8efc2d5ee4c9cc ("arm64: Use pointer masking to limit uaccess speculation") Which was intended to prevent speculative uaccesses to kernel memory on CPUs where access permissions were not respected under speculation. At the time, the uaccess primitives were occasionally used to access kernel memory, with the maximum permitted address held in thread_info::addr_limit. Consequently, the address masking needed to take this dynamic limit into account. Subsequently the uaccess primitives were reworked such that they are only used for user memory, and as of commit: 3d2403fd10a1dbb3 ("arm64: uaccess: remove set_fs()") ... the address limit was made a compile-time constant, but the logic was otherwise unchanged. Regardless of the configured VA size or whether TBI is in use, the address space can be divided into three ranges: * The TTBR0 VA range, for which any valid pointer has bit 55 *clear*, and any non-tag bits [63-56] must match bit 55 (i.e. must be clear). * The TTBR1 VA range, for which any valid pointer has bit 55 *set*, and any non-tag bits [63-56] must match bit 55 (i.e. must be set). * The gap between the TTBR0 and TTBR1 ranges, where bit 55 may be set or clear, but any access will result in a fault. As the uaccess primitives are now only used for user memory in the TTBR0 VA range, we can prevent generation of TTBR1 addresses by clearing bit 55, which will either result in a TTBR0 address or a faulting address between the TTBR VA ranges. This is beneficial for code generation as: * We no longer clobber the condition codes. * We no longer burn a register on (TASK_SIZE_MAX - 1). * We no longer need to consume the untagged pointer. When building a defconfig v6.0-rc3 with GCC 12.1.0, this change makes the resulting Image 64KiB smaller. Signed-off-by: Mark Rutland Cc: James Morse Cc: Robin Murphy Cc: Will Deacon Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20220922151053.3520750-1-mark.rutland@arm.com [catalin.marinas@arm.com: remove csdb() as the bit clearing is unconditional] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 2fc9f0861769..5c7b2f9d5913 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -203,9 +203,11 @@ static inline void uaccess_enable_privileged(void) } /* - * Sanitise a uaccess pointer such that it becomes NULL if above the maximum - * user address. In case the pointer is tagged (has the top byte set), untag - * the pointer before checking. + * Sanitize a uaccess pointer such that it cannot reach any kernel address. + * + * Clearing bit 55 ensures the pointer cannot address any portion of the TTBR1 + * address range (i.e. any kernel address), and either the pointer falls within + * the TTBR0 address range or must cause a fault. */ #define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) static inline void __user *__uaccess_mask_ptr(const void __user *ptr) @@ -213,14 +215,12 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) void __user *safe_ptr; asm volatile( - " bics xzr, %3, %2\n" - " csel %0, %1, xzr, eq\n" - : "=&r" (safe_ptr) - : "r" (ptr), "r" (TASK_SIZE_MAX - 1), - "r" (untagged_addr(ptr)) - : "cc"); + " bic %0, %1, %2\n" + : "=r" (safe_ptr) + : "r" (ptr), + "i" (BIT(55)) + ); - csdb(); return safe_ptr; } From 55c8a987dd73a7ef9e53119f3329620768d4a655 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 28 Sep 2022 16:45:17 +0100 Subject: [PATCH 114/126] kselftest/arm64: Don't enable v8.5 for MTE selftest builds Currently we set -march=armv8.5+memtag when building the MTE selftests, allowing the compiler to emit v8.5 and MTE instructions for anything it generates. This means that we may get code that will generate SIGILLs when run on older systems rather than skipping on non-MTE systems as should be the case. Most toolchains don't select any incompatible instructions but I have seen some reports which suggest that some may be appearing which do so. This is also potentially problematic in that if the compiler chooses to emit any MTE instructions for the C code it may interfere with the MTE usage we are trying to test. Since the only reason we are specifying this option is to allow us to assemble MTE instructions in mte_helper.S we can avoid these issues by moving to using a .arch directive there and adding the -march explicitly to the toolchain support check instead of the generic CFLAGS. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220928154517.173108-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/Makefile | 5 +---- tools/testing/selftests/arm64/mte/mte_helper.S | 2 ++ 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index a5a0744423d8..037046f5784e 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -11,11 +11,8 @@ LDFLAGS += -pthread SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) PROGS := $(patsubst %.c,%,$(SRCS)) -#Add mte compiler option -CFLAGS += -march=armv8.5-a+memtag - #check if the compiler works well -mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) +mte_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.5-a+memtag -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) ifeq ($(mte_cc_support),1) # Generated binaries to be installed by top KSFT script diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S index a02c04cd0aac..a55dbbc56ed1 100644 --- a/tools/testing/selftests/arm64/mte/mte_helper.S +++ b/tools/testing/selftests/arm64/mte/mte_helper.S @@ -3,6 +3,8 @@ #include "mte_def.h" +.arch armv8.5-a+memtag + #define ENTRY(name) \ .globl name ;\ .p2align 2;\ From ba00c2a04fa5431d204a4183062b30372c062aa7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 29 Sep 2022 16:02:27 +0100 Subject: [PATCH 115/126] arm64: fix the build with binutils 2.27 Jon Hunter reports that for some toolchains the build has been broken since commit: 4c0bd995d73ed889 ("arm64: alternatives: have callbacks take a cap") ... with a stream of build-time splats of the form: | CC arch/arm64/kvm/hyp/vhe/debug-sr.o | /tmp/ccY3kbki.s: Assembler messages: | /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')' | /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')' | /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')' | /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')' | /tmp/ccY3kbki.s:1600: Error: junk at end of line, first unrecognized character | is `L' | /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')' | /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')' | /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')' | /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')' | /tmp/ccY3kbki.s:1723: Error: junk at end of line, first unrecognized character | is `L' | scripts/Makefile.build:249: recipe for target | 'arch/arm64/kvm/hyp/vhe/debug-sr.o' failed The issue here is that older versions of binutils (up to and including 2.27.0) don't like an 'L' suffix on constants. For plain assembly files, UL() avoids this suffix, but in C files this gets added, and so for inline assembly we can't directly use a constant defined with `UL()`. We could avoid this by passing the constant as an input parameter, but this isn't practical given the way we use the alternative macros. Instead, just open code the constant without the `UL` suffix, and for consistency do this for both the inline assembly macro and the regular assembly macro. Signed-off-by: Mark Rutland Fixes: 4c0bd995d73e ("arm64: alternatives: have callbacks take a cap") Reported-by: Jon Hunter Link: https://lore.kernel.org/linux-arm-kernel/3cecc3a5-30b0-f0bd-c3de-9e09bd21909b@nvidia.com/ Tested-by: Jon Hunter Cc: Ard Biesheuvel Cc: Will Deacon Link: https://lore.kernel.org/r/20220929150227.1028556-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative-macros.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 966767debaa3..b5ac0b85c9bb 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -2,12 +2,18 @@ #ifndef __ASM_ALTERNATIVE_MACROS_H #define __ASM_ALTERNATIVE_MACROS_H +#include #include #include #include -#define ARM64_CB_BIT (UL(1) << 15) +/* + * Binutils 2.27.0 can't handle a 'UL' suffix on constants, so for the assembly + * macros below we must use we must use `(1 << ARM64_CB_SHIFT)`. + */ +#define ARM64_CB_SHIFT 15 +#define ARM64_CB_BIT BIT(ARM64_CB_SHIFT) #if ARM64_NCAPS >= ARM64_CB_BIT #error "cpucaps have overflown ARM64_CB_BIT" @@ -80,7 +86,7 @@ __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #define ALTERNATIVE_CB(oldinstr, feature, cb) \ - __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_BIT | (feature), 1, cb) + __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (feature), 1, cb) #else #include @@ -150,7 +156,7 @@ .macro alternative_cb cap, cb .set .Lasm_alt_mode, 0 .pushsection .altinstructions, "a" - altinstruction_entry 661f, \cb, ARM64_CB_BIT | \cap, 662f-661f, 0 + altinstruction_entry 661f, \cb, (1 << ARM64_CB_SHIFT) | \cap, 662f-661f, 0 .popsection 661: .endm From 3fb420f56cbf06b6ef8c39b886fed8f8f9aedee3 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Thu, 29 Sep 2022 17:41:32 +0800 Subject: [PATCH 116/126] arm64: module: Make plt_equals_entry() static Since commit 4e69ecf4da1e ("arm64/module: ftrace: deal with place relative nature of PLTs"), plt_equals_entry() is not used outside of module-plts.c, so make it static. Signed-off-by: Li Huafei Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220929094134.99512-2-lihuafei1@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/module.h | 1 - arch/arm64/kernel/module-plts.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 4e7fa2623896..28514b989a0b 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -58,7 +58,6 @@ static inline bool is_forbidden_offset_for_adrp(void *place) } struct plt_entry get_plt_entry(u64 dst, void *pc); -bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b); static inline bool plt_entry_is_initialized(const struct plt_entry *e) { diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index a3d0494f25a9..5a0a8f552a61 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -37,7 +37,8 @@ struct plt_entry get_plt_entry(u64 dst, void *pc) return plt; } -bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b) +static bool plt_entries_equal(const struct plt_entry *a, + const struct plt_entry *b) { u64 p, q; From 5de2291605087b7a0cbc978c1a55dd4916f04e6e Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Thu, 29 Sep 2022 17:41:33 +0800 Subject: [PATCH 117/126] arm64: module: Remove unused plt_entry_is_initialized() Since commit f1a54ae9af0d ("arm64: module/ftrace: intialize PLT at load time"), plt_entry_is_initialized() is unused anymore , so remove it. Signed-off-by: Li Huafei Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220929094134.99512-3-lihuafei1@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/module.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 28514b989a0b..8096d30c5e39 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -59,9 +59,4 @@ static inline bool is_forbidden_offset_for_adrp(void *place) struct plt_entry get_plt_entry(u64 dst, void *pc); -static inline bool plt_entry_is_initialized(const struct plt_entry *e) -{ - return e->adrp || e->add || e->br; -} - #endif /* __ASM_MODULE_H */ From 8cfb08575c6d4585f1ce0deeb189e5c824776b04 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 29 Sep 2022 14:45:25 +0100 Subject: [PATCH 118/126] arm64: ftrace: fix module PLTs with mcount Li Huafei reports that mcount-based ftrace with module PLTs was broken by commit: a6253579977e4c6f ("arm64: ftrace: consistently handle PLTs.") When a module PLTs are used and a module is loaded sufficiently far away from the kernel, we'll create PLTs for any branches which are out-of-range. These are separate from the special ftrace trampoline PLTs, which the module PLT code doesn't directly manipulate. When mcount is in use this is a problem, as each mcount callsite in a module will be initialized to point to a module PLT, but since commit a6253579977e4c6f ftrace_make_nop() will assume that the callsite has been initialized to point to the special ftrace trampoline PLT, and ftrace_find_callable_addr() rejects other cases. This means that when ftrace tries to initialize a callsite via ftrace_make_nop(), the call to ftrace_find_callable_addr() will find that the `_mcount` stub is out-of-range and is not handled by the ftrace PLT, resulting in a splat: | ftrace_test: loading out-of-tree module taints kernel. | ftrace: no module PLT for _mcount | ------------[ ftrace bug ]------------ | ftrace failed to modify | [] 0xffff800029180014 | actual: 44:00:00:94 | Initializing ftrace call sites | ftrace record flags: 2000000 | (0) | expected tramp: ffff80000802eb3c | ------------[ cut here ]------------ | WARNING: CPU: 3 PID: 157 at kernel/trace/ftrace.c:2120 ftrace_bug+0x94/0x270 | Modules linked in: | CPU: 3 PID: 157 Comm: insmod Tainted: G O 6.0.0-rc6-00151-gcd722513a189-dirty #22 | Hardware name: linux,dummy-virt (DT) | pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : ftrace_bug+0x94/0x270 | lr : ftrace_bug+0x21c/0x270 | sp : ffff80000b2bbaf0 | x29: ffff80000b2bbaf0 x28: 0000000000000000 x27: ffff0000c4d38000 | x26: 0000000000000001 x25: ffff800009d7e000 x24: ffff0000c4d86e00 | x23: 0000000002000000 x22: ffff80000a62b000 x21: ffff8000098ebea8 | x20: ffff0000c4d38000 x19: ffff80000aa24158 x18: ffffffffffffffff | x17: 0000000000000000 x16: 0a0d2d2d2d2d2d2d x15: ffff800009aa9118 | x14: 0000000000000000 x13: 6333626532303830 x12: 3030303866666666 | x11: 203a706d61727420 x10: 6465746365707865 x9 : 3362653230383030 | x8 : c0000000ffffefff x7 : 0000000000017fe8 x6 : 000000000000bff4 | x5 : 0000000000057fa8 x4 : 0000000000000000 x3 : 0000000000000001 | x2 : ad2cb14bb5438900 x1 : 0000000000000000 x0 : 0000000000000022 | Call trace: | ftrace_bug+0x94/0x270 | ftrace_process_locs+0x308/0x430 | ftrace_module_init+0x44/0x60 | load_module+0x15b4/0x1ce8 | __do_sys_init_module+0x1ec/0x238 | __arm64_sys_init_module+0x24/0x30 | invoke_syscall+0x54/0x118 | el0_svc_common.constprop.4+0x84/0x100 | do_el0_svc+0x3c/0xd0 | el0_svc+0x1c/0x50 | el0t_64_sync_handler+0x90/0xb8 | el0t_64_sync+0x15c/0x160 | ---[ end trace 0000000000000000 ]--- | ---------test_init----------- Fix this by reverting to the old behaviour of ignoring the old instruction when initialising an mcount callsite in a module, which was the behaviour prior to commit a6253579977e4c6f. Signed-off-by: Mark Rutland Fixes: a6253579977e ("arm64: ftrace: consistently handle PLTs.") Reported-by: Li Huafei Link: https://lore.kernel.org/linux-arm-kernel/20220929094134.99512-1-lihuafei1@huawei.com Cc: Ard Biesheuvel Cc: Will Deacon Link: https://lore.kernel.org/r/20220929134525.798593-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ftrace.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index ea5dc7c90f46..b49ba9a24bcc 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -217,11 +217,26 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long pc = rec->ip; u32 old = 0, new; + new = aarch64_insn_gen_nop(); + + /* + * When using mcount, callsites in modules may have been initalized to + * call an arbitrary module PLT (which redirects to the _mcount stub) + * rather than the ftrace PLT we'll use at runtime (which redirects to + * the ftrace trampoline). We can ignore the old PLT when initializing + * the callsite. + * + * Note: 'mod' is only set at module load time. + */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && + IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) { + return aarch64_insn_patch_text_nosync((void *)pc, new); + } + if (!ftrace_find_callable_addr(rec, mod, &addr)) return -EINVAL; old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); - new = aarch64_insn_gen_nop(); return ftrace_modify_code(pc, old, new, true); } From b9dd04a20f81333e4b99662f1bbaf7c9e3a1e137 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Sep 2022 10:48:41 +0300 Subject: [PATCH 119/126] arm64/mm: fold check for KFENCE into can_set_direct_map() KFENCE requires linear map to be mapped at page granularity, so that it is possible to protect/unprotect single pages, just like with rodata_full and DEBUG_PAGEALLOC. Instead of repating can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE) make can_set_direct_map() handle the KFENCE case. This also prevents potential false positives in kernel_page_present() that may return true for non-present page if CONFIG_KFENCE is enabled. Signed-off-by: Mike Rapoport Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220921074841.382615-1-rppt@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 8 ++------ arch/arm64/mm/pageattr.c | 8 +++++++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 69deed27dec8..e8a48dfd550f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -535,7 +535,7 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -1542,11 +1542,7 @@ int arch_add_memory(int nid, u64 start, u64 size, VM_BUG_ON(!mhp_range_allowed(start, size, true)); - /* - * KFENCE requires linear map to be mapped at page granularity, so that - * it is possible to protect/unprotect single pages in the KFENCE pool. - */ - if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 64e985eaa52d..d107c3d434e2 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -21,7 +21,13 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED bool can_set_direct_map(void) { - return rodata_full || debug_pagealloc_enabled(); + /* + * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be + * mapped at page granularity, so that it is possible to + * protect/unprotect single pages. + */ + return rodata_full || debug_pagealloc_enabled() || + IS_ENABLED(CONFIG_KFENCE); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) From 8c6e3657be6b39cd5943041d0a4ab6bd5d0c2258 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sun, 11 Sep 2022 11:47:47 +0800 Subject: [PATCH 120/126] ARM64: reloc_test: add __init/__exit annotations to module init/exit funcs Add missing __init/__exit annotations to module init/exit funcs. Signed-off-by: Xiu Jianfeng Link: https://lore.kernel.org/r/20220911034747.132098-1-xiujianfeng@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/reloc_test_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c index e87a2b7f20f6..99f2ffe9fc05 100644 --- a/arch/arm64/kernel/reloc_test_core.c +++ b/arch/arm64/kernel/reloc_test_core.c @@ -48,7 +48,7 @@ static struct { { "R_AARCH64_PREL16", relative_data16, (u64)&sym64_rel }, }; -static int reloc_test_init(void) +static int __init reloc_test_init(void) { int i; @@ -67,7 +67,7 @@ static int reloc_test_init(void) return 0; } -static void reloc_test_exit(void) +static void __exit reloc_test_exit(void) { } From c38d381fff26ece271447d558dcb4b42f0ccda51 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 21 Sep 2022 19:13:43 +0100 Subject: [PATCH 121/126] kselftest/arm64: Don't repeat termination handler for fp-stress When fp-stress gets a termination signal it sets a flag telling itself to exit and sends a termination signal to all the children. If the flag is set then don't bother repeating this process, it isn't going to accomplish anything other than consume CPU time which can be an issue when running in emulation. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220921181345.618085-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-stress.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index a5c0ebef2419..4387c3cacaa7 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -255,6 +255,10 @@ static void handle_exit_signal(int sig, siginfo_t *info, void *context) { int i; + /* If we're already exiting then don't signal again */ + if (terminate) + return; + ksft_print_msg("Got signal, exiting...\n"); terminate = true; From dd72dd7cd527ff9313af5866434e698fdbda98ae Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 21 Sep 2022 19:13:44 +0100 Subject: [PATCH 122/126] kselftest/arm64: Flag fp-stress as exiting when we begin finishing up Once we have started exiting the termination handler will have the same effect as what we're already running so set the termination flag at that point. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220921181345.618085-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-stress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index 4387c3cacaa7..03ce3936220a 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -522,6 +522,7 @@ int main(int argc, char **argv) } ksft_print_msg("Finishing up...\n"); + terminate = true; for (i = 0; i < tests; i++) child_stop(&children[i]); From a711987490a1784c3e3fd6d752a63501c11eb80b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 21 Sep 2022 19:13:45 +0100 Subject: [PATCH 123/126] kselftest/arm64: Handle EINTR while reading data from children Currently we treat any error when reading from the child as a failure and don't read any more output from that child as a result. This ignores the fact that it is valid for read() to return EINTR as the error code if there is a signal pending so we could stop handling the output of children, especially during exit when we will get some SIGCHLD signals delivered to us. Fix this by pulling the read handling out into a separate function which returns a flag if reads should be continued and wrapping it in a loop. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220921181345.618085-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-stress.c | 85 ++++++++++++-------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index 03ce3936220a..4e62a9199f97 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -121,55 +121,70 @@ static void child_start(struct child_data *child, const char *program) } } -static void child_output(struct child_data *child, uint32_t events, - bool flush) +static bool child_output_read(struct child_data *child) { char read_data[1024]; char work[1024]; int ret, len, cur_work, cur_read; - if (events & EPOLLIN) { - ret = read(child->stdout, read_data, sizeof(read_data)); - if (ret < 0) { - ksft_print_msg("%s: read() failed: %s (%d)\n", - child->name, strerror(errno), errno); - return; - } - len = ret; + ret = read(child->stdout, read_data, sizeof(read_data)); + if (ret < 0) { + if (errno == EINTR) + return true; - child->output_seen = true; + ksft_print_msg("%s: read() failed: %s (%d)\n", + child->name, strerror(errno), + errno); + return false; + } + len = ret; - /* Pick up any partial read */ - if (child->output) { - strncpy(work, child->output, sizeof(work) - 1); - cur_work = strnlen(work, sizeof(work)); - free(child->output); - child->output = NULL; - } else { - cur_work = 0; - } + child->output_seen = true; - cur_read = 0; - while (cur_read < len) { - work[cur_work] = read_data[cur_read++]; + /* Pick up any partial read */ + if (child->output) { + strncpy(work, child->output, sizeof(work) - 1); + cur_work = strnlen(work, sizeof(work)); + free(child->output); + child->output = NULL; + } else { + cur_work = 0; + } - if (work[cur_work] == '\n') { - work[cur_work] = '\0'; - ksft_print_msg("%s: %s\n", child->name, work); - cur_work = 0; - } else { - cur_work++; - } - } + cur_read = 0; + while (cur_read < len) { + work[cur_work] = read_data[cur_read++]; - if (cur_work) { + if (work[cur_work] == '\n') { work[cur_work] = '\0'; - ret = asprintf(&child->output, "%s", work); - if (ret == -1) - ksft_exit_fail_msg("Out of memory\n"); + ksft_print_msg("%s: %s\n", child->name, work); + cur_work = 0; + } else { + cur_work++; } } + if (cur_work) { + work[cur_work] = '\0'; + ret = asprintf(&child->output, "%s", work); + if (ret == -1) + ksft_exit_fail_msg("Out of memory\n"); + } + + return false; +} + +static void child_output(struct child_data *child, uint32_t events, + bool flush) +{ + bool read_more; + + if (events & EPOLLIN) { + do { + read_more = child_output_read(child); + } while (read_more); + } + if (events & EPOLLHUP) { close(child->stdout); child->stdout = -1; From d56f66d2bd4a7133e7d6a4c814c7582b4a7d1000 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 22 Sep 2022 15:24:00 +0100 Subject: [PATCH 124/126] arm64: defconfig: Add Coresight as module Add Coresight to defconfig so that build errors are caught. CONFIG_CORESIGHT_SOURCE_ETM4X is excluded because it depends on CONFIG_PID_IN_CONTEXTIDR which has a performance cost. Signed-off-by: James Clark Link: https://lore.kernel.org/r/20220922142400.478815-2-james.clark@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index ef3467092ded..d699933cab45 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1340,4 +1340,12 @@ CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set # CONFIG_FTRACE is not set +CONFIG_CORESIGHT=m +CONFIG_CORESIGHT_LINK_AND_SINK_TMC=m +CONFIG_CORESIGHT_CATU=m +CONFIG_CORESIGHT_SINK_TPIU=m +CONFIG_CORESIGHT_SINK_ETBV10=m +CONFIG_CORESIGHT_STM=m +CONFIG_CORESIGHT_CPU_DEBUG=m +CONFIG_CORESIGHT_CTI=m CONFIG_MEMTEST=y From a0caebbd04602cb7d28f6f316213a915ffab92a2 Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Tue, 27 Sep 2022 10:24:35 +0800 Subject: [PATCH 125/126] arm64/kprobe: Optimize the performance of patching single-step slot Single-step slot would not be used until kprobe is enabled, that means no race condition occurs on it under SMP, hence it is safe to pacth ss slot without stopping machine. Since I and D caches are coherent within single-step slot from aarch64_insn_patch_text_nosync(), hence no need to do it again via flush_icache_range(). Acked-by: Will Deacon Acked-by: Masami Hiramatsu (Google) Signed-off-by: Liao Chang Link: https://lore.kernel.org/r/20220927022435.129965-4-liaochang1@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d1d182320245..c9e4d0720285 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -44,13 +44,28 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { kprobe_opcode_t *addr = p->ainsn.api.insn; - void *addrs[] = {addr, addr + 1}; - u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS}; - /* prepare insn slot */ - aarch64_insn_patch_text(addrs, insns, 2); - - flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE)); + /* + * Prepare insn slot, Mark Rutland points out it depends on a coupe of + * subtleties: + * + * - That the I-cache maintenance for these instructions is complete + * *before* the kprobe BRK is written (and aarch64_insn_patch_text_nosync() + * ensures this, but just omits causing a Context-Synchronization-Event + * on all CPUS). + * + * - That the kprobe BRK results in an exception (and consequently a + * Context-Synchronoization-Event), which ensures that the CPU will + * fetch thesingle-step slot instructions *after* this, ensuring that + * the new instructions are used + * + * It supposes to place ISB after patching to guarantee I-cache maintenance + * is observed on all CPUS, however, single-step slot is installed in + * the BRK exception handler, so it is unnecessary to generate + * Contex-Synchronization-Event via ISB again. + */ + aarch64_insn_patch_text_nosync(addr, p->opcode); + aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS); /* * Needs restoring of return address after stepping xol. From d2995249a2f72333a4ab4922ff3c42a76c023791 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 3 Oct 2022 12:37:59 -0700 Subject: [PATCH 126/126] arm64: alternatives: Use vdso/bits.h instead of linux/bits.h When building with CONFIG_LTO after commit ba00c2a04fa5 ("arm64: fix the build with binutils 2.27"), the following build error occurs: In file included from arch/arm64/kernel/module-plts.c:6: In file included from include/linux/elf.h:6: In file included from arch/arm64/include/asm/elf.h:8: In file included from arch/arm64/include/asm/hwcap.h:9: In file included from arch/arm64/include/asm/cpufeature.h:9: In file included from arch/arm64/include/asm/alternative-macros.h:5: In file included from include/linux/bits.h:22: In file included from include/linux/build_bug.h:5: In file included from include/linux/compiler.h:248: In file included from arch/arm64/include/asm/rwonce.h:71: include/asm-generic/rwonce.h:67:9: error: expected string literal in 'asm' return __READ_ONCE(*(unsigned long *)addr); ^ arch/arm64/include/asm/rwonce.h:43:16: note: expanded from macro '__READ_ONCE' asm volatile(__LOAD_RCPC(b, %w0, %1) \ ^ arch/arm64/include/asm/rwonce.h:17:2: note: expanded from macro '__LOAD_RCPC' ALTERNATIVE( \ ^ Similar to the issue resolved by commit 0072dc1b53c3 ("arm64: avoid BUILD_BUG_ON() in alternative-macros"), there is a circular include dependency through when CONFIG_LTO is enabled due to appearing in the include chain before the contents of , which results in ALTERNATIVE() not getting expanded properly because it has not been defined yet. Avoid this issue by including , which includes the definition of the BIT() macro, instead of , as BIT() is the only macro from bits.h that is relevant to this header. Fixes: ba00c2a04fa5 ("arm64: fix the build with binutils 2.27") Link: https://github.com/ClangBuiltLinux/linux/issues/1728 Signed-off-by: Nathan Chancellor Tested-by: Will Deacon Link: https://lore.kernel.org/r/20221003193759.1141709-1-nathan@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative-macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index b5ac0b85c9bb..3622e9f4fb44 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -2,8 +2,8 @@ #ifndef __ASM_ALTERNATIVE_MACROS_H #define __ASM_ALTERNATIVE_MACROS_H -#include #include +#include #include #include