From 914d52e46490b6599b7f03fad233f4f19bf23cf7 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 4 Jul 2019 16:18:15 +0200 Subject: [PATCH 01/33] s390: implement perf_arch_fetch_caller_regs On s390 bpf_get_stack_raw_tp() returns 0 entries for both kernel and user stacks. While there is no practical unwinding solution for userspace on s390 at this moment, there certainly is a kernel unwinder. However, it is not properly integrated with BPF. In order to start unwinding, bpf_get_stack_raw_tp() obtains the current kernel register values using perf_fetch_caller_regs(), which is not implemented for s390. The actual unwinding then happens by passing those registers to perf_callchain_kernel(). Implement perf_arch_fetch_caller_regs() for s390, where __builtin_frame_address(0) points to back_chain. Signed-off-by: Ilya Leoshkevich Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 4652ffffe0b2..b9da71632827 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -12,6 +12,7 @@ #include #include +#include /* Per-CPU flags for PMU states */ #define PMU_F_RESERVED 0x1000 @@ -73,4 +74,10 @@ struct perf_sf_sde_regs { #define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS) #define SAMPLE_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE) +#define perf_arch_fetch_caller_regs(regs, __ip) do { \ + (regs)->psw.addr = (__ip); \ + (regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) - \ + offsetof(struct stack_frame, back_chain); \ +} while (0) + #endif /* _ASM_S390_PERF_EVENT_H */ From 6733775a92eacd612ac88afa0fd922e4ffeb2bc7 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 20 Nov 2019 11:44:31 +0100 Subject: [PATCH 02/33] s390/zcrypt: handle new reply code FILTERED_BY_HYPERVISOR This patch introduces support for a new architectured reply code 0x8B indicating that a hypervisor layer (if any) has rejected an ap message. Linux may run as a guest on top of a hypervisor like zVM or KVM. So the crypto hardware seen by the ap bus may be restricted by the hypervisor for example only a subset like only clear key crypto requests may be supported. Other requests will be filtered out - rejected by the hypervisor. The new reply code 0x8B will appear in such cases and needs to get recognized by the ap bus and zcrypt device driver zoo. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_error.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index f34ee41cbed8..4f4dd9d727c9 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -61,6 +61,7 @@ struct error_hdr { #define REP82_ERROR_EVEN_MOD_IN_OPND 0x85 #define REP82_ERROR_RESERVED_FIELD 0x88 #define REP82_ERROR_INVALID_DOMAIN_PENDING 0x8A +#define REP82_ERROR_FILTERED_BY_HYPERVISOR 0x8B #define REP82_ERROR_TRANSPORT_FAIL 0x90 #define REP82_ERROR_PACKET_TRUNCATED 0xA0 #define REP82_ERROR_ZERO_BUFFER_LEN 0xB0 @@ -91,6 +92,7 @@ static inline int convert_error(struct zcrypt_queue *zq, case REP82_ERROR_INVALID_DOMAIN_PRECHECK: case REP82_ERROR_INVALID_DOMAIN_PENDING: case REP82_ERROR_INVALID_SPECIAL_CMD: + case REP82_ERROR_FILTERED_BY_HYPERVISOR: // REP88_ERROR_INVALID_KEY // '82' CEX2A // REP88_ERROR_OPERAND // '84' CEX2A // REP88_ERROR_OPERAND_EVEN_MOD // '85' CEX2A From a2308c11ecbc3471ebb7435ee8075815b1502ef0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 18 Nov 2019 13:09:52 +0100 Subject: [PATCH 03/33] s390/smp,vdso: fix ASCE handling When a secondary CPU is brought up it must initialize its control registers. CPU A which triggers that a secondary CPU B is brought up stores its control register contents into the lowcore of new CPU B, which then loads these values on startup. This is problematic in various ways: the control register which contains the home space ASCE will correctly contain the kernel ASCE; however control registers for primary and secondary ASCEs are initialized with whatever values were present in CPU A. Typically: - the primary ASCE will contain the user process ASCE of the process that triggered onlining of CPU B. - the secondary ASCE will contain the percpu VDSO ASCE of CPU A. Due to lazy ASCE handling we may also end up with other combinations. When then CPU B switches to a different process (!= idle) it will fixup the primary ASCE. However the problem is that the (wrong) ASCE from CPU A was loaded into control register 1: as soon as an ASCE is attached (aka loaded) a CPU is free to generate TLB entries using that address space. Even though it is very unlikey that CPU B will actually generate such entries, this could result in TLB entries of the address space of the process that ran on CPU A. These entries shouldn't exist at all and could cause problems later on. Furthermore the secondary ASCE of CPU B will not be updated correctly. This means that processes may see wrong results or even crash if they access VDSO data on CPU B. The correct VDSO ASCE will eventually be loaded on return to user space as soon as the kernel executed a call to strnlen_user or an atomic futex operation on CPU B. Fix both issues by intializing the to be loaded control register contents with the correct ASCEs and also enforce (re-)loading of the ASCEs upon first context switch and return to user space. Fixes: 0aaba41b58bc ("s390: remove all code using the access register mode") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/smp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6acdcf1d4074..06dddd7c4290 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -262,10 +262,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; + lc->user_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); + lc->cregs_save_area[1] = lc->kernel_asce; + lc->cregs_save_area[7] = lc->vdso_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, sizeof(lc->stfle_fac_list)); @@ -844,6 +847,8 @@ static void smp_init_secondary(void) S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); + set_cpu_flag(CIF_ASCE_PRIMARY); + set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); preempt_disable(); init_cpu_timer(); From 5a5525b0488ce31e19065f8527dbf50266b5b712 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 18 Nov 2019 09:38:37 +0100 Subject: [PATCH 04/33] s390/vdso: fix getcpu getcpu reads the required values for cpu and node with two instructions. This might lead to an inconsistent result if user space gets preempted and migrated to a different CPU between the two instructions. Fix this by using just a single instruction to read both values at once. This is currently rather a theoretical bug, since there is no real NUMA support available (except for NUMA emulation). Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vdso.h | 13 +++++++++++-- arch/s390/kernel/asm-offsets.c | 3 +-- arch/s390/kernel/vdso32/getcpu.S | 4 +--- arch/s390/kernel/vdso64/getcpu.S | 4 +--- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 169d7604eb80..3bcfdeb01395 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -41,8 +41,17 @@ struct vdso_data { struct vdso_per_cpu_data { __u64 ectg_timer_base; __u64 ectg_user_time; - __u32 cpu_nr; - __u32 node_id; + /* + * Note: node_id and cpu_nr must be at adjacent memory locations. + * VDSO userspace must read both values with a single instruction. + */ + union { + __u64 getcpu_val; + struct { + __u32 node_id; + __u32 cpu_nr; + }; + }; }; extern struct vdso_data *vdso_data; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 41ac4ad21311..ce33406cfe83 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -78,8 +78,7 @@ int main(void) OFFSET(__VDSO_TS_END, vdso_data, ts_end); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); - OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr); - OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id); + OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); BLANK(); /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S index 25515f3fbcea..dc79e169f0ad 100644 --- a/arch/s390/kernel/vdso32/getcpu.S +++ b/arch/s390/kernel/vdso32/getcpu.S @@ -16,10 +16,8 @@ .type __kernel_getcpu,@function __kernel_getcpu: CFI_STARTPROC - la %r4,0 sacf 256 - l %r5,__VDSO_CPU_NR(%r4) - l %r4,__VDSO_NODE_ID(%r4) + lm %r4,%r5,__VDSO_GETCPU_VAL(%r0) sacf 0 ltr %r2,%r2 jz 2f diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S index 2446e9dac8ab..3c04f7328500 100644 --- a/arch/s390/kernel/vdso64/getcpu.S +++ b/arch/s390/kernel/vdso64/getcpu.S @@ -16,10 +16,8 @@ .type __kernel_getcpu,@function __kernel_getcpu: CFI_STARTPROC - la %r4,0 sacf 256 - l %r5,__VDSO_CPU_NR(%r4) - l %r4,__VDSO_NODE_ID(%r4) + lm %r4,%r5,__VDSO_GETCPU_VAL(%r0) sacf 0 ltgr %r2,%r2 jz 2f From c2e06e15ad92bad94b54df257c683f7e715238a1 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 12:08:44 +0100 Subject: [PATCH 05/33] s390: always inline disabled_wait disabled_wait uses _THIS_IP_ and assumes that compiler would inline it. Make sure this assumption is always correct by utilizing __always_inline. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 881fc37c11c6..361ef5eda468 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -310,7 +310,7 @@ void enabled_wait(void); /* * Function to drop a processor into disabled wait state */ -static inline void __noreturn disabled_wait(void) +static __always_inline void __noreturn disabled_wait(void) { psw_t psw; From 7f28dad395243c5026d649136823bbc40029a828 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 12:19:16 +0100 Subject: [PATCH 06/33] s390: disable preemption when switching to nodat stack with CALL_ON_STACK Make sure preemption is disabled when temporary switching to nodat stack with CALL_ON_STACK helper, because nodat stack is per cpu. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/machine_kexec.c | 2 ++ arch/s390/mm/maccess.c | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 444a19125a81..dcaadceaf6ef 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -164,7 +164,9 @@ static bool kdump_csum_valid(struct kimage *image) #ifdef CONFIG_CRASH_DUMP int rc; + preempt_disable(); rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image); + preempt_enable(); return rc == 0; #else return false; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 59ad7997fed1..de7ca4b6718f 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -119,9 +119,15 @@ static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, */ int memcpy_real(void *dest, void *src, size_t count) { - if (S390_lowcore.nodat_stack != 0) - return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, - 3, dest, src, count); + int rc; + + if (S390_lowcore.nodat_stack != 0) { + preempt_disable(); + rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3, + dest, src, count); + preempt_enable(); + return rc; + } /* * This is a really early memcpy_real call, the stacks are * not set up yet. Just call _memcpy_real on the early boot From 103b4cca60d2c8c51f1290cc984b7046ccb8b46d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 12:35:34 +0100 Subject: [PATCH 07/33] s390/unwind: unify task is current checks Avoid mixture of task == NULL and task == current meaning the same thing and simply always initialize task with current in unwind_start. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stacktrace.h | 2 +- arch/s390/include/asm/unwind.h | 3 ++- arch/s390/kernel/dumpstack.c | 4 ---- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index fee40212af11..0ae4bbf7779c 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -38,7 +38,7 @@ static inline unsigned long get_stack_pointer(struct task_struct *task, { if (regs) return (unsigned long) kernel_stack_pointer(regs); - if (!task || task == current) + if (task == current) return current_stack_pointer(); return (unsigned long) task->thread.ksp; } diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index eaaefeceef6f..a2d8dd766987 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -61,7 +61,8 @@ static inline void unwind_start(struct unwind_state *state, struct pt_regs *regs, unsigned long sp) { - sp = sp ? : get_stack_pointer(task, regs); + task = task ?: current; + sp = sp ?: get_stack_pointer(task, regs); __unwind_start(state, task, regs, sp); } diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 34bdc60c0b11..fc442aec0d96 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -93,8 +93,6 @@ int get_stack_info(unsigned long sp, struct task_struct *task, if (!sp) goto unknown; - task = task ? : current; - /* Check per-task stack */ if (in_task_stack(sp, task, info)) goto recursion_check; @@ -128,8 +126,6 @@ void show_stack(struct task_struct *task, unsigned long *stack) struct unwind_state state; printk("Call Trace:\n"); - if (!task) - task = current; unwind_for_each_frame(&state, task, NULL, (unsigned long) stack) printk(state.reliable ? " [<%016lx>] %pSR \n" : "([<%016lx>] %pSR)\n", From 7579425777c0d802237e0d59ae395e8cf60723e1 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 12:47:52 +0100 Subject: [PATCH 08/33] s390: correct CALL_ON_STACK back_chain saving Currently CALL_ON_STACK saves r15 as back_chain in the first stack frame of the stack we about to switch to. But if a function which uses CALL_ON_STACK calls other function it allocates a stack frame for a callee. In this case r15 is pointing to a callee stack frame and not a stack frame of function itself. This results in dummy unwinding entry with random sp and ip values. Introduce and utilize current_frame_address macro to get an address of actual function stack frame. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stacktrace.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 0ae4bbf7779c..bb854e33e460 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -62,6 +62,17 @@ struct stack_frame { }; #endif +/* + * Unlike current_stack_pointer() which simply returns current value of %r15 + * current_frame_address() returns function stack frame address, which matches + * %r15 upon function invocation. It may differ from %r15 later if function + * allocates stack for local variables or new stack frame to call other + * functions. + */ +#define current_frame_address() \ + ((unsigned long)__builtin_frame_address(0) - \ + offsetof(struct stack_frame, back_chain)) + #define CALL_ARGS_0() \ register unsigned long r2 asm("2") #define CALL_ARGS_1(arg1) \ @@ -95,18 +106,20 @@ struct stack_frame { #define CALL_ON_STACK(fn, stack, nr, args...) \ ({ \ + unsigned long frame = current_frame_address(); \ CALL_ARGS_##nr(args); \ unsigned long prev; \ \ asm volatile( \ " la %[_prev],0(15)\n" \ " la 15,0(%[_stack])\n" \ - " stg %[_prev],%[_bc](15)\n" \ + " stg %[_frame],%[_bc](15)\n" \ " brasl 14,%[_fn]\n" \ " la 15,0(%[_prev])\n" \ : [_prev] "=&a" (prev), CALL_FMT_##nr \ [_stack] "a" (stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ + [_frame] "d" (frame), \ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ r2; \ }) From 7bcaad1f9fac889f5fcd1a383acf7e00d006da41 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 13:12:57 +0100 Subject: [PATCH 09/33] s390: avoid misusing CALL_ON_STACK for task stack setup CALL_ON_STACK is intended to be used for temporary stack switching with potential return to the caller. When CALL_ON_STACK is misused to switch from nodat stack to task stack back_chain information would later lead stack unwinder from task stack into (per cpu) nodat stack which is reused for other purposes. This would yield confusing unwinding result or errors. To avoid that introduce CALL_ON_STACK_NORETURN to be used instead. It makes sure that back_chain is zeroed and unwinder finishes gracefully ending up at task pt_regs. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stacktrace.h | 11 +++++++++++ arch/s390/kernel/setup.c | 9 +-------- arch/s390/kernel/smp.c | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index bb854e33e460..4f3dd1c86c0d 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -124,4 +124,15 @@ struct stack_frame { r2; \ }) +#define CALL_ON_STACK_NORETURN(fn, stack) \ +({ \ + asm volatile( \ + " la 15,0(%[_stack])\n" \ + " xc %[_bc](8,15),%[_bc](15)\n" \ + " brasl 14,%[_fn]\n" \ + ::[_bc] "i" (offsetof(struct stack_frame, back_chain)), \ + [_stack] "a" (stack), [_fn] "X" (fn)); \ + BUG(); \ +}) + #endif /* _ASM_S390_STACKTRACE_H */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 3ff291bc63b7..9cbf490fd162 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -355,7 +355,6 @@ early_initcall(async_stack_realloc); void __init arch_call_rest_init(void) { - struct stack_frame *frame; unsigned long stack; stack = stack_alloc(); @@ -368,13 +367,7 @@ void __init arch_call_rest_init(void) set_task_stack_end_magic(current); stack += STACK_INIT_OFFSET; S390_lowcore.kernel_stack = stack; - frame = (struct stack_frame *) stack; - memset(frame, 0, sizeof(*frame)); - /* Branch to rest_init on the new stack, never returns */ - asm volatile( - " la 15,0(%[_frame])\n" - " jg rest_init\n" - : : [_frame] "a" (frame)); + CALL_ON_STACK_NORETURN(rest_init, stack); } static void __init setup_lowcore_dat_off(void) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 06dddd7c4290..2794cad9312e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -876,7 +876,7 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid) S390_lowcore.restart_source = -1UL; __ctl_load(S390_lowcore.cregs_save_area, 0, 15); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); - CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0); + CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack); } /* Upping and downing of CPUs */ From 67f5593419878798bb306632cdca0698a2dd3cbd Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 15:53:30 +0100 Subject: [PATCH 10/33] s390/unwind: report an error if pt_regs are not on stack If unwinder is looking at pt_regs which is not on stack then something went wrong and an error has to be reported rather than successful unwinding termination. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/unwind_bc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index fa111d3d378f..fd90b6e21663 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -76,7 +76,7 @@ bool unwind_next_frame(struct unwind_state *state) /* No back-chain, look for a pt_regs structure */ sp = state->sp + STACK_FRAME_OVERHEAD; if (!on_stack(info, sp, sizeof(struct pt_regs))) - goto out_stop; + goto out_err; regs = (struct pt_regs *) sp; if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE) goto out_stop; From 97806dfb6f3838ee4b7bc69e6f160d83eadbc74a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 15:58:42 +0100 Subject: [PATCH 11/33] s390/unwind: make reuse_sp default when unwinding pt_regs Currently unwinder yields 2 entries when pt_regs are met: sp="address of pt_regs itself" ip=pt_regs->psw sp=pt_regs->gprs[15] ip="r14 from stack frame pointed by pt_regs->gprs[15]" And neither of those 2 states (combination of sp and ip) ever happened. reuse_sp has been introduced by commit a1d863ac3e10 ("s390/unwind: fix mixing regs and sp"). reuse_sp=true makes unwinder keen to produce the following result, when pt_regs are given (as an arg to unwind_start): sp=pt_regs->gprs[15] ip=pt_regs->psw sp=pt_regs->gprs[15] ip="r14 from stack frame pointed by pt_regs->gprs[15]" The first state is an actual state in which a task was when pt_regs were collected. The second state is marked unreliable and is for debugging purposes to cover the case when a task has been interrupted in between stack frame allocation and writing back_chain - in this case r14 might show an actual caller. Make unwinder behaviour enabled via reuse_sp=true default and drop the special case handling. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/unwind.h | 1 - arch/s390/kernel/unwind_bc.c | 21 +++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index a2d8dd766987..5d6c8fe7a271 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -35,7 +35,6 @@ struct unwind_state { struct task_struct *task; struct pt_regs *regs; unsigned long sp, ip; - bool reuse_sp; int graph_idx; bool reliable; bool error; diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index fd90b6e21663..ac6cfab567d1 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -46,16 +46,7 @@ bool unwind_next_frame(struct unwind_state *state) regs = state->regs; if (unlikely(regs)) { - if (state->reuse_sp) { - sp = state->sp; - state->reuse_sp = false; - } else { - sp = READ_ONCE_NOCHECK(regs->gprs[15]); - if (unlikely(outside_of_stack(state, sp))) { - if (!update_stack_info(state, sp)) - goto out_err; - } - } + sp = state->sp; sf = (struct stack_frame *) sp; ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; @@ -81,6 +72,11 @@ bool unwind_next_frame(struct unwind_state *state) if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE) goto out_stop; ip = READ_ONCE_NOCHECK(regs->psw.addr); + sp = READ_ONCE_NOCHECK(regs->gprs[15]); + if (unlikely(outside_of_stack(state, sp))) { + if (!update_stack_info(state, sp)) + goto out_err; + } reliable = true; } } @@ -107,7 +103,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, { struct stack_info *info = &state->stack_info; unsigned long *mask = &state->stack_mask; - bool reliable, reuse_sp; + bool reliable; struct stack_frame *sf; unsigned long ip; @@ -134,12 +130,10 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, if (regs) { ip = READ_ONCE_NOCHECK(regs->psw.addr); reliable = true; - reuse_sp = true; } else { sf = (struct stack_frame *) sp; ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; - reuse_sp = false; } ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL); @@ -148,6 +142,5 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, state->sp = sp; state->ip = ip; state->reliable = reliable; - state->reuse_sp = reuse_sp; } EXPORT_SYMBOL_GPL(__unwind_start); From cb7948e8c3f18f7ff0ab7d0fa1e6b108d938cdd6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 17:15:35 +0100 Subject: [PATCH 12/33] s390/head64: correct init_task stack setup Add missing allocation of pt_regs at the bottom of the stack. This makes it consistent with other stack setup cases and also what stack unwinder expects. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/head64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index b9e585f528a6..8b88dbbda7df 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -31,7 +31,7 @@ ENTRY(startup_continue) # larl %r14,init_task stg %r14,__LC_CURRENT - larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD + larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE #ifdef CONFIG_KASAN brasl %r14,kasan_early_init #endif From e76e69611e944ecc38aaf8fe3a7bebdc3c5daf84 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 16:49:13 +0100 Subject: [PATCH 13/33] s390/unwind: stop gracefully at task pt_regs Consider reaching task pt_regs graceful unwinder termination. Task pt_regs itself never contains a valid state to which a task might return within the kernel context (user task pt_regs is a special case). Since we already avoid printing user task pt_regs and in most cases we don't even bother filling task pt_regs psw and r15 with something reasonable simply skip task pt_regs altogether. With this change unwind_error() now accurately represent whether unwinder reached task pt_regs successfully or failed along the way. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/unwind_bc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index ac6cfab567d1..c5ebb8a4cdd6 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -36,6 +36,12 @@ static bool update_stack_info(struct unwind_state *state, unsigned long sp) return true; } +static inline bool is_task_pt_regs(struct unwind_state *state, + struct pt_regs *regs) +{ + return task_pt_regs(state->task) == regs; +} + bool unwind_next_frame(struct unwind_state *state) { struct stack_info *info = &state->stack_info; @@ -69,7 +75,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!on_stack(info, sp, sizeof(struct pt_regs))) goto out_err; regs = (struct pt_regs *) sp; - if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE) + if (is_task_pt_regs(state, regs)) goto out_stop; ip = READ_ONCE_NOCHECK(regs->psw.addr); sp = READ_ONCE_NOCHECK(regs->gprs[15]); From a9f2f6865d784477e1c7b59269d3a384abafd9ca Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 19 Nov 2019 12:30:53 +0100 Subject: [PATCH 14/33] s390/kaslr: store KASLR offset for early dumps The KASLR offset is added to vmcoreinfo in arch_crash_save_vmcoreinfo(), so that it can be found by crash when processing kernel dumps. However, arch_crash_save_vmcoreinfo() is called during a subsys_initcall, so if the kernel crashes before that, we have no vmcoreinfo and no KASLR offset. Fix this by storing the KASLR offset in the lowcore, where the vmcore_info pointer will be stored, and where it can be found by crash. In order to make it distinguishable from a real vmcore_info pointer, mark it as uneven (KASLR offset itself is aligned to THREAD_SIZE). When arch_crash_save_vmcoreinfo() stores the real vmcore_info pointer in the lowcore, it overwrites the KASLR offset. At that point, the KASLR offset is not yet added to vmcoreinfo, so we also need to move the mem_assign_absolute() behind the vmcoreinfo_append_str(). Fixes: b2d24b97b2a9 ("s390/kernel: add support for kernel address space layout randomization (KASLR)") Cc: # v5.2+ Signed-off-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 5 +++++ arch/s390/kernel/machine_kexec.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index fbd341ea03b8..3b3a11f95269 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -170,6 +170,11 @@ void startup_kernel(void) handle_relocs(__kaslr_offset); if (__kaslr_offset) { + /* + * Save KASLR offset for early dumps, before vmcore_info is set. + * Mark as uneven to distinguish from real vmcore_info pointer. + */ + S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL; /* Clear non-relocated kernel */ if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) memset(img, 0, vmlinux.image_size); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index dcaadceaf6ef..cb8b1cc285c9 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -256,10 +256,10 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); - mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); vmcoreinfo_append_str("SDMA=%lx\n", __sdma); vmcoreinfo_append_str("EDMA=%lx\n", __edma); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); } void machine_shutdown(void) From 532da3de70b207be2b98cd5fb966e3915c8872c3 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 21 Nov 2019 15:46:02 +0100 Subject: [PATCH 15/33] s390/cpum_sf: Replace function name in debug statements Replace hard coded function names in debug statements by the "%s ...", __func__ construct suggested by checkpatch.pl script. Use consistent debug print format of the form variable blank value. Also add leading 0x for all hex values. Print allocated page addresses consistantly as hex numbers with leading 0x. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/cpu_mf.h | 2 +- arch/s390/kernel/perf_cpum_sf.c | 107 +++++++++++++++++--------------- 2 files changed, 57 insertions(+), 52 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 819803a97c2b..0d90cbeb89b4 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -313,7 +313,7 @@ static inline unsigned long *trailer_entry_ptr(unsigned long v) return (unsigned long *) ret; } -/* Return if the entry in the sample data block table (sdbt) +/* Return true if the entry in the sample data block table (sdbt) * is a link to the next sdbt */ static inline int is_link_entry(unsigned long *s) { diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 69506fdbd9a1..4414094550a4 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -156,8 +156,8 @@ static void free_sampling_buffer(struct sf_buffer *sfb) } } - debug_sprintf_event(sfdbg, 5, "%s freed sdbt %p\n", __func__, - sfb->sdbt); + debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__, + (unsigned long)sfb->sdbt); memset(sfb, 0, sizeof(*sfb)); } @@ -213,9 +213,10 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, */ if (sfb->sdbt != get_next_sdbt(tail)) { debug_sprintf_event(sfdbg, 3, "%s: " - "sampling buffer is not linked: origin %p" - " tail %p\n", __func__, - (void *)sfb->sdbt, (void *)tail); + "sampling buffer is not linked: origin %#lx" + " tail %#lx\n", __func__, + (unsigned long)sfb->sdbt, + (unsigned long)tail); return -EINVAL; } @@ -251,8 +252,8 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, *tail = (unsigned long) sfb->sdbt + 1; sfb->tail = tail; - debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" - " settings: sdbt %lu sdb %lu\n", + debug_sprintf_event(sfdbg, 4, "%s: new buffer" + " settings: sdbt %lu sdb %lu\n", __func__, sfb->num_sdbt, sfb->num_sdb); return rc; } @@ -292,12 +293,13 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); if (rc) { free_sampling_buffer(sfb); - debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " - "realloc_sampling_buffer failed with rc %i\n", rc); + debug_sprintf_event(sfdbg, 4, "%s: " + "realloc_sampling_buffer failed with rc %i\n", + __func__, rc); } else debug_sprintf_event(sfdbg, 4, - "alloc_sampling_buffer: tear %p dear %p\n", - sfb->sdbt, (void *)*sfb->sdbt); + "%s: tear %#lx dear %#lx\n", __func__, + (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt); return rc; } @@ -465,8 +467,8 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, if (num) sfb_account_allocs(num, hwc); - debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow %llu ratio %lu" - " num %lu\n", OVERFLOW_REG(hwc), ratio, num); + debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n", + __func__, OVERFLOW_REG(hwc), ratio, num); OVERFLOW_REG(hwc) = 0; } @@ -504,13 +506,13 @@ static void extend_sampling_buffer(struct sf_buffer *sfb, */ rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); if (rc) - debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " - "failed with rc %i\n", rc); + debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n", + __func__, rc); if (sfb_has_pending_allocs(sfb, hwc)) - debug_sprintf_event(sfdbg, 5, "sfb: extend: " + debug_sprintf_event(sfdbg, 5, "%s: " "req %lu alloc %lu remaining %lu\n", - num, sfb->num_sdb - num_old, + __func__, num, sfb->num_sdb - num_old, sfb_pending_allocs(sfb, hwc)); } @@ -698,9 +700,9 @@ static unsigned long getrate(bool freq, unsigned long sample, */ if (sample_rate_to_freq(si, rate) > sysctl_perf_event_sample_rate) { - debug_sprintf_event(sfdbg, 1, + debug_sprintf_event(sfdbg, 1, "%s: " "Sampling rate exceeds maximum " - "perf sample rate\n"); + "perf sample rate\n", __func__); rate = 0; } } @@ -745,10 +747,9 @@ static int __hw_perf_event_init_rate(struct perf_event *event, attr->sample_period = rate; SAMPL_RATE(hwc) = rate; hw_init_period(hwc, SAMPL_RATE(hwc)); - debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:" - "cpu:%d period:%#llx freq:%d,%#lx\n", event->cpu, - event->attr.sample_period, event->attr.freq, - SAMPLE_FREQ_MODE(hwc)); + debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n", + __func__, event->cpu, event->attr.sample_period, + event->attr.freq, SAMPLE_FREQ_MODE(hwc)); return 0; } @@ -973,12 +974,11 @@ static void cpumsf_pmu_enable(struct pmu *pmu) /* Load current program parameter */ lpp(&S390_lowcore.lpp); - debug_sprintf_event(sfdbg, 6, "pmu_enable: es %i cs %i ed %i cd %i " - "interval %#lx tear %p dear %p\n", + debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i " + "interval %#lx tear %#lx dear %#lx\n", __func__, cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed, cpuhw->lsctl.cd, cpuhw->lsctl.interval, - (void *) cpuhw->lsctl.tear, - (void *) cpuhw->lsctl.dear); + cpuhw->lsctl.tear, cpuhw->lsctl.dear); } static void cpumsf_pmu_disable(struct pmu *pmu) @@ -1019,8 +1019,8 @@ static void cpumsf_pmu_disable(struct pmu *pmu) cpuhw->lsctl.dear = si.dear; } } else - debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " - "qsi() failed with err %i\n", err); + debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n", + __func__, err); cpuhw->flags &= ~PMU_F_ENABLED; } @@ -1265,9 +1265,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) sampl_overflow += te->overflow; /* Timestamps are valid for full sample-data-blocks only */ - debug_sprintf_event(sfdbg, 6, "%s: sdbt %p " + debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " "overflow %llu timestamp %#llx\n", - __func__, sdbt, te->overflow, + __func__, (unsigned long)sdbt, te->overflow, (te->f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and @@ -1312,8 +1312,10 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) sampl_overflow, 1 + num_sdb); if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "%s: " - "overflow stats: sample %llu event %llu\n", - __func__, sampl_overflow, event_overflow); + "overflows: sample %llu event %llu" + " total %llu num_sdb %llu\n", + __func__, sampl_overflow, event_overflow, + OVERFLOW_REG(hwc), num_sdb); } #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb) @@ -1424,10 +1426,10 @@ static int aux_output_begin(struct perf_output_handle *handle, cpuhw->lsctl.tear = base + offset * sizeof(unsigned long); cpuhw->lsctl.dear = aux->sdb_index[head]; - debug_sprintf_event(sfdbg, 6, "aux_output_begin: " + debug_sprintf_event(sfdbg, 6, "%s: " "head->alert_mark->empty_mark (num_alert, range)" "[%#lx -> %#lx -> %#lx] (%#lx, %#lx) " - "tear index %#lx, tear %#lx dear %#lx\n", + "tear index %#lx, tear %#lx dear %#lx\n", __func__, aux->head, aux->alert_mark, aux->empty_mark, AUX_SDB_NUM_ALERT(aux), range, head / CPUM_SF_SDB_PER_TABLE, @@ -1571,7 +1573,9 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) pr_err("The AUX buffer with %lu pages for the " "diagnostic-sampling mode is full\n", num_sdb); - debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n"); + debug_sprintf_event(sfdbg, 1, + "%s: AUX buffer used up\n", + __func__); break; } if (WARN_ON_ONCE(!aux)) @@ -1594,23 +1598,25 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) perf_aux_output_end(&cpuhw->handle, size); pr_err("Sample data caused the AUX buffer with %lu " "pages to overflow\n", num_sdb); - debug_sprintf_event(sfdbg, 1, "head %#lx range %#lx " - "overflow %#llx\n", + debug_sprintf_event(sfdbg, 1, "%s: head %#lx range %#lx " + "overflow %#llx\n", __func__, aux->head, range, overflow); } else { size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; perf_aux_output_end(&cpuhw->handle, size); - debug_sprintf_event(sfdbg, 6, "head %#lx alert %#lx " + debug_sprintf_event(sfdbg, 6, "%s: head %#lx alert %#lx " "already full, try another\n", + __func__, aux->head, aux->alert_mark); } } if (done) - debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: " + debug_sprintf_event(sfdbg, 6, "%s: aux_reset_buffer " "[%#lx -> %#lx -> %#lx] (%#lx, %#lx)\n", - aux->head, aux->alert_mark, aux->empty_mark, - AUX_SDB_NUM_ALERT(aux), range); + __func__, aux->head, aux->alert_mark, + aux->empty_mark, AUX_SDB_NUM_ALERT(aux), + range); } /* @@ -1633,8 +1639,8 @@ static void aux_buffer_free(void *data) kfree(aux->sdb_index); kfree(aux); - debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free " - "%lu SDBTs\n", num_sdbt); + debug_sprintf_event(sfdbg, 4, "%s: free " + "%lu SDBTs\n", __func__, num_sdbt); } static void aux_sdb_init(unsigned long sdb) @@ -1742,9 +1748,8 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages, */ aux->empty_mark = sfb->num_sdb - 1; - debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs" - " and %lu SDBs\n", - sfb->num_sdbt, sfb->num_sdb); + debug_sprintf_event(sfdbg, 4, "%s: setup %lu SDBTs and %lu SDBs\n", + __func__, sfb->num_sdbt, sfb->num_sdb); return aux; @@ -1797,9 +1802,9 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value) event->attr.sample_period = rate; SAMPL_RATE(&event->hw) = rate; hw_init_period(&event->hw, SAMPL_RATE(&event->hw)); - debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:" - "cpu:%d value:%#llx period:%#llx freq:%d\n", - event->cpu, value, + debug_sprintf_event(sfdbg, 4, "%s:" + " cpu %d value %#llx period %#llx freq %d\n", + __func__, event->cpu, value, event->attr.sample_period, do_freq); return 0; } @@ -2030,7 +2035,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* Report measurement alerts only for non-PRA codes */ if (alert != CPU_MF_INT_SF_PRA) - debug_sprintf_event(sfdbg, 6, "measurement alert: %#x\n", + debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__, alert); /* Sampling authorization change request */ From c17a7c6ee8177e0da998784c06f37fc093507c5b Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 22 Nov 2019 13:42:55 +0100 Subject: [PATCH 16/33] s390/cpum_sf: Remove unnecessary check for pending SDBs In interrupt handling the function extend_sampling_buffer() is called after checking for a possibly extension. This check is not necessary as the called function itself performs this check again. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_sf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4414094550a4..dc0ac098a465 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -952,8 +952,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu) * buffer extents */ sfb_account_overflows(cpuhw, hwc); - if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) - extend_sampling_buffer(&cpuhw->sfb, hwc); + extend_sampling_buffer(&cpuhw->sfb, hwc); } /* Rate may be adjusted with ioctl() */ cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw); From 7dd6b199df46e871e6e0d0cd7e4f71dc07dfd53c Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 22 Nov 2019 15:29:54 +0100 Subject: [PATCH 17/33] s390/cpum_sf: Use TEAR_REG macro consistantly The macro TEAR_REG() saves the last used SDBT address in the perf_hw_event structure. This is also done by function hw_reset_registers() which is a one-liner and simply uses macro TEAR_REG(). Remove function hw_reset_registers(), which is only used one time and use macro TEAR_REG() instead. This macro is used throughout the code anyway. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_sf.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index dc0ac098a465..cd9fb45eebd2 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -602,13 +602,6 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) local64_set(&hwc->period_left, hwc->sample_period); } -static void hw_reset_registers(struct hw_perf_event *hwc, - unsigned long *sdbt_origin) -{ - /* (Re)set to first sample-data-block-table */ - TEAR_REG(hwc) = (unsigned long) sdbt_origin; -} - static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, unsigned long rate) { @@ -1879,7 +1872,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) if (!SAMPL_DIAG_MODE(&event->hw)) { cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; - hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt; } /* Ensure sampling functions are in the disabled state. If disabled, From 247f265fa502e7b17a0cb0cc330e055a36aafce4 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 22 Nov 2019 16:43:15 +0100 Subject: [PATCH 18/33] s390/cpum_sf: Check for SDBT and SDB consistency Each SBDT is located at a 4KB page and contains 512 entries. Each entry of a SDBT points to a SDB, a 4KB page containing sampled data. The last entry is a link to another SDBT page. When an event is created the function sequence executed is: __hw_perf_event_init() +--> allocate_buffers() +--> realloc_sampling_buffers() +---> alloc_sample_data_block() Both functions realloc_sampling_buffers() and alloc_sample_data_block() allocate pages and the allocation can fail. This is handled correctly and all allocated pages are freed and error -ENOMEM is returned to the top calling function. Finally the event is not created. Once the event has been created, the amount of initially allocated SDBT and SDB can be too low. This is detected during measurement interrupt handling, where the amount of lost samples is calculated. If the number of lost samples is too high considering sampling frequency and already allocated SBDs, the number of SDBs is enlarged during the next execution of cpumsf_pmu_enable(). If more SBDs need to be allocated, functions realloc_sampling_buffers() +---> alloc-sample_data_block() are called to allocate more pages. Page allocation may fail and the returned error is ignored. A SDBT and SDB setup already exists. However the modified SDBTs and SDBs might end up in a situation where the first entry of an SDBT does not point to an SDB, but another SDBT, basicly an SBDT without payload. This can not be handled by the interrupt handler, where an SDBT must have at least one entry pointing to an SBD. Add a check to avoid SDBTs with out payload (SDBs) when enlarging the buffer setup. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_sf.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index cd9fb45eebd2..c07fdcd73726 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -193,7 +193,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb, gfp_t gfp_flags) { int i, rc; - unsigned long *new, *tail; + unsigned long *new, *tail, *tail_prev = NULL; if (!sfb->sdbt || !sfb->tail) return -EINVAL; @@ -233,6 +233,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, sfb->num_sdbt++; /* Link current page to tail of chain */ *tail = (unsigned long)(void *) new + 1; + tail_prev = tail; tail = new; } @@ -242,10 +243,22 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, * issue, a new realloc call (if required) might succeed. */ rc = alloc_sample_data_block(tail, gfp_flags); - if (rc) + if (rc) { + /* Undo last SDBT. An SDBT with no SDB at its first + * entry but with an SDBT entry instead can not be + * handled by the interrupt handler code. + * Avoid this situation. + */ + if (tail_prev) { + sfb->num_sdbt--; + free_page((unsigned long) new); + tail = tail_prev; + } break; + } sfb->num_sdb++; tail++; + tail_prev = new = NULL; /* Allocated at least one SBD */ } /* Link sampling buffer to its origin */ From 794b8846dcdc0c6e23bbf4e5283415cab0caa9ac Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 28 Nov 2019 09:30:00 +0100 Subject: [PATCH 19/33] s390/pci: add error message for UID collision When UID checking was turned off during runtime in the underlying hypervisor, a PCI device may be attached with the same UID. This is already detected but happens silently. Add an error message so it can more easily be understood why a device was not added. Reviewed-by: Peter Oberparleiter Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index c7fea9bea8cb..4901f5d1c479 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -659,6 +660,8 @@ static int zpci_alloc_domain(struct zpci_dev *zdev) spin_lock(&zpci_domain_lock); if (test_bit(zdev->domain, zpci_domain)) { spin_unlock(&zpci_domain_lock); + pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n", + zdev->fid, zdev->domain); return -EEXIST; } set_bit(zdev->domain, zpci_domain); From d497b7ec836d2c900993f1c43b2ddff5f8a6b129 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 28 Nov 2019 09:31:52 +0100 Subject: [PATCH 20/33] s390/pci: add error message on device number limit The config option CONFIG_PCI_NR_FUNCTIONS sets a limit on the number of PCI functions we can support. Previously on reaching this limit there was no indication why newly attached devices are not recognized by Linux which could be quite confusing. Thus this patch adds a pr_err() for this case. Reviewed-by: Peter Oberparleiter Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 4901f5d1c479..2e377f2b7b6d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -673,6 +673,8 @@ static int zpci_alloc_domain(struct zpci_dev *zdev) zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); if (zdev->domain == ZPCI_NR_DEVICES) { spin_unlock(&zpci_domain_lock); + pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n", + zdev->fid, ZPCI_NR_DEVICES); return -ENOSPC; } set_bit(zdev->domain, zpci_domain); From adcfb8cdc910bdd0b5d52d2ba88103af93dc43d3 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 26 Nov 2019 17:40:04 +0100 Subject: [PATCH 21/33] s390/unwind: always inline get_stack_pointer Always inline get_stack_pointer() to avoid potential problems due to compiler inlining decisions, i.e. getting stack pointer of get_stack_pointer() itself which is later reused. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stacktrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 4f3dd1c86c0d..4725315a9cb1 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -33,8 +33,8 @@ static inline bool on_stack(struct stack_info *info, return addr >= info->begin && addr + len <= info->end; } -static inline unsigned long get_stack_pointer(struct task_struct *task, - struct pt_regs *regs) +static __always_inline unsigned long get_stack_pointer(struct task_struct *task, + struct pt_regs *regs) { if (regs) return (unsigned long) kernel_stack_pointer(regs); From badbf39790798283f2424828e7b7bec3962f1e02 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 17 Oct 2019 15:09:08 +0200 Subject: [PATCH 22/33] s390/unwind: add a test for the internal API unwind_for_each_frame can take at least 8 different sets of parameters. Add a test to make sure they all are handled in a sane way. Reviewed-by: Heiko Carstens Signed-off-by: Ilya Leoshkevich Co-developed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 14 +++ arch/s390/lib/Makefile | 3 + arch/s390/lib/test_unwind.c | 231 ++++++++++++++++++++++++++++++++++++ 3 files changed, 248 insertions(+) create mode 100644 arch/s390/lib/test_unwind.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f0df9e48e651..2528eb9d01fb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -1018,3 +1018,17 @@ config S390_GUEST the KVM hypervisor. endmenu + +menu "Selftests" + +config S390_UNWIND_SELFTEST + def_tristate n + prompt "Test unwind functions" + help + This option enables s390 specific stack unwinder testing kernel + module. This option is not useful for distributions or general + kernels, but only for kernel developers working on architecture code. + + Say N if you are unsure. + +endmenu diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index d7c218e8b559..28fd66d558ff 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -11,3 +11,6 @@ lib-$(CONFIG_UPROBES) += probes.o # Instrumenting memory accesses to __user data (in different address space) # produce false positives KASAN_SANITIZE_uaccess.o := n + +obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o +CFLAGS_test_unwind.o += -fno-optimize-sibling-calls diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c new file mode 100644 index 000000000000..5636da941f1f --- /dev/null +++ b/arch/s390/lib/test_unwind.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test module for unwind_for_each_frame + */ + +#define pr_fmt(fmt) "test_unwind: " fmt +#include +#include +#include +#include +#include +#include +#include + +#define BT_BUF_SIZE (PAGE_SIZE * 4) + +/* + * To avoid printk line limit split backtrace by lines + */ +static void print_backtrace(char *bt) +{ + char *p; + + while (true) { + p = strsep(&bt, "\n"); + if (!p) + break; + pr_err("%s\n", p); + } +} + +/* + * Calls unwind_for_each_frame(task, regs, sp) and verifies that the result + * contains unwindme_func2 followed by unwindme_func1. + */ +static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, + unsigned long sp) +{ + int frame_count, prev_is_func2, seen_func2_func1; + const int max_frames = 128; + struct unwind_state state; + size_t bt_pos = 0; + int ret = 0; + char *bt; + + bt = kmalloc(BT_BUF_SIZE, GFP_KERNEL); + if (!bt) { + pr_err("failed to allocate backtrace buffer\n"); + return -ENOMEM; + } + /* Unwind. */ + frame_count = 0; + prev_is_func2 = 0; + seen_func2_func1 = 0; + unwind_for_each_frame(&state, task, regs, sp) { + unsigned long addr = unwind_get_return_address(&state); + char sym[KSYM_SYMBOL_LEN]; + + if (!addr || frame_count == max_frames) + break; + sprint_symbol(sym, addr); + if (bt_pos < BT_BUF_SIZE) { + bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos, "%s\n", sym); + if (bt_pos >= BT_BUF_SIZE) + pr_err("backtrace buffer is too small\n"); + } + frame_count += 1; + if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1")) + seen_func2_func1 = 1; + prev_is_func2 = str_has_prefix(sym, "unwindme_func2"); + } + + /* Check the results. */ + if (!seen_func2_func1) { + pr_err("unwindme_func2 and unwindme_func1 not found\n"); + ret = -EINVAL; + } + if (frame_count == max_frames) { + pr_err("Maximum number of frames exceeded\n"); + ret = -EINVAL; + } + if (ret) + print_backtrace(bt); + kfree(bt); + return ret; +} + +/* State of the task being unwound. */ +struct unwindme { + int flags; + struct completion task_ready; + wait_queue_head_t task_wq; + unsigned long sp; +}; + +/* Values of unwindme.flags. */ +#define UWM_DEFAULT 0x0 +#define UWM_THREAD 0x1 /* Unwind a separate task. */ +#define UWM_REGS 0x2 /* Pass regs to test_unwind(). */ +#define UWM_SP 0x4 /* Pass sp to test_unwind(). */ +#define UWM_CALLER 0x8 /* Unwind starting from caller. */ + +static __always_inline unsigned long get_psw_addr(void) +{ + unsigned long psw_addr; + + asm volatile( + "basr %[psw_addr],0\n" + : [psw_addr] "=d" (psw_addr)); + return psw_addr; +} + +/* This function may or may not appear in the backtrace. */ +static noinline int unwindme_func4(struct unwindme *u) +{ + if (!(u->flags & UWM_CALLER)) + u->sp = current_frame_address(); + if (u->flags & UWM_THREAD) { + complete(&u->task_ready); + wait_event(u->task_wq, kthread_should_park()); + kthread_parkme(); + return 0; + } else { + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + regs.psw.addr = get_psw_addr(); + regs.gprs[15] = current_stack_pointer(); + return test_unwind(NULL, + (u->flags & UWM_REGS) ? ®s : NULL, + (u->flags & UWM_SP) ? u->sp : 0); + } +} + +/* This function may or may not appear in the backtrace. */ +static noinline int unwindme_func3(struct unwindme *u) +{ + u->sp = current_frame_address(); + return unwindme_func4(u); +} + +/* This function must appear in the backtrace. */ +static noinline int unwindme_func2(struct unwindme *u) +{ + return unwindme_func3(u); +} + +/* This function must follow unwindme_func2 in the backtrace. */ +static noinline int unwindme_func1(void *u) +{ + return unwindme_func2((struct unwindme *)u); +} + +/* Spawns a task and passes it to test_unwind(). */ +static int test_unwind_task(struct unwindme *u) +{ + struct task_struct *task; + int ret; + + /* Initialize thread-related fields. */ + init_completion(&u->task_ready); + init_waitqueue_head(&u->task_wq); + + /* + * Start the task and wait until it reaches unwindme_func4() and sleeps + * in (task_ready, unwind_done] range. + */ + task = kthread_run(unwindme_func1, u, "%s", __func__); + if (IS_ERR(task)) { + pr_err("kthread_run() failed\n"); + return PTR_ERR(task); + } + /* + * Make sure task reaches unwindme_func4 before parking it, + * we might park it before kthread function has been executed otherwise + */ + wait_for_completion(&u->task_ready); + kthread_park(task); + /* Unwind. */ + ret = test_unwind(task, NULL, (u->flags & UWM_SP) ? u->sp : 0); + kthread_stop(task); + return ret; +} + +static int test_unwind_flags(int flags) +{ + struct unwindme u; + + u.flags = flags; + if (u.flags & UWM_THREAD) + return test_unwind_task(&u); + else + return unwindme_func1(&u); +} + +static int test_unwind_init(void) +{ + int ret = 0; + +#define TEST(flags) \ +do { \ + pr_info("[ RUN ] " #flags "\n"); \ + if (!test_unwind_flags((flags))) { \ + pr_info("[ OK ] " #flags "\n"); \ + } else { \ + pr_err("[ FAILED ] " #flags "\n"); \ + ret = -EINVAL; \ + } \ +} while (0) + + TEST(UWM_DEFAULT); + TEST(UWM_SP); + TEST(UWM_REGS); + TEST(UWM_SP | UWM_REGS); + TEST(UWM_CALLER | UWM_SP); + TEST(UWM_CALLER | UWM_SP | UWM_REGS); + TEST(UWM_THREAD); + TEST(UWM_THREAD | UWM_SP); + TEST(UWM_THREAD | UWM_CALLER | UWM_SP); +#undef TEST + + return ret; +} + +static void test_unwind_exit(void) +{ +} + +module_init(test_unwind_init); +module_exit(test_unwind_exit); +MODULE_LICENSE("GPL"); From f44fa79b104b56d53d33ae43e69bab98b63d4783 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 17:37:50 +0100 Subject: [PATCH 23/33] s390/test_unwind: require that unwinding ended successfully Currently unwinder test passes if unwinding results contain unwindme_func2 and unwindme_func1 functions. Now that unwinder reports success upon reaching task pt_regs, check that unwinding ended successfully in every test. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 5636da941f1f..2839f8cb691d 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -71,6 +71,10 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, } /* Check the results. */ + if (unwind_error(&state)) { + pr_err("unwind error\n"); + ret = -EINVAL; + } if (!seen_func2_func1) { pr_err("unwindme_func2 and unwindme_func1 not found\n"); ret = -EINVAL; From 4ac24c092b4eef69b2436ee4d478500dc886e8b5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 25 Nov 2019 13:34:59 +0100 Subject: [PATCH 24/33] s390: fix register clobbering in CALL_ON_STACK CALL_ON_STACK defines and initializes register variables. Inline assembly which follows might trigger compiler to generate memory access for "stack" argument (e.g. in case of S390_lowcore.nodat_stack). This memory access produces a function call under kasan with outline instrumentation which clobbers registers. Switch "stack" argument in CALL_ON_STACK helper to use memory reference constraint and perform load instead. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stacktrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 4725315a9cb1..ee056f4a4fa3 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -112,12 +112,12 @@ struct stack_frame { \ asm volatile( \ " la %[_prev],0(15)\n" \ - " la 15,0(%[_stack])\n" \ + " lg 15,%[_stack]\n" \ " stg %[_frame],%[_bc](15)\n" \ " brasl 14,%[_fn]\n" \ " la 15,0(%[_prev])\n" \ : [_prev] "=&a" (prev), CALL_FMT_##nr \ - [_stack] "a" (stack), \ + [_stack] "R" (stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ [_frame] "d" (frame), \ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ From 7868249fbbc8125b82b83d99d33b23897ae7d9ab Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 18:22:06 +0100 Subject: [PATCH 25/33] s390/test_unwind: add CALL_ON_STACK tests Add CALL_ON_STACK helper testing. Tests make sure that we can unwind from switched stack to original one up to task pt_regs (nodat -> task stack). UWM_SWITCH_STACK could not be used together with UWM_THREAD because get_stack_info explicitly restricts unwinding to task stack if task != current. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 2839f8cb691d..687a6922beda 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -43,7 +43,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, int ret = 0; char *bt; - bt = kmalloc(BT_BUF_SIZE, GFP_KERNEL); + bt = kmalloc(BT_BUF_SIZE, GFP_ATOMIC); if (!bt) { pr_err("failed to allocate backtrace buffer\n"); return -ENOMEM; @@ -98,11 +98,12 @@ struct unwindme { }; /* Values of unwindme.flags. */ -#define UWM_DEFAULT 0x0 -#define UWM_THREAD 0x1 /* Unwind a separate task. */ -#define UWM_REGS 0x2 /* Pass regs to test_unwind(). */ -#define UWM_SP 0x4 /* Pass sp to test_unwind(). */ -#define UWM_CALLER 0x8 /* Unwind starting from caller. */ +#define UWM_DEFAULT 0x0 +#define UWM_THREAD 0x1 /* Unwind a separate task. */ +#define UWM_REGS 0x2 /* Pass regs to test_unwind(). */ +#define UWM_SP 0x4 /* Pass sp to test_unwind(). */ +#define UWM_CALLER 0x8 /* Unwind starting from caller. */ +#define UWM_SWITCH_STACK 0x10 /* Use CALL_ON_STACK. */ static __always_inline unsigned long get_psw_addr(void) { @@ -146,7 +147,16 @@ static noinline int unwindme_func3(struct unwindme *u) /* This function must appear in the backtrace. */ static noinline int unwindme_func2(struct unwindme *u) { - return unwindme_func3(u); + int rc; + + if (u->flags & UWM_SWITCH_STACK) { + preempt_disable(); + rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u); + preempt_enable(); + return rc; + } else { + return unwindme_func3(u); + } } /* This function must follow unwindme_func2 in the backtrace. */ @@ -215,9 +225,11 @@ do { \ TEST(UWM_DEFAULT); TEST(UWM_SP); TEST(UWM_REGS); + TEST(UWM_SWITCH_STACK); TEST(UWM_SP | UWM_REGS); TEST(UWM_CALLER | UWM_SP); TEST(UWM_CALLER | UWM_SP | UWM_REGS); + TEST(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK); TEST(UWM_THREAD); TEST(UWM_THREAD | UWM_SP); TEST(UWM_THREAD | UWM_CALLER | UWM_SP); From 0610154650f161d56a0bef0d9678ae1de7360019 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 18:52:40 +0100 Subject: [PATCH 26/33] s390/test_unwind: print verbose unwinding results Add stack name, sp and reliable information into test unwinding results. Also consider ip outside of kernel text as failure if the state is reported reliable. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/dumpstack.c | 1 + arch/s390/lib/test_unwind.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index fc442aec0d96..d74e21a23703 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -38,6 +38,7 @@ const char *stack_type_name(enum stack_type type) return "unknown"; } } +EXPORT_SYMBOL_GPL(stack_type_name); static inline bool in_stack(unsigned long sp, struct stack_info *info, enum stack_type type, unsigned long low, diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 687a6922beda..db94e657c056 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -56,11 +56,19 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, unsigned long addr = unwind_get_return_address(&state); char sym[KSYM_SYMBOL_LEN]; - if (!addr || frame_count == max_frames) + if (frame_count++ == max_frames) break; + if (state.reliable && !addr) { + pr_err("unwind state reliable but addr is 0\n"); + return -EINVAL; + } sprint_symbol(sym, addr); if (bt_pos < BT_BUF_SIZE) { - bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos, "%s\n", sym); + bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos, + state.reliable ? " [%-7s%px] %pSR\n" : + "([%-7s%px] %pSR)\n", + stack_type_name(state.stack_info.type), + (void *)state.sp, (void *)state.ip); if (bt_pos >= BT_BUF_SIZE) pr_err("backtrace buffer is too small\n"); } From e7409367abe54ad04868552b9d9fe4a56acc753d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 22 Nov 2019 19:18:58 +0100 Subject: [PATCH 27/33] s390/test_unwind: add irq context tests Add unwinding from irq context tests. Unwinder should be able to unwind through irq stack to task stack up to task pt_regs. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 45 +++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index db94e657c056..72fa745281f0 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #define BT_BUF_SIZE (PAGE_SIZE * 4) @@ -100,11 +102,15 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, /* State of the task being unwound. */ struct unwindme { int flags; + int ret; + struct task_struct *task; struct completion task_ready; wait_queue_head_t task_wq; unsigned long sp; }; +static struct unwindme *unwindme; + /* Values of unwindme.flags. */ #define UWM_DEFAULT 0x0 #define UWM_THREAD 0x1 /* Unwind a separate task. */ @@ -112,6 +118,7 @@ struct unwindme { #define UWM_SP 0x4 /* Pass sp to test_unwind(). */ #define UWM_CALLER 0x8 /* Unwind starting from caller. */ #define UWM_SWITCH_STACK 0x10 /* Use CALL_ON_STACK. */ +#define UWM_IRQ 0x20 /* Unwind from irq context. */ static __always_inline unsigned long get_psw_addr(void) { @@ -173,6 +180,34 @@ static noinline int unwindme_func1(void *u) return unwindme_func2((struct unwindme *)u); } +static void unwindme_irq_handler(struct ext_code ext_code, + unsigned int param32, + unsigned long param64) +{ + struct unwindme *u = READ_ONCE(unwindme); + + if (u && u->task == current) { + unwindme = NULL; + u->task = NULL; + u->ret = unwindme_func1(u); + } +} + +static int test_unwind_irq(struct unwindme *u) +{ + preempt_disable(); + if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) { + pr_info("Couldn't reqister external interrupt handler"); + return -1; + } + u->task = current; + unwindme = u; + udelay(1); + unregister_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler); + preempt_enable(); + return u->ret; +} + /* Spawns a task and passes it to test_unwind(). */ static int test_unwind_task(struct unwindme *u) { @@ -211,6 +246,8 @@ static int test_unwind_flags(int flags) u.flags = flags; if (u.flags & UWM_THREAD) return test_unwind_task(&u); + else if (u.flags & UWM_IRQ) + return test_unwind_irq(&u); else return unwindme_func1(&u); } @@ -241,6 +278,14 @@ do { \ TEST(UWM_THREAD); TEST(UWM_THREAD | UWM_SP); TEST(UWM_THREAD | UWM_CALLER | UWM_SP); + TEST(UWM_IRQ); + TEST(UWM_IRQ | UWM_SWITCH_STACK); + TEST(UWM_IRQ | UWM_SP); + TEST(UWM_IRQ | UWM_REGS); + TEST(UWM_IRQ | UWM_SP | UWM_REGS); + TEST(UWM_IRQ | UWM_CALLER | UWM_SP); + TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS); + TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK); #undef TEST return ret; From de6921ccbd0fb2882a1f615a6d3cdfbdcd64532c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 25 Nov 2019 14:07:40 +0100 Subject: [PATCH 28/33] s390/test_unwind: add program check context tests Add unwinding from program check handler tests. Unwinder should be able to unwind through pt_regs stored by program check handler on task stack. Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 72fa745281f0..bda7ac0ddd29 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -119,6 +120,7 @@ static struct unwindme *unwindme; #define UWM_CALLER 0x8 /* Unwind starting from caller. */ #define UWM_SWITCH_STACK 0x10 /* Use CALL_ON_STACK. */ #define UWM_IRQ 0x20 /* Unwind from irq context. */ +#define UWM_PGM 0x40 /* Unwind from program check handler. */ static __always_inline unsigned long get_psw_addr(void) { @@ -130,6 +132,17 @@ static __always_inline unsigned long get_psw_addr(void) return psw_addr; } +#ifdef CONFIG_KPROBES +static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct unwindme *u = unwindme; + + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL, + (u->flags & UWM_SP) ? u->sp : 0); + return 0; +} +#endif + /* This function may or may not appear in the backtrace. */ static noinline int unwindme_func4(struct unwindme *u) { @@ -140,6 +153,34 @@ static noinline int unwindme_func4(struct unwindme *u) wait_event(u->task_wq, kthread_should_park()); kthread_parkme(); return 0; +#ifdef CONFIG_KPROBES + } else if (u->flags & UWM_PGM) { + struct kprobe kp; + int ret; + + unwindme = u; + memset(&kp, 0, sizeof(kp)); + kp.symbol_name = "do_report_trap"; + kp.pre_handler = pgm_pre_handler; + ret = register_kprobe(&kp); + if (ret < 0) { + pr_err("register_kprobe failed %d\n", ret); + return -EINVAL; + } + + /* + * trigger specification exception + */ + asm volatile( + " mvcl %%r1,%%r1\n" + "0: nopr %%r7\n" + EX_TABLE(0b, 0b) + :); + + unregister_kprobe(&kp); + unwindme = NULL; + return u->ret; +#endif } else { struct pt_regs regs; @@ -286,6 +327,12 @@ do { \ TEST(UWM_IRQ | UWM_CALLER | UWM_SP); TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS); TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK); +#ifdef CONFIG_KPROBES + TEST(UWM_PGM); + TEST(UWM_PGM | UWM_SP); + TEST(UWM_PGM | UWM_REGS); + TEST(UWM_PGM | UWM_SP | UWM_REGS); +#endif #undef TEST return ret; From 222ee9087a730b1df08d09baed0d03626e67600f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Nov 2019 17:37:51 +0100 Subject: [PATCH 29/33] s390/unwind: start unwinding from reliable state A comment in arch/s390/include/asm/unwind.h says: > If 'first_frame' is not zero unwind_start skips unwind frames until it > reaches the specified stack pointer. > The end of the unwinding is indicated with unwind_done, this can be true > right after unwind_start, e.g. with first_frame!=0 that can not be found. > unwind_next_frame skips to the next frame. > Once the unwind is completed unwind_error() can be used to check if there > has been a situation where the unwinder could not correctly understand > the tasks call chain. With this change backchain unwinder now comply with behaviour described. As well as matches orc unwinder implementation. Now unwinder starts from reliable state, i.e. __unwind_start own stack frame is taken or stack frame generated by __switch_to (ksp) - both known to be valid. In case of pt_regs %r15 is better match for pt_regs psw, than sometimes random "sp" caller passed. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/unwind.h | 6 ++--- arch/s390/kernel/unwind_bc.c | 42 ++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index 5d6c8fe7a271..de9006b0cfeb 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -58,11 +58,11 @@ static inline bool unwind_error(struct unwind_state *state) static inline void unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, - unsigned long sp) + unsigned long first_frame) { task = task ?: current; - sp = sp ?: get_stack_pointer(task, regs); - __unwind_start(state, task, regs, sp); + first_frame = first_frame ?: get_stack_pointer(task, regs); + __unwind_start(state, task, regs, first_frame); } static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index c5ebb8a4cdd6..e1371cdf9fa5 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -105,13 +105,11 @@ out_stop: EXPORT_SYMBOL_GPL(unwind_next_frame); void __unwind_start(struct unwind_state *state, struct task_struct *task, - struct pt_regs *regs, unsigned long sp) + struct pt_regs *regs, unsigned long first_frame) { struct stack_info *info = &state->stack_info; - unsigned long *mask = &state->stack_mask; - bool reliable; struct stack_frame *sf; - unsigned long ip; + unsigned long ip, sp; memset(state, 0, sizeof(*state)); state->task = task; @@ -123,23 +121,28 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, return; } + /* Get the instruction pointer from pt_regs or the stack frame */ + if (regs) { + ip = regs->psw.addr; + sp = regs->gprs[15]; + } else if (task == current) { + sp = current_frame_address(); + } else { + sp = task->thread.ksp; + } + /* Get current stack pointer and initialize stack info */ - if (get_stack_info(sp, task, info, mask) != 0 || - !on_stack(info, sp, sizeof(struct stack_frame))) { + if (!update_stack_info(state, sp)) { /* Something is wrong with the stack pointer */ info->type = STACK_TYPE_UNKNOWN; state->error = true; return; } - /* Get the instruction pointer from pt_regs or the stack frame */ - if (regs) { - ip = READ_ONCE_NOCHECK(regs->psw.addr); - reliable = true; - } else { - sf = (struct stack_frame *) sp; + if (!regs) { + /* Stack frame is within valid stack */ + sf = (struct stack_frame *)sp; ip = READ_ONCE_NOCHECK(sf->gprs[8]); - reliable = false; } ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL); @@ -147,6 +150,17 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, /* Update unwind state */ state->sp = sp; state->ip = ip; - state->reliable = reliable; + state->reliable = true; + + if (!first_frame) + return; + /* Skip through the call chain to the specified starting frame */ + while (!unwind_done(state)) { + if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) { + if (state->sp >= first_frame) + break; + } + unwind_next_frame(state); + } } EXPORT_SYMBOL_GPL(__unwind_start); From bf018ee644897d7982e1b8dd8b15e97db6e1a4da Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Nov 2019 18:12:04 +0100 Subject: [PATCH 30/33] s390/unwind: filter out unreliable bogus %r14 Currently unwinder unconditionally returns %r14 from the first frame pointed by %r15 from pt_regs. A task could be interrupted when a function already allocated this frame (if it needs it) for its callees or to store local variables. In that case this frame would contain random values from stack or values stored there by a callee. As we are only interested in %r14 to get potential return address, skip bogus return addresses which doesn't belong to kernel text. This helps to avoid duplicating filtering logic in unwider users, most of which use unwind_get_return_address() and would choke on bogus 0 address returned by it otherwise. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/unwind_bc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index e1371cdf9fa5..ef42d5f77ce7 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -57,6 +57,11 @@ bool unwind_next_frame(struct unwind_state *state) ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; regs = NULL; + if (!__kernel_text_address(ip)) { + /* skip bogus %r14 */ + state->regs = NULL; + return unwind_next_frame(state); + } } else { sf = (struct stack_frame *) state->sp; sp = READ_ONCE_NOCHECK(sf->back_chain); From be2d11b2a1e86586ace9f6839a159b170b00f2b3 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Wed, 27 Nov 2019 19:35:19 +0100 Subject: [PATCH 31/33] s390/unwind: add stack pointer alignment sanity checks ABI requires SP to be aligned 8 bytes, report unwinding error otherwise. Link: https://lkml.kernel.org/r/20191106095601.29986-5-mbenes@suse.cz Reviewed-by: Heiko Carstens Tested-by: Miroslav Benes Signed-off-by: Miroslav Benes Signed-off-by: Vasily Gorbik --- arch/s390/kernel/dumpstack.c | 4 ++++ arch/s390/kernel/unwind_bc.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index d74e21a23703..d306fe04489a 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -94,6 +94,10 @@ int get_stack_info(unsigned long sp, struct task_struct *task, if (!sp) goto unknown; + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (sp & 0x7) + goto unknown; + /* Check per-task stack */ if (in_task_stack(sp, task, info)) goto recursion_check; diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index ef42d5f77ce7..da2d4d4c5b0e 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -92,6 +92,10 @@ bool unwind_next_frame(struct unwind_state *state) } } + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (sp & 0x7) + goto out_err; + ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp); /* Update unwind state */ From aa137a6d302b5989ed205b7dfb7fe40a8851babc Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Wed, 6 Nov 2019 10:56:01 +0100 Subject: [PATCH 32/33] s390/livepatch: Implement reliable stack tracing for the consistency model The livepatch consistency model requires reliable stack tracing architecture support in order to work properly. In order to achieve this, two main issues have to be solved. First, reliable and consistent call chain backtracing has to be ensured. Second, the unwinder needs to be able to detect stack corruptions and return errors. The "zSeries ELF Application Binary Interface Supplement" says: "The stack pointer points to the first word of the lowest allocated stack frame. If the "back chain" is implemented this word will point to the previously allocated stack frame (towards higher addresses), except for the first stack frame, which shall have a back chain of zero (NULL). The stack shall grow downwards, in other words towards lower addresses." "back chain" is optional. GCC option -mbackchain enables it. Quoting Martin Schwidefsky [1]: "The compiler is called with the -mbackchain option, all normal C function will store the backchain in the function prologue. All functions written in assembler code should do the same, if you find one that does not we should fix that. The end result is that a task that *voluntarily* called schedule() should have a proper backchain at all times. Dependent on the use case this may or may not be enough. Asynchronous interrupts may stop the CPU at the beginning of a function, if kernel preemption is enabled we can end up with a broken backchain. The production kernels for IBM Z are all compiled *without* kernel preemption. So yes, we might get away without the objtool support. On a side-note, we do have a line item to implement the ORC unwinder for the kernel, that includes the objtool support. Once we have that we can drop the -mbackchain option for the kernel build. That gives us a nice little performance benefit. I hope that the change from backchain to the ORC unwinder will not be too hard to implement in the livepatch tools." Since -mbackchain is enabled by default when the kernel is compiled, the call chain backtracing should be currently ensured and objtool should not be necessary for livepatch purposes. Regarding the second issue, stack corruptions and non-reliable states have to be recognized by the unwinder. Mainly it means to detect preemption or page faults, the end of the task stack must be reached, return addresses must be valid text addresses and hacks like function graph tracing and kretprobes must be properly detected. Unwinding a running task's stack is not a problem, because there is a livepatch requirement that every checked task is blocked, except for the current task. Due to that, the implementation can be much simpler compared to the existing non-reliable infrastructure. We can consider a task's kernel/thread stack only and skip the other stacks. [1] 20180912121106.31ffa97c@mschwideX1 [not archived on lore.kernel.org] Link: https://lkml.kernel.org/r/20191106095601.29986-5-mbenes@suse.cz Reviewed-by: Heiko Carstens Tested-by: Miroslav Benes Signed-off-by: Miroslav Benes Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/kernel/stacktrace.c | 43 +++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2528eb9d01fb..367a87c5d7b8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -170,6 +170,7 @@ config S390 select HAVE_PERF_EVENTS select HAVE_RCU_TABLE_FREE select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index f8fc4f8aef9b..fc5419ac64c8 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -9,6 +9,7 @@ #include #include #include +#include void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) @@ -22,3 +23,45 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, break; } } + +/* + * This function returns an error if it detects any unreliable features of the + * stack. Otherwise it guarantees that the stack trace is reliable. + * + * If the task is not 'current', the caller *must* ensure the task is inactive. + */ +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) +{ + struct unwind_state state; + unsigned long addr; + + unwind_for_each_frame(&state, task, NULL, 0) { + if (state.stack_info.type != STACK_TYPE_TASK) + return -EINVAL; + + if (state.regs) + return -EINVAL; + + addr = unwind_get_return_address(&state); + if (!addr) + return -EINVAL; + +#ifdef CONFIG_KPROBES + /* + * Mark stacktraces with kretprobed functions on them + * as unreliable. + */ + if (state.ip == (unsigned long)kretprobe_trampoline) + return -EINVAL; +#endif + + if (!consume_entry(cookie, addr, false)) + return -EINVAL; + } + + /* Check for stack corruption */ + if (unwind_error(&state)) + return -EINVAL; + return 0; +} From 2115fbf7210bd053ba55a95e7ebc366df41aa9cf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 18 Nov 2019 13:59:25 +0100 Subject: [PATCH 33/33] s390: remove compat vdso code Remove compat vdso code, since there is hardly any compat user space left. Still existing compat user space will have to use system calls instead. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 3 - arch/s390/Makefile | 1 - arch/s390/kernel/Makefile | 1 - arch/s390/kernel/vdso.c | 42 +----- arch/s390/kernel/vdso32/.gitignore | 1 - arch/s390/kernel/vdso32/Makefile | 66 --------- arch/s390/kernel/vdso32/clock_getres.S | 44 ------ arch/s390/kernel/vdso32/clock_gettime.S | 179 ----------------------- arch/s390/kernel/vdso32/getcpu.S | 31 ---- arch/s390/kernel/vdso32/gettimeofday.S | 103 ------------- arch/s390/kernel/vdso32/note.S | 13 -- arch/s390/kernel/vdso32/vdso32.lds.S | 142 ------------------ arch/s390/kernel/vdso32/vdso32_wrapper.S | 15 -- 13 files changed, 3 insertions(+), 638 deletions(-) delete mode 100644 arch/s390/kernel/vdso32/.gitignore delete mode 100644 arch/s390/kernel/vdso32/Makefile delete mode 100644 arch/s390/kernel/vdso32/clock_getres.S delete mode 100644 arch/s390/kernel/vdso32/clock_gettime.S delete mode 100644 arch/s390/kernel/vdso32/getcpu.S delete mode 100644 arch/s390/kernel/vdso32/gettimeofday.S delete mode 100644 arch/s390/kernel/vdso32/note.S delete mode 100644 arch/s390/kernel/vdso32/vdso32.lds.S delete mode 100644 arch/s390/kernel/vdso32/vdso32_wrapper.S diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 367a87c5d7b8..d4051e88e625 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -427,9 +427,6 @@ config COMPAT (and some other stuff like libraries and such) is needed for executing 31 bit applications. It is safe to say "Y". -config COMPAT_VDSO - def_bool COMPAT && !CC_IS_CLANG - config SYSVIPC_COMPAT def_bool y if COMPAT && SYSVIPC diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 478b645b20dd..ba8556bb0fb1 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -157,7 +157,6 @@ zfcpdump: vdso_install: $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ - $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7edbbcd8228a..2b1203cf7be6 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -81,4 +81,3 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o # vdso obj-y += vdso64/ -obj-$(CONFIG_COMPAT_VDSO) += vdso32/ diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index ed1fc08ccea2..bcc9bdb39ba2 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -29,13 +29,6 @@ #include #include -#ifdef CONFIG_COMPAT_VDSO -extern char vdso32_start, vdso32_end; -static void *vdso32_kbase = &vdso32_start; -static unsigned int vdso32_pages; -static struct page **vdso32_pagelist; -#endif - extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; static unsigned int vdso64_pages; @@ -55,12 +48,6 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, vdso_pagelist = vdso64_pagelist; vdso_pages = vdso64_pages; -#ifdef CONFIG_COMPAT_VDSO - if (vma->vm_mm->context.compat_mm) { - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; - } -#endif if (vmf->pgoff >= vdso_pages) return VM_FAULT_SIGBUS; @@ -76,10 +63,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, unsigned long vdso_pages; vdso_pages = vdso64_pages; -#ifdef CONFIG_COMPAT_VDSO - if (vma->vm_mm->context.compat_mm) - vdso_pages = vdso32_pages; -#endif if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start) return -EINVAL; @@ -209,12 +192,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso_enabled) return 0; + if (is_compat_task()) + return 0; + vdso_pages = vdso64_pages; -#ifdef CONFIG_COMPAT_VDSO - mm->context.compat_mm = is_compat_task(); - if (mm->context.compat_mm) - vdso_pages = vdso32_pages; -#endif /* * vDSO has a problem and was disabled, just don't "enable" it for * the process @@ -267,23 +248,6 @@ static int __init vdso_init(void) int i; vdso_init_data(vdso_data); -#ifdef CONFIG_COMPAT_VDSO - /* Calculate the size of the 32 bit vDSO */ - vdso32_pages = ((&vdso32_end - &vdso32_start - + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; - - /* Make sure pages are in the correct state */ - vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso32_pagelist == NULL); - for (i = 0; i < vdso32_pages - 1; i++) { - struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - get_page(pg); - vdso32_pagelist[i] = pg; - } - vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data); - vdso32_pagelist[vdso32_pages] = NULL; -#endif /* Calculate the size of the 64 bit vDSO */ vdso64_pages = ((&vdso64_end - &vdso64_start diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore deleted file mode 100644 index e45fba9d0ced..000000000000 --- a/arch/s390/kernel/vdso32/.gitignore +++ /dev/null @@ -1 +0,0 @@ -vdso32.lds diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile deleted file mode 100644 index aee9ffbccb54..000000000000 --- a/arch/s390/kernel/vdso32/Makefile +++ /dev/null @@ -1,66 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso, has to be asm only for now - -KCOV_INSTRUMENT := n - -obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o - -# Build rules - -targets := $(obj-vdso32) vdso32.so vdso32.so.dbg -obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) - -KBUILD_AFLAGS += -DBUILD_VDSO -KBUILD_CFLAGS += -DBUILD_VDSO - -KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS)) -KBUILD_AFLAGS_31 += -m31 -s - -KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ - -Wl,--hash-style=both - -$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31) -$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31) - -obj-y += vdso32_wrapper.o -extra-y += vdso32.lds -CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) - -# Disable gcov profiling, ubsan and kasan for VDSO code -GCOV_PROFILE := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -# Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so - -# link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE - $(call if_changed,vdso32ld) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -# assembly rules for the .S files -$(obj-vdso32): %.o: %.S FORCE - $(call if_changed_dep,vdso32as) - -# actual build commands -quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ -quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso32.so: $(obj)/vdso32.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso32.so diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S deleted file mode 100644 index eaf9cf1417f6..000000000000 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of clock_getres() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include -#include -#include -#include - - .text - .align 4 - .globl __kernel_clock_getres - .type __kernel_clock_getres,@function -__kernel_clock_getres: - CFI_STARTPROC - basr %r1,0 - la %r1,4f-.(%r1) - chi %r2,__CLOCK_REALTIME - je 0f - chi %r2,__CLOCK_MONOTONIC - je 0f - la %r1,5f-4f(%r1) - chi %r2,__CLOCK_REALTIME_COARSE - je 0f - chi %r2,__CLOCK_MONOTONIC_COARSE - jne 3f -0: ltr %r3,%r3 - jz 2f /* res == NULL */ -1: l %r0,0(%r1) - xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ - st %r0,4(%r3) /* store tp->tv_usec */ -2: lhi %r2,0 - br %r14 -3: lhi %r1,__NR_clock_getres /* fallback to svc */ - svc 0 - br %r14 - CFI_ENDPROC -4: .long __CLOCK_REALTIME_RES -5: .long __CLOCK_COARSE_RES - .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S deleted file mode 100644 index ada5c11a16e5..000000000000 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ /dev/null @@ -1,179 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of clock_gettime() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include -#include -#include -#include -#include - - .text - .align 4 - .globl __kernel_clock_gettime - .type __kernel_clock_gettime,@function -__kernel_clock_gettime: - CFI_STARTPROC - ahi %r15,-16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD - basr %r5,0 -0: al %r5,21f-0b(%r5) /* get &_vdso_data */ - chi %r2,__CLOCK_REALTIME_COARSE - je 10f - chi %r2,__CLOCK_REALTIME - je 11f - chi %r2,__CLOCK_MONOTONIC_COARSE - je 9f - chi %r2,__CLOCK_MONOTONIC - jne 19f - - /* CLOCK_MONOTONIC */ -1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 1b - stcke 0(%r15) /* Store TOD clock */ - lm %r0,%r1,1(%r15) - s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - sl %r1,__VDSO_XTIME_STAMP+4(%r5) - brc 3,2f - ahi %r0,-1 -2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ - lr %r2,%r0 - l %r0,__VDSO_TK_MULT(%r5) - ltr %r1,%r1 - mr %r0,%r0 - jnm 3f - a %r0,__VDSO_TK_MULT(%r5) -3: alr %r0,%r2 - al %r0,__VDSO_WTOM_NSEC(%r5) - al %r1,__VDSO_WTOM_NSEC+4(%r5) - brc 12,5f - ahi %r0,1 -5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - srdl %r0,0(%r2) /* >> tk->shift */ - l %r2,__VDSO_WTOM_SEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 1b - basr %r5,0 -6: ltr %r0,%r0 - jnz 7f - cl %r1,20f-6b(%r5) - jl 8f -7: ahi %r2,1 - sl %r1,20f-6b(%r5) - brc 3,6b - ahi %r0,-1 - j 6b -8: st %r2,0(%r3) /* store tp->tv_sec */ - st %r1,4(%r3) /* store tp->tv_nsec */ - lhi %r2,0 - ahi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* CLOCK_MONOTONIC_COARSE */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 9b - l %r2,__VDSO_WTOM_CRS_SEC+4(%r5) - l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 9b - j 8b - - /* CLOCK_REALTIME_COARSE */ -10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 10b - l %r2,__VDSO_XTIME_CRS_SEC+4(%r5) - l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 10b - j 17f - - /* CLOCK_REALTIME */ -11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 11b - stcke 0(%r15) /* Store TOD clock */ - lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */ - s %r0,1(%r15) /* no - ts_steering_end */ - sl %r1,5(%r15) - brc 3,22f - ahi %r0,-1 -22: ltr %r0,%r0 /* past end of steering? */ - jm 24f - srdl %r0,15 /* 1 per 2^16 */ - tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */ - jz 23f - lcr %r0,%r0 /* negative TOD offset */ - lcr %r1,%r1 - je 23f - ahi %r0,-1 -23: a %r0,1(%r15) /* add TOD timestamp */ - al %r1,5(%r15) - brc 12,25f - ahi %r0,1 - j 25f -24: lm %r0,%r1,1(%r15) /* load TOD timestamp */ -25: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - sl %r1,__VDSO_XTIME_STAMP+4(%r5) - brc 3,12f - ahi %r0,-1 -12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ - lr %r2,%r0 - l %r0,__VDSO_TK_MULT(%r5) - ltr %r1,%r1 - mr %r0,%r0 - jnm 13f - a %r0,__VDSO_TK_MULT(%r5) -13: alr %r0,%r2 - al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ - al %r1,__VDSO_XTIME_NSEC+4(%r5) - brc 12,14f - ahi %r0,1 -14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - srdl %r0,0(%r2) /* >> tk->shift */ - l %r2,__VDSO_XTIME_SEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 11b - basr %r5,0 -15: ltr %r0,%r0 - jnz 16f - cl %r1,20f-15b(%r5) - jl 17f -16: ahi %r2,1 - sl %r1,20f-15b(%r5) - brc 3,15b - ahi %r0,-1 - j 15b -17: st %r2,0(%r3) /* store tp->tv_sec */ - st %r1,4(%r3) /* store tp->tv_nsec */ - lhi %r2,0 - ahi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* Fallback to system call */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -19: lhi %r1,__NR_clock_gettime - svc 0 - ahi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - CFI_ENDPROC - -20: .long 1000000000 -21: .long _vdso_data - 0b - .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S deleted file mode 100644 index dc79e169f0ad..000000000000 --- a/arch/s390/kernel/vdso32/getcpu.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of getcpu() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2016 - * Author(s): Martin Schwidefsky - */ -#include -#include -#include - - .text - .align 4 - .globl __kernel_getcpu - .type __kernel_getcpu,@function -__kernel_getcpu: - CFI_STARTPROC - sacf 256 - lm %r4,%r5,__VDSO_GETCPU_VAL(%r0) - sacf 0 - ltr %r2,%r2 - jz 2f - st %r5,0(%r2) -2: ltr %r3,%r3 - jz 3f - st %r4,0(%r3) -3: lhi %r2,0 - br %r14 - CFI_ENDPROC - .size __kernel_getcpu,.-__kernel_getcpu diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S deleted file mode 100644 index b23063fbc892..000000000000 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of gettimeofday() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include -#include -#include -#include -#include - - .text - .align 4 - .globl __kernel_gettimeofday - .type __kernel_gettimeofday,@function -__kernel_gettimeofday: - CFI_STARTPROC - ahi %r15,-16 - CFI_ADJUST_CFA_OFFSET 16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD - basr %r5,0 -0: al %r5,13f-0b(%r5) /* get &_vdso_data */ -1: ltr %r3,%r3 /* check if tz is NULL */ - je 2f - mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) -2: ltr %r2,%r2 /* check if tv is NULL */ - je 10f - l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 1b - stcke 0(%r15) /* Store TOD clock */ - lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */ - s %r0,1(%r15) - sl %r1,5(%r15) - brc 3,14f - ahi %r0,-1 -14: ltr %r0,%r0 /* past end of steering? */ - jm 16f - srdl %r0,15 /* 1 per 2^16 */ - tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */ - jz 15f - lcr %r0,%r0 /* negative TOD offset */ - lcr %r1,%r1 - je 15f - ahi %r0,-1 -15: a %r0,1(%r15) /* add TOD timestamp */ - al %r1,5(%r15) - brc 12,17f - ahi %r0,1 - j 17f -16: lm %r0,%r1,1(%r15) /* load TOD timestamp */ -17: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - sl %r1,__VDSO_XTIME_STAMP+4(%r5) - brc 3,3f - ahi %r0,-1 -3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ - st %r0,0(%r15) - l %r0,__VDSO_TK_MULT(%r5) - ltr %r1,%r1 - mr %r0,%r0 - jnm 4f - a %r0,__VDSO_TK_MULT(%r5) -4: al %r0,0(%r15) - al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - al %r1,__VDSO_XTIME_NSEC+4(%r5) - brc 12,5f - ahi %r0,1 -5: mvc 0(4,%r15),__VDSO_XTIME_SEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 1b - l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - srdl %r0,0(%r4) /* >> tk->shift */ - l %r4,0(%r15) /* get tv_sec from stack */ - basr %r5,0 -6: ltr %r0,%r0 - jnz 7f - cl %r1,11f-6b(%r5) - jl 8f -7: ahi %r4,1 - sl %r1,11f-6b(%r5) - brc 3,6b - ahi %r0,-1 - j 6b -8: st %r4,0(%r2) /* store tv->tv_sec */ - ltr %r1,%r1 - m %r0,12f-6b(%r5) - jnm 9f - al %r0,12f-6b(%r5) -9: srl %r0,6 - st %r0,4(%r2) /* store tv->tv_usec */ -10: slr %r2,%r2 - ahi %r15,16 - CFI_ADJUST_CFA_OFFSET -16 - CFI_RESTORE 15 - br %r14 - CFI_ENDPROC -11: .long 1000000000 -12: .long 274877907 -13: .long _vdso_data - 0b - .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S deleted file mode 100644 index db19d0680a0a..000000000000 --- a/arch/s390/kernel/vdso32/note.S +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include -#include -#include - -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S deleted file mode 100644 index 721c4954cb6e..000000000000 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ /dev/null @@ -1,142 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is the infamous ld script for the 32 bits vdso - * library - */ - -#include -#include - -OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390:31-bit) -ENTRY(_start) - -SECTIONS -{ - . = VDSO32_LBASE + SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - .note : { *(.note.*) } :text :note - - . = ALIGN(16); - .text : { - *(.text .stub .text.* .gnu.linkonce.t.*) - } :text - PROVIDE(__etext = .); - PROVIDE(_etext = .); - PROVIDE(etext = .); - - /* - * Other stuff is appended to the text segment: - */ - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } - .rodata1 : { *(.rodata1) } - - .dynamic : { *(.dynamic) } :text :dynamic - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } - - .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .got ALIGN(8) : { *(.got .toc) } - - _end = .; - PROVIDE(end = .); - - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the - * beginning of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } - .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - - . = ALIGN(PAGE_SIZE); - PROVIDE(_vdso_data = .); - - /DISCARD/ : { - *(.note.GNU-stack) - *(.branch_lt) - *(.data .data.* .gnu.linkonce.d.* .sdata*) - *(.bss .sbss .dynbss .dynsbss) - } -} - -/* - * Very old versions of ld do not recognize this name token; use the constant. - */ -#define PT_GNU_EH_FRAME 0x6474e550 - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - VDSO_VERSION_STRING { - global: - /* - * Has to be there for the kernel to find - */ - __kernel_gettimeofday; - __kernel_clock_gettime; - __kernel_clock_getres; - __kernel_getcpu; - - local: *; - }; -} diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S deleted file mode 100644 index de2fb930471a..000000000000 --- a/arch/s390/kernel/vdso32/vdso32_wrapper.S +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include - - __PAGE_ALIGNED_DATA - - .globl vdso32_start, vdso32_end - .balign PAGE_SIZE -vdso32_start: - .incbin "arch/s390/kernel/vdso32/vdso32.so" - .balign PAGE_SIZE -vdso32_end: - - .previous