From 891f6a726cacbb87e5b06076693ffab53bd378d7 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 21 Jun 2018 14:49:38 +0200 Subject: [PATCH 1/5] s390: Correct register corruption in critical section cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the critical section cleanup we must not mess with r1. For march=z9 or older, larl + ex (instead of exrl) are used with r1 as a temporary register. This can clobber r1 in several interrupt handlers. Fix this by using r11 as a temp register. r11 is being saved by all callers of cleanup_critical. Fixes: 6dd85fbb87 ("s390: move expoline assembler macros to a header") Cc: stable@vger.kernel.org #v4.16 Reported-by: Oliver Kurz Reported-by: Petr Tesařík Signed-off-by: Christian Borntraeger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f03402efab4b..3891805bfcdd 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1265,7 +1265,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: BR_EX %r14 +0: BR_EX %r14,%r11 .align 8 .Lcleanup_table: @@ -1301,7 +1301,7 @@ cleanup_critical: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - BR_EX %r14 + BR_EX %r14,%r11 #endif .Lcleanup_system_call: From 3284da34a87ab7a527a593f89bbdaf6debe9e713 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 12 Jun 2018 16:47:10 +0200 Subject: [PATCH 2/5] s390/dasd: reduce the default queue depth and nr of hardware queues Reduce the default values for the number of hardware queues and queue depth to significantly reduce the memory footprint of a DASD device. The memory consumption per DASD device reduces from approximately 40MB to approximately 1.5MB. This is necessary to build systems with a large number of DASD devices and a reasonable amount of memory. Performance measurements showed that good performance results are possible with the new default values even on systems with lots of CPUs and lots of alias devices. Fixes: e443343e509a ("s390/dasd: blk-mq conversion") Reviewed-by: Jan Hoeppner Reviewed-by: Peter Oberparleiter Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 13 +++++++++++-- drivers/s390/block/dasd_int.h | 8 -------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d3a38c421503..a9f60d0ee02e 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -41,6 +41,15 @@ #define DASD_DIAG_MOD "dasd_diag_mod" +static unsigned int queue_depth = 32; +static unsigned int nr_hw_queues = 4; + +module_param(queue_depth, uint, 0444); +MODULE_PARM_DESC(queue_depth, "Default queue depth for new DASD devices"); + +module_param(nr_hw_queues, uint, 0444); +MODULE_PARM_DESC(nr_hw_queues, "Default number of hardware queues for new DASD devices"); + /* * SECTION: exported variables of dasd.c */ @@ -3115,8 +3124,8 @@ static int dasd_alloc_queue(struct dasd_block *block) block->tag_set.ops = &dasd_mq_ops; block->tag_set.cmd_size = sizeof(struct dasd_ccw_req); - block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES; - block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV; + block->tag_set.nr_hw_queues = nr_hw_queues; + block->tag_set.queue_depth = queue_depth; block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; rc = blk_mq_alloc_tag_set(&block->tag_set); diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 976b6bd4fb05..de6b96036aa4 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -228,14 +228,6 @@ struct dasd_ccw_req { #define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */ #define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */ -/* - * There is no reliable way to determine the number of available CPUs on - * LPAR but there is no big performance difference between 1 and the - * maximum CPU number. - * 64 is a good trade off performance wise. - */ -#define DASD_NR_HW_QUEUES 64 -#define DASD_MAX_LCU_DEV 256 #define DASD_REQ_PER_DEV 4 /* Signature for error recovery functions. */ From dfa758638fd2d1184760deb2693abf76e982c53a Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 29 Jun 2018 19:54:01 +0200 Subject: [PATCH 3/5] s390/mm: fix refcount usage for 4K pgste s390 no longer uses the _mapcount field in struct page to identify the page table format being used. While the code was diligent in handling the different mappings, it neglected to turn "off" the map bits when alloc_pgste was being used. This resulted in bits remaining "on" in the _refcount field, and thus an artifically huge "in use" count that prevents the pages from actually being released by __free_page. There's opportunity for improvement in the "1 vs 3" vs "1U vs 3U" vs "0x1 vs 0x11" etc. variations for all these calls, I am just keeping things simple compared to neighboring code. Fixes: 620b4e903179 ("s390: use _refcount for pgtables") Reported-by: Halil Pasic Bisected-by: Vasily Gorbik Signed-off-by: Eric Farman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgalloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 84bd6329a88d..e3bd5627afef 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -252,6 +252,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) spin_unlock_bh(&mm->context.lock); if (mask != 0) return; + } else { + atomic_xor_bits(&page->_refcount, 3U << 24); } pgtable_page_dtor(page); @@ -304,6 +306,8 @@ static void __tlb_remove_table(void *_table) break; /* fallthrough */ case 3: /* 4K page table with pgstes */ + if (mask & 3) + atomic_xor_bits(&page->_refcount, 3 << 24); pgtable_page_dtor(page); __free_page(page); break; From 8bf935501a7ef1b2ec3b51c804d657d5895f221a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 30 Jun 2018 10:53:57 +0200 Subject: [PATCH 4/5] s390: wire up io_pgetevents system call Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 8b210ead7956..4baefed5fefb 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -389,3 +389,4 @@ 379 common statx sys_statx compat_sys_statx 380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi 381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents From 9d6d99e3ac8ccfd0945edb3c83cd912838775056 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 30 Jun 2018 10:54:15 +0200 Subject: [PATCH 5/5] s390: wire up rseq system call Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/entry.S | 4 ++++ arch/s390/kernel/signal.c | 3 ++- arch/s390/kernel/syscalls/syscall.tbl | 1 + 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index baed39772c84..e44bb2b2873e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -160,6 +160,7 @@ config S390 select HAVE_OPROFILE select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select MODULES_USE_ELF_RELA diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 607c5e9fba3d..2ce28bf0c5ec 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -183,3 +183,4 @@ COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags); COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) +COMPAT_SYSCALL_WRAP4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3891805bfcdd..150130c897c3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -357,6 +357,10 @@ ENTRY(system_call) stg %r2,__PT_R2(%r11) # store return value .Lsysc_return: +#ifdef CONFIG_DEBUG_RSEQ + lgr %r2,%r11 + brasl %r14,rseq_syscall +#endif LOCKDEP_SYS_EXIT .Lsysc_tif: TSTMSK __PT_FLAGS(%r11),_PIF_WORK diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 2d2960ab3e10..22f08245aa5d 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -498,7 +498,7 @@ void do_signal(struct pt_regs *regs) } /* No longer in a system call */ clear_pt_regs_flag(regs, PIF_SYSCALL); - + rseq_signal_deliver(&ksig, regs); if (is_compat_task()) handle_signal32(&ksig, oldset, regs); else @@ -537,4 +537,5 @@ void do_notify_resume(struct pt_regs *regs) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 4baefed5fefb..022fc099b628 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -390,3 +390,4 @@ 380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi 381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load 382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents +383 common rseq sys_rseq compat_sys_rseq