From 734781a9165a1d2fe6117a6435dd3f7619dd9794 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 28 Aug 2018 07:51:30 +0800 Subject: [PATCH 01/83] s390/dasd: remove unnecessary condition check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kmem_cache_destroy() can handle NULL pointer correctly, so there is no need to check NULL pointer before calling kmem_cache_destroy(). Signed-off-by: Chengguang Xu Signed-off-by: Jan Höppner Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a23e7d394a0a..5e9ebdb0594c 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3309,10 +3309,8 @@ dasd_exit(void) dasd_proc_exit(); #endif dasd_eer_exit(); - if (dasd_page_cache != NULL) { - kmem_cache_destroy(dasd_page_cache); - dasd_page_cache = NULL; - } + kmem_cache_destroy(dasd_page_cache); + dasd_page_cache = NULL; dasd_gendisk_exit(); dasd_devmap_exit(); if (dasd_debug_area != NULL) { From 6779df406b27ce44d989e965169db39fb58a7efa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Thu, 6 Sep 2018 13:16:40 +0200 Subject: [PATCH 02/83] s390/sclp: Allow to request adapter reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SCLP event 24 "Adapter Error Notification" supports three different action qualifier of which 'adapter reset' is currently not enabled in the sysfs interface. However, userspace tools might want to be able to use the reset functionality as well. Enable the 'adapter reset' qualifier. Signed-off-by: Jan Höppner Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp_pci.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index e44a8d7959f5..5d9420bbe165 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -95,6 +95,7 @@ extern struct sclp_info sclp; struct zpci_report_error_header { u8 version; /* Interface version byte */ u8 action; /* Action qualifier byte + * 0: Adapter Reset Request * 1: Deconfigure and repair action requested * (OpenCrypto Problem Call Home) * 2: Informational Report diff --git a/drivers/s390/char/sclp_pci.c b/drivers/s390/char/sclp_pci.c index e7c84a4e5eb5..995e9196852e 100644 --- a/drivers/s390/char/sclp_pci.c +++ b/drivers/s390/char/sclp_pci.c @@ -24,6 +24,7 @@ #define SCLP_ATYPE_PCI 2 +#define SCLP_ERRNOTIFY_AQ_RESET 0 #define SCLP_ERRNOTIFY_AQ_REPAIR 1 #define SCLP_ERRNOTIFY_AQ_INFO_LOG 2 @@ -111,9 +112,14 @@ static int sclp_pci_check_report(struct zpci_report_error_header *report) if (report->version != 1) return -EINVAL; - if (report->action != SCLP_ERRNOTIFY_AQ_REPAIR && - report->action != SCLP_ERRNOTIFY_AQ_INFO_LOG) + switch (report->action) { + case SCLP_ERRNOTIFY_AQ_RESET: + case SCLP_ERRNOTIFY_AQ_REPAIR: + case SCLP_ERRNOTIFY_AQ_INFO_LOG: + break; + default: return -EINVAL; + } if (report->length > (PAGE_SIZE - sizeof(struct err_notify_sccb))) return -EINVAL; From 827fd299a16404e1b8a64e5eb1830244ccd395cd Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Sat, 8 Sep 2018 16:50:21 +0800 Subject: [PATCH 03/83] s390/zcrypt: Use kmemdup to replace kmalloc + memcpy kmemdup has implemented the function that kmalloc() + memcpy() will do. We prefer to use the kmemdup function rather than an open coded implementation. Signed-off-by: zhong jiang Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_msgtype6.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 2101776a8148..9ac960d83c28 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1131,10 +1131,9 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB, ap_msg->receive = zcrypt_msgtype6_receive; ap_msg->psmid = (((unsigned long long) current->pid) << 32) + atomic_inc_return(&zcrypt_step); - ap_msg->private = kmalloc(sizeof(resp_type), GFP_KERNEL); + ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) return -ENOMEM; - memcpy(ap_msg->private, &resp_type, sizeof(resp_type)); return XCRB_msg_to_type6CPRB_msgX(ap_msg, xcRB, func_code, dom); } @@ -1187,10 +1186,9 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb, ap_msg->receive = zcrypt_msgtype6_receive_ep11; ap_msg->psmid = (((unsigned long long) current->pid) << 32) + atomic_inc_return(&zcrypt_step); - ap_msg->private = kmalloc(sizeof(resp_type), GFP_KERNEL); + ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) return -ENOMEM; - memcpy(ap_msg->private, &resp_type, sizeof(resp_type)); return xcrb_msg_to_type6_ep11cprb_msgx(ap_msg, xcrb, func_code); } @@ -1282,10 +1280,9 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code, ap_msg->receive = zcrypt_msgtype6_receive; ap_msg->psmid = (((unsigned long long) current->pid) << 32) + atomic_inc_return(&zcrypt_step); - ap_msg->private = kmalloc(sizeof(resp_type), GFP_KERNEL); + ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) return -ENOMEM; - memcpy(ap_msg->private, &resp_type, sizeof(resp_type)); rng_type6CPRB_msgX(ap_msg, ZCRYPT_RNG_BUFFER_SIZE, domain); From 54be9d12b5c1c07fcd9eb05a7725882a9d9e500f Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Sat, 8 Sep 2018 18:26:27 +0800 Subject: [PATCH 04/83] s390: vmlogrdr: Use ARRAY_SIZE instead of reimplementing its function Use the common code ARRAY_SIZE macro instead of a private implementation. Reviewed-by: Jean Delvare Signed-off-by: zhong jiang Signed-off-by: Martin Schwidefsky --- drivers/s390/char/vmlogrdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 069b9ef08206..58333cb4503f 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -153,7 +153,7 @@ static struct vmlogrdr_priv_t sys_ser[] = { } }; -#define MAXMINOR (sizeof(sys_ser)/sizeof(struct vmlogrdr_priv_t)) +#define MAXMINOR ARRAY_SIZE(sys_ser) static char FENCE[] = {"EOR"}; static int vmlogrdr_major = 0; From 198ee66a0e970bc57872095cc3a79722bbf36306 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 11 Sep 2018 09:18:58 +0100 Subject: [PATCH 05/83] s390/tape: fix spelling mistake "partion" -> "partition" Trivial fix to spelling mistake in message text Signed-off-by: Colin Ian King Signed-off-by: Martin Schwidefsky --- drivers/s390/char/tape_3590.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index cdcde18e7220..4554cdf4d6bd 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -971,7 +971,7 @@ tape_3590_print_mim_msg_f0(struct tape_device *device, struct irb *irb) snprintf(exception, BUFSIZE, "Data degraded"); break; case 0x03: - snprintf(exception, BUFSIZE, "Data degraded in partion %i", + snprintf(exception, BUFSIZE, "Data degraded in partition %i", sense->fmt.f70.mp); break; case 0x04: From 8e5a7627b5881ee0551b7ee02d41c2a983358842 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 12 Sep 2018 08:54:59 +0200 Subject: [PATCH 06/83] s390: add initial 64-bit restart PSW To be able to start a kernel image loaded into memory with a PSW restart, place a 64-bit restart PSW at 0x1a0 in absolute lowcore. Suggested-by: Dominik Klein Tested-by: Dominik Klein Signed-off-by: Martin Schwidefsky --- arch/s390/boot/head.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index f721913b73f1..f1cdca8ad3cc 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -60,6 +60,9 @@ __HEAD .long 0x02000690,0x60000050 .long 0x020006e0,0x20000050 + .org 0x1a0 + .quad 0,iplstart + .org 0x200 # From 1c472d46283263497adccd7a0bec64ee2f9c09e5 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 17 Sep 2018 15:23:03 +0200 Subject: [PATCH 07/83] s390/zcrypt: enable AP bus scan without a valid default domain The AP bus scan is aborted before doing anything worth mentioning if ap_select_domain() fails, e.g. if the ap_rights.aqm mask is all zeros. As the result of this the ap bus fails to manage (e.g. create and register) devices like it is supposed to. Let us make ap_scan_bus() work even if ap_select_domain() can't select a default domain. Let's also make ap_select_domain() return void, as there are no more callers interested in its return value. Signed-off-by: Halil Pasic Reported-by: Michael Mueller Fixes: 7e0bdbe5c21c "s390/zcrypt: AP bus support for alternate driver(s)" [freude@linux.ibm.com: title and patch header slightly modified] Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index f039266b275d..4b6a091c1225 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1218,11 +1218,10 @@ static struct bus_attribute *const ap_bus_attrs[] = { }; /** - * ap_select_domain(): Select an AP domain. - * - * Pick one of the 16 AP domains. + * ap_select_domain(): Select an AP domain if possible and we haven't + * already done so before. */ -static int ap_select_domain(void) +static void ap_select_domain(void) { int count, max_count, best_domain; struct ap_queue_status status; @@ -1237,7 +1236,7 @@ static int ap_select_domain(void) if (ap_domain_index >= 0) { /* Domain has already been selected. */ spin_unlock_bh(&ap_domain_lock); - return 0; + return; } best_domain = -1; max_count = 0; @@ -1264,11 +1263,8 @@ static int ap_select_domain(void) if (best_domain >= 0) { ap_domain_index = best_domain; AP_DBF(DBF_DEBUG, "new ap_domain_index=%d\n", ap_domain_index); - spin_unlock_bh(&ap_domain_lock); - return 0; } spin_unlock_bh(&ap_domain_lock); - return -ENODEV; } /* @@ -1346,8 +1342,7 @@ static void ap_scan_bus(struct work_struct *unused) AP_DBF(DBF_DEBUG, "%s running\n", __func__); ap_query_configuration(ap_configuration); - if (ap_select_domain() != 0) - goto out; + ap_select_domain(); for (id = 0; id < AP_DEVICES; id++) { /* check if device is registered */ @@ -1467,12 +1462,11 @@ static void ap_scan_bus(struct work_struct *unused) } } /* end device loop */ - if (defdomdevs < 1) + if (ap_domain_index >= 0 && defdomdevs < 1) AP_DBF(DBF_INFO, "no queue device with default domain %d available\n", ap_domain_index); -out: mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ); } From d1befa65823e9c6d013883b8a41d081ec338c489 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 14 Sep 2018 17:29:39 +0200 Subject: [PATCH 08/83] s390/vdso: avoid 64-bit vdso mapping for compat tasks vdso_fault used is_compat_task function (on s390 it tests "current" thread_info flags) to distinguish compat tasks and map 31-bit vdso pages. But "current" task might not correspond to mm context. When 31-bit compat inferior is executed under gdb, gdb does PTRACE_PEEKTEXT on vdso page, causing vdso_fault with "current" being 64-bit gdb process. So, 31-bit inferior ends up with 64-bit vdso mapped. To avoid this problem a new compat_mm flag has been introduced into mm context. This flag is used in vdso_fault and vdso_mremap instead of is_compat_task. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 2 ++ arch/s390/include/asm/mmu_context.h | 1 + arch/s390/kernel/vdso.c | 8 +++++--- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index a8418e1379eb..bcfb6371086f 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -32,6 +32,8 @@ typedef struct { unsigned int uses_cmm:1; /* The gmaps associated with this context are allowed to use huge pages. */ unsigned int allow_gmap_hpage_1m:1; + /* The mmu context is for compat task */ + unsigned int compat_mm:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 0717ee76885d..dbd689d556ce 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -25,6 +25,7 @@ static inline int init_new_context(struct task_struct *tsk, atomic_set(&mm->context.flush_count, 0); mm->context.gmap_asce = 0; mm->context.flush_mm = 0; + mm->context.compat_mm = 0; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste || test_thread_flag(TIF_PGSTE) || diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 3031cc6dd0ab..ec31b48a42a5 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -56,7 +56,7 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, vdso_pagelist = vdso64_pagelist; vdso_pages = vdso64_pages; #ifdef CONFIG_COMPAT - if (is_compat_task()) { + if (vma->vm_mm->context.compat_mm) { vdso_pagelist = vdso32_pagelist; vdso_pages = vdso32_pages; } @@ -77,7 +77,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm, vdso_pages = vdso64_pages; #ifdef CONFIG_COMPAT - if (is_compat_task()) + if (vma->vm_mm->context.compat_mm) vdso_pages = vdso32_pages; #endif @@ -224,8 +224,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vdso_pages = vdso64_pages; #ifdef CONFIG_COMPAT - if (is_compat_task()) + if (is_compat_task()) { vdso_pages = vdso32_pages; + mm->context.compat_mm = 1; + } #endif /* * vDSO has a problem and was disabled, just don't "enable" it for From 26f4414a45b808f83d42d6fd2fbf4a59ef25e84b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 14 Sep 2018 18:08:10 +0200 Subject: [PATCH 09/83] s390/vdso: correct CFI annotations of vDSO functions Correct stack frame overhead for 31-bit vdso, which should be 96 rather then 160. This is done by reusing STACK_FRAME_OVERHEAD definition which contains correct value based on build flags. This fixes stack unwinding within vdso code for 31-bit processes. While at it replace all hard coded stack frame overhead values with the same definition in vdso64 as well. Reviewed-by: Hendrik Brueckner Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vdso32/clock_gettime.S | 19 ++++++++++--------- arch/s390/kernel/vdso32/gettimeofday.S | 3 ++- arch/s390/kernel/vdso64/clock_gettime.S | 25 +++++++++++++------------ arch/s390/kernel/vdso64/gettimeofday.S | 3 ++- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index a9418bf975db..ada5c11a16e5 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -10,6 +10,7 @@ #include #include #include +#include .text .align 4 @@ -18,8 +19,8 @@ __kernel_clock_gettime: CFI_STARTPROC ahi %r15,-16 - CFI_DEF_CFA_OFFSET 176 - CFI_VAL_OFFSET 15, -160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD basr %r5,0 0: al %r5,21f-0b(%r5) /* get &_vdso_data */ chi %r2,__CLOCK_REALTIME_COARSE @@ -72,13 +73,13 @@ __kernel_clock_gettime: st %r1,4(%r3) /* store tp->tv_nsec */ lhi %r2,0 ahi %r15,16 - CFI_DEF_CFA_OFFSET 160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 br %r14 /* CLOCK_MONOTONIC_COARSE */ - CFI_DEF_CFA_OFFSET 176 - CFI_VAL_OFFSET 15, -160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD 9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 9b @@ -158,17 +159,17 @@ __kernel_clock_gettime: st %r1,4(%r3) /* store tp->tv_nsec */ lhi %r2,0 ahi %r15,16 - CFI_DEF_CFA_OFFSET 160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 br %r14 /* Fallback to system call */ - CFI_DEF_CFA_OFFSET 176 - CFI_VAL_OFFSET 15, -160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD 19: lhi %r1,__NR_clock_gettime svc 0 ahi %r15,16 - CFI_DEF_CFA_OFFSET 160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 br %r14 CFI_ENDPROC diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index 3c0db0fa6ad9..b23063fbc892 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -10,6 +10,7 @@ #include #include #include +#include .text .align 4 @@ -19,7 +20,7 @@ __kernel_gettimeofday: CFI_STARTPROC ahi %r15,-16 CFI_ADJUST_CFA_OFFSET 16 - CFI_VAL_OFFSET 15, -160 + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD basr %r5,0 0: al %r5,13f-0b(%r5) /* get &_vdso_data */ 1: ltr %r3,%r3 /* check if tz is NULL */ diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index fac3ab5ec83a..9d2ee79b90f2 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -10,6 +10,7 @@ #include #include #include +#include .text .align 4 @@ -18,8 +19,8 @@ __kernel_clock_gettime: CFI_STARTPROC aghi %r15,-16 - CFI_DEF_CFA_OFFSET 176 - CFI_VAL_OFFSET 15, -160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD larl %r5,_vdso_data cghi %r2,__CLOCK_REALTIME_COARSE je 4f @@ -56,13 +57,13 @@ __kernel_clock_gettime: stg %r1,8(%r3) /* store tp->tv_nsec */ lghi %r2,0 aghi %r15,16 - CFI_DEF_CFA_OFFSET 160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 br %r14 /* CLOCK_MONOTONIC_COARSE */ - CFI_DEF_CFA_OFFSET 176 - CFI_VAL_OFFSET 15, -160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD 3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 3b @@ -115,13 +116,13 @@ __kernel_clock_gettime: stg %r1,8(%r3) /* store tp->tv_nsec */ lghi %r2,0 aghi %r15,16 - CFI_DEF_CFA_OFFSET 160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 br %r14 /* CPUCLOCK_VIRT for this thread */ - CFI_DEF_CFA_OFFSET 176 - CFI_VAL_OFFSET 15, -160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD 9: lghi %r4,0 icm %r0,15,__VDSO_ECTG_OK(%r5) jz 12f @@ -142,17 +143,17 @@ __kernel_clock_gettime: stg %r4,8(%r3) lghi %r2,0 aghi %r15,16 - CFI_DEF_CFA_OFFSET 160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 br %r14 /* Fallback to system call */ - CFI_DEF_CFA_OFFSET 176 - CFI_VAL_OFFSET 15, -160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD 12: lghi %r1,__NR_clock_gettime svc 0 aghi %r15,16 - CFI_DEF_CFA_OFFSET 160 + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 br %r14 CFI_ENDPROC diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index 6e1f0b421695..aebe10dc7c99 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -10,6 +10,7 @@ #include #include #include +#include .text .align 4 @@ -19,7 +20,7 @@ __kernel_gettimeofday: CFI_STARTPROC aghi %r15,-16 CFI_ADJUST_CFA_OFFSET 16 - CFI_VAL_OFFSET 15, -160 + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD larl %r5,_vdso_data 0: ltgr %r3,%r3 /* check if tz is NULL */ je 1f From 4e62d458850069c9f05e03f99be1a817967e201f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 14 Sep 2018 17:35:46 +0200 Subject: [PATCH 10/83] s390: clean up stacks setup Replace hard coded stack frame overhead values with STACK_FRAME_OVERHEAD definition. Avoid unnecessary arithmetic instructions. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/head.S | 4 ++-- arch/s390/boot/head.S | 3 +-- arch/s390/kernel/head64.S | 6 +++--- arch/s390/kernel/swsusp.S | 4 +--- arch/s390/purgatory/head.S | 4 ++-- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index df8dbbc17bcc..4041fcfd8980 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -12,6 +12,7 @@ #include #include #include +#include #include "sizes.h" __HEAD @@ -20,7 +21,6 @@ ENTRY(startup_decompressor) .LPG1: # setup stack lg %r15,.Lstack-.LPG1(%r13) - aghi %r15,-160 brasl %r14,decompress_kernel # Set up registers for memory mover. We move the decompressed image to # 0x100000, where startup_continue of the decompressed image is supposed @@ -45,7 +45,7 @@ mover_end: .align 8 .Lstack: - .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) + .quad 0x8000 + THREAD_SIZE - STACK_FRAME_OVERHEAD .Loffset: .quad 0x100000 .Lmvsize: diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index f1cdca8ad3cc..d0736a05bc7f 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -311,7 +311,6 @@ ENTRY(startup_kdump) spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) l %r15,.Lstack-.LPG0(%r13) - ahi %r15,-STACK_FRAME_OVERHEAD brasl %r14,verify_facilities #ifdef CONFIG_KERNEL_UNCOMPRESSED jg startup_continue @@ -320,7 +319,7 @@ ENTRY(startup_kdump) #endif .Lstack: - .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) + .long 0x8000 + THREAD_SIZE - STACK_FRAME_OVERHEAD .align 8 6: .long 0x7fffffff,0xffffffff diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 6d14ad42ba88..b31dfb102700 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -14,6 +14,7 @@ #include #include #include +#include __HEAD ENTRY(startup_continue) @@ -35,10 +36,9 @@ ENTRY(startup_continue) # larl %r14,init_task stg %r14,__LC_CURRENT - larl %r15,init_thread_union - aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) # init_task_union + THREAD_SIZE + larl %r15,init_thread_union+THREAD_SIZE stg %r15,__LC_KERNEL_STACK # set end of kernel stack - aghi %r15,-160 + aghi %r15,-STACK_FRAME_OVERHEAD # # Early setup functions that may not rely on an initialized bss section, # like moving the initrd. Returns with an initialized bss section. diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index c1a080b11ae9..34b014b5cf03 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -197,9 +197,7 @@ pgm_check_entry: brc 2,3b /* busy, try again */ /* Suspend CPU not available -> panic */ - larl %r15,init_thread_union - aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) - aghi %r15,-STACK_FRAME_OVERHEAD + larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD larl %r2,.Lpanic_string brasl %r14,sclp_early_printk_force larl %r3,.Ldisabled_wait_31 diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 2e3707b12edd..5a10ce34b95d 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -11,6 +11,7 @@ #include #include #include +#include /* The purgatory is the code running between two kernels. It's main purpose * is to verify that the next kernel was not corrupted after load and to @@ -88,8 +89,7 @@ ENTRY(purgatory_start) .base_crash: /* Setup stack */ - larl %r15,purgatory_end - aghi %r15,-160 + larl %r15,purgatory_end-STACK_FRAME_OVERHEAD /* If the next kernel is KEXEC_TYPE_CRASH the purgatory is called * directly with a flag passed in %r2 whether the purgatory shall do From ccc413f621432fcb5dabb751d42148795f59a816 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 15 May 2018 21:17:38 +0200 Subject: [PATCH 11/83] s390/qdio: clean up AOB handling I've stumbled over this too many times now... AOBs are only ever used on Output Queues. So in qdio_kick_handler(), move the call to their handler into the Output-only path, and get rid of the convoluted contains_aobs() helper. No functional change. While at it, also remove 1. the unused sbal_state->aob field. For processing an async completion, upper-layer drivers get their AOB pointer from the CQ buffer. 2. an unused EXPORT for qdio_allocate_aob(). External users would have no way of passing an allocated AOB back into qdio.ko anyways... Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/qdio.h | 2 -- drivers/s390/cio/qdio_main.c | 15 ++------------- drivers/s390/cio/qdio_setup.c | 1 - 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 9c9970a5dfb1..d46edde7e458 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -252,13 +252,11 @@ struct slsb { * (for communication with upper layer programs) * (only required for use with completion queues) * @flags: flags indicating state of buffer - * @aob: pointer to QAOB used for the particular SBAL * @user: pointer to upper layer program's state information related to SBAL * (stored in user1 data of QAOB) */ struct qdio_outbuf_state { u8 flags; - struct qaob *aob; void *user; }; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 9c7d9da42ba0..9537e656e927 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -595,19 +595,11 @@ static inline int qdio_inbound_q_done(struct qdio_q *q) return 0; } -static inline int contains_aobs(struct qdio_q *q) -{ - return !q->is_input_q && q->u.out.use_cq; -} - static inline void qdio_handle_aobs(struct qdio_q *q, int start, int count) { unsigned char state = 0; int j, b = start; - if (!contains_aobs(q)) - return; - for (j = 0; j < count; ++j) { get_buf_state(q, b, &state, 0); if (state == SLSB_P_OUTPUT_PENDING) { @@ -618,8 +610,6 @@ static inline void qdio_handle_aobs(struct qdio_q *q, int start, int count) q->u.out.sbal_state[b].flags |= QDIO_OUTBUF_STATE_FLAG_PENDING; q->u.out.aobs[b] = NULL; - } else if (state == SLSB_P_OUTPUT_EMPTY) { - q->u.out.sbal_state[b].aob = NULL; } b = next_buf(b); } @@ -638,7 +628,6 @@ static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q, q->aobs[bufnr] = aob; } if (q->aobs[bufnr]) { - q->sbal_state[bufnr].aob = q->aobs[bufnr]; q->aobs[bufnr]->user1 = (u64) q->sbal_state[bufnr].user; phys_aob = virt_to_phys(q->aobs[bufnr]); WARN_ON_ONCE(phys_aob & 0xFF); @@ -666,10 +655,10 @@ static void qdio_kick_handler(struct qdio_q *q) qperf_inc(q, outbound_handler); DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "koh: s:%02x c:%02x", start, count); + if (q->u.out.use_cq) + qdio_handle_aobs(q, start, count); } - qdio_handle_aobs(q, start, count); - q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr, start, count, q->irq_ptr->int_parm); diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 78f1be41b05e..e324d890a4f6 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -27,7 +27,6 @@ struct qaob *qdio_allocate_aob(void) { return kmem_cache_zalloc(qdio_aob_cache, GFP_ATOMIC); } -EXPORT_SYMBOL_GPL(qdio_allocate_aob); void qdio_release_aob(struct qaob *aob) { From 00fab2350e6b91e57b3cdcd5d9f01056775a921d Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 17 Sep 2018 16:18:41 +0200 Subject: [PATCH 12/83] s390/zcrypt: multiple zcrypt device nodes support This patch is an extension to the zcrypt device driver to provide, support and maintain multiple zcrypt device nodes. The individual zcrypt device nodes can be restricted in terms of crypto cards, domains and available ioctls. Such a device node can be used as a base for container solutions like docker to control and restrict the access to crypto resources. The handling is done with a new sysfs subdir /sys/class/zcrypt. Echoing a name (or an empty sting) into the attribute "create" creates a new zcrypt device node. In /sys/class/zcrypt a new link will appear which points to the sysfs device tree of this new device. The attribute files "ioctlmask", "apmask" and "aqmask" in this directory are used to customize this new zcrypt device node instance. Finally the zcrypt device node can be destroyed by echoing the name into /sys/class/zcrypt/destroy. The internal structs holding the device info are reference counted - so a destroy will not hard remove a device but only marks it as removable when the reference counter drops to zero. The mask values are bitmaps in big endian order starting with bit 0. So adapter number 0 is the leftmost bit, mask is 0x8000... The sysfs attributes accept 2 different formats: * Absolute hex string starting with 0x like "0x12345678" does set the mask starting from left to right. If the given string is shorter than the mask it is padded with 0s on the right. If the string is longer than the mask an error comes back (EINVAL). * Relative format - a concatenation (done with ',') of the terms +[-] or -[-]. may be any valid number (hex, decimal or octal) in the range 0...255. Here are some examples: "+0-15,+32,-128,-0xFF" "-0-255,+1-16,+0x128" "+1,+2,+3,+4,-5,-7-10" A simple usage examples: # create new zcrypt device 'my_zcrypt': echo "my_zcrypt" >/sys/class/zcrypt/create # go into the device dir of this new device echo "my_zcrypt" >create cd my_zcrypt/ ls -l total 0 -rw-r--r-- 1 root root 4096 Jul 20 15:23 apmask -rw-r--r-- 1 root root 4096 Jul 20 15:23 aqmask -r--r--r-- 1 root root 4096 Jul 20 15:23 dev -rw-r--r-- 1 root root 4096 Jul 20 15:23 ioctlmask lrwxrwxrwx 1 root root 0 Jul 20 15:23 subsystem -> ../../../../class/zcrypt ... # customize this zcrypt node clone # enable only adapter 0 and 2 echo "0xa0" >apmask # enable only domain 6 echo "+6" >aqmask # enable all 256 ioctls echo "+0-255" >ioctls # now the /dev/my_zcrypt may be used # finally destroy it echo "my_zcrypt" >/sys/class/zcrypt/destroy Please note that a very similar 'filtering behavior' also applies to the parent z90crypt device. The two mask attributes apmask and aqmask in /sys/bus/ap act the very same for the z90crypt device node. However the implementation here is totally different as the ap bus acts on bind/unbind of queue devices and associated drivers but the effect is still the same. So there are two filters active for each additional zcrypt device node: The adapter/domain needs to be enabled on the ap bus level and it needs to be active on the zcrypt device node level. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/defconfig | 1 + arch/s390/include/uapi/asm/zcrypt.h | 21 +- drivers/crypto/Kconfig | 11 + drivers/s390/crypto/ap_bus.c | 45 +- drivers/s390/crypto/ap_bus.h | 25 ++ drivers/s390/crypto/zcrypt_api.c | 623 ++++++++++++++++++++++++++-- 6 files changed, 663 insertions(+), 63 deletions(-) diff --git a/arch/s390/defconfig b/arch/s390/defconfig index f40600eb1762..20add000dd6d 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -232,6 +232,7 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_ZCRYPT=m +CONFIG_ZCRYPT_MULTIDEVNODES=y CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_SHA1_S390=m diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 2bb1f3bb98ac..196a3047fb0a 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -2,9 +2,9 @@ /* * include/asm-s390/zcrypt.h * - * zcrypt 2.1.0 (user-visible header) + * zcrypt 2.2.0 (user-visible header) * - * Copyright IBM Corp. 2001, 2006 + * Copyright IBM Corp. 2001, 2018 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) * @@ -15,12 +15,15 @@ #define __ASM_S390_ZCRYPT_H #define ZCRYPT_VERSION 2 -#define ZCRYPT_RELEASE 1 -#define ZCRYPT_VARIANT 1 +#define ZCRYPT_RELEASE 2 +#define ZCRYPT_VARIANT 0 #include #include +/* Name of the zcrypt device driver. */ +#define ZCRYPT_NAME "zcrypt" + /** * struct ica_rsa_modexpo * @@ -309,6 +312,16 @@ struct zcrypt_device_matrix_ext { #define ZCRYPT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x59, char[MAX_ZDEV_CARDIDS_EXT]) #define ZCRYPT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x5a, int[MAX_ZDEV_CARDIDS_EXT]) +/* + * Support for multiple zcrypt device nodes. + */ + +/* Nr of minor device node numbers to allocate. */ +#define ZCRYPT_MAX_MINOR_NODES 256 + +/* Max amount of possible ioctls */ +#define MAX_ZDEV_IOCTLS (1 << _IOC_NRBITS) + /* * Only deprecated defines, structs and ioctls below this line. */ diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index a8c4ce07fc9d..caa98a7fe392 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -73,6 +73,17 @@ config ZCRYPT + Crypto Express 2,3,4 or 5 Accelerator (CEXxA) + Crypto Express 4 or 5 EP11 Coprocessor (CEXxP) +config ZCRYPT_MULTIDEVNODES + bool "Support for multiple zcrypt device nodes" + default y + depends on S390 + depends on ZCRYPT + help + With this option enabled the zcrypt device driver can + provide multiple devices nodes in /dev. Each device + node can get customized to limit access and narrow + down the use of the available crypto hardware. + config PKEY tristate "Kernel API for protected key handling" depends on S390 diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 4b6a091c1225..15bca7583bb9 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -65,12 +65,11 @@ static struct device *ap_root_device; DEFINE_SPINLOCK(ap_list_lock); LIST_HEAD(ap_card_list); -/* Default permissions (card and domain masking) */ -static struct ap_perms { - DECLARE_BITMAP(apm, AP_DEVICES); - DECLARE_BITMAP(aqm, AP_DOMAINS); -} ap_perms; -static DEFINE_MUTEX(ap_perms_mutex); +/* Default permissions (ioctl, card and domain masking) */ +struct ap_perms ap_perms; +EXPORT_SYMBOL(ap_perms); +DEFINE_MUTEX(ap_perms_mutex); +EXPORT_SYMBOL(ap_perms_mutex); static struct ap_config_info *ap_configuration; static bool initialised; @@ -944,21 +943,9 @@ static int modify_bitmap(const char *str, unsigned long *bitmap, int bits) return 0; } -/* - * process_mask_arg() - parse a bitmap string and clear/set the - * bits in the bitmap accordingly. The string may be given as - * absolute value, a hex string like 0x1F2E3D4C5B6A" simple over- - * writing the current content of the bitmap. Or as relative string - * like "+1-16,-32,-0x40,+128" where only single bits or ranges of - * bits are cleared or set. Distinction is done based on the very - * first character which may be '+' or '-' for the relative string - * and othewise assume to be an absolute value string. If parsing fails - * a negative errno value is returned. All arguments and bitmaps are - * big endian order. - */ -static int process_mask_arg(const char *str, - unsigned long *bitmap, int bits, - struct mutex *lock) +int ap_parse_mask_str(const char *str, + unsigned long *bitmap, int bits, + struct mutex *lock) { unsigned long *newmap, size; int rc; @@ -989,6 +976,7 @@ static int process_mask_arg(const char *str, kfree(newmap); return rc; } +EXPORT_SYMBOL(ap_parse_mask_str); /* * AP bus attributes. @@ -1161,7 +1149,7 @@ static ssize_t apmask_store(struct bus_type *bus, const char *buf, { int rc; - rc = process_mask_arg(buf, ap_perms.apm, AP_DEVICES, &ap_perms_mutex); + rc = ap_parse_mask_str(buf, ap_perms.apm, AP_DEVICES, &ap_perms_mutex); if (rc) return rc; @@ -1192,7 +1180,7 @@ static ssize_t aqmask_store(struct bus_type *bus, const char *buf, { int rc; - rc = process_mask_arg(buf, ap_perms.aqm, AP_DOMAINS, &ap_perms_mutex); + rc = ap_parse_mask_str(buf, ap_perms.aqm, AP_DOMAINS, &ap_perms_mutex); if (rc) return rc; @@ -1490,21 +1478,22 @@ static int __init ap_debug_init(void) static void __init ap_perms_init(void) { /* all resources useable if no kernel parameter string given */ + memset(&ap_perms.ioctlm, 0xFF, sizeof(ap_perms.ioctlm)); memset(&ap_perms.apm, 0xFF, sizeof(ap_perms.apm)); memset(&ap_perms.aqm, 0xFF, sizeof(ap_perms.aqm)); /* apm kernel parameter string */ if (apm_str) { memset(&ap_perms.apm, 0, sizeof(ap_perms.apm)); - process_mask_arg(apm_str, ap_perms.apm, AP_DEVICES, - &ap_perms_mutex); + ap_parse_mask_str(apm_str, ap_perms.apm, AP_DEVICES, + &ap_perms_mutex); } /* aqm kernel parameter string */ if (aqm_str) { memset(&ap_perms.aqm, 0, sizeof(ap_perms.aqm)); - process_mask_arg(aqm_str, ap_perms.aqm, AP_DOMAINS, - &ap_perms_mutex); + ap_parse_mask_str(aqm_str, ap_perms.aqm, AP_DOMAINS, + &ap_perms_mutex); } } @@ -1527,7 +1516,7 @@ static int __init ap_module_init(void) return -ENODEV; } - /* set up the AP permissions (ap and aq masks) */ + /* set up the AP permissions (ioctls, ap and aq masks) */ ap_perms_init(); /* Get AP configuration data if available */ diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 5246cd8c16a6..3eed1b36c876 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -20,6 +20,7 @@ #define AP_DEVICES 256 /* Number of AP devices. */ #define AP_DOMAINS 256 /* Number of AP domains. */ +#define AP_IOCTLS 256 /* Number of ioctls. */ #define AP_RESET_TIMEOUT (HZ*0.7) /* Time in ticks for reset timeouts. */ #define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */ #define AP_POLL_TIME 1 /* Time in ticks between receive polls. */ @@ -257,6 +258,14 @@ void ap_queue_resume(struct ap_device *ap_dev); struct ap_card *ap_card_create(int id, int queue_depth, int raw_device_type, int comp_device_type, unsigned int functions); +struct ap_perms { + unsigned long ioctlm[BITS_TO_LONGS(AP_IOCTLS)]; + unsigned long apm[BITS_TO_LONGS(AP_DEVICES)]; + unsigned long aqm[BITS_TO_LONGS(AP_DOMAINS)]; +}; +extern struct ap_perms ap_perms; +extern struct mutex ap_perms_mutex; + /* * check APQN for owned/reserved by ap bus and default driver(s). * Checks if this APQN is or will be in use by the ap bus @@ -280,4 +289,20 @@ int ap_owned_by_def_drv(int card, int queue); int ap_apqn_in_matrix_owned_by_def_drv(unsigned long *apm, unsigned long *aqm); +/* + * ap_parse_mask_str() - helper function to parse a bitmap string + * and clear/set the bits in the bitmap accordingly. The string may be + * given as absolute value, a hex string like 0x1F2E3D4C5B6A" simple + * overwriting the current content of the bitmap. Or as relative string + * like "+1-16,-32,-0x40,+128" where only single bits or ranges of + * bits are cleared or set. Distinction is done based on the very + * first character which may be '+' or '-' for the relative string + * and othewise assume to be an absolute value string. If parsing fails + * a negative errno value is returned. All arguments and bitmaps are + * big endian order. + */ +int ap_parse_mask_str(const char *str, + unsigned long *bitmap, int bits, + struct mutex *lock); + #endif /* _AP_BUS_H_ */ diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index e6854127b434..d7e1c7cd2c89 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * zcrypt 2.1.0 + * zcrypt 2.2.0 * - * Copyright IBM Corp. 2001, 2012 + * Copyright IBM Corp. 2001, 2018 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) * Cornelia Huck @@ -11,6 +11,7 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler + * Multiple device nodes: Harald Freudenberger */ #include @@ -24,6 +25,8 @@ #include #include #include +#include +#include #include #define CREATE_TRACE_POINTS @@ -108,6 +111,375 @@ struct zcrypt_ops *zcrypt_msgtype(unsigned char *name, int variant) } EXPORT_SYMBOL(zcrypt_msgtype); +/* + * Multi device nodes extension functions. + */ + +#ifdef CONFIG_ZCRYPT_MULTIDEVNODES + +struct zcdn_device; + +static struct class *zcrypt_class; +static dev_t zcrypt_devt; +static struct cdev zcrypt_cdev; + +struct zcdn_device { + struct device device; + struct ap_perms perms; +}; + +#define to_zcdn_dev(x) container_of((x), struct zcdn_device, device) + +#define ZCDN_MAX_NAME 32 + +static int zcdn_create(const char *name); +static int zcdn_destroy(const char *name); + +/* helper function, matches the name for find_zcdndev_by_name() */ +static int __match_zcdn_name(struct device *dev, const void *data) +{ + return strcmp(dev_name(dev), (const char *)data) == 0; +} + +/* helper function, matches the devt value for find_zcdndev_by_devt() */ +static int __match_zcdn_devt(struct device *dev, const void *data) +{ + return dev->devt == *((dev_t *) data); +} + +/* + * Find zcdn device by name. + * Returns reference to the zcdn device which needs to be released + * with put_device() after use. + */ +static inline struct zcdn_device *find_zcdndev_by_name(const char *name) +{ + struct device *dev = + class_find_device(zcrypt_class, NULL, + (void *) name, + __match_zcdn_name); + + return dev ? to_zcdn_dev(dev) : NULL; +} + +/* + * Find zcdn device by devt value. + * Returns reference to the zcdn device which needs to be released + * with put_device() after use. + */ +static inline struct zcdn_device *find_zcdndev_by_devt(dev_t devt) +{ + struct device *dev = + class_find_device(zcrypt_class, NULL, + (void *) &devt, + __match_zcdn_devt); + + return dev ? to_zcdn_dev(dev) : NULL; +} + +static ssize_t ioctlmask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int i, rc; + struct zcdn_device *zcdndev = to_zcdn_dev(dev); + + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + buf[0] = '0'; + buf[1] = 'x'; + for (i = 0; i < sizeof(zcdndev->perms.ioctlm) / sizeof(long); i++) + snprintf(buf + 2 + 2 * i * sizeof(long), + PAGE_SIZE - 2 - 2 * i * sizeof(long), + "%016lx", zcdndev->perms.ioctlm[i]); + buf[2 + 2 * i * sizeof(long)] = '\n'; + buf[2 + 2 * i * sizeof(long) + 1] = '\0'; + rc = 2 + 2 * i * sizeof(long) + 1; + + mutex_unlock(&ap_perms_mutex); + + return rc; +} + +static ssize_t ioctlmask_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc; + struct zcdn_device *zcdndev = to_zcdn_dev(dev); + + rc = ap_parse_mask_str(buf, zcdndev->perms.ioctlm, + AP_IOCTLS, &ap_perms_mutex); + if (rc) + return rc; + + return count; +} + +static DEVICE_ATTR_RW(ioctlmask); + +static ssize_t apmask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int i, rc; + struct zcdn_device *zcdndev = to_zcdn_dev(dev); + + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + buf[0] = '0'; + buf[1] = 'x'; + for (i = 0; i < sizeof(zcdndev->perms.apm) / sizeof(long); i++) + snprintf(buf + 2 + 2 * i * sizeof(long), + PAGE_SIZE - 2 - 2 * i * sizeof(long), + "%016lx", zcdndev->perms.apm[i]); + buf[2 + 2 * i * sizeof(long)] = '\n'; + buf[2 + 2 * i * sizeof(long) + 1] = '\0'; + rc = 2 + 2 * i * sizeof(long) + 1; + + mutex_unlock(&ap_perms_mutex); + + return rc; +} + +static ssize_t apmask_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc; + struct zcdn_device *zcdndev = to_zcdn_dev(dev); + + rc = ap_parse_mask_str(buf, zcdndev->perms.apm, + AP_DEVICES, &ap_perms_mutex); + if (rc) + return rc; + + return count; +} + +static DEVICE_ATTR_RW(apmask); + +static ssize_t aqmask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int i, rc; + struct zcdn_device *zcdndev = to_zcdn_dev(dev); + + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + buf[0] = '0'; + buf[1] = 'x'; + for (i = 0; i < sizeof(zcdndev->perms.aqm) / sizeof(long); i++) + snprintf(buf + 2 + 2 * i * sizeof(long), + PAGE_SIZE - 2 - 2 * i * sizeof(long), + "%016lx", zcdndev->perms.aqm[i]); + buf[2 + 2 * i * sizeof(long)] = '\n'; + buf[2 + 2 * i * sizeof(long) + 1] = '\0'; + rc = 2 + 2 * i * sizeof(long) + 1; + + mutex_unlock(&ap_perms_mutex); + + return rc; +} + +static ssize_t aqmask_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc; + struct zcdn_device *zcdndev = to_zcdn_dev(dev); + + rc = ap_parse_mask_str(buf, zcdndev->perms.aqm, + AP_DOMAINS, &ap_perms_mutex); + if (rc) + return rc; + + return count; +} + +static DEVICE_ATTR_RW(aqmask); + +static struct attribute *zcdn_dev_attrs[] = { + &dev_attr_ioctlmask.attr, + &dev_attr_apmask.attr, + &dev_attr_aqmask.attr, + NULL +}; + +static struct attribute_group zcdn_dev_attr_group = { + .attrs = zcdn_dev_attrs +}; + +static const struct attribute_group *zcdn_dev_attr_groups[] = { + &zcdn_dev_attr_group, + NULL +}; + +static ssize_t zcdn_create_store(struct class *class, + struct class_attribute *attr, + const char *buf, size_t count) +{ + int rc; + char name[ZCDN_MAX_NAME]; + + strncpy(name, skip_spaces(buf), sizeof(name)); + name[sizeof(name) - 1] = '\0'; + + rc = zcdn_create(strim(name)); + + return rc ? rc : count; +} + +static const struct class_attribute class_attr_zcdn_create = + __ATTR(create, 0600, NULL, zcdn_create_store); + +static ssize_t zcdn_destroy_store(struct class *class, + struct class_attribute *attr, + const char *buf, size_t count) +{ + int rc; + char name[ZCDN_MAX_NAME]; + + strncpy(name, skip_spaces(buf), sizeof(name)); + name[sizeof(name) - 1] = '\0'; + + rc = zcdn_destroy(strim(name)); + + return rc ? rc : count; +} + +static const struct class_attribute class_attr_zcdn_destroy = + __ATTR(destroy, 0600, NULL, zcdn_destroy_store); + +static void zcdn_device_release(struct device *dev) +{ + struct zcdn_device *zcdndev = to_zcdn_dev(dev); + + ZCRYPT_DBF(DBF_INFO, "releasing zcdn device %d:%d\n", + MAJOR(dev->devt), MINOR(dev->devt)); + + kfree(zcdndev); +} + +static int zcdn_create(const char *name) +{ + dev_t devt; + int i, rc = 0; + char nodename[ZCDN_MAX_NAME]; + struct zcdn_device *zcdndev; + + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + /* check if device node with this name already exists */ + if (name[0]) { + zcdndev = find_zcdndev_by_name(name); + if (zcdndev) { + put_device(&zcdndev->device); + rc = -EEXIST; + goto unlockout; + } + } + + /* find an unused minor number */ + for (i = 0; i < ZCRYPT_MAX_MINOR_NODES; i++) { + devt = MKDEV(MAJOR(zcrypt_devt), MINOR(zcrypt_devt) + i); + zcdndev = find_zcdndev_by_devt(devt); + if (zcdndev) + put_device(&zcdndev->device); + else + break; + } + if (i == ZCRYPT_MAX_MINOR_NODES) { + rc = -ENOSPC; + goto unlockout; + } + + /* alloc and prepare a new zcdn device */ + zcdndev = kzalloc(sizeof(*zcdndev), GFP_KERNEL); + if (!zcdndev) { + rc = -ENOMEM; + goto unlockout; + } + zcdndev->device.release = zcdn_device_release; + zcdndev->device.class = zcrypt_class; + zcdndev->device.devt = devt; + zcdndev->device.groups = zcdn_dev_attr_groups; + if (name[0]) + strncpy(nodename, name, sizeof(nodename)); + else + snprintf(nodename, sizeof(nodename), + ZCRYPT_NAME "_%d", (int) MINOR(devt)); + nodename[sizeof(nodename)-1] = '\0'; + if (dev_set_name(&zcdndev->device, nodename)) { + rc = -EINVAL; + goto unlockout; + } + rc = device_register(&zcdndev->device); + if (rc) { + put_device(&zcdndev->device); + goto unlockout; + } + + ZCRYPT_DBF(DBF_INFO, "created zcdn device %d:%d\n", + MAJOR(devt), MINOR(devt)); + +unlockout: + mutex_unlock(&ap_perms_mutex); + return rc; +} + +static int zcdn_destroy(const char *name) +{ + int rc = 0; + struct zcdn_device *zcdndev; + + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + /* try to find this zcdn device */ + zcdndev = find_zcdndev_by_name(name); + if (!zcdndev) { + rc = -ENOENT; + goto unlockout; + } + + /* + * The zcdn device is not hard destroyed. It is subject to + * reference counting and thus just needs to be unregistered. + */ + put_device(&zcdndev->device); + device_unregister(&zcdndev->device); + +unlockout: + mutex_unlock(&ap_perms_mutex); + return rc; +} + +static void zcdn_destroy_all(void) +{ + int i; + dev_t devt; + struct zcdn_device *zcdndev; + + mutex_lock(&ap_perms_mutex); + for (i = 0; i < ZCRYPT_MAX_MINOR_NODES; i++) { + devt = MKDEV(MAJOR(zcrypt_devt), MINOR(zcrypt_devt) + i); + zcdndev = find_zcdndev_by_devt(devt); + if (zcdndev) { + put_device(&zcdndev->device); + device_unregister(&zcdndev->device); + } + } + mutex_unlock(&ap_perms_mutex); +} + +#endif + /** * zcrypt_read (): Not supported beyond zcrypt 1.3.1. * @@ -137,6 +509,23 @@ static ssize_t zcrypt_write(struct file *filp, const char __user *buf, */ static int zcrypt_open(struct inode *inode, struct file *filp) { + struct ap_perms *perms = &ap_perms; + +#ifdef CONFIG_ZCRYPT_MULTIDEVNODES + if (filp->f_inode->i_cdev == &zcrypt_cdev) { + struct zcdn_device *zcdndev; + + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + zcdndev = find_zcdndev_by_devt(filp->f_inode->i_rdev); + /* find returns a reference, no get_device() needed */ + mutex_unlock(&ap_perms_mutex); + if (zcdndev) + perms = &zcdndev->perms; + } +#endif + filp->private_data = (void *) perms; + atomic_inc(&zcrypt_open_count); return nonseekable_open(inode, filp); } @@ -148,10 +537,55 @@ static int zcrypt_open(struct inode *inode, struct file *filp) */ static int zcrypt_release(struct inode *inode, struct file *filp) { +#ifdef CONFIG_ZCRYPT_MULTIDEVNODES + if (filp->f_inode->i_cdev == &zcrypt_cdev) { + struct zcdn_device *zcdndev; + + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + zcdndev = find_zcdndev_by_devt(filp->f_inode->i_rdev); + mutex_unlock(&ap_perms_mutex); + if (zcdndev) { + /* 2 puts here: one for find, one for open */ + put_device(&zcdndev->device); + put_device(&zcdndev->device); + } + } +#endif + atomic_dec(&zcrypt_open_count); return 0; } +static inline int zcrypt_check_ioctl(struct ap_perms *perms, + unsigned int cmd) +{ + int rc = -EPERM; + int ioctlnr = (cmd & _IOC_NRMASK) >> _IOC_NRSHIFT; + + if (ioctlnr > 0 && ioctlnr < AP_IOCTLS) { + if (test_bit_inv(ioctlnr, perms->ioctlm)) + rc = 0; + } + + if (rc) + ZCRYPT_DBF(DBF_WARN, + "ioctl check failed: ioctlnr=0x%04x rc=%d\n", + ioctlnr, rc); + + return rc; +} + +static inline bool zcrypt_check_card(struct ap_perms *perms, int card) +{ + return test_bit_inv(card, perms->apm) ? true : false; +} + +static inline bool zcrypt_check_queue(struct ap_perms *perms, int queue) +{ + return test_bit_inv(queue, perms->aqm) ? true : false; +} + static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc, struct zcrypt_queue *zq, unsigned int weight) @@ -213,7 +647,8 @@ static inline bool zcrypt_queue_compare(struct zcrypt_queue *zq, /* * zcrypt ioctls. */ -static long zcrypt_rsa_modexpo(struct ica_rsa_modexpo *mex) +static long zcrypt_rsa_modexpo(struct ap_perms *perms, + struct ica_rsa_modexpo *mex) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; @@ -250,6 +685,9 @@ static long zcrypt_rsa_modexpo(struct ica_rsa_modexpo *mex) if (zc->min_mod_size > mex->inputdatalength || zc->max_mod_size < mex->inputdatalength) continue; + /* check if device node has admission for this card */ + if (!zcrypt_check_card(perms, zc->card->id)) + continue; /* get weight index of the card device */ weight = zc->speed_rating[func_code]; if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight)) @@ -258,6 +696,10 @@ static long zcrypt_rsa_modexpo(struct ica_rsa_modexpo *mex) /* check if device is online and eligible */ if (!zq->online || !zq->ops->rsa_modexpo) continue; + /* check if device node has admission for this queue */ + if (!zcrypt_check_queue(perms, + AP_QID_QUEUE(zq->queue->qid))) + continue; if (zcrypt_queue_compare(zq, pref_zq, weight, pref_weight)) continue; @@ -287,7 +729,8 @@ out: return rc; } -static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt) +static long zcrypt_rsa_crt(struct ap_perms *perms, + struct ica_rsa_modexpo_crt *crt) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; @@ -324,6 +767,9 @@ static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt) if (zc->min_mod_size > crt->inputdatalength || zc->max_mod_size < crt->inputdatalength) continue; + /* check if device node has admission for this card */ + if (!zcrypt_check_card(perms, zc->card->id)) + continue; /* get weight index of the card device */ weight = zc->speed_rating[func_code]; if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight)) @@ -332,6 +778,10 @@ static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt) /* check if device is online and eligible */ if (!zq->online || !zq->ops->rsa_modexpo_crt) continue; + /* check if device node has admission for this queue */ + if (!zcrypt_check_queue(perms, + AP_QID_QUEUE(zq->queue->qid))) + continue; if (zcrypt_queue_compare(zq, pref_zq, weight, pref_weight)) continue; @@ -361,7 +811,8 @@ out: return rc; } -long zcrypt_send_cprb(struct ica_xcRB *xcRB) +static long _zcrypt_send_cprb(struct ap_perms *perms, + struct ica_xcRB *xcRB) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; @@ -389,6 +840,9 @@ long zcrypt_send_cprb(struct ica_xcRB *xcRB) if (xcRB->user_defined != AUTOSELECT && xcRB->user_defined != zc->card->id) continue; + /* check if device node has admission for this card */ + if (!zcrypt_check_card(perms, zc->card->id)) + continue; /* get weight index of the card device */ weight = speed_idx_cca(func_code) * zc->speed_rating[SECKEY]; if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight)) @@ -400,6 +854,10 @@ long zcrypt_send_cprb(struct ica_xcRB *xcRB) ((*domain != (unsigned short) AUTOSELECT) && (*domain != AP_QID_QUEUE(zq->queue->qid)))) continue; + /* check if device node has admission for this queue */ + if (!zcrypt_check_queue(perms, + AP_QID_QUEUE(zq->queue->qid))) + continue; if (zcrypt_queue_compare(zq, pref_zq, weight, pref_weight)) continue; @@ -433,6 +891,11 @@ out: AP_QID_CARD(qid), AP_QID_QUEUE(qid)); return rc; } + +long zcrypt_send_cprb(struct ica_xcRB *xcRB) +{ + return _zcrypt_send_cprb(NULL, xcRB); +} EXPORT_SYMBOL(zcrypt_send_cprb); static bool is_desired_ep11_card(unsigned int dev_id, @@ -459,7 +922,8 @@ static bool is_desired_ep11_queue(unsigned int dev_qid, return false; } -static long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb) +static long zcrypt_send_ep11_cprb(struct ap_perms *perms, + struct ep11_urb *xcrb) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; @@ -510,6 +974,9 @@ static long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb) if (targets && !is_desired_ep11_card(zc->card->id, target_num, targets)) continue; + /* check if device node has admission for this card */ + if (!zcrypt_check_card(perms, zc->card->id)) + continue; /* get weight index of the card device */ weight = speed_idx_ep11(func_code) * zc->speed_rating[SECKEY]; if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight)) @@ -522,6 +989,10 @@ static long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb) !is_desired_ep11_queue(zq->queue->qid, target_num, targets))) continue; + /* check if device node has admission for this queue */ + if (!zcrypt_check_queue(perms, + AP_QID_QUEUE(zq->queue->qid))) + continue; if (zcrypt_queue_compare(zq, pref_zq, weight, pref_weight)) continue; @@ -788,7 +1259,13 @@ static int zcrypt_requestq_count(void) static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - int rc = 0; + int rc; + struct ap_perms *perms = + (struct ap_perms *) filp->private_data; + + rc = zcrypt_check_ioctl(perms, cmd); + if (rc) + return rc; switch (cmd) { case ICARSAMODEXPO: { @@ -798,12 +1275,12 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, if (copy_from_user(&mex, umex, sizeof(mex))) return -EFAULT; do { - rc = zcrypt_rsa_modexpo(&mex); + rc = zcrypt_rsa_modexpo(perms, &mex); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_rsa_modexpo(&mex); + rc = zcrypt_rsa_modexpo(perms, &mex); } while (rc == -EAGAIN); if (rc) { ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d\n", rc); @@ -818,12 +1295,12 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, if (copy_from_user(&crt, ucrt, sizeof(crt))) return -EFAULT; do { - rc = zcrypt_rsa_crt(&crt); + rc = zcrypt_rsa_crt(perms, &crt); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_rsa_crt(&crt); + rc = zcrypt_rsa_crt(perms, &crt); } while (rc == -EAGAIN); if (rc) { ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d\n", rc); @@ -838,12 +1315,12 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB))) return -EFAULT; do { - rc = zcrypt_send_cprb(&xcRB); + rc = _zcrypt_send_cprb(perms, &xcRB); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_send_cprb(&xcRB); + rc = _zcrypt_send_cprb(perms, &xcRB); } while (rc == -EAGAIN); if (rc) ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d\n", rc); @@ -858,12 +1335,12 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb))) return -EFAULT; do { - rc = zcrypt_send_ep11_cprb(&xcrb); + rc = zcrypt_send_ep11_cprb(perms, &xcrb); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_send_ep11_cprb(&xcrb); + rc = zcrypt_send_ep11_cprb(perms, &xcrb); } while (rc == -EAGAIN); if (rc) ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d\n", rc); @@ -989,8 +1466,8 @@ struct compat_ica_rsa_modexpo { compat_uptr_t n_modulus; }; -static long trans_modexpo32(struct file *filp, unsigned int cmd, - unsigned long arg) +static long trans_modexpo32(struct ap_perms *perms, struct file *filp, + unsigned int cmd, unsigned long arg) { struct compat_ica_rsa_modexpo __user *umex32 = compat_ptr(arg); struct compat_ica_rsa_modexpo mex32; @@ -1006,12 +1483,12 @@ static long trans_modexpo32(struct file *filp, unsigned int cmd, mex64.b_key = compat_ptr(mex32.b_key); mex64.n_modulus = compat_ptr(mex32.n_modulus); do { - rc = zcrypt_rsa_modexpo(&mex64); + rc = zcrypt_rsa_modexpo(perms, &mex64); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_rsa_modexpo(&mex64); + rc = zcrypt_rsa_modexpo(perms, &mex64); } while (rc == -EAGAIN); if (rc) return rc; @@ -1031,8 +1508,8 @@ struct compat_ica_rsa_modexpo_crt { compat_uptr_t u_mult_inv; }; -static long trans_modexpo_crt32(struct file *filp, unsigned int cmd, - unsigned long arg) +static long trans_modexpo_crt32(struct ap_perms *perms, struct file *filp, + unsigned int cmd, unsigned long arg) { struct compat_ica_rsa_modexpo_crt __user *ucrt32 = compat_ptr(arg); struct compat_ica_rsa_modexpo_crt crt32; @@ -1051,12 +1528,12 @@ static long trans_modexpo_crt32(struct file *filp, unsigned int cmd, crt64.nq_prime = compat_ptr(crt32.nq_prime); crt64.u_mult_inv = compat_ptr(crt32.u_mult_inv); do { - rc = zcrypt_rsa_crt(&crt64); + rc = zcrypt_rsa_crt(perms, &crt64); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_rsa_crt(&crt64); + rc = zcrypt_rsa_crt(perms, &crt64); } while (rc == -EAGAIN); if (rc) return rc; @@ -1084,8 +1561,8 @@ struct compat_ica_xcRB { unsigned int status; } __packed; -static long trans_xcRB32(struct file *filp, unsigned int cmd, - unsigned long arg) +static long trans_xcRB32(struct ap_perms *perms, struct file *filp, + unsigned int cmd, unsigned long arg) { struct compat_ica_xcRB __user *uxcRB32 = compat_ptr(arg); struct compat_ica_xcRB xcRB32; @@ -1115,12 +1592,12 @@ static long trans_xcRB32(struct file *filp, unsigned int cmd, xcRB64.priority_window = xcRB32.priority_window; xcRB64.status = xcRB32.status; do { - rc = zcrypt_send_cprb(&xcRB64); + rc = _zcrypt_send_cprb(perms, &xcRB64); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_send_cprb(&xcRB64); + rc = _zcrypt_send_cprb(perms, &xcRB64); } while (rc == -EAGAIN); xcRB32.reply_control_blk_length = xcRB64.reply_control_blk_length; xcRB32.reply_data_length = xcRB64.reply_data_length; @@ -1133,12 +1610,20 @@ static long trans_xcRB32(struct file *filp, unsigned int cmd, static long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + int rc; + struct ap_perms *perms = + (struct ap_perms *) filp->private_data; + + rc = zcrypt_check_ioctl(perms, cmd); + if (rc) + return rc; + if (cmd == ICARSAMODEXPO) - return trans_modexpo32(filp, cmd, arg); + return trans_modexpo32(perms, filp, cmd, arg); if (cmd == ICARSACRT) - return trans_modexpo_crt32(filp, cmd, arg); + return trans_modexpo_crt32(perms, filp, cmd, arg); if (cmd == ZSECSENDCPRB) - return trans_xcRB32(filp, cmd, arg); + return trans_xcRB32(perms, filp, cmd, arg); return zcrypt_unlocked_ioctl(filp, cmd, arg); } #endif @@ -1256,6 +1741,67 @@ void zcrypt_debug_exit(void) debug_unregister(zcrypt_dbf_info); } +#ifdef CONFIG_ZCRYPT_MULTIDEVNODES + +static int __init zcdn_init(void) +{ + int rc; + + /* create a new class 'zcrypt' */ + zcrypt_class = class_create(THIS_MODULE, ZCRYPT_NAME); + if (IS_ERR(zcrypt_class)) { + rc = PTR_ERR(zcrypt_class); + goto out_class_create_failed; + } + zcrypt_class->dev_release = zcdn_device_release; + + /* alloc device minor range */ + rc = alloc_chrdev_region(&zcrypt_devt, + 0, ZCRYPT_MAX_MINOR_NODES, + ZCRYPT_NAME); + if (rc) + goto out_alloc_chrdev_failed; + + cdev_init(&zcrypt_cdev, &zcrypt_fops); + zcrypt_cdev.owner = THIS_MODULE; + rc = cdev_add(&zcrypt_cdev, zcrypt_devt, ZCRYPT_MAX_MINOR_NODES); + if (rc) + goto out_cdev_add_failed; + + /* need some class specific sysfs attributes */ + rc = class_create_file(zcrypt_class, &class_attr_zcdn_create); + if (rc) + goto out_class_create_file_1_failed; + rc = class_create_file(zcrypt_class, &class_attr_zcdn_destroy); + if (rc) + goto out_class_create_file_2_failed; + + return 0; + +out_class_create_file_2_failed: + class_remove_file(zcrypt_class, &class_attr_zcdn_create); +out_class_create_file_1_failed: + cdev_del(&zcrypt_cdev); +out_cdev_add_failed: + unregister_chrdev_region(zcrypt_devt, ZCRYPT_MAX_MINOR_NODES); +out_alloc_chrdev_failed: + class_destroy(zcrypt_class); +out_class_create_failed: + return rc; +} + +static void zcdn_exit(void) +{ + class_remove_file(zcrypt_class, &class_attr_zcdn_create); + class_remove_file(zcrypt_class, &class_attr_zcdn_destroy); + zcdn_destroy_all(); + cdev_del(&zcrypt_cdev); + unregister_chrdev_region(zcrypt_devt, ZCRYPT_MAX_MINOR_NODES); + class_destroy(zcrypt_class); +} + +#endif + /** * zcrypt_api_init(): Module initialization. * @@ -1269,15 +1815,27 @@ int __init zcrypt_api_init(void) if (rc) goto out; +#ifdef CONFIG_ZCRYPT_MULTIDEVNODES + rc = zcdn_init(); + if (rc) + goto out; +#endif + /* Register the request sprayer. */ rc = misc_register(&zcrypt_misc_device); if (rc < 0) - goto out; + goto out_misc_register_failed; zcrypt_msgtype6_init(); zcrypt_msgtype50_init(); + return 0; +out_misc_register_failed: +#ifdef CONFIG_ZCRYPT_MULTIDEVNODES + zcdn_exit(); +#endif + zcrypt_debug_exit(); out: return rc; } @@ -1289,6 +1847,9 @@ out: */ void __exit zcrypt_api_exit(void) { +#ifdef CONFIG_ZCRYPT_MULTIDEVNODES + zcdn_exit(); +#endif misc_deregister(&zcrypt_misc_device); zcrypt_msgtype6_exit(); zcrypt_msgtype50_exit(); From 346e485d42e2d8c9b5739e6f2acb5052be0fef9e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 14 Sep 2018 13:47:41 +0200 Subject: [PATCH 13/83] s390/ccwgroup: add get_ccwgroupdev_by_busid() Provide function to find a ccwgroup device by its busid. Acked-by: Sebastian Ott Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ccwgroup.h | 2 ++ drivers/s390/cio/ccwgroup.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 860cab7479c3..7293c139dd79 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -64,6 +64,8 @@ extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver); int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv, int num_devices, const char *buf); +struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv, + char *bus_id); extern int ccwgroup_set_online(struct ccwgroup_device *gdev); extern int ccwgroup_set_offline(struct ccwgroup_device *gdev); diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 93b2862bd3fa..4ebf6d4fc66c 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -608,6 +608,36 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver) } EXPORT_SYMBOL(ccwgroup_driver_unregister); +static int __ccwgroupdev_check_busid(struct device *dev, void *id) +{ + char *bus_id = id; + + return (strcmp(bus_id, dev_name(dev)) == 0); +} + +/** + * get_ccwgroupdev_by_busid() - obtain device from a bus id + * @gdrv: driver the device is owned by + * @bus_id: bus id of the device to be searched + * + * This function searches all devices owned by @gdrv for a device with a bus + * id matching @bus_id. + * Returns: + * If a match is found, its reference count of the found device is increased + * and it is returned; else %NULL is returned. + */ +struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv, + char *bus_id) +{ + struct device *dev; + + dev = driver_find_device(&gdrv->driver, NULL, bus_id, + __ccwgroupdev_check_busid); + + return dev ? to_ccwgroupdev(dev) : NULL; +} +EXPORT_SYMBOL_GPL(get_ccwgroupdev_by_busid); + /** * ccwgroup_probe_ccwdev() - probe function for slave devices * @cdev: ccw device to be probed From f689789a288e297451869c0770b3351c80c85b15 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 6 Sep 2018 10:43:27 +0200 Subject: [PATCH 14/83] s390/appldata: pass parameter list pointer to appldata_asm In preparation for CONFIG_VMAP_STACK=y move the allocation of the struct appldata_parameter_list to the caller of appldata_asm(). Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 4 +++- arch/s390/include/asm/appldata.h | 19 ++++++++++--------- drivers/s390/char/monwriter.c | 4 +++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 9bf8489df6e6..50dd7117cbc1 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -145,6 +145,7 @@ static void appldata_work_fn(struct work_struct *work) int appldata_diag(char record_nr, u16 function, unsigned long buffer, u16 length, char *mod_lvl) { + struct appldata_parameter_list parm_list; struct appldata_product_id id = { .prod_nr = {0xD3, 0xC9, 0xD5, 0xE4, 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */ @@ -155,7 +156,8 @@ int appldata_diag(char record_nr, u16 function, unsigned long buffer, id.record_nr = record_nr; id.mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1]; - return appldata_asm(&id, function, (void *) buffer, length); + return appldata_asm(&parm_list, &id, function, + (void *) buffer, length); } /************************ timer, work, DIAG ****************************/ diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h index 4afbb5938726..c5bd9f4437e5 100644 --- a/arch/s390/include/asm/appldata.h +++ b/arch/s390/include/asm/appldata.h @@ -40,26 +40,27 @@ struct appldata_product_id { u16 mod_lvl; /* modification level */ } __attribute__ ((packed)); -static inline int appldata_asm(struct appldata_product_id *id, + +static inline int appldata_asm(struct appldata_parameter_list *parm_list, + struct appldata_product_id *id, unsigned short fn, void *buffer, unsigned short length) { - struct appldata_parameter_list parm_list; int ry; if (!MACHINE_IS_VM) return -EOPNOTSUPP; - parm_list.diag = 0xdc; - parm_list.function = fn; - parm_list.parlist_length = sizeof(parm_list); - parm_list.buffer_length = length; - parm_list.product_id_addr = (unsigned long) id; - parm_list.buffer_addr = virt_to_phys(buffer); + parm_list->diag = 0xdc; + parm_list->function = fn; + parm_list->parlist_length = sizeof(*parm_list); + parm_list->buffer_length = length; + parm_list->product_id_addr = (unsigned long) id; + parm_list->buffer_addr = virt_to_phys(buffer); diag_stat_inc(DIAG_STAT_X0DC); asm volatile( " diag %1,%0,0xdc" : "=d" (ry) - : "d" (&parm_list), "m" (parm_list), "m" (*id) + : "d" (parm_list), "m" (*parm_list), "m" (*id) : "cc"); return ry; } diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 4f1a69c9d81d..6388f614de4e 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -58,6 +58,7 @@ struct mon_private { static int monwrite_diag(struct monwrite_hdr *myhdr, char *buffer, int fcn) { + struct appldata_parameter_list parm_list; struct appldata_product_id id; int rc; @@ -67,7 +68,8 @@ static int monwrite_diag(struct monwrite_hdr *myhdr, char *buffer, int fcn) id.version_nr = myhdr->version; id.release_nr = myhdr->release; id.mod_lvl = myhdr->mod_level; - rc = appldata_asm(&id, fcn, (void *) buffer, myhdr->datalen); + rc = appldata_asm(&parm_list, &id, fcn, + (void *) buffer, myhdr->datalen); if (rc <= 0) return rc; pr_err("Writing monitor data failed with rc=%i\n", rc); From d36a9281399700f3b573f61b845367cfc3e12cb1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 6 Sep 2018 10:47:26 +0200 Subject: [PATCH 15/83] s390/appldata: do not use stack buffers for hardware data With CONFIG_VMAP_STACK=y the stack is allocated from the vmalloc space. Data structures passed to a hardware or a hypervisor interface that requires V=R can not be allocated on the stack anymore. Use kmalloc to get memory for the appldata_product_id and the appldata_parameter_list structures. Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 35 ++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 50dd7117cbc1..e4b58240ec53 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -137,6 +137,14 @@ static void appldata_work_fn(struct work_struct *work) mutex_unlock(&appldata_ops_mutex); } +static struct appldata_product_id appldata_id = { + .prod_nr = {0xD3, 0xC9, 0xD5, 0xE4, + 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */ + .prod_fn = 0xD5D3, /* "NL" */ + .version_nr = 0xF2F6, /* "26" */ + .release_nr = 0xF0F1, /* "01" */ +}; + /* * appldata_diag() * @@ -145,19 +153,22 @@ static void appldata_work_fn(struct work_struct *work) int appldata_diag(char record_nr, u16 function, unsigned long buffer, u16 length, char *mod_lvl) { - struct appldata_parameter_list parm_list; - struct appldata_product_id id = { - .prod_nr = {0xD3, 0xC9, 0xD5, 0xE4, - 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */ - .prod_fn = 0xD5D3, /* "NL" */ - .version_nr = 0xF2F6, /* "26" */ - .release_nr = 0xF0F1, /* "01" */ - }; + struct appldata_parameter_list *parm_list; + struct appldata_product_id *id; + int rc; - id.record_nr = record_nr; - id.mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1]; - return appldata_asm(&parm_list, &id, function, - (void *) buffer, length); + parm_list = kmalloc(sizeof(*parm_list), GFP_KERNEL); + id = kmemdup(&appldata_id, sizeof(appldata_id), GFP_KERNEL); + rc = -ENOMEM; + if (parm_list && id) { + id->record_nr = record_nr; + id->mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1]; + rc = appldata_asm(parm_list, id, function, + (void *) buffer, length); + } + kfree(id); + kfree(parm_list); + return rc; } /************************ timer, work, DIAG ****************************/ From 8ef9eda0188c2e904ef257f67cefcc3371a0c98e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 6 Sep 2018 10:06:57 +0200 Subject: [PATCH 16/83] s390/hypfs: do not use stack buffers for hardware data With CONFIG_VMAP_STACK=y the stack is allocated from the vmalloc space. Data structures passed to a hardware or a hypervisor interface that requires V=R can not be allocated on the stack anymore. Use kmalloc to get memory for the hypsfs_diag304 structure. Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_sprp.c | 40 ++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c index 5d85a039391c..601b70786dc8 100644 --- a/arch/s390/hypfs/hypfs_sprp.c +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -68,40 +68,44 @@ static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size) static int __hypfs_sprp_ioctl(void __user *user_area) { - struct hypfs_diag304 diag304; + struct hypfs_diag304 *diag304; unsigned long cmd; void __user *udata; void *data; int rc; - if (copy_from_user(&diag304, user_area, sizeof(diag304))) - return -EFAULT; - if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX) - return -EINVAL; - + rc = -ENOMEM; data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!data) - return -ENOMEM; + diag304 = kzalloc(sizeof(*diag304), GFP_KERNEL); + if (!data || !diag304) + goto out; - udata = (void __user *)(unsigned long) diag304.data; - if (diag304.args[1] == DIAG304_SET_WEIGHTS || - diag304.args[1] == DIAG304_SET_CAPPING) - if (copy_from_user(data, udata, PAGE_SIZE)) { - rc = -EFAULT; + rc = -EFAULT; + if (copy_from_user(diag304, user_area, sizeof(*diag304))) + goto out; + rc = -EINVAL; + if ((diag304->args[0] >> 8) != 0 || diag304->args[1] > DIAG304_CMD_MAX) + goto out; + + rc = -EFAULT; + udata = (void __user *)(unsigned long) diag304->data; + if (diag304->args[1] == DIAG304_SET_WEIGHTS || + diag304->args[1] == DIAG304_SET_CAPPING) + if (copy_from_user(data, udata, PAGE_SIZE)) goto out; - } - cmd = *(unsigned long *) &diag304.args[0]; - diag304.rc = hypfs_sprp_diag304(data, cmd); + cmd = *(unsigned long *) &diag304->args[0]; + diag304->rc = hypfs_sprp_diag304(data, cmd); - if (diag304.args[1] == DIAG304_QUERY_PRP) + if (diag304->args[1] == DIAG304_QUERY_PRP) if (copy_to_user(udata, data, PAGE_SIZE)) { rc = -EFAULT; goto out; } - rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0; + rc = copy_to_user(user_area, diag304, sizeof(*diag304)) ? -EFAULT : 0; out: + kfree(diag304); free_page((unsigned long) data); return rc; } From c0f07ff93bffae8c4252e4945ad82bc98f82a60e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 6 Sep 2018 10:31:59 +0200 Subject: [PATCH 17/83] s390/monwriter: do not use stack buffers for hardware data With CONFIG_VMAP_STACK=y the stack is allocated from the vmalloc space. Data structures passed to a hardware or a hypervisor interface that requires V=R can not be allocated on the stack anymore. Use kmalloc to get memory for the appldata_parameter_list and appldata_product_id structures. Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- drivers/s390/char/monwriter.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 6388f614de4e..fdc0c0b7a6f5 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -58,24 +58,31 @@ struct mon_private { static int monwrite_diag(struct monwrite_hdr *myhdr, char *buffer, int fcn) { - struct appldata_parameter_list parm_list; - struct appldata_product_id id; + struct appldata_parameter_list *parm_list; + struct appldata_product_id *id; int rc; - memcpy(id.prod_nr, "LNXAPPL", 7); - id.prod_fn = myhdr->applid; - id.record_nr = myhdr->record_num; - id.version_nr = myhdr->version; - id.release_nr = myhdr->release; - id.mod_lvl = myhdr->mod_level; - rc = appldata_asm(&parm_list, &id, fcn, + id = kmalloc(sizeof(*id), GFP_KERNEL); + parm_list = kmalloc(sizeof(*parm_list), GFP_KERNEL); + rc = -ENOMEM; + if (!id || !parm_list) + goto out; + memcpy(id->prod_nr, "LNXAPPL", 7); + id->prod_fn = myhdr->applid; + id->record_nr = myhdr->record_num; + id->version_nr = myhdr->version; + id->release_nr = myhdr->release; + id->mod_lvl = myhdr->mod_level; + rc = appldata_asm(parm_list, id, fcn, (void *) buffer, myhdr->datalen); if (rc <= 0) - return rc; + goto out; pr_err("Writing monitor data failed with rc=%i\n", rc); - if (rc == 5) - return -EPERM; - return -EINVAL; + rc = (rc == 5) ? -EPERM : -EINVAL; +out: + kfree(id); + kfree(parm_list); + return rc; } static struct mon_buf *monwrite_find_hdr(struct mon_private *monpriv, From 00e9e6645adc2c02c9ec5b42fd39d2a7f0880e6a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 7 Sep 2018 11:20:08 +0200 Subject: [PATCH 18/83] s390/pfault: do not use stack buffers for hardware data With CONFIG_VMAP_STACK=y the stack is allocated from the vmalloc space. Data structures passed to a hardware or a hypervisor interface that requires V=R can not be allocated on the stack anymore. Make the init and fini pfault parameter blocks static variables. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 72af23bacbb5..2b8f32f56e0c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -636,17 +636,19 @@ struct pfault_refbk { u64 reserved; } __attribute__ ((packed, aligned(8))); +static struct pfault_refbk pfault_init_refbk = { + .refdiagc = 0x258, + .reffcode = 0, + .refdwlen = 5, + .refversn = 2, + .refgaddr = __LC_LPP, + .refselmk = 1ULL << 48, + .refcmpmk = 1ULL << 48, + .reserved = __PF_RES_FIELD +}; + int pfault_init(void) { - struct pfault_refbk refbk = { - .refdiagc = 0x258, - .reffcode = 0, - .refdwlen = 5, - .refversn = 2, - .refgaddr = __LC_LPP, - .refselmk = 1ULL << 48, - .refcmpmk = 1ULL << 48, - .reserved = __PF_RES_FIELD }; int rc; if (pfault_disable) @@ -658,18 +660,20 @@ int pfault_init(void) "1: la %0,8\n" "2:\n" EX_TABLE(0b,1b) - : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc"); + : "=d" (rc) + : "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc"); return rc; } +static struct pfault_refbk pfault_fini_refbk = { + .refdiagc = 0x258, + .reffcode = 1, + .refdwlen = 5, + .refversn = 2, +}; + void pfault_fini(void) { - struct pfault_refbk refbk = { - .refdiagc = 0x258, - .reffcode = 1, - .refdwlen = 5, - .refversn = 2, - }; if (pfault_disable) return; @@ -678,7 +682,7 @@ void pfault_fini(void) " diag %0,0,0x258\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) - : : "a" (&refbk), "m" (refbk) : "cc"); + : : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc"); } static DEFINE_SPINLOCK(pfault_lock); From 53c99bd665a2649341ed6aed358ab56a3eedcd00 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 31 Aug 2018 10:42:24 +0200 Subject: [PATCH 19/83] init: add arch_call_rest_init to allow stack switching With CONFIG_VMAP_STACK=y the kernel stack of all tasks should be allocated in the vmalloc space. The initial stack used for all the early init code is in the init_thread_union. To be able to switch from this early stack to a properly allocated stack from vmalloc the architecture needs a switch-over point. Introduce the arch_call_rest_init() function with a weak definition in init/main.c with the only purpose to call rest_init() from the end of start_kernel(). The architecture override can then do the necessary magic to switch to the new vmalloc'ed stack. Signed-off-by: Martin Schwidefsky --- include/linux/start_kernel.h | 2 ++ init/main.c | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h index 4b268d86a784..8b369a41c03c 100644 --- a/include/linux/start_kernel.h +++ b/include/linux/start_kernel.h @@ -9,5 +9,7 @@ up something else. */ extern asmlinkage void __init start_kernel(void); +extern void __init arch_call_rest_init(void); +extern void __ref rest_init(void); #endif /* _LINUX_START_KERNEL_H */ diff --git a/init/main.c b/init/main.c index 18f8f0140fa0..78b714a5fa94 100644 --- a/init/main.c +++ b/init/main.c @@ -394,7 +394,7 @@ static void __init setup_command_line(char *command_line) static __initdata DECLARE_COMPLETION(kthreadd_done); -static noinline void __ref rest_init(void) +noinline void __ref rest_init(void) { struct task_struct *tsk; int pid; @@ -528,6 +528,11 @@ static void __init mm_init(void) pti_init(); } +void __init __weak arch_call_rest_init(void) +{ + rest_init(); +} + asmlinkage __visible void __init start_kernel(void) { char *command_line; @@ -736,7 +741,7 @@ asmlinkage __visible void __init start_kernel(void) } /* Do the rest non-__init'ed, we're now alive */ - rest_init(); + arch_call_rest_init(); } /* Call all constructor functions linked into the kernel. */ From ff340d2472ec7618443913928af9fb85a7009270 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 7 Sep 2017 17:03:19 +0200 Subject: [PATCH 20/83] s390: add stack switch helper Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 49 +++++++++++++++++++++++++++++++ arch/s390/kernel/irq.c | 10 +------ 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 7f2953c15c37..43494a014d5b 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -250,6 +250,55 @@ static inline unsigned short stap(void) return cpu_address; } +#define CALL_ARGS_0() \ + register unsigned long r2 asm("2") +#define CALL_ARGS_1(arg1) \ + register unsigned long r2 asm("2") = (unsigned long)(arg1) +#define CALL_ARGS_2(arg1, arg2) \ + CALL_ARGS_1(arg1); \ + register unsigned long r3 asm("3") = (unsigned long)(arg2) +#define CALL_ARGS_3(arg1, arg2, arg3) \ + CALL_ARGS_2(arg1, arg2); \ + register unsigned long r4 asm("4") = (unsigned long)(arg3) +#define CALL_ARGS_4(arg1, arg2, arg3, arg4) \ + CALL_ARGS_3(arg1, arg2, arg3); \ + register unsigned long r4 asm("5") = (unsigned long)(arg4) +#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5) \ + CALL_ARGS_4(arg1, arg2, arg3, arg4); \ + register unsigned long r4 asm("6") = (unsigned long)(arg5) + +#define CALL_FMT_0 +#define CALL_FMT_1 CALL_FMT_0, "0" (r2) +#define CALL_FMT_2 CALL_FMT_1, "d" (r3) +#define CALL_FMT_3 CALL_FMT_2, "d" (r4) +#define CALL_FMT_4 CALL_FMT_3, "d" (r5) +#define CALL_FMT_5 CALL_FMT_4, "d" (r6) + +#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" +#define CALL_CLOBBER_4 CALL_CLOBBER_5 +#define CALL_CLOBBER_3 CALL_CLOBBER_4, "5" +#define CALL_CLOBBER_2 CALL_CLOBBER_3, "4" +#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3" +#define CALL_CLOBBER_0 CALL_CLOBBER_1 + +#define CALL_ON_STACK(fn, stack, nr, args...) \ +({ \ + CALL_ARGS_##nr(args); \ + unsigned long prev; \ + \ + asm volatile( \ + " la %[_prev],0(15)\n" \ + " la 15,0(%[_stack])\n" \ + " stg %[_prev],%[_bc](15)\n" \ + " brasl 14,%[_fn]\n" \ + " la 15,0(%[_prev])\n" \ + : "+&d" (r2), [_prev] "=&a" (prev) \ + : [_stack] "a" (stack), \ + [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ + [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr); \ + r2; \ +}) + /* * Give up the time slice of the virtual PU. */ diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 3d17c41074ca..0e8d68bac82c 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -172,15 +172,7 @@ void do_softirq_own_stack(void) /* Check against async. stack address range. */ new = S390_lowcore.async_stack; if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) { - /* Need to switch to the async. stack. */ - new -= STACK_FRAME_OVERHEAD; - ((struct stack_frame *) new)->back_chain = old; - asm volatile(" la 15,0(%0)\n" - " brasl 14,__do_softirq\n" - " la 15,0(%1)\n" - : : "a" (new), "a" (old) - : "0", "1", "2", "3", "4", "5", "14", - "cc", "memory" ); + CALL_ON_STACK(__do_softirq, new, 0); } else { /* We are already on the async stack. */ __do_softirq(); From ce3dc447493ff4186b192b38d723ab5e8c1eb52f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 12 Sep 2017 16:37:33 +0200 Subject: [PATCH 21/83] s390: add support for virtually mapped kernel stacks With virtually mapped kernel stacks the kernel stack overflow detection is now fault based, every stack has a guard page in the vmalloc space. The panic_stack is renamed to nodat_stack and is used for all function that need to run without DAT, e.g. memcpy_real or do_start_kdump. The main effect is a reduction in the kernel image size as with vmap stacks the old style overflow checking that adds two instructions per function is not needed anymore. Result from bloat-o-meter: add/remove: 20/1 grow/shrink: 13/26854 up/down: 2198/-216240 (-214042) In regard to performance the micro-benchmark for fork has a hit of a few microseconds, allocating 4 pages in vmalloc space is more expensive compare to an order-2 page allocation. But with real workload I could not find a noticeable difference. Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 + arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/processor.h | 8 +++ arch/s390/include/asm/thread_info.h | 3 - arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/base.S | 2 +- arch/s390/kernel/dumpstack.c | 6 +- arch/s390/kernel/entry.S | 53 +++++++++++------ arch/s390/kernel/entry.h | 3 + arch/s390/kernel/head64.S | 4 +- arch/s390/kernel/irq.c | 2 +- arch/s390/kernel/machine_kexec.c | 19 ++++-- arch/s390/kernel/setup.c | 89 +++++++++++++++++++++++++--- arch/s390/kernel/smp.c | 90 +++++++++++++++++------------ arch/s390/kernel/swsusp.S | 7 ++- arch/s390/mm/maccess.c | 25 ++++++-- 16 files changed, 228 insertions(+), 91 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9a9c7a6fe925..6061dd7578fe 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -125,6 +125,7 @@ config S390 select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_VMAP_STACK select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL @@ -649,6 +650,7 @@ config PACK_STACK config CHECK_STACK def_bool y + depends on !VMAP_STACK prompt "Detect kernel stack overflow" help This option enables the compiler option -mstack-guard and diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 406d940173ab..cc0947e08b6f 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -102,9 +102,9 @@ struct lowcore { __u64 current_task; /* 0x0338 */ __u64 kernel_stack; /* 0x0340 */ - /* Interrupt, panic and restart stack. */ + /* Interrupt, DAT-off and restartstack. */ __u64 async_stack; /* 0x0348 */ - __u64 panic_stack; /* 0x0350 */ + __u64 nodat_stack; /* 0x0350 */ __u64 restart_stack; /* 0x0358 */ /* Restart function and parameter. */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 43494a014d5b..3c1e723a143a 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -162,6 +162,14 @@ struct thread_struct { typedef struct thread_struct thread_struct; +/* + * General size of a stack + */ +#define STACK_ORDER 2 +#define STACK_SIZE (PAGE_SIZE << STACK_ORDER) +#define STACK_INIT_OFFSET \ + (STACK_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs)) + /* * Stack layout of a C stack frame. */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 3c883c368eb0..3fa2fea0ba23 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -14,10 +14,7 @@ * Size of kernel stack for each process */ #define THREAD_SIZE_ORDER 2 -#define ASYNC_ORDER 2 - #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER) #ifndef __ASSEMBLY__ #include diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 66e830f1c7bf..164bec175628 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -159,7 +159,7 @@ int main(void) OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); - OFFSET(__LC_PANIC_STACK, lowcore, panic_stack); + OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack); OFFSET(__LC_RESTART_STACK, lowcore, restart_stack); OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_DATA, lowcore, restart_data); diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index b65874b0b412..f268fca67e82 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -18,7 +18,7 @@ ENTRY(s390_base_mcck_handler) basr %r13,0 -0: lg %r15,__LC_PANIC_STACK # load panic stack +0: lg %r15,__LC_NODAT_STACK # load panic stack aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_mcck_handler_fn lg %r9,0(%r1) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 5b23c4f6e50c..301b945de77b 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -77,11 +77,11 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); #ifdef CONFIG_CHECK_STACK sp = __dump_trace(func, data, sp, - S390_lowcore.panic_stack + frame_size - PAGE_SIZE, - S390_lowcore.panic_stack + frame_size); + S390_lowcore.nodat_stack + frame_size - STACK_SIZE, + S390_lowcore.nodat_stack + frame_size); #endif sp = __dump_trace(func, data, sp, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size - STACK_SIZE, S390_lowcore.async_stack + frame_size); task = task ?: current; __dump_trace(func, data, sp, diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 150130c897c3..724fba4d09d2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -85,14 +85,34 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro CHECK_STACK stacksize,savearea + .macro CHECK_STACK savearea #ifdef CONFIG_CHECK_STACK - tml %r15,\stacksize - CONFIG_STACK_GUARD + tml %r15,STACK_SIZE - CONFIG_STACK_GUARD lghi %r14,\savearea jz stack_overflow #endif .endm + .macro CHECK_VMAP_STACK savearea,oklabel +#ifdef CONFIG_VMAP_STACK + lgr %r14,%r15 + nill %r14,0x10000 - STACK_SIZE + oill %r14,STACK_INIT + clg %r14,__LC_KERNEL_STACK + je \oklabel + clg %r14,__LC_ASYNC_STACK + je \oklabel + clg %r14,__LC_NODAT_STACK + je \oklabel + clg %r14,__LC_RESTART_STACK + je \oklabel + lghi %r14,\savearea + j stack_overflow +#else + j \oklabel +#endif + .endm + .macro SWITCH_ASYNC savearea,timer tmhh %r8,0x0001 # interrupting from user ? jnz 1f @@ -104,11 +124,11 @@ _LPP_OFFSET = __LC_LPP brasl %r14,cleanup_critical tmhh %r8,0x0001 # retest problem state after cleanup jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the async stack? +0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT jnz 2f - CHECK_STACK 1< enabled, can't be a double fault tm __LC_PGM_ILC+3,0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC +1: CHECK_STACK __LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 4f + # CHECK_VMAP_STACK branches to stack_overflow or 4f + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f 2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK @@ -1136,7 +1157,8 @@ ENTRY(mcck_int_handler) jnz 4f TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic -4: SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER +4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) @@ -1163,7 +1185,6 @@ ENTRY(mcck_int_handler) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING jno .Lmcck_return TRACE_IRQS_OFF @@ -1182,7 +1203,7 @@ ENTRY(mcck_int_handler) lpswe __LC_RETURN_MCCK_PSW .Lmcck_panic: - lg %r15,__LC_PANIC_STACK + lg %r15,__LC_NODAT_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) j .Lmcck_skip @@ -1193,12 +1214,10 @@ ENTRY(restart_int_handler) ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 stg %r15,__LC_SAVE_AREA_RESTART lg %r15,__LC_RESTART_STACK - aghi %r15,-__PT_SIZE # create pt_regs on stack - xc 0(__PT_SIZE,%r15),0(%r15) - stmg %r0,%r14,__PT_R0(%r15) - mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART - mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw - aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART + mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) lg %r1,__LC_RESTART_FN # load fn, parm & source cpu lg %r2,__LC_RESTART_DATA @@ -1216,14 +1235,14 @@ ENTRY(restart_int_handler) .section .kprobes.text, "ax" -#ifdef CONFIG_CHECK_STACK +#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK) /* * The synchronous or the asynchronous stack overflowed. We are dead. * No need to properly save the registers, we are going to panic anyway. * Setup a pt_regs so that show_trace can provide a good call trace. */ stack_overflow: - lg %r15,__LC_PANIC_STACK # change to panic stack + lg %r15,__LC_NODAT_STACK # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 472fa2f1a4a5..c3816ae108b0 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -86,4 +86,7 @@ DECLARE_PER_CPU(u64, mt_cycles[8]); void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); +unsigned long stack_alloc(void); +void stack_free(unsigned long stack); + #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index b31dfb102700..57bba24b1c27 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -36,9 +36,7 @@ ENTRY(startup_continue) # larl %r14,init_task stg %r14,__LC_CURRENT - larl %r15,init_thread_union+THREAD_SIZE - stg %r15,__LC_KERNEL_STACK # set end of kernel stack - aghi %r15,-STACK_FRAME_OVERHEAD + larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD # # Early setup functions that may not rely on an initialized bss section, # like moving the initrd. Returns with an initialized bss section. diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 0e8d68bac82c..b2bc0eb1ca7a 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -171,7 +171,7 @@ void do_softirq_own_stack(void) old = current_stack_pointer(); /* Check against async. stack address range. */ new = S390_lowcore.async_stack; - if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) { + if (((new - old) >> (PAGE_SHIFT + STACK_ORDER)) != 0) { CALL_ON_STACK(__do_softirq, new, 0); } else { /* We are already on the async stack. */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b7020e721ae3..cb582649aba6 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -142,18 +142,27 @@ static noinline void __machine_kdump(void *image) } #endif -/* - * Check if kdump checksums are valid: We call purgatory with parameter "0" - */ -static bool kdump_csum_valid(struct kimage *image) +static unsigned long do_start_kdump(unsigned long addr) { -#ifdef CONFIG_CRASH_DUMP + struct kimage *image = (struct kimage *) addr; int (*start_kdump)(int) = (void *)image->start; int rc; __arch_local_irq_stnsm(0xfb); /* disable DAT */ rc = start_kdump(0); __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc; +} + +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static bool kdump_csum_valid(struct kimage *image) +{ +#ifdef CONFIG_CRASH_DUMP + int rc; + + rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image); return rc == 0; #else return false; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c637c12f9e37..eca51c485d09 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -303,6 +304,78 @@ early_param("vmalloc", parse_vmalloc); void *restart_stack __section(.data); +unsigned long stack_alloc(void) +{ +#ifdef CONFIG_VMAP_STACK + return (unsigned long) + __vmalloc_node_range(STACK_SIZE, STACK_SIZE, + VMALLOC_START, VMALLOC_END, + THREADINFO_GFP, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +#else + return __get_free_pages(GFP_KERNEL, STACK_ORDER); +#endif +} + +void stack_free(unsigned long stack) +{ +#ifdef CONFIG_VMAP_STACK + vfree((void *) stack); +#else + free_pages(stack, STACK_ORDER); +#endif +} + +int __init arch_early_irq_init(void) +{ + unsigned long stack; + + stack = __get_free_pages(GFP_KERNEL, STACK_ORDER); + if (!stack) + panic("Couldn't allocate async stack"); + S390_lowcore.async_stack = stack + STACK_INIT_OFFSET; + return 0; +} + +static int __init async_stack_realloc(void) +{ + unsigned long old, new; + + old = S390_lowcore.async_stack - STACK_INIT_OFFSET; + new = stack_alloc(); + if (!new) + panic("Couldn't allocate async stack"); + S390_lowcore.async_stack = new + STACK_INIT_OFFSET; + free_pages(old, STACK_ORDER); + return 0; +} +early_initcall(async_stack_realloc); + +void __init arch_call_rest_init(void) +{ + struct stack_frame *frame; + unsigned long stack; + + stack = stack_alloc(); + if (!stack) + panic("Couldn't allocate kernel stack"); + current->stack = (void *) stack; +#ifdef CONFIG_VMAP_STACK + current->stack_vm_area = (void *) stack; +#endif + set_task_stack_end_magic(current); + stack += STACK_INIT_OFFSET; + S390_lowcore.kernel_stack = stack; + frame = (struct stack_frame *) stack; + memset(frame, 0, sizeof(*frame)); + /* Branch to rest_init on the new stack, never returns */ + asm volatile( + " la 15,0(%[_frame])\n" + " jg rest_init\n" + : : [_frame] "a" (frame)); +} + static void __init setup_lowcore(void) { struct lowcore *lc; @@ -329,14 +402,8 @@ static void __init setup_lowcore(void) PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = clock_comparator_max; - lc->kernel_stack = ((unsigned long) &init_thread_union) + lc->nodat_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->async_stack = (unsigned long) - memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE) - + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->panic_stack = (unsigned long) - memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE) - + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; @@ -357,8 +424,12 @@ static void __init setup_lowcore(void) lc->last_update_timer = S390_lowcore.last_update_timer; lc->last_update_clock = S390_lowcore.last_update_clock; - restart_stack = memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE); - restart_stack += ASYNC_SIZE; + /* + * Allocate the global restart stack which is the same for + * all CPUs in cast *one* of them does a PSW restart. + */ + restart_stack = memblock_virt_alloc(STACK_SIZE, STACK_SIZE); + restart_stack += STACK_INIT_OFFSET; /* * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2f8f7d7dd9a8..fccdb96a04cb 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -186,36 +186,34 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) pcpu_sigp_retry(pcpu, order, 0); } -#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) -#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) - static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { - unsigned long async_stack, panic_stack; + unsigned long async_stack, nodat_stack; struct lowcore *lc; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - panic_stack = __get_free_page(GFP_KERNEL); - if (!pcpu->lowcore || !panic_stack || !async_stack) + nodat_stack = __get_free_pages(GFP_KERNEL, STACK_ORDER); + if (!pcpu->lowcore || !nodat_stack) goto out; } else { - async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; - panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; + nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; } + async_stack = stack_alloc(); + if (!async_stack) + goto out; lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); - lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; - lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; + lc->async_stack = async_stack + STACK_INIT_OFFSET; + lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; lc->br_r1_trampoline = 0x07f1; /* br %r1 */ if (nmi_alloc_per_cpu(lc)) - goto out; + goto out_async; if (vdso_alloc_per_cpu(lc)) goto out_mcesa; lowcore_ptr[cpu] = lc; @@ -224,10 +222,11 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) out_mcesa: nmi_free_per_cpu(lc); +out_async: + stack_free(async_stack); out: if (pcpu != &pcpu_devices[0]) { - free_page(panic_stack); - free_pages(async_stack, ASYNC_ORDER); + free_pages(nodat_stack, STACK_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); } return -ENOMEM; @@ -237,15 +236,21 @@ out: static void pcpu_free_lowcore(struct pcpu *pcpu) { + unsigned long async_stack, nodat_stack, lowcore; + + nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; + async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET; + lowcore = (unsigned long) pcpu->lowcore; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; vdso_free_per_cpu(pcpu->lowcore); nmi_free_per_cpu(pcpu->lowcore); + stack_free(async_stack); if (pcpu == &pcpu_devices[0]) return; - free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); - free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); - free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + free_pages(nodat_stack, STACK_ORDER); + free_pages(lowcore, LC_ORDER); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -293,7 +298,7 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) { struct lowcore *lc = pcpu->lowcore; - lc->restart_stack = lc->kernel_stack; + lc->restart_stack = lc->nodat_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; lc->restart_source = -1UL; @@ -303,15 +308,20 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) /* * Call function via PSW restart on pcpu and stop the current cpu. */ +static void __pcpu_delegate(void (*func)(void*), void *data) +{ + func(data); /* should not return */ +} + static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), void *data, unsigned long stack) { struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; unsigned long source_cpu = stap(); - __load_psw_mask(PSW_KERNEL_BITS); + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); if (pcpu->address == source_cpu) - func(data); /* should not return */ + CALL_ON_STACK(__pcpu_delegate, stack, 2, func, data); /* Stop target cpu (if func returns this stops the current cpu). */ pcpu_sigp_retry(pcpu, SIGP_STOP, 0); /* Restart func on the target cpu and stop the current cpu. */ @@ -372,8 +382,7 @@ void smp_call_online_cpu(void (*func)(void *), void *data) void smp_call_ipl_cpu(void (*func)(void *), void *data) { pcpu_delegate(&pcpu_devices[0], func, data, - pcpu_devices->lowcore->panic_stack - - PANIC_FRAME_OFFSET + PAGE_SIZE); + pcpu_devices->lowcore->nodat_stack); } int smp_find_processor_id(u16 address) @@ -791,13 +800,31 @@ void __init smp_detect_cpus(void) memblock_free_early((unsigned long)info, sizeof(*info)); } +static void smp_init_secondary(void) +{ + int cpu = smp_processor_id(); + + cpu_init(); + preempt_disable(); + init_cpu_timer(); + vtime_init(); + pfault_init(); + notify_cpu_starting(smp_processor_id()); + if (topology_cpu_dedicated(cpu)) + set_cpu_flag(CIF_DEDICATED_CPU); + else + clear_cpu_flag(CIF_DEDICATED_CPU); + set_cpu_online(smp_processor_id(), true); + inc_irq_stat(CPU_RST); + local_irq_enable(); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); +} + /* * Activate a secondary processor. */ static void smp_start_secondary(void *cpuvoid) { - int cpu = smp_processor_id(); - S390_lowcore.last_update_clock = get_tod_clock(); S390_lowcore.restart_stack = (unsigned long) restart_stack; S390_lowcore.restart_fn = (unsigned long) do_restart; @@ -806,20 +833,7 @@ static void smp_start_secondary(void *cpuvoid) restore_access_regs(S390_lowcore.access_regs_save_area); __ctl_load(S390_lowcore.cregs_save_area, 0, 15); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); - cpu_init(); - preempt_disable(); - init_cpu_timer(); - vtime_init(); - pfault_init(); - notify_cpu_starting(cpu); - if (topology_cpu_dedicated(cpu)) - set_cpu_flag(CIF_DEDICATED_CPU); - else - clear_cpu_flag(CIF_DEDICATED_CPU); - set_cpu_online(cpu, true); - inc_irq_stat(CPU_RST); - local_irq_enable(); - cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0); } /* Upping and downing of CPUs */ diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index 34b014b5cf03..537f97fde37f 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -29,10 +29,11 @@ .section .text ENTRY(swsusp_arch_suspend) - stmg %r6,%r15,__SF_GPRS(%r15) + lg %r1,__LC_NODAT_STACK + aghi %r1,-STACK_FRAME_OVERHEAD + stmg %r6,%r15,__SF_GPRS(%r1) + stg %r15,__SF_BACKCHAIN(%r1) lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD - stg %r1,__SF_BACKCHAIN(%r15) /* Store FPU registers */ brasl %r14,save_fpu_regs diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 7be06475809b..97b3ee53852b 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -89,10 +89,8 @@ static int __memcpy_real(void *dest, void *src, size_t count) return rc; } -/* - * Copy memory in real mode (kernel to kernel) - */ -int memcpy_real(void *dest, void *src, size_t count) +static unsigned long _memcpy_real(unsigned long dest, unsigned long src, + unsigned long count) { int irqs_disabled, rc; unsigned long flags; @@ -103,13 +101,30 @@ int memcpy_real(void *dest, void *src, size_t count) irqs_disabled = arch_irqs_disabled_flags(flags); if (!irqs_disabled) trace_hardirqs_off(); - rc = __memcpy_real(dest, src, count); + rc = __memcpy_real((void *) dest, (void *) src, (size_t) count); if (!irqs_disabled) trace_hardirqs_on(); __arch_local_irq_ssm(flags); return rc; } +/* + * Copy memory in real mode (kernel to kernel) + */ +int memcpy_real(void *dest, void *src, size_t count) +{ + if (S390_lowcore.nodat_stack != 0) + return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, + 3, dest, src, count); + /* + * This is a really early memcpy_real call, the stacks are + * not set up yet. Just call _memcpy_real on the early boot + * stack + */ + return _memcpy_real((unsigned long) dest,(unsigned long) src, + (unsigned long) count); +} + /* * Copy memory in absolute mode (kernel to kernel) */ From 32ce55a6592fc3e117e70953001a9ea1931f7941 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 18 Sep 2018 18:23:40 +0200 Subject: [PATCH 22/83] s390: unify stack size definitions Remove STACK_ORDER and STACK_SIZE in favour of identical THREAD_SIZE_ORDER and THREAD_SIZE definitions. THREAD_SIZE and THREAD_SIZE_ORDER naming is misleading since it is used as general kernel stack size information. But both those definitions are used in the common code and throughout architectures specific code, so changing the naming is problematic. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 8 -------- arch/s390/include/asm/thread_info.h | 5 ++++- arch/s390/kernel/dumpstack.c | 4 ++-- arch/s390/kernel/irq.c | 2 +- arch/s390/kernel/setup.c | 12 ++++++------ arch/s390/kernel/smp.c | 6 +++--- 6 files changed, 16 insertions(+), 21 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 3c1e723a143a..43494a014d5b 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -162,14 +162,6 @@ struct thread_struct { typedef struct thread_struct thread_struct; -/* - * General size of a stack - */ -#define STACK_ORDER 2 -#define STACK_SIZE (PAGE_SIZE << STACK_ORDER) -#define STACK_INIT_OFFSET \ - (STACK_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs)) - /* * Stack layout of a C stack frame. */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 3fa2fea0ba23..1bbbaf6ae511 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -11,7 +11,7 @@ #include /* - * Size of kernel stack for each process + * General size of kernel stacks */ #define THREAD_SIZE_ORDER 2 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) @@ -21,6 +21,9 @@ #include #include +#define STACK_INIT_OFFSET \ + (THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs)) + /* * low level task data that entry.S needs immediate access to * - this struct should fit entirely inside of one cache line diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 301b945de77b..ef85a00442cd 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -77,11 +77,11 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); #ifdef CONFIG_CHECK_STACK sp = __dump_trace(func, data, sp, - S390_lowcore.nodat_stack + frame_size - STACK_SIZE, + S390_lowcore.nodat_stack + frame_size - THREAD_SIZE, S390_lowcore.nodat_stack + frame_size); #endif sp = __dump_trace(func, data, sp, - S390_lowcore.async_stack + frame_size - STACK_SIZE, + S390_lowcore.async_stack + frame_size - THREAD_SIZE, S390_lowcore.async_stack + frame_size); task = task ?: current; __dump_trace(func, data, sp, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index b2bc0eb1ca7a..0e8d68bac82c 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -171,7 +171,7 @@ void do_softirq_own_stack(void) old = current_stack_pointer(); /* Check against async. stack address range. */ new = S390_lowcore.async_stack; - if (((new - old) >> (PAGE_SHIFT + STACK_ORDER)) != 0) { + if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) { CALL_ON_STACK(__do_softirq, new, 0); } else { /* We are already on the async stack. */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index eca51c485d09..67fa7cb8ae80 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -308,13 +308,13 @@ unsigned long stack_alloc(void) { #ifdef CONFIG_VMAP_STACK return (unsigned long) - __vmalloc_node_range(STACK_SIZE, STACK_SIZE, + __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, VMALLOC_START, VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); #else - return __get_free_pages(GFP_KERNEL, STACK_ORDER); + return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); #endif } @@ -323,7 +323,7 @@ void stack_free(unsigned long stack) #ifdef CONFIG_VMAP_STACK vfree((void *) stack); #else - free_pages(stack, STACK_ORDER); + free_pages(stack, THREAD_SIZE_ORDER); #endif } @@ -331,7 +331,7 @@ int __init arch_early_irq_init(void) { unsigned long stack; - stack = __get_free_pages(GFP_KERNEL, STACK_ORDER); + stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); if (!stack) panic("Couldn't allocate async stack"); S390_lowcore.async_stack = stack + STACK_INIT_OFFSET; @@ -347,7 +347,7 @@ static int __init async_stack_realloc(void) if (!new) panic("Couldn't allocate async stack"); S390_lowcore.async_stack = new + STACK_INIT_OFFSET; - free_pages(old, STACK_ORDER); + free_pages(old, THREAD_SIZE_ORDER); return 0; } early_initcall(async_stack_realloc); @@ -428,7 +428,7 @@ static void __init setup_lowcore(void) * Allocate the global restart stack which is the same for * all CPUs in cast *one* of them does a PSW restart. */ - restart_stack = memblock_virt_alloc(STACK_SIZE, STACK_SIZE); + restart_stack = memblock_virt_alloc(THREAD_SIZE, THREAD_SIZE); restart_stack += STACK_INIT_OFFSET; /* diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index fccdb96a04cb..032d98bfc60a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -194,7 +194,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - nodat_stack = __get_free_pages(GFP_KERNEL, STACK_ORDER); + nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); if (!pcpu->lowcore || !nodat_stack) goto out; } else { @@ -226,7 +226,7 @@ out_async: stack_free(async_stack); out: if (pcpu != &pcpu_devices[0]) { - free_pages(nodat_stack, STACK_ORDER); + free_pages(nodat_stack, THREAD_SIZE_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); } return -ENOMEM; @@ -249,7 +249,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) stack_free(async_stack); if (pcpu == &pcpu_devices[0]) return; - free_pages(nodat_stack, STACK_ORDER); + free_pages(nodat_stack, THREAD_SIZE_ORDER); free_pages(lowcore, LC_ORDER); } From 8f75582a2fb6e2c5afc5252b6d6932f61a79c939 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 19 Jul 2018 13:11:28 +0200 Subject: [PATCH 23/83] s390: remove decompressor's head.S Decompressor's head.S provided "data mover" sole purpose of which has been to safely move uncompressed kernel at 0x100000 and jump to it. With current bzImage layout entire decompressor's code guaranteed to be in a safe location under 0x100000, and hence could not be overwritten during kernel move. For that reason head.S could be replaced with simple memmove function. To do so introduce early boot code phase which is executed from arch/s390/boot/head.S after "verify_facilities" and takes care of optional kernel image decompression and transition to it. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 2 +- arch/s390/boot/boot.h | 7 ++++ arch/s390/boot/compressed/Makefile | 5 +-- arch/s390/boot/compressed/decompressor.h | 11 +++++ arch/s390/boot/compressed/head.S | 52 ------------------------ arch/s390/boot/compressed/misc.c | 7 +++- arch/s390/boot/head.S | 6 +-- arch/s390/boot/startup.c | 17 ++++++++ 8 files changed, 43 insertions(+), 64 deletions(-) create mode 100644 arch/s390/boot/boot.h create mode 100644 arch/s390/boot/compressed/decompressor.h delete mode 100644 arch/s390/boot/compressed/head.S create mode 100644 arch/s390/boot/startup.c diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 9e6668ee93de..1b5a95b1ab09 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -27,7 +27,7 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char -obj-y := head.o als.o ebcdic.o sclp_early_core.o mem.o +obj-y := head.o als.o startup.o ebcdic.o sclp_early_core.o mem.o targets := bzImage startup.a $(obj-y) subdir- := compressed diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h new file mode 100644 index 000000000000..36c93e6cbc3f --- /dev/null +++ b/arch/s390/boot/boot.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_BOOT_H +#define BOOT_BOOT_H + +void startup_kernel(void); + +#endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 04609478d18b..c16ded8a35be 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -9,7 +9,7 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n -obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,head.o misc.o) piggy.o +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,misc.o) piggy.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 targets += vmlinux.scr.lds $(obj-y) $(if $(CONFIG_KERNEL_UNCOMPRESSED),,sizes.h) @@ -32,9 +32,6 @@ quiet_cmd_sizes = GEN $@ $(obj)/sizes.h: vmlinux $(call if_changed,sizes) -AFLAGS_head.o += -I$(objtree)/$(obj) -$(obj)/head.o: $(obj)/sizes.h - CFLAGS_misc.o += -I$(objtree)/$(obj) $(obj)/misc.o: $(obj)/sizes.h diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h new file mode 100644 index 000000000000..0dd0b84679c4 --- /dev/null +++ b/arch/s390/boot/compressed/decompressor.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H +#define BOOT_COMPRESSED_DECOMPRESSOR_H + +#ifdef CONFIG_KERNEL_UNCOMPRESSED +static inline void *decompress_kernel(unsigned long *uncompressed_size) {} +#else +void *decompress_kernel(unsigned long *uncompressed_size); +#endif + +#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */ diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S deleted file mode 100644 index 4041fcfd8980..000000000000 --- a/arch/s390/boot/compressed/head.S +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Startup glue code to uncompress the kernel - * - * Copyright IBM Corp. 2010 - * - * Author(s): Martin Schwidefsky - */ - -#include -#include -#include -#include -#include -#include -#include "sizes.h" - -__HEAD -ENTRY(startup_decompressor) - basr %r13,0 # get base -.LPG1: - # setup stack - lg %r15,.Lstack-.LPG1(%r13) - brasl %r14,decompress_kernel - # Set up registers for memory mover. We move the decompressed image to - # 0x100000, where startup_continue of the decompressed image is supposed - # to be. - lgr %r4,%r2 - lg %r2,.Loffset-.LPG1(%r13) - lg %r3,.Lmvsize-.LPG1(%r13) - lgr %r5,%r3 - # Move the memory mover someplace safe so it doesn't overwrite itself. - la %r1,0x200 - mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) - # When the memory mover is done we pass control to - # arch/s390/kernel/head64.S:startup_continue which lives at 0x100000 in - # the decompressed image. - lgr %r6,%r2 - br %r1 -mover: - mvcle %r2,%r4,0 - jo mover - br %r6 -mover_end: - - .align 8 -.Lstack: - .quad 0x8000 + THREAD_SIZE - STACK_FRAME_OVERHEAD -.Loffset: - .quad 0x100000 -.Lmvsize: - .quad SZ__bss_start diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index f66ad73c205b..321f6151ded9 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -12,6 +12,7 @@ #include #include #include "sizes.h" +#include "decompressor.h" /* * gzip declarations @@ -82,7 +83,7 @@ static void error(char *x) asm volatile("lpsw %0" : : "Q" (psw)); } -unsigned long decompress_kernel(void) +void *decompress_kernel(unsigned long *uncompressed_size) { void *output, *kernel_end; @@ -111,6 +112,8 @@ unsigned long decompress_kernel(void) free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); - return (unsigned long) output; + if (uncompressed_size) + *uncompressed_size = SZ__bss_start; + return output; } diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index d0736a05bc7f..e209cfe69bb9 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -312,11 +312,7 @@ ENTRY(startup_kdump) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) l %r15,.Lstack-.LPG0(%r13) brasl %r14,verify_facilities -#ifdef CONFIG_KERNEL_UNCOMPRESSED - jg startup_continue -#else - jg startup_decompressor -#endif + brasl %r14,startup_kernel .Lstack: .long 0x8000 + THREAD_SIZE - STACK_FRAME_OVERHEAD diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c new file mode 100644 index 000000000000..2a9ce355f8e6 --- /dev/null +++ b/arch/s390/boot/startup.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "compressed/decompressor.h" +#include "boot.h" + +void startup_kernel(void) +{ + void (*startup_continue)(void) = (void *)0x100000; + unsigned long uncompressed_size; + void *uncompressed_img; + + if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { + uncompressed_img = decompress_kernel(&uncompressed_size); + memmove(startup_continue, uncompressed_img, uncompressed_size); + } + startup_continue(); +} From 369f91c374514f9491d52fec12f7ee9ef6d44b23 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 19 Jul 2018 16:51:25 +0200 Subject: [PATCH 24/83] s390/decompressor: rework uncompressed image info collection The kernel decompressor has to know several bits of information about uncompressed image. Currently this info is collected by running "nm" on uncompressed vmlinux + "sed" and producing sizes.h file. This method worked well, but it has several disadvantages. Obscure symbols name pattern matching is fragile. Adding new values makes pattern even longer. Logic is spread across code and make file. Limited ability to adjust symbols values (currently magic lma value of 0x100000 is always subtracted). Apart from that same pieces of information (and more) would be needed for early memory detection and features like KASLR outside of boot/compressed/ folder where sizes.h is generated. To overcome limitations new "struct vmlinux_info" has been introduced to include values needed for the decompressor and the rest of the boot code. The only static instance of vmlinux_info is produced during vmlinux link step by filling in struct fields by the linker (like it is done with input_data in boot/compressed/vmlinux.scr.lds.S). This way individual values could be adjusted with all the knowledge linker has and arithmetic it supports. Later .vmlinux.info section (which contains struct vmlinux_info) is transplanted into the decompressor image and dropped from uncompressed image altogether. While doing that replace "compressed/vmlinux.scr.lds.S" linker script (whose purpose is to rename .data section in piggy.o to .rodata.compressed) with plain objcopy command. And simplify decompressor's linker script. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/Makefile | 33 ++++++++++----------- arch/s390/boot/compressed/decompressor.h | 13 ++++++-- arch/s390/boot/compressed/misc.c | 15 ++++------ arch/s390/boot/compressed/vmlinux.lds.S | 21 +++++++++---- arch/s390/boot/compressed/vmlinux.scr.lds.S | 15 ---------- arch/s390/boot/startup.c | 10 +++---- arch/s390/kernel/vmlinux.lds.S | 10 +++++++ 7 files changed, 61 insertions(+), 56 deletions(-) delete mode 100644 arch/s390/boot/compressed/vmlinux.scr.lds.S diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index c16ded8a35be..8262984aa405 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -9,13 +9,14 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n -obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,misc.o) piggy.o +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,misc.o) piggy.o info.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += vmlinux.scr.lds $(obj-y) $(if $(CONFIG_KERNEL_UNCOMPRESSED),,sizes.h) +targets += info.bin $(obj-y) KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) +OBJCOPYFLAGS := OBJECTS := $(addprefix $(obj)/,$(obj-y)) @@ -23,20 +24,16 @@ LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) $(call if_changed,ld) -# extract required uncompressed vmlinux symbols and adjust them to reflect offsets inside vmlinux.bin -sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - 0x100000)/p' +OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info +$(obj)/info.bin: vmlinux FORCE + $(call if_changed,objcopy) -quiet_cmd_sizes = GEN $@ - cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ +OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info +$(obj)/info.o: $(obj)/info.bin FORCE + $(call if_changed,objcopy) -$(obj)/sizes.h: vmlinux - $(call if_changed,sizes) - -CFLAGS_misc.o += -I$(objtree)/$(obj) -$(obj)/misc.o: $(obj)/sizes.h - -OBJCOPYFLAGS_vmlinux.bin := -R .comment -S -$(obj)/vmlinux.bin: vmlinux +OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S +$(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) vmlinux.bin.all-y := $(obj)/vmlinux.bin @@ -61,10 +58,10 @@ $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) $(call if_changed,xzkern) -LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T -$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin$(suffix-y) - $(call if_changed,ld) +OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed +$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE + $(call if_changed,objcopy) -chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS)) +chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o $(obj)/info.o,$(OBJECTS)) chkbss-target := $(obj)/vmlinux.bin include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index 0dd0b84679c4..011cbb6e0e08 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -3,9 +3,18 @@ #define BOOT_COMPRESSED_DECOMPRESSOR_H #ifdef CONFIG_KERNEL_UNCOMPRESSED -static inline void *decompress_kernel(unsigned long *uncompressed_size) {} +static inline void *decompress_kernel(void) {} #else -void *decompress_kernel(unsigned long *uncompressed_size); +void *decompress_kernel(void); #endif +struct vmlinux_info { + unsigned long default_lma; + void (*entry)(void); + unsigned long image_size; /* does not include .bss */ +}; + +extern char _vmlinux_info[]; +#define vmlinux (*(struct vmlinux_info *)_vmlinux_info) + #endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */ diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 321f6151ded9..8b35af625aff 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -11,7 +11,6 @@ #include #include #include -#include "sizes.h" #include "decompressor.h" /* @@ -26,10 +25,10 @@ #define memzero(s, n) memset((s), 0, (n)) /* Symbols defined by linker scripts */ -extern char input_data[]; -extern int input_len; extern char _end[]; extern char _bss[], _ebss[]; +extern unsigned char _compressed_start[]; +extern unsigned char _compressed_end[]; static void error(char *m); @@ -83,12 +82,12 @@ static void error(char *x) asm volatile("lpsw %0" : : "Q" (psw)); } -void *decompress_kernel(unsigned long *uncompressed_size) +void *decompress_kernel(void) { void *output, *kernel_end; output = (void *) ALIGN((unsigned long) _end + HEAP_SIZE, PAGE_SIZE); - kernel_end = output + SZ__bss_start; + kernel_end = output + vmlinux.image_size; #ifdef CONFIG_BLK_DEV_INITRD /* @@ -111,9 +110,7 @@ void *decompress_kernel(unsigned long *uncompressed_size) free_mem_ptr = (unsigned long) _end; free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; - __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); - if (uncompressed_size) - *uncompressed_size = SZ__bss_start; + __decompress(_compressed_start, _compressed_end - _compressed_start, + NULL, NULL, output, 0, NULL, error); return output; } - diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index b16ac8b3c439..3814810718ef 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -8,9 +8,6 @@ ENTRY(startup) SECTIONS { - /* Be careful parts of head_64.S assume startup_32 is at - * address 0. - */ . = 0; .head.text : { _head = . ; @@ -26,7 +23,7 @@ SECTIONS .rodata : { _rodata = . ; *(.rodata) /* read-only data */ - *(EXCLUDE_FILE (*piggy.o) .rodata.compressed) + *(.rodata.*) _erodata = . ; } .data : { @@ -35,14 +32,26 @@ SECTIONS *(.data.*) _edata = . ; } - startup_continue = 0x100000; + /* + * uncompressed image info used by the decompressor it should match + * struct vmlinux_info. It comes from .vmlinux.info section of + * uncompressed vmlinux in a form of info.o + */ + . = ALIGN(8); + .vmlinux.info : { + _vmlinux_info = .; + *(.vmlinux.info) + } + #ifdef CONFIG_KERNEL_UNCOMPRESSED . = 0x100000; #else . = ALIGN(8); #endif .rodata.compressed : { - *(.rodata.compressed) + _compressed_start = .; + *(.vmlinux.bin.compressed) + _compressed_end = .; } . = ALIGN(256); .bss : { diff --git a/arch/s390/boot/compressed/vmlinux.scr.lds.S b/arch/s390/boot/compressed/vmlinux.scr.lds.S deleted file mode 100644 index ff01d18c9222..000000000000 --- a/arch/s390/boot/compressed/vmlinux.scr.lds.S +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -SECTIONS -{ - .rodata.compressed : { -#ifndef CONFIG_KERNEL_UNCOMPRESSED - input_len = .; - LONG(input_data_end - input_data) input_data = .; -#endif - *(.data) -#ifndef CONFIG_KERNEL_UNCOMPRESSED - output_len = . - 4; - input_data_end = .; -#endif - } -} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 2a9ce355f8e6..474dee84d8a8 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -5,13 +5,11 @@ void startup_kernel(void) { - void (*startup_continue)(void) = (void *)0x100000; - unsigned long uncompressed_size; - void *uncompressed_img; + void *img; if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { - uncompressed_img = decompress_kernel(&uncompressed_size); - memmove(startup_continue, uncompressed_img, uncompressed_size); + img = decompress_kernel(); + memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); } - startup_continue(); + vmlinux.entry(); } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b43f8d33a369..4b59d1ce7124 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -146,6 +146,16 @@ SECTIONS _end = . ; + /* + * uncompressed image info used by the decompressor + * it should match struct vmlinux_info + */ + .vmlinux.info 0 : { + QUAD(_stext) /* default_lma */ + QUAD(startup_continue) /* entry */ + QUAD(__bss_start - _stext) /* image_size */ + } + /* Debugging sections. */ STABS_DEBUG DWARF_DEBUG From a2ac1bb1f3ddbad8388b0ba4edf28ff501009cea Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 25 Jul 2018 13:27:27 +0200 Subject: [PATCH 25/83] s390/decompressor: get rid of .bss usage Using .bss in early code should be avoided. It might overlay initrd image or not yet be initialized. Clean up the last couple of places in the decompressor's code where .bss is used and enfore no .bss usage check on boot/compressed/misc.c. In particular: - initializing free_mem_ptr and free_mem_end_ptr with values guarantee that these variables won't end up in the .bss section. - define STATIC_RW_DATA to go into .data section. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/Makefile | 2 +- arch/s390/boot/compressed/misc.c | 17 +++++------------ 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 8262984aa405..a69746cd83be 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -62,6 +62,6 @@ OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section $(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE $(call if_changed,objcopy) -chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o $(obj)/info.o,$(OBJECTS)) +chkbss := $(filter-out $(obj)/piggy.o $(obj)/info.o,$(OBJECTS)) chkbss-target := $(obj)/vmlinux.bin include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 8b35af625aff..5dcf34e31f8d 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -11,12 +11,14 @@ #include #include #include +#include #include "decompressor.h" /* * gzip declarations */ #define STATIC static +#define STATIC_RW_DATA static __section(.data) #undef memset #undef memcpy @@ -26,21 +28,20 @@ /* Symbols defined by linker scripts */ extern char _end[]; -extern char _bss[], _ebss[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; static void error(char *m); -static unsigned long free_mem_ptr; -static unsigned long free_mem_end_ptr; - #ifdef CONFIG_HAVE_KERNEL_BZIP2 #define HEAP_SIZE 0x400000 #else #define HEAP_SIZE 0x10000 #endif +static unsigned long free_mem_ptr = (unsigned long) _end; +static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE; + #ifdef CONFIG_KERNEL_GZIP #include "../../../../lib/decompress_inflate.c" #endif @@ -102,14 +103,6 @@ void *decompress_kernel(void) } #endif - /* - * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be - * initialized afterwards since they reside in bss. - */ - memset(_bss, 0, _ebss - _bss); - free_mem_ptr = (unsigned long) _end; - free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; - __decompress(_compressed_start, _compressed_end - _compressed_start, NULL, NULL, output, 0, NULL, error); return output; From 3b076dca14c50f61c89a3dc51b3150656272b0f8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 9 Apr 2018 10:40:39 +0200 Subject: [PATCH 26/83] s390/sclp: simplify early hsa_size detection Architecture documentation suggests that hsa_size has been available in the read info since the list-directed ipl dump has been introduced. By using this value few early sclp calls could be avoided. Reviewed-by: Heiko Carstens Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp_early.c | 60 ++-------------------------------- 1 file changed, 3 insertions(+), 57 deletions(-) diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 9a74abb9224d..c06b44b7d8fd 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -46,7 +46,7 @@ struct read_info_sccb { u8 hamaxpow; /* 99 */ u32 rnsize2; /* 100-103 */ u64 rnmax2; /* 104-111 */ - u8 _pad_112[116 - 112]; /* 112-115 */ + u32 hsa_size; /* 112-115 */ u8 fac116; /* 116 */ u8 fac117; /* 117 */ u8 fac118; /* 118 */ @@ -147,6 +147,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) sclp_ipl_info.has_dump = 1; memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN); + if (sccb->hsa_size) + sclp.hsa_size = (sccb->hsa_size - 1) * PAGE_SIZE; sclp.mtid = (sccb->fac42 & 0x80) ? (sccb->fac42 & 31) : 0; sclp.mtid_cp = (sccb->fac42 & 0x80) ? (sccb->fac43 & 31) : 0; sclp.mtid_prev = (sccb->fac42 & 0x80) ? (sccb->fac66 & 31) : 0; @@ -189,61 +191,6 @@ int __init sclp_early_get_core_info(struct sclp_core_info *info) return 0; } -static long __init sclp_early_hsa_size_init(struct sdias_sccb *sccb) -{ - memset(sccb, 0, sizeof(*sccb)); - sccb->hdr.length = sizeof(*sccb); - sccb->evbuf.hdr.length = sizeof(struct sdias_evbuf); - sccb->evbuf.hdr.type = EVTYP_SDIAS; - sccb->evbuf.event_qual = SDIAS_EQ_SIZE; - sccb->evbuf.data_id = SDIAS_DI_FCP_DUMP; - sccb->evbuf.event_id = 4712; - sccb->evbuf.dbs = 1; - if (sclp_early_cmd(SCLP_CMDW_WRITE_EVENT_DATA, sccb)) - return -EIO; - if (sccb->hdr.response_code != 0x20) - return -EIO; - if (sccb->evbuf.blk_cnt == 0) - return 0; - return (sccb->evbuf.blk_cnt - 1) * PAGE_SIZE; -} - -static long __init sclp_early_hsa_copy_wait(struct sdias_sccb *sccb) -{ - memset(sccb, 0, PAGE_SIZE); - sccb->hdr.length = PAGE_SIZE; - if (sclp_early_cmd(SCLP_CMDW_READ_EVENT_DATA, sccb)) - return -EIO; - if ((sccb->hdr.response_code != 0x20) && (sccb->hdr.response_code != 0x220)) - return -EIO; - if (sccb->evbuf.blk_cnt == 0) - return 0; - return (sccb->evbuf.blk_cnt - 1) * PAGE_SIZE; -} - -static void __init sclp_early_hsa_size_detect(void *sccb) -{ - unsigned long flags; - long size = -EIO; - - raw_local_irq_save(flags); - if (sclp_early_set_event_mask(sccb, EVTYP_SDIAS_MASK, EVTYP_SDIAS_MASK)) - goto out; - size = sclp_early_hsa_size_init(sccb); - /* First check for synchronous response (LPAR) */ - if (size) - goto out_mask; - if (!(S390_lowcore.ext_params & 1)) - sclp_early_wait_irq(); - size = sclp_early_hsa_copy_wait(sccb); -out_mask: - sclp_early_set_event_mask(sccb, 0, 0); -out: - raw_local_irq_restore(flags); - if (size > 0) - sclp.hsa_size = size; -} - static void __init sclp_early_console_detect(struct init_sccb *sccb) { if (sccb->header.response_code != 0x20) @@ -262,7 +209,6 @@ void __init sclp_early_detect(void) sclp_early_facilities_detect(sccb); sclp_early_init_core_info(sccb); - sclp_early_hsa_size_detect(sccb); /* * Turn off SCLP event notifications. Also save remote masks in the From 15426ca43d888e79f2dc4012bce0cbd6be96baf1 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Apr 2018 11:56:55 +0200 Subject: [PATCH 27/83] s390: rescue initrd as early as possible To avoid multi-stage initrd rescue operation and to simplify assumptions during early memory allocations move initrd at some final safe destination as early as possible. This would also allow us to drop .bss usage restrictions for some files. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/decompressor.h | 2 ++ arch/s390/boot/compressed/misc.c | 31 +++++++++++------------- arch/s390/boot/startup.c | 24 ++++++++++++++++++ arch/s390/kernel/early_nobss.c | 22 ----------------- arch/s390/kernel/vmlinux.lds.S | 1 + 5 files changed, 41 insertions(+), 39 deletions(-) diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index 011cbb6e0e08..90d382d501d7 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -7,11 +7,13 @@ static inline void *decompress_kernel(void) {} #else void *decompress_kernel(void); #endif +unsigned long mem_safe_offset(void); struct vmlinux_info { unsigned long default_lma; void (*entry)(void); unsigned long image_size; /* does not include .bss */ + unsigned long bss_size; /* uncompressed image .bss size */ }; extern char _vmlinux_info[]; diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 5dcf34e31f8d..b773f81f5bff 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -83,25 +83,22 @@ static void error(char *x) asm volatile("lpsw %0" : : "Q" (psw)); } +#define decompress_offset ALIGN((unsigned long)_end + HEAP_SIZE, PAGE_SIZE) + +unsigned long mem_safe_offset(void) +{ + /* + * due to 4MB HEAD_SIZE for bzip2 + * 'decompress_offset + vmlinux.image_size' could be larger than + * kernel at final position + its .bss, so take the larger of two + */ + return max(decompress_offset + vmlinux.image_size, + vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size); +} + void *decompress_kernel(void) { - void *output, *kernel_end; - - output = (void *) ALIGN((unsigned long) _end + HEAP_SIZE, PAGE_SIZE); - kernel_end = output + vmlinux.image_size; - -#ifdef CONFIG_BLK_DEV_INITRD - /* - * Move the initrd right behind the end of the decompressed - * kernel image. This also prevents initrd corruption caused by - * bss clearing since kernel_end will always be located behind the - * current bss section.. - */ - if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) { - memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE); - INITRD_START = (unsigned long) kernel_end; - } -#endif + void *output = (void *)decompress_offset; __decompress(_compressed_start, _compressed_end - _compressed_start, NULL, NULL, output, 0, NULL, error); diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 474dee84d8a8..5aeac7564e67 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,12 +1,36 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include "compressed/decompressor.h" #include "boot.h" +#ifdef CONFIG_KERNEL_UNCOMPRESSED +unsigned long mem_safe_offset(void) +{ + return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; +} +#endif + +static void rescue_initrd(void) +{ + unsigned long min_initrd_addr; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) + return; + if (!INITRD_START || !INITRD_SIZE) + return; + min_initrd_addr = mem_safe_offset(); + if (min_initrd_addr <= INITRD_START) + return; + memmove((void *)min_initrd_addr, (void *)INITRD_START, INITRD_SIZE); + INITRD_START = min_initrd_addr; +} + void startup_kernel(void) { void *img; + rescue_initrd(); if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { img = decompress_kernel(); memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c index 2d84fc48df3a..8e96590b3a68 100644 --- a/arch/s390/kernel/early_nobss.c +++ b/arch/s390/kernel/early_nobss.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include "entry.h" @@ -32,26 +31,6 @@ static void __init reset_tod_clock(void) S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; } -static void __init rescue_initrd(void) -{ - unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); - - /* - * Just like in case of IPL from VM reader we make sure there is a - * gap of 4MB between end of kernel and start of initrd. - * That way we can also be sure that saving an NSS will succeed, - * which however only requires different segments. - */ - if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) - return; - if (!INITRD_START || !INITRD_SIZE) - return; - if (INITRD_START >= min_initrd_addr) - return; - memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); - INITRD_START = min_initrd_addr; -} - static void __init clear_bss_section(void) { memset(__bss_start, 0, __bss_stop - __bss_start); @@ -60,6 +39,5 @@ static void __init clear_bss_section(void) void __init startup_init_nobss(void) { reset_tod_clock(); - rescue_initrd(); clear_bss_section(); } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 4b59d1ce7124..4c5358ff9e05 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -154,6 +154,7 @@ SECTIONS QUAD(_stext) /* default_lma */ QUAD(startup_continue) /* entry */ QUAD(__bss_start - _stext) /* image_size */ + QUAD(__bss_stop - __bss_start) /* bss_size */ } /* Debugging sections. */ From 7516fc11e44e73f1fcf8a3808dd88f82142e6585 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 25 Jul 2018 15:01:11 +0200 Subject: [PATCH 28/83] s390/decompressor: clean up and rename compressed/misc.c Since compressed/misc.c is conditionally compiled move error reporting code to boot/main.c. With that being done compressed/misc.c has no "miscellaneous" functions left and is all about plain decompression now. Rename it accordingly. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/Makefile | 2 +- .../compressed/{misc.c => decompressor.c} | 25 ++----------------- arch/s390/boot/compressed/decompressor.h | 1 + arch/s390/boot/startup.c | 10 ++++++++ 4 files changed, 14 insertions(+), 24 deletions(-) rename arch/s390/boot/compressed/{misc.c => decompressor.c} (83%) diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index a69746cd83be..fd7cfc7b93a5 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -9,7 +9,7 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n -obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,misc.o) piggy.o info.o +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) piggy.o info.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 targets += info.bin $(obj-y) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/decompressor.c similarity index 83% rename from arch/s390/boot/compressed/misc.c rename to arch/s390/boot/compressed/decompressor.c index b773f81f5bff..45046630c56a 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/decompressor.c @@ -7,11 +7,9 @@ * Author(s): Martin Schwidefsky */ -#include +#include +#include #include -#include -#include -#include #include "decompressor.h" /* @@ -31,8 +29,6 @@ extern char _end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; -static void error(char *m); - #ifdef CONFIG_HAVE_KERNEL_BZIP2 #define HEAP_SIZE 0x400000 #else @@ -66,23 +62,6 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE; #include "../../../../lib/decompress_unxz.c" #endif -static int puts(const char *s) -{ - sclp_early_printk(s); - return 0; -} - -static void error(char *x) -{ - unsigned long long psw = 0x000a0000deadbeefULL; - - puts("\n\n"); - puts(x); - puts("\n\n -- System halted"); - - asm volatile("lpsw %0" : : "Q" (psw)); -} - #define decompress_offset ALIGN((unsigned long)_end + HEAP_SIZE, PAGE_SIZE) unsigned long mem_safe_offset(void) diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index 90d382d501d7..b774425dcb5f 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -8,6 +8,7 @@ static inline void *decompress_kernel(void) {} void *decompress_kernel(void); #endif unsigned long mem_safe_offset(void); +void error(char *m); struct vmlinux_info { unsigned long default_lma; diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 5aeac7564e67..81199ca4a513 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,9 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include "compressed/decompressor.h" #include "boot.h" +void error(char *x) +{ + sclp_early_printk("\n\n"); + sclp_early_printk(x); + sclp_early_printk("\n\n -- System halted"); + + disabled_wait(0xdeadbeef); +} + #ifdef CONFIG_KERNEL_UNCOMPRESSED unsigned long mem_safe_offset(void) { From d1b52a4388ffdcff47fb53de7fffe052fe766a9f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Apr 2018 14:14:02 +0200 Subject: [PATCH 29/83] s390: introduce .boot.data section Introduce .boot.data section which is "shared" between the decompressor code and the decompressed kernel. The decompressor will store values in it, and copy over to the decompressed image before starting it. This method allows to avoid using pre-defined addresses and other hacks to pass values between those boot phases. .boot.data section is a part of init data, and will be freed after kernel initialization is complete. For uncompressed kernel image, .boot.data section is basically the same as .init.data Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/decompressor.h | 2 ++ arch/s390/boot/compressed/vmlinux.lds.S | 3 +++ arch/s390/boot/startup.c | 10 ++++++++++ arch/s390/include/asm/sections.h | 12 ++++++++++++ arch/s390/include/asm/vmlinux.lds.h | 20 ++++++++++++++++++++ arch/s390/kernel/vmlinux.lds.S | 13 +++++++++---- 6 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 arch/s390/include/asm/vmlinux.lds.h diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index b774425dcb5f..e1c1f2ec60f4 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -15,6 +15,8 @@ struct vmlinux_info { void (*entry)(void); unsigned long image_size; /* does not include .bss */ unsigned long bss_size; /* uncompressed image .bss size */ + unsigned long bootdata_off; + unsigned long bootdata_size; }; extern char _vmlinux_info[]; diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 3814810718ef..7efc3938f595 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) @@ -32,6 +33,8 @@ SECTIONS *(.data.*) _edata = . ; } + BOOT_DATA + /* * uncompressed image info used by the decompressor it should match * struct vmlinux_info. It comes from .vmlinux.info section of diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 81199ca4a513..e9eea37894b3 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -5,6 +5,8 @@ #include "compressed/decompressor.h" #include "boot.h" +extern char __boot_data_start[], __boot_data_end[]; + void error(char *x) { sclp_early_printk("\n\n"); @@ -36,6 +38,13 @@ static void rescue_initrd(void) INITRD_START = min_initrd_addr; } +static void copy_bootdata(void) +{ + if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size) + error(".boot.data section size mismatch"); + memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size); +} + void startup_kernel(void) { void *img; @@ -45,5 +54,6 @@ void startup_kernel(void) img = decompress_kernel(); memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); } + copy_bootdata(); vmlinux.entry(); } diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 724faede8ac5..7afe4620685c 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,4 +4,16 @@ #include +/* + * .boot.data section contains variables "shared" between the decompressor and + * the decompressed kernel. The decompressor will store values in them, and + * copy over to the decompressed image before starting it. + * + * Each variable end up in its own intermediate section .boot.data., + * those sections are later sorted by alignment + name and merged together into + * final .boot.data section, which should be identical in the decompressor and + * the decompressed kernel (that is checked during the build). + */ +#define __bootdata(var) __section(.boot.data.var) var + #endif diff --git a/arch/s390/include/asm/vmlinux.lds.h b/arch/s390/include/asm/vmlinux.lds.h new file mode 100644 index 000000000000..2d127f900352 --- /dev/null +++ b/arch/s390/include/asm/vmlinux.lds.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include + +/* + * .boot.data section is shared between the decompressor code and the + * decompressed kernel. The decompressor will store values in it, and copy + * over to the decompressed image before starting it. + * + * .boot.data variables are kept in separate .boot.data. sections, + * which are sorted by alignment first, then by name before being merged + * into single .boot.data section. This way big holes cased by page aligned + * structs are avoided and linker produces consistent result. + */ +#define BOOT_DATA \ + . = ALIGN(PAGE_SIZE); \ + .boot.data : { \ + __boot_data_start = .; \ + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.data*))) \ + __boot_data_end = .; \ + } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 4c5358ff9e05..cc3cbdc93d35 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -16,6 +16,7 @@ #define RO_AFTER_INIT_DATA #include +#include OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) @@ -134,6 +135,8 @@ SECTIONS __nospec_return_end = . ; } + BOOT_DATA + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) @@ -151,10 +154,12 @@ SECTIONS * it should match struct vmlinux_info */ .vmlinux.info 0 : { - QUAD(_stext) /* default_lma */ - QUAD(startup_continue) /* entry */ - QUAD(__bss_start - _stext) /* image_size */ - QUAD(__bss_stop - __bss_start) /* bss_size */ + QUAD(_stext) /* default_lma */ + QUAD(startup_continue) /* entry */ + QUAD(__bss_start - _stext) /* image_size */ + QUAD(__bss_stop - __bss_start) /* bss_size */ + QUAD(__boot_data_start) /* bootdata_off */ + QUAD(__boot_data_end - __boot_data_start) /* bootdata_size */ } /* Debugging sections. */ From 17aacfbfa1ae386d3e54d12a13b88b7981e04896 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 23 May 2018 11:07:13 +0200 Subject: [PATCH 30/83] s390/sclp: move sclp_early_read_info to sclp_early_core.c To enable early online memory detection sclp_early_read_info has been moved to sclp_early_core.c. sclp_info_sccb has been made a part of .boot.data, which allows to reuse it later during early kernel startup and make sclp_early_read_info call just once. Reviewed-by: Heiko Carstens Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/startup.c | 1 + arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp.h | 42 +++++++++++++++++++ drivers/s390/char/sclp_early.c | 65 +---------------------------- drivers/s390/char/sclp_early_core.c | 36 ++++++++++++++++ 5 files changed, 81 insertions(+), 64 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index e9eea37894b3..78651a2c26b0 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -50,6 +50,7 @@ void startup_kernel(void) void *img; rescue_initrd(); + sclp_early_read_info(); if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { img = decompress_kernel(); memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 5d9420bbe165..7df57bd09aa1 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -105,6 +105,7 @@ struct zpci_report_error_header { u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */ } __packed; +int sclp_early_read_info(void); int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 1fe4918088e7..ffe72f03b5c5 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -63,6 +63,8 @@ typedef unsigned int sclp_cmdw_t; #define SCLP_CMDW_READ_CPU_INFO 0x00010001 +#define SCLP_CMDW_READ_SCP_INFO 0x00020001 +#define SCLP_CMDW_READ_SCP_INFO_FORCED 0x00120001 #define SCLP_CMDW_READ_EVENT_DATA 0x00770005 #define SCLP_CMDW_WRITE_EVENT_DATA 0x00760005 #define SCLP_CMDW_WRITE_EVENT_MASK 0x00780005 @@ -156,6 +158,45 @@ struct read_cpu_info_sccb { u8 reserved[4096 - 16]; } __attribute__((packed, aligned(PAGE_SIZE))); +struct read_info_sccb { + struct sccb_header header; /* 0-7 */ + u16 rnmax; /* 8-9 */ + u8 rnsize; /* 10 */ + u8 _pad_11[16 - 11]; /* 11-15 */ + u16 ncpurl; /* 16-17 */ + u16 cpuoff; /* 18-19 */ + u8 _pad_20[24 - 20]; /* 20-23 */ + u8 loadparm[8]; /* 24-31 */ + u8 _pad_32[42 - 32]; /* 32-41 */ + u8 fac42; /* 42 */ + u8 fac43; /* 43 */ + u8 _pad_44[48 - 44]; /* 44-47 */ + u64 facilities; /* 48-55 */ + u8 _pad_56[66 - 56]; /* 56-65 */ + u8 fac66; /* 66 */ + u8 _pad_67[76 - 67]; /* 67-83 */ + u32 ibc; /* 76-79 */ + u8 _pad80[84 - 80]; /* 80-83 */ + u8 fac84; /* 84 */ + u8 fac85; /* 85 */ + u8 _pad_86[91 - 86]; /* 86-90 */ + u8 fac91; /* 91 */ + u8 _pad_92[98 - 92]; /* 92-97 */ + u8 fac98; /* 98 */ + u8 hamaxpow; /* 99 */ + u32 rnsize2; /* 100-103 */ + u64 rnmax2; /* 104-111 */ + u32 hsa_size; /* 112-115 */ + u8 fac116; /* 116 */ + u8 fac117; /* 117 */ + u8 fac118; /* 118 */ + u8 fac119; /* 119 */ + u16 hcpua; /* 120-121 */ + u8 _pad_122[124 - 122]; /* 122-123 */ + u32 hmfai; /* 124-127 */ + u8 _pad_128[4096 - 128]; /* 128-4095 */ +} __packed __aligned(PAGE_SIZE); + static inline void sclp_fill_core_info(struct sclp_core_info *info, struct read_cpu_info_sccb *sccb) { @@ -275,6 +316,7 @@ unsigned int sclp_early_con_check_vt220(struct init_sccb *sccb); int sclp_early_set_event_mask(struct init_sccb *sccb, sccb_mask_t receive_mask, sccb_mask_t send_mask); +int sclp_early_get_info(struct read_info_sccb *info); /* useful inlines */ diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index c06b44b7d8fd..e792cee3b51c 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -15,80 +15,17 @@ #include "sclp_sdias.h" #include "sclp.h" -#define SCLP_CMDW_READ_SCP_INFO 0x00020001 -#define SCLP_CMDW_READ_SCP_INFO_FORCED 0x00120001 - -struct read_info_sccb { - struct sccb_header header; /* 0-7 */ - u16 rnmax; /* 8-9 */ - u8 rnsize; /* 10 */ - u8 _pad_11[16 - 11]; /* 11-15 */ - u16 ncpurl; /* 16-17 */ - u16 cpuoff; /* 18-19 */ - u8 _pad_20[24 - 20]; /* 20-23 */ - u8 loadparm[8]; /* 24-31 */ - u8 _pad_32[42 - 32]; /* 32-41 */ - u8 fac42; /* 42 */ - u8 fac43; /* 43 */ - u8 _pad_44[48 - 44]; /* 44-47 */ - u64 facilities; /* 48-55 */ - u8 _pad_56[66 - 56]; /* 56-65 */ - u8 fac66; /* 66 */ - u8 _pad_67[76 - 67]; /* 67-83 */ - u32 ibc; /* 76-79 */ - u8 _pad80[84 - 80]; /* 80-83 */ - u8 fac84; /* 84 */ - u8 fac85; /* 85 */ - u8 _pad_86[91 - 86]; /* 86-90 */ - u8 fac91; /* 91 */ - u8 _pad_92[98 - 92]; /* 92-97 */ - u8 fac98; /* 98 */ - u8 hamaxpow; /* 99 */ - u32 rnsize2; /* 100-103 */ - u64 rnmax2; /* 104-111 */ - u32 hsa_size; /* 112-115 */ - u8 fac116; /* 116 */ - u8 fac117; /* 117 */ - u8 fac118; /* 118 */ - u8 fac119; /* 119 */ - u16 hcpua; /* 120-121 */ - u8 _pad_122[124 - 122]; /* 122-123 */ - u32 hmfai; /* 124-127 */ - u8 _pad_128[4096 - 128]; /* 128-4095 */ -} __packed __aligned(PAGE_SIZE); - static struct sclp_ipl_info sclp_ipl_info; struct sclp_info sclp; EXPORT_SYMBOL(sclp); -static int __init sclp_early_read_info(struct read_info_sccb *sccb) -{ - int i; - sclp_cmdw_t commands[] = {SCLP_CMDW_READ_SCP_INFO_FORCED, - SCLP_CMDW_READ_SCP_INFO}; - - for (i = 0; i < ARRAY_SIZE(commands); i++) { - memset(sccb, 0, sizeof(*sccb)); - sccb->header.length = sizeof(*sccb); - sccb->header.function_code = 0x80; - sccb->header.control_mask[2] = 0x80; - if (sclp_early_cmd(commands[i], sccb)) - break; - if (sccb->header.response_code == 0x10) - return 0; - if (sccb->header.response_code != 0x1f0) - break; - } - return -EIO; -} - static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) { struct sclp_core_entry *cpue; u16 boot_cpu_address, cpu; - if (sclp_early_read_info(sccb)) + if (sclp_early_get_info(sccb)) return; sclp.facilities = sccb->facilities; diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index 2f61f5579aa5..bbea2154a807 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -9,9 +9,12 @@ #include #include #include +#include #include "sclp.h" #include "sclp_rw.h" +static struct read_info_sccb __bootdata(sclp_info_sccb); +static int __bootdata(sclp_info_sccb_valid); char sclp_early_sccb[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data); int sclp_init_state __section(.data) = sclp_init_state_uninitialized; /* @@ -234,3 +237,36 @@ void sclp_early_printk_force(const char *str) { __sclp_early_printk(str, strlen(str), 1); } + +int __init sclp_early_read_info(void) +{ + int i; + struct read_info_sccb *sccb = &sclp_info_sccb; + sclp_cmdw_t commands[] = {SCLP_CMDW_READ_SCP_INFO_FORCED, + SCLP_CMDW_READ_SCP_INFO}; + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + memset(sccb, 0, sizeof(*sccb)); + sccb->header.length = sizeof(*sccb); + sccb->header.function_code = 0x80; + sccb->header.control_mask[2] = 0x80; + if (sclp_early_cmd(commands[i], sccb)) + break; + if (sccb->header.response_code == 0x10) { + sclp_info_sccb_valid = 1; + return 0; + } + if (sccb->header.response_code != 0x1f0) + break; + } + return -EIO; +} + +int __init sclp_early_get_info(struct read_info_sccb *info) +{ + if (!sclp_info_sccb_valid) + return -EIO; + + *info = sclp_info_sccb; + return 0; +} From 6966d604e2ec4ecf5691aea953538f63597a250d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Apr 2018 11:56:55 +0200 Subject: [PATCH 31/83] s390/mem_detect: move tprot loop to early boot phase Move memory detection to early boot phase. To store online memory regions "struct mem_detect_info" has been introduced together with for_each_mem_detect_block iterator. mem_detect_info is later converted to memblock. Also introduces sclp_early_get_meminfo function to get maximum physical memory and maximum increment number. Reviewed-by: Heiko Carstens Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 2 +- arch/s390/boot/boot.h | 1 + arch/s390/boot/mem_detect.c | 133 ++++++++++++++++++++++++++++ arch/s390/boot/startup.c | 1 + arch/s390/include/asm/mem_detect.h | 77 ++++++++++++++++ arch/s390/include/asm/sclp.h | 1 + arch/s390/include/asm/setup.h | 2 - arch/s390/kernel/setup.c | 47 +++++++++- arch/s390/mm/Makefile | 3 +- arch/s390/mm/mem_detect.c | 62 ------------- drivers/s390/char/sclp_early_core.c | 17 ++++ 11 files changed, 277 insertions(+), 69 deletions(-) create mode 100644 arch/s390/boot/mem_detect.c create mode 100644 arch/s390/include/asm/mem_detect.h delete mode 100644 arch/s390/mm/mem_detect.c diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 1b5a95b1ab09..5e2cec6e4b3e 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -27,7 +27,7 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char -obj-y := head.o als.o startup.o ebcdic.o sclp_early_core.o mem.o +obj-y := head.o als.o startup.o mem_detect.o ebcdic.o sclp_early_core.o mem.o targets := bzImage startup.a $(obj-y) subdir- := compressed diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 36c93e6cbc3f..808154b99a5d 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -3,5 +3,6 @@ #define BOOT_BOOT_H void startup_kernel(void); +void detect_memory(void); #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c new file mode 100644 index 000000000000..920e6fee75de --- /dev/null +++ b/arch/s390/boot/mem_detect.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "compressed/decompressor.h" +#include "boot.h" + +#define CHUNK_READ_WRITE 0 +#define CHUNK_READ_ONLY 1 + +unsigned long __bootdata(max_physmem_end); +struct mem_detect_info __bootdata(mem_detect); + +/* up to 256 storage elements, 1020 subincrements each */ +#define ENTRIES_EXTENDED_MAX \ + (256 * (1020 / 2) * sizeof(struct mem_detect_block)) + +/* + * To avoid corrupting old kernel memory during dump, find lowest memory + * chunk possible either right after the kernel end (decompressed kernel) or + * after initrd (if it is present and there is no hole between the kernel end + * and initrd) + */ +static void *mem_detect_alloc_extended(void) +{ + unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64)); + + if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && + INITRD_START < offset + ENTRIES_EXTENDED_MAX) + offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64)); + + return (void *)offset; +} + +static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n) +{ + if (n < MEM_INLINED_ENTRIES) + return &mem_detect.entries[n]; + if (unlikely(!mem_detect.entries_extended)) + mem_detect.entries_extended = mem_detect_alloc_extended(); + return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES]; +} + +/* + * sequential calls to add_mem_detect_block with adjacent memory areas + * are merged together into single memory block. + */ +void add_mem_detect_block(u64 start, u64 end) +{ + struct mem_detect_block *block; + + if (mem_detect.count) { + block = __get_mem_detect_block_ptr(mem_detect.count - 1); + if (block->end == start) { + block->end = end; + return; + } + } + + block = __get_mem_detect_block_ptr(mem_detect.count); + block->start = start; + block->end = end; + mem_detect.count++; +} + +static unsigned long get_mem_detect_end(void) +{ + if (mem_detect.count) + return __get_mem_detect_block_ptr(mem_detect.count - 1)->end; + return 0; +} + +static int tprot(unsigned long addr) +{ + unsigned long pgm_addr; + int rc = -EFAULT; + psw_t old = S390_lowcore.program_new_psw; + + S390_lowcore.program_new_psw.mask = __extract_psw(); + asm volatile( + " larl %[pgm_addr],1f\n" + " stg %[pgm_addr],%[psw_pgm_addr]\n" + " tprot 0(%[addr]),0\n" + " ipm %[rc]\n" + " srl %[rc],28\n" + "1:\n" + : [pgm_addr] "=&d"(pgm_addr), + [psw_pgm_addr] "=Q"(S390_lowcore.program_new_psw.addr), + [rc] "+&d"(rc) + : [addr] "a"(addr) + : "cc", "memory"); + S390_lowcore.program_new_psw = old; + return rc; +} + +static void scan_memory(unsigned long rzm) +{ + unsigned long addr, size; + int type; + + if (!rzm) + rzm = 1UL << 20; + + addr = 0; + do { + size = 0; + /* assume lowcore is writable */ + type = addr ? tprot(addr) : CHUNK_READ_WRITE; + do { + size += rzm; + if (max_physmem_end && addr + size >= max_physmem_end) + break; + } while (type == tprot(addr + size)); + if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { + if (max_physmem_end && (addr + size > max_physmem_end)) + size = max_physmem_end - addr; + add_mem_detect_block(addr, addr + size); + } + addr += size; + } while (addr < max_physmem_end); +} + +void detect_memory(void) +{ + unsigned long rzm; + + sclp_early_get_meminfo(&max_physmem_end, &rzm); + scan_memory(rzm); + mem_detect.info_source = MEM_DETECT_TPROT_LOOP; + if (!max_physmem_end) + max_physmem_end = get_mem_detect_end(); +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 78651a2c26b0..b0e9f4619203 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -51,6 +51,7 @@ void startup_kernel(void) rescue_initrd(); sclp_early_read_info(); + detect_memory(); if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { img = decompress_kernel(); memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h new file mode 100644 index 000000000000..8586adef1c65 --- /dev/null +++ b/arch/s390/include/asm/mem_detect.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_MEM_DETECT_H +#define _ASM_S390_MEM_DETECT_H + +#include + +enum mem_info_source { + MEM_DETECT_NONE = 0, + MEM_DETECT_TPROT_LOOP +}; + +struct mem_detect_block { + u64 start; + u64 end; +}; + +/* + * Storage element id is defined as 1 byte (up to 256 storage elements). + * In practise only storage element id 0 and 1 are used). + * According to architecture one storage element could have as much as + * 1020 subincrements. 255 mem_detect_blocks are embedded in mem_detect_info. + * If more mem_detect_blocks are required, a block of memory from already + * known mem_detect_block is taken (entries_extended points to it). + */ +#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */ + +struct mem_detect_info { + u32 count; + u8 info_source; + struct mem_detect_block entries[MEM_INLINED_ENTRIES]; + struct mem_detect_block *entries_extended; +}; +extern struct mem_detect_info mem_detect; + +static inline int __get_mem_detect_block(u32 n, unsigned long *start, + unsigned long *end) +{ + if (n >= mem_detect.count) { + *start = 0; + *end = 0; + return -1; + } + + if (n < MEM_INLINED_ENTRIES) { + *start = (unsigned long)mem_detect.entries[n].start; + *end = (unsigned long)mem_detect.entries[n].end; + } else { + *start = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].start; + *end = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].end; + } + return 0; +} + +/** + * for_each_mem_detect_block - early online memory range iterator + * @i: an integer used as loop variable + * @p_start: ptr to unsigned long for start address of the range + * @p_end: ptr to unsigned long for end address of the range + * + * Walks over detected online memory ranges. + */ +#define for_each_mem_detect_block(i, p_start, p_end) \ + for (i = 0, __get_mem_detect_block(i, p_start, p_end); \ + i < mem_detect.count; \ + i++, __get_mem_detect_block(i, p_start, p_end)) + +static inline void get_mem_detect_reserved(unsigned long *start, + unsigned long *size) +{ + *start = (unsigned long)mem_detect.entries_extended; + if (mem_detect.count > MEM_INLINED_ENTRIES) + *size = (mem_detect.count - MEM_INLINED_ENTRIES) * sizeof(struct mem_detect_block); + else + *size = 0; +} + +#endif diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 7df57bd09aa1..c21a8b637a11 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -113,6 +113,7 @@ void sclp_early_printk(const char *s); void sclp_early_printk_force(const char *s); void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); +int sclp_early_get_meminfo(unsigned long *mem, unsigned long *rzm); int _sclp_get_core_info(struct sclp_core_info *info); int sclp_core_configure(u8 core); int sclp_core_deconfigure(u8 core); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 1d66016f4170..522e4553373a 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -69,8 +69,6 @@ extern int memory_end_set; extern unsigned long memory_end; extern unsigned long max_physmem_end; -extern void detect_memory_memblock(void); - #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) #define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 67fa7cb8ae80..fdf9bd964dee 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -70,6 +70,7 @@ #include #include #include +#include #include "entry.h" /* @@ -91,7 +92,8 @@ unsigned long int_hwcap = 0; int __initdata memory_end_set; unsigned long __initdata memory_end; -unsigned long __initdata max_physmem_end; +unsigned long __bootdata(max_physmem_end); +struct mem_detect_info __bootdata(mem_detect); unsigned long VMALLOC_START; EXPORT_SYMBOL(VMALLOC_START); @@ -720,6 +722,45 @@ static void __init reserve_initrd(void) #endif } +static void __init reserve_mem_detect_info(void) +{ + unsigned long start, size; + + get_mem_detect_reserved(&start, &size); + if (size) + memblock_reserve(start, size); +} + +static void __init free_mem_detect_info(void) +{ + unsigned long start, size; + + get_mem_detect_reserved(&start, &size); + if (size) + memblock_free(start, size); +} + +static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size) +{ + memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n", + start, start + size - 1); + memblock_add_range(&memblock.memory, start, size, 0, 0); + memblock_add_range(&memblock.physmem, start, size, 0, 0); +} + +static void __init memblock_add_mem_detect_info(void) +{ + unsigned long start, end; + int i; + + /* keep memblock lists close to the kernel */ + memblock_set_bottom_up(true); + for_each_mem_detect_block(i, &start, &end) + memblock_physmem_add(start, end - start); + memblock_set_bottom_up(false); + memblock_dump_all(); +} + /* * Check for initrd being in usable memory */ @@ -984,11 +1025,13 @@ void __init setup_arch(char **cmdline_p) reserve_oldmem(); reserve_kernel(); reserve_initrd(); + reserve_mem_detect_info(); memblock_allow_resize(); /* Get information about *all* installed memory */ - detect_memory_memblock(); + memblock_add_mem_detect_info(); + free_mem_detect_info(); remove_oldmem(); /* diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 33fe418506bc..83c83c69cab2 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -4,8 +4,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o -obj-y += page-states.o gup.o pageattr.o mem_detect.o -obj-y += pgtable.o pgalloc.o +obj-y += page-states.o gup.o pageattr.o pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c deleted file mode 100644 index 21f6c82c8296..000000000000 --- a/arch/s390/mm/mem_detect.c +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define CHUNK_READ_WRITE 0 -#define CHUNK_READ_ONLY 1 - -static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size) -{ - memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n", - start, start + size - 1); - memblock_add_range(&memblock.memory, start, size, 0, 0); - memblock_add_range(&memblock.physmem, start, size, 0, 0); -} - -void __init detect_memory_memblock(void) -{ - unsigned long memsize, rnmax, rzm, addr, size; - int type; - - rzm = sclp.rzm; - rnmax = sclp.rnmax; - memsize = rzm * rnmax; - if (!rzm) - rzm = 1UL << 17; - max_physmem_end = memsize; - addr = 0; - /* keep memblock lists close to the kernel */ - memblock_set_bottom_up(true); - do { - size = 0; - /* assume lowcore is writable */ - type = addr ? tprot(addr) : CHUNK_READ_WRITE; - do { - size += rzm; - if (max_physmem_end && addr + size >= max_physmem_end) - break; - } while (type == tprot(addr + size)); - if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { - if (max_physmem_end && (addr + size > max_physmem_end)) - size = max_physmem_end - addr; - memblock_physmem_add(addr, size); - } - addr += size; - } while (addr < max_physmem_end); - memblock_set_bottom_up(false); - if (!max_physmem_end) - max_physmem_end = memblock_end_of_DRAM(); - memblock_dump_all(); -} diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index bbea2154a807..4f04ba689771 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -270,3 +270,20 @@ int __init sclp_early_get_info(struct read_info_sccb *info) *info = sclp_info_sccb; return 0; } + +int __init sclp_early_get_meminfo(unsigned long *mem, unsigned long *rzm) +{ + unsigned long rnmax; + unsigned long rnsize; + struct read_info_sccb *sccb = &sclp_info_sccb; + + if (!sclp_info_sccb_valid) + return -EIO; + + rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; + rnsize = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; + rnsize <<= 20; + *mem = rnsize * rnmax; + *rzm = rnsize; + return 0; +} From 251b72a440fa8c550d64d9a9f35e6e1b5b9637df Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Apr 2018 14:24:40 +0200 Subject: [PATCH 32/83] s390: introduce .boot.data section compile time validation Make sure that .boot.data sections of vmlinux and arch/s390/compressed/vmlinux match before producing the compressed kernel image. Symbols presence, order and sizes are cross-checked. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/.gitignore | 1 + arch/s390/boot/Makefile | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore index 017d5912ad2d..16ff906e4610 100644 --- a/arch/s390/boot/.gitignore +++ b/arch/s390/boot/.gitignore @@ -1,2 +1,3 @@ image bzImage +section_cmp.* diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 5e2cec6e4b3e..b6903c47e0ac 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -28,14 +28,30 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o mem_detect.o ebcdic.o sclp_early_core.o mem.o -targets := bzImage startup.a $(obj-y) +targets := bzImage startup.a section_cmp.boot.data $(obj-y) subdir- := compressed OBJECTS := $(addprefix $(obj)/,$(obj-y)) -$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE +quiet_cmd_section_cmp = SECTCMP $* +define cmd_section_cmp + s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \ + sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \ + s2=`$(OBJDUMP) -t -j "$*" "$(word 2,$^)" | sort | \ + sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \ + if [ "$$s1" != "$$s2" ]; then \ + echo "error: section $* differs between $< and $(word 2,$^)" >&2; \ + exit 1; \ + fi; \ + touch $@ +endef + +$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data FORCE $(call if_changed,objcopy) +$(obj)/section_cmp%: vmlinux $(obj)/compressed/vmlinux FORCE + $(call if_changed,section_cmp) + $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ From fddbaa5c423f7ca0a187f88e0b1d98a5c8b4edcf Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Apr 2018 18:42:37 +0200 Subject: [PATCH 33/83] s390/mem_detect: introduce SCLP storage info SCLP storage info allows to detect continuous and non-continuous online memory under LPAR, z/VM and KVM, when standby memory is defined. Reviewed-by: Heiko Carstens Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/mem_detect.c | 6 ++++ arch/s390/include/asm/mem_detect.h | 3 ++ arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp.h | 10 ++++++ drivers/s390/char/sclp_cmd.c | 11 +----- drivers/s390/char/sclp_early_core.c | 53 +++++++++++++++++++++++++++++ 6 files changed, 74 insertions(+), 10 deletions(-) diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 920e6fee75de..8974e3dde1e4 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -126,6 +126,12 @@ void detect_memory(void) unsigned long rzm; sclp_early_get_meminfo(&max_physmem_end, &rzm); + + if (!sclp_early_read_storage_info()) { + mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; + return; + } + scan_memory(rzm); mem_detect.info_source = MEM_DETECT_TPROT_LOOP; if (!max_physmem_end) diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h index 8586adef1c65..00426c07f6df 100644 --- a/arch/s390/include/asm/mem_detect.h +++ b/arch/s390/include/asm/mem_detect.h @@ -6,6 +6,7 @@ enum mem_info_source { MEM_DETECT_NONE = 0, + MEM_DETECT_SCLP_STOR_INFO, MEM_DETECT_TPROT_LOOP }; @@ -32,6 +33,8 @@ struct mem_detect_info { }; extern struct mem_detect_info mem_detect; +void add_mem_detect_block(u64 start, u64 end); + static inline int __get_mem_detect_block(u32 n, unsigned long *start, unsigned long *end) { diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index c21a8b637a11..e0da13c0ef79 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -106,6 +106,7 @@ struct zpci_report_error_header { } __packed; int sclp_early_read_info(void); +int sclp_early_read_storage_info(void); int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index ffe72f03b5c5..b3fcc24b1182 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -64,6 +64,7 @@ typedef unsigned int sclp_cmdw_t; #define SCLP_CMDW_READ_CPU_INFO 0x00010001 #define SCLP_CMDW_READ_SCP_INFO 0x00020001 +#define SCLP_CMDW_READ_STORAGE_INFO 0x00040001 #define SCLP_CMDW_READ_SCP_INFO_FORCED 0x00120001 #define SCLP_CMDW_READ_EVENT_DATA 0x00770005 #define SCLP_CMDW_WRITE_EVENT_DATA 0x00760005 @@ -197,6 +198,15 @@ struct read_info_sccb { u8 _pad_128[4096 - 128]; /* 128-4095 */ } __packed __aligned(PAGE_SIZE); +struct read_storage_sccb { + struct sccb_header header; + u16 max_id; + u16 assigned; + u16 standby; + u16 :16; + u32 entries[0]; +} __packed; + static inline void sclp_fill_core_info(struct sclp_core_info *info, struct read_cpu_info_sccb *sccb) { diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index d7686a68c093..37d42de06079 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -460,15 +460,6 @@ static int sclp_mem_freeze(struct device *dev) return -EPERM; } -struct read_storage_sccb { - struct sccb_header header; - u16 max_id; - u16 assigned; - u16 standby; - u16 :16; - u32 entries[0]; -} __packed; - static const struct dev_pm_ops sclp_mem_pm_ops = { .freeze = sclp_mem_freeze, }; @@ -498,7 +489,7 @@ static int __init sclp_detect_standby_memory(void) for (id = 0; id <= sclp_max_storage_id; id++) { memset(sccb, 0, PAGE_SIZE); sccb->header.length = PAGE_SIZE; - rc = sclp_sync_request(0x00040001 | id << 8, sccb); + rc = sclp_sync_request(SCLP_CMDW_READ_STORAGE_INFO | id << 8, sccb); if (rc) goto out; switch (sccb->header.response_code) { diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index 4f04ba689771..0df08dcb9fe8 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "sclp.h" #include "sclp_rw.h" @@ -287,3 +288,55 @@ int __init sclp_early_get_meminfo(unsigned long *mem, unsigned long *rzm) *rzm = rnsize; return 0; } + +#define SCLP_STORAGE_INFO_FACILITY 0x0000400000000000UL + +void __weak __init add_mem_detect_block(u64 start, u64 end) {} +int __init sclp_early_read_storage_info(void) +{ + struct read_storage_sccb *sccb = (struct read_storage_sccb *)&sclp_early_sccb; + int rc, id, max_id = 0; + unsigned long rn, rzm; + sclp_cmdw_t command; + u16 sn; + + if (!sclp_info_sccb_valid) + return -EIO; + + if (!(sclp_info_sccb.facilities & SCLP_STORAGE_INFO_FACILITY)) + return -EOPNOTSUPP; + + rzm = sclp_info_sccb.rnsize ?: sclp_info_sccb.rnsize2; + rzm <<= 20; + + for (id = 0; id <= max_id; id++) { + memset(sclp_early_sccb, 0, sizeof(sclp_early_sccb)); + sccb->header.length = sizeof(sclp_early_sccb); + command = SCLP_CMDW_READ_STORAGE_INFO | (id << 8); + rc = sclp_early_cmd(command, sccb); + if (rc) + goto fail; + + max_id = sccb->max_id; + switch (sccb->header.response_code) { + case 0x0010: + for (sn = 0; sn < sccb->assigned; sn++) { + if (!sccb->entries[sn]) + continue; + rn = sccb->entries[sn] >> 16; + add_mem_detect_block((rn - 1) * rzm, rn * rzm); + } + break; + case 0x0310: + case 0x0410: + break; + default: + goto fail; + } + } + + return 0; +fail: + mem_detect.count = 0; + return -EIO; +} From 6e98e6432995a3094a88bf6024187c3c235be976 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Apr 2018 18:48:20 +0200 Subject: [PATCH 34/83] s390/mem_detect: introduce z/VM specific diag260 call In the case when z/VM memory is defined with "define storage config" command, SCLP storage info is not available. Utilize diag260 "storage configuration" call, to get information about z/VM specific guest memory definitions with potential memory holes. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/mem_detect.c | 52 ++++++++++++++++++++++++++++++ arch/s390/include/asm/mem_detect.h | 1 + 2 files changed, 53 insertions(+) diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 8974e3dde1e4..42b0cd23f04a 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -71,6 +71,53 @@ static unsigned long get_mem_detect_end(void) return 0; } +static int __diag260(unsigned long rx1, unsigned long rx2) +{ + register unsigned long _rx1 asm("2") = rx1; + register unsigned long _rx2 asm("3") = rx2; + register unsigned long _ry asm("4") = 0x10; /* storage configuration */ + int rc = -1; /* fail */ + unsigned long reg1, reg2; + psw_t old = S390_lowcore.program_new_psw; + + asm volatile( + " epsw %0,%1\n" + " st %0,%[psw_pgm]\n" + " st %1,%[psw_pgm]+4\n" + " larl %0,1f\n" + " stg %0,%[psw_pgm]+8\n" + " diag %[rx],%[ry],0x260\n" + " ipm %[rc]\n" + " srl %[rc],28\n" + "1:\n" + : "=&d" (reg1), "=&a" (reg2), + [psw_pgm] "=Q" (S390_lowcore.program_new_psw), + [rc] "+&d" (rc), [ry] "+d" (_ry) + : [rx] "d" (_rx1), "d" (_rx2) + : "cc", "memory"); + S390_lowcore.program_new_psw = old; + return rc == 0 ? _ry : -1; +} + +static int diag260(void) +{ + int rc, i; + + struct { + unsigned long start; + unsigned long end; + } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */ + + memset(storage_extents, 0, sizeof(storage_extents)); + rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents)); + if (rc == -1) + return -1; + + for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++) + add_mem_detect_block(storage_extents[i].start, storage_extents[i].end + 1); + return 0; +} + static int tprot(unsigned long addr) { unsigned long pgm_addr; @@ -132,6 +179,11 @@ void detect_memory(void) return; } + if (!diag260()) { + mem_detect.info_source = MEM_DETECT_DIAG260; + return; + } + scan_memory(rzm); mem_detect.info_source = MEM_DETECT_TPROT_LOOP; if (!max_physmem_end) diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h index 00426c07f6df..6047a28656f5 100644 --- a/arch/s390/include/asm/mem_detect.h +++ b/arch/s390/include/asm/mem_detect.h @@ -7,6 +7,7 @@ enum mem_info_source { MEM_DETECT_NONE = 0, MEM_DETECT_SCLP_STOR_INFO, + MEM_DETECT_DIAG260, MEM_DETECT_TPROT_LOOP }; From cd45c995610420755c5fe0d09afee3106c586e26 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Apr 2018 18:54:40 +0200 Subject: [PATCH 35/83] s390/mem_detect: use SCLP info for continuous memory detection When neither SCLP storage info, nor z/VM diag260 "storage configuration" are available assume a continuous online memory of size specified by SCLP info. Reviewed-by: Heiko Carstens Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/mem_detect.c | 9 +++++++-- arch/s390/include/asm/mem_detect.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 42b0cd23f04a..3becf6bbe4c7 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -184,8 +184,13 @@ void detect_memory(void) return; } + if (max_physmem_end) { + add_mem_detect_block(0, max_physmem_end); + mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; + return; + } + scan_memory(rzm); mem_detect.info_source = MEM_DETECT_TPROT_LOOP; - if (!max_physmem_end) - max_physmem_end = get_mem_detect_end(); + max_physmem_end = get_mem_detect_end(); } diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h index 6047a28656f5..153c3542fa8a 100644 --- a/arch/s390/include/asm/mem_detect.h +++ b/arch/s390/include/asm/mem_detect.h @@ -8,6 +8,7 @@ enum mem_info_source { MEM_DETECT_NONE = 0, MEM_DETECT_SCLP_STOR_INFO, MEM_DETECT_DIAG260, + MEM_DETECT_SCLP_READ_INFO, MEM_DETECT_TPROT_LOOP }; From 54c57795e848100a2502b7a39b12b784292f4576 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Apr 2018 19:15:24 +0200 Subject: [PATCH 36/83] s390/mem_detect: replace tprot loop with binary search In a situation when other memory detection methods are not available (no SCLP and no z/VM diag260), continuous online memory is assumed. Replacing tprot loop with faster binary search, as only online memory end has to be found. Reviewed-by: Heiko Carstens Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/mem_detect.c | 45 ++++++++++------------------- arch/s390/include/asm/mem_detect.h | 2 +- arch/s390/include/asm/sclp.h | 2 +- drivers/s390/char/sclp_early_core.c | 3 +- 4 files changed, 18 insertions(+), 34 deletions(-) diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 3becf6bbe4c7..65ae3c926042 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -3,12 +3,10 @@ #include #include #include +#include #include "compressed/decompressor.h" #include "boot.h" -#define CHUNK_READ_WRITE 0 -#define CHUNK_READ_ONLY 1 - unsigned long __bootdata(max_physmem_end); struct mem_detect_info __bootdata(mem_detect); @@ -141,38 +139,25 @@ static int tprot(unsigned long addr) return rc; } -static void scan_memory(unsigned long rzm) +static void search_mem_end(void) { - unsigned long addr, size; - int type; + unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */ + unsigned long offset = 0; + unsigned long pivot; - if (!rzm) - rzm = 1UL << 20; + while (range > 1) { + range >>= 1; + pivot = offset + range; + if (!tprot(pivot << 20)) + offset = pivot; + } - addr = 0; - do { - size = 0; - /* assume lowcore is writable */ - type = addr ? tprot(addr) : CHUNK_READ_WRITE; - do { - size += rzm; - if (max_physmem_end && addr + size >= max_physmem_end) - break; - } while (type == tprot(addr + size)); - if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { - if (max_physmem_end && (addr + size > max_physmem_end)) - size = max_physmem_end - addr; - add_mem_detect_block(addr, addr + size); - } - addr += size; - } while (addr < max_physmem_end); + add_mem_detect_block(0, (offset + 1) << 20); } void detect_memory(void) { - unsigned long rzm; - - sclp_early_get_meminfo(&max_physmem_end, &rzm); + sclp_early_get_memsize(&max_physmem_end); if (!sclp_early_read_storage_info()) { mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; @@ -190,7 +175,7 @@ void detect_memory(void) return; } - scan_memory(rzm); - mem_detect.info_source = MEM_DETECT_TPROT_LOOP; + search_mem_end(); + mem_detect.info_source = MEM_DETECT_BIN_SEARCH; max_physmem_end = get_mem_detect_end(); } diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h index 153c3542fa8a..6114b92ab667 100644 --- a/arch/s390/include/asm/mem_detect.h +++ b/arch/s390/include/asm/mem_detect.h @@ -9,7 +9,7 @@ enum mem_info_source { MEM_DETECT_SCLP_STOR_INFO, MEM_DETECT_DIAG260, MEM_DETECT_SCLP_READ_INFO, - MEM_DETECT_TPROT_LOOP + MEM_DETECT_BIN_SEARCH }; struct mem_detect_block { diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index e0da13c0ef79..32b683f6992f 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -114,7 +114,7 @@ void sclp_early_printk(const char *s); void sclp_early_printk_force(const char *s); void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); -int sclp_early_get_meminfo(unsigned long *mem, unsigned long *rzm); +int sclp_early_get_memsize(unsigned long *mem); int _sclp_get_core_info(struct sclp_core_info *info); int sclp_core_configure(u8 core); int sclp_core_deconfigure(u8 core); diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index 0df08dcb9fe8..acfe09313852 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -272,7 +272,7 @@ int __init sclp_early_get_info(struct read_info_sccb *info) return 0; } -int __init sclp_early_get_meminfo(unsigned long *mem, unsigned long *rzm) +int __init sclp_early_get_memsize(unsigned long *mem) { unsigned long rnmax; unsigned long rnsize; @@ -285,7 +285,6 @@ int __init sclp_early_get_meminfo(unsigned long *mem, unsigned long *rzm) rnsize = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; rnsize <<= 20; *mem = rnsize * rnmax; - *rzm = rnsize; return 0; } From f01b8bca088a6fae82fe55cfc95ca9c2096126e8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 24 Sep 2018 15:27:30 +0200 Subject: [PATCH 37/83] s390/mem_detect: add info source debug print Print mem_detect info source when memblock=debug is specified. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index fdf9bd964dee..36fb37d7a36c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -748,11 +748,28 @@ static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size) memblock_add_range(&memblock.physmem, start, size, 0, 0); } +static const char * __init get_mem_info_source(void) +{ + switch (mem_detect.info_source) { + case MEM_DETECT_SCLP_STOR_INFO: + return "sclp storage info"; + case MEM_DETECT_DIAG260: + return "diag260"; + case MEM_DETECT_SCLP_READ_INFO: + return "sclp read info"; + case MEM_DETECT_BIN_SEARCH: + return "binary search"; + } + return "none"; +} + static void __init memblock_add_mem_detect_info(void) { unsigned long start, end; int i; + memblock_dbg("physmem info source: %s (%hhd)\n", + get_mem_info_source(), mem_detect.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); for_each_mem_detect_block(i, &start, &end) From b09decfd99f8258408decfaa07c5cce6c06fe2cf Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 13 Apr 2018 17:37:28 +0200 Subject: [PATCH 38/83] s390/sclp: introduce sclp_early_get_hsa_size Introduce sclp_early_get_hsa_size function to be used during early memory detection. This function allows to find a memory limit imposed during zfcpdump. Reviewed-by: Heiko Carstens Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp_early_core.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 32b683f6992f..0cd4bda85eb1 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -115,6 +115,7 @@ void sclp_early_printk_force(const char *s); void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); int sclp_early_get_memsize(unsigned long *mem); +int sclp_early_get_hsa_size(unsigned long *hsa_size); int _sclp_get_core_info(struct sclp_core_info *info); int sclp_core_configure(u8 core); int sclp_core_deconfigure(u8 core); diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index acfe09313852..387c114ded3f 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -288,6 +288,17 @@ int __init sclp_early_get_memsize(unsigned long *mem) return 0; } +int __init sclp_early_get_hsa_size(unsigned long *hsa_size) +{ + if (!sclp_info_sccb_valid) + return -EIO; + + *hsa_size = 0; + if (sclp_info_sccb.hsa_size) + *hsa_size = (sclp_info_sccb.hsa_size - 1) * PAGE_SIZE; + return 0; +} + #define SCLP_STORAGE_INFO_FACILITY 0x0000400000000000UL void __weak __init add_mem_detect_block(u64 start, u64 end) {} From 49698745e53c417370ac5cfe8b849bb65d62f129 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 15 May 2018 13:28:53 +0200 Subject: [PATCH 39/83] s390: move ipl block and cmd line handling to early boot phase To distinguish zfcpdump case and to be able to parse some of the kernel command line arguments early (e.g. mem=) ipl block retrieval and command line construction code is moved to the early boot phase. "memory_end" is set up correctly respecting "mem=" and hsa_size in case of the zfcpdump. arch/s390/boot/string.c is introduced to provide string handling and command line parsing functions to early boot phase code for the compressed kernel image case. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 3 +- arch/s390/boot/boot.h | 3 + arch/s390/boot/cmdline.c | 2 + arch/s390/boot/ctype.c | 2 + arch/s390/boot/ipl_parm.c | 173 ++++++++++++++++++++++++++++++ arch/s390/boot/ipl_vmparm.c | 2 + arch/s390/boot/startup.c | 3 + arch/s390/boot/string.c | 100 +++++++++++++++++ arch/s390/include/asm/boot_data.h | 11 ++ arch/s390/include/asm/ipl.h | 4 +- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/early.c | 47 +------- arch/s390/kernel/ipl.c | 117 ++------------------ arch/s390/kernel/ipl_vmparm.c | 36 +++++++ arch/s390/kernel/setup.c | 26 +---- 15 files changed, 355 insertions(+), 176 deletions(-) create mode 100644 arch/s390/boot/cmdline.c create mode 100644 arch/s390/boot/ctype.c create mode 100644 arch/s390/boot/ipl_parm.c create mode 100644 arch/s390/boot/ipl_vmparm.c create mode 100644 arch/s390/boot/string.c create mode 100644 arch/s390/include/asm/boot_data.h create mode 100644 arch/s390/kernel/ipl_vmparm.c diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index b6903c47e0ac..f58edd8b2e34 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -27,7 +27,8 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char -obj-y := head.o als.o startup.o mem_detect.o ebcdic.o sclp_early_core.o mem.o +obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o string.o ebcdic.o +obj-y += sclp_early_core.o mem.o ipl_vmparm.o cmdline.o ctype.o targets := bzImage startup.a section_cmp.boot.data $(obj-y) subdir- := compressed diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 808154b99a5d..fc41e2277ea8 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -4,5 +4,8 @@ void startup_kernel(void); void detect_memory(void); +void store_ipl_parmblock(void); +void setup_boot_command_line(void); +void setup_memory_end(void); #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/cmdline.c b/arch/s390/boot/cmdline.c new file mode 100644 index 000000000000..73d826cdbdeb --- /dev/null +++ b/arch/s390/boot/cmdline.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../lib/cmdline.c" diff --git a/arch/s390/boot/ctype.c b/arch/s390/boot/ctype.c new file mode 100644 index 000000000000..2495810b47e3 --- /dev/null +++ b/arch/s390/boot/ctype.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../lib/ctype.c" diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c new file mode 100644 index 000000000000..7f8e546400a1 --- /dev/null +++ b/arch/s390/boot/ipl_parm.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include "boot.h" + +char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; +struct ipl_parameter_block __bootdata(early_ipl_block); +int __bootdata(early_ipl_block_valid); + +unsigned long __bootdata(memory_end); +int __bootdata(memory_end_set); + +static inline int __diag308(unsigned long subcode, void *addr) +{ + register unsigned long _addr asm("0") = (unsigned long)addr; + register unsigned long _rc asm("1") = 0; + unsigned long reg1, reg2; + psw_t old = S390_lowcore.program_new_psw; + + asm volatile( + " epsw %0,%1\n" + " st %0,%[psw_pgm]\n" + " st %1,%[psw_pgm]+4\n" + " larl %0,1f\n" + " stg %0,%[psw_pgm]+8\n" + " diag %[addr],%[subcode],0x308\n" + "1: nopr %%r7\n" + : "=&d" (reg1), "=&a" (reg2), + [psw_pgm] "=Q" (S390_lowcore.program_new_psw), + [addr] "+d" (_addr), "+d" (_rc) + : [subcode] "d" (subcode) + : "cc", "memory"); + S390_lowcore.program_new_psw = old; + return _rc; +} + +void store_ipl_parmblock(void) +{ + int rc; + + rc = __diag308(DIAG308_STORE, &early_ipl_block); + if (rc == DIAG308_RC_OK && + early_ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION) + early_ipl_block_valid = 1; +} + +static size_t scpdata_length(const char *buf, size_t count) +{ + while (count) { + if (buf[count - 1] != '\0' && buf[count - 1] != ' ') + break; + count--; + } + return count; +} + +static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + size_t count; + size_t i; + int has_lowercase; + + count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data, + ipb->ipl_info.fcp.scp_data_len)); + if (!count) + goto out; + + has_lowercase = 0; + for (i = 0; i < count; i++) { + if (!isascii(ipb->ipl_info.fcp.scp_data[i])) { + count = 0; + goto out; + } + if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i])) + has_lowercase = 1; + } + + if (has_lowercase) + memcpy(dest, ipb->ipl_info.fcp.scp_data, count); + else + for (i = 0; i < count; i++) + dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]); +out: + dest[count] = '\0'; + return count; +} + +static void append_ipl_block_parm(void) +{ + char *parm, *delim; + size_t len, rc = 0; + + len = strlen(early_command_line); + + delim = early_command_line + len; /* '\0' character position */ + parm = early_command_line + len + 1; /* append right after '\0' */ + + switch (early_ipl_block.hdr.pbt) { + case DIAG308_IPL_TYPE_CCW: + rc = ipl_block_get_ascii_vmparm( + parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block); + break; + case DIAG308_IPL_TYPE_FCP: + rc = ipl_block_get_ascii_scpdata( + parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block); + break; + } + if (rc) { + if (*parm == '=') + memmove(early_command_line, parm + 1, rc); + else + *delim = ' '; /* replace '\0' with space */ + } +} + +static inline int has_ebcdic_char(const char *str) +{ + int i; + + for (i = 0; str[i]; i++) + if (str[i] & 0x80) + return 1; + return 0; +} + +void setup_boot_command_line(void) +{ + COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; + /* convert arch command line to ascii if necessary */ + if (has_ebcdic_char(COMMAND_LINE)) + EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); + /* copy arch command line */ + strcpy(early_command_line, strim(COMMAND_LINE)); + + /* append IPL PARM data to the boot command line */ + if (early_ipl_block_valid) + append_ipl_block_parm(); +} + +static char command_line_buf[COMMAND_LINE_SIZE] __section(.data); +static void parse_mem_opt(void) +{ + char *args; + char *param, *val; + + args = strcpy(command_line_buf, early_command_line); + while (*args) { + args = next_arg(args, ¶m, &val); + + if (!strcmp(param, "mem")) { + memory_end = memparse(val, NULL); + memory_end_set = 1; + } + } +} + +void setup_memory_end(void) +{ + parse_mem_opt(); +#ifdef CONFIG_CRASH_DUMP + if (!OLDMEM_BASE && early_ipl_block_valid && + early_ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP && + early_ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) { + if (!sclp_early_get_hsa_size(&memory_end) && memory_end) + memory_end_set = 1; + } +#endif +} diff --git a/arch/s390/boot/ipl_vmparm.c b/arch/s390/boot/ipl_vmparm.c new file mode 100644 index 000000000000..8dacd5fadfd7 --- /dev/null +++ b/arch/s390/boot/ipl_vmparm.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../kernel/ipl_vmparm.c" diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index b0e9f4619203..4d441317cdeb 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -51,6 +51,9 @@ void startup_kernel(void) rescue_initrd(); sclp_early_read_info(); + store_ipl_parmblock(); + setup_boot_command_line(); + setup_memory_end(); detect_memory(); if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { img = decompress_kernel(); diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c new file mode 100644 index 000000000000..09ca9130e73a --- /dev/null +++ b/arch/s390/boot/string.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "../lib/string.c" + +int strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} + +char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} + +char *strim(char *s) +{ + size_t size; + char *end; + + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return skip_spaces(s); +} + +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + +static unsigned int simple_guess_base(const char *cp) +{ + if (cp[0] == '0') { + if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2])) + return 16; + else + return 8; + } else { + return 10; + } +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ + +unsigned long long simple_strtoull(const char *cp, char **endp, + unsigned int base) +{ + unsigned long long result = 0; + + if (!base) + base = simple_guess_base(cp); + + if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') + cp += 2; + + while (isxdigit(*cp)) { + unsigned int value; + + value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; + if (value >= base) + break; + result = result * base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + + return result; +} + +long simple_strtol(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + return -simple_strtoull(cp + 1, endp, base); + + return simple_strtoull(cp, endp, base); +} diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h new file mode 100644 index 000000000000..2d999ccb977a --- /dev/null +++ b/arch/s390/include/asm/boot_data.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_BOOT_DATA_H + +#include +#include + +extern char early_command_line[COMMAND_LINE_SIZE]; +extern struct ipl_parameter_block early_ipl_block; +extern int early_ipl_block_valid; + +#endif /* _ASM_S390_BOOT_DATA_H */ diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index ae5135704616..a8389e2d2f03 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -89,8 +89,8 @@ void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs); extern void s390_reset_system(void); extern void ipl_store_parameters(void); -extern size_t append_ipl_vmparm(char *, size_t); -extern size_t append_ipl_scpdata(char *, size_t); +extern size_t ipl_block_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb); enum ipl_type { IPL_TYPE_UNKNOWN = 1, diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index dbfd1730e631..7ad6fa60a1f2 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -47,7 +47,7 @@ obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o -obj-y += nospec-branch.o +obj-y += nospec-branch.o ipl_vmparm.o extra-y += head64.o vmlinux.lds diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 5b28b434f8a1..af5c2b3f7065 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -29,10 +29,9 @@ #include #include #include +#include #include "entry.h" -static void __init setup_boot_command_line(void); - /* * Initialize storage key for kernel pages */ @@ -284,51 +283,11 @@ static int __init cad_setup(char *str) } early_param("cad", cad_setup); -/* Set up boot command line */ -static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) -{ - char *parm, *delim; - size_t rc, len; - - len = strlen(boot_command_line); - - delim = boot_command_line + len; /* '\0' character position */ - parm = boot_command_line + len + 1; /* append right after '\0' */ - - rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1); - if (rc) { - if (*parm == '=') - memmove(boot_command_line, parm + 1, rc); - else - *delim = ' '; /* replace '\0' with space */ - } -} - -static inline int has_ebcdic_char(const char *str) -{ - int i; - - for (i = 0; str[i]; i++) - if (str[i] & 0x80) - return 1; - return 0; -} - +char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; static void __init setup_boot_command_line(void) { - COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; - /* convert arch command line to ascii if necessary */ - if (has_ebcdic_char(COMMAND_LINE)) - EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); /* copy arch command line */ - strlcpy(boot_command_line, strstrip(COMMAND_LINE), - ARCH_COMMAND_LINE_SIZE); - - /* append IPL PARM data to the boot command line */ - if (MACHINE_IS_VM) - append_to_cmdline(append_ipl_vmparm); - - append_to_cmdline(append_ipl_scpdata); + strlcpy(boot_command_line, early_command_line, ARCH_COMMAND_LINE_SIZE); } static void __init check_image_bootable(void) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 4296d7e61fb6..f1d69f78bb1d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -117,6 +119,9 @@ static char *dump_type_str(enum dump_type type) } } +struct ipl_parameter_block __bootdata(early_ipl_block); +int __bootdata(early_ipl_block_valid); + static int ipl_block_valid; static struct ipl_parameter_block ipl_block; @@ -262,115 +267,16 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); -/* VM IPL PARM routines */ -static size_t reipl_get_ascii_vmparm(char *dest, size_t size, - const struct ipl_parameter_block *ipb) -{ - int i; - size_t len; - char has_lowercase = 0; - - len = 0; - if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) && - (ipb->ipl_info.ccw.vm_parm_len > 0)) { - - len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len); - memcpy(dest, ipb->ipl_info.ccw.vm_parm, len); - /* If at least one character is lowercase, we assume mixed - * case; otherwise we convert everything to lowercase. - */ - for (i = 0; i < len; i++) - if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */ - (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */ - (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */ - has_lowercase = 1; - break; - } - if (!has_lowercase) - EBC_TOLOWER(dest, len); - EBCASC(dest, len); - } - dest[len] = 0; - - return len; -} - -size_t append_ipl_vmparm(char *dest, size_t size) -{ - size_t rc; - - rc = 0; - if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW) - rc = reipl_get_ascii_vmparm(dest, size, &ipl_block); - else - dest[0] = 0; - return rc; -} - static ssize_t ipl_vm_parm_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { char parm[DIAG308_VMPARM_SIZE + 1] = {}; - append_ipl_vmparm(parm, sizeof(parm)); + if (ipl_block_valid && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)) + ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block); return sprintf(page, "%s\n", parm); } -static size_t scpdata_length(const char* buf, size_t count) -{ - while (count) { - if (buf[count - 1] != '\0' && buf[count - 1] != ' ') - break; - count--; - } - return count; -} - -static size_t reipl_append_ascii_scpdata(char *dest, size_t size, - const struct ipl_parameter_block *ipb) -{ - size_t count; - size_t i; - int has_lowercase; - - count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data, - ipb->ipl_info.fcp.scp_data_len)); - if (!count) - goto out; - - has_lowercase = 0; - for (i = 0; i < count; i++) { - if (!isascii(ipb->ipl_info.fcp.scp_data[i])) { - count = 0; - goto out; - } - if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i])) - has_lowercase = 1; - } - - if (has_lowercase) - memcpy(dest, ipb->ipl_info.fcp.scp_data, count); - else - for (i = 0; i < count; i++) - dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]); -out: - dest[count] = '\0'; - return count; -} - -size_t append_ipl_scpdata(char *dest, size_t len) -{ - size_t rc; - - rc = 0; - if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP) - rc = reipl_append_ascii_scpdata(dest, len, &ipl_block); - else - dest[0] = 0; - return rc; -} - - static struct kobj_attribute sys_ipl_vm_parm_attr = __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL); @@ -564,7 +470,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb, { char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; - reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); + ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); return sprintf(page, "%s\n", vmparm); } @@ -1769,11 +1675,10 @@ void __init setup_ipl(void) void __init ipl_store_parameters(void) { - int rc; - - rc = diag308(DIAG308_STORE, &ipl_block); - if (rc == DIAG308_RC_OK && ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION) + if (early_ipl_block_valid) { + memcpy(&ipl_block, &early_ipl_block, sizeof(ipl_block)); ipl_block_valid = 1; + } } void s390_reset_system(void) diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c new file mode 100644 index 000000000000..411838c0a0af --- /dev/null +++ b/arch/s390/kernel/ipl_vmparm.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +/* VM IPL PARM routines */ +size_t ipl_block_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + int i; + size_t len; + char has_lowercase = 0; + + len = 0; + if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) && + (ipb->ipl_info.ccw.vm_parm_len > 0)) { + + len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len); + memcpy(dest, ipb->ipl_info.ccw.vm_parm, len); + /* If at least one character is lowercase, we assume mixed + * case; otherwise we convert everything to lowercase. + */ + for (i = 0; i < len; i++) + if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */ + (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */ + (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */ + has_lowercase = 1; + break; + } + if (!has_lowercase) + EBC_TOLOWER(dest, len); + EBCASC(dest, len); + } + dest[len] = 0; + + return len; +} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 36fb37d7a36c..ae3810c04872 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -90,8 +90,8 @@ char elf_platform[ELF_PLATFORM_SIZE]; unsigned long int_hwcap = 0; -int __initdata memory_end_set; -unsigned long __initdata memory_end; +int __bootdata(memory_end_set); +unsigned long __bootdata(memory_end); unsigned long __bootdata(max_physmem_end); struct mem_detect_info __bootdata(mem_detect); @@ -286,15 +286,6 @@ void machine_power_off(void) void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL_GPL(pm_power_off); -static int __init early_parse_mem(char *p) -{ - memory_end = memparse(p, &p); - memory_end &= PAGE_MASK; - memory_end_set = 1; - return 0; -} -early_param("mem", early_parse_mem); - static int __init parse_vmalloc(char *arg) { if (!arg) @@ -605,17 +596,8 @@ static struct notifier_block kdump_mem_nb = { */ static void reserve_memory_end(void) { -#ifdef CONFIG_CRASH_DUMP - if (ipl_info.type == IPL_TYPE_FCP_DUMP && - !OLDMEM_BASE && sclp.hsa_size) { - memory_end = sclp.hsa_size; - memory_end &= PAGE_MASK; - memory_end_set = 1; - } -#endif - if (!memory_end_set) - return; - memblock_reserve(memory_end, ULONG_MAX); + if (memory_end_set) + memblock_reserve(memory_end, ULONG_MAX); } /* From 75f195420a987490d3ce79db2d06c70fbfce4ac4 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 11:21:49 +0100 Subject: [PATCH 40/83] s390/mm: add missing pfn_to_kaddr helper kasan common code uses pfn_to_kaddr, which is defined by many other architectures. Adding it as well to avoid a build error. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 41e3908b397f..a4d38092530a 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -161,6 +161,7 @@ static inline int devmem_is_allowed(unsigned long pfn) #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define pfn_to_kaddr(pfn) pfn_to_virt(pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define page_to_virt(page) pfn_to_virt(page_to_pfn(page)) From 348498458505e202df41b6b9a78da448d39298b7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 13:02:36 +0100 Subject: [PATCH 41/83] s390/kasan: avoid vdso instrumentation vdso is mapped into user space processes, which won't have kasan shodow mapped. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vdso32/Makefile | 3 ++- arch/s390/kernel/vdso64/Makefile | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index c5c856f320bc..eb8aebea3ea7 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -28,9 +28,10 @@ obj-y += vdso32_wrapper.o extra-y += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) -# Disable gcov profiling and ubsan for VDSO code +# Disable gcov profiling, ubsan and kasan for VDSO code GCOV_PROFILE := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 15b1ceafc4c1..a22b2cf86eec 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -28,9 +28,10 @@ obj-y += vdso64_wrapper.o extra-y += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) -# Disable gcov profiling and ubsan for VDSO code +# Disable gcov profiling, ubsan and kasan for VDSO code GCOV_PROFILE := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so From 0a9b40911baffac6fc9cc2d88e893585870a97f7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 13:10:33 +0100 Subject: [PATCH 42/83] s390/kasan: avoid instrumentation of early C code Instrumented C code cannot run without the kasan shadow area. Exempt source code files from kasan which are running before / used during kasan initialization. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 1 + arch/s390/boot/compressed/Makefile | 1 + arch/s390/kernel/Makefile | 2 ++ drivers/s390/char/Makefile | 1 + 4 files changed, 5 insertions(+) diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index f58edd8b2e34..d5ad724f5c96 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -6,6 +6,7 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index fd7cfc7b93a5..593039620487 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -8,6 +8,7 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) piggy.o info.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7ad6fa60a1f2..1f9c98f7d9e2 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -23,6 +23,8 @@ KCOV_INSTRUMENT_early_nobss.o := n UBSAN_SANITIZE_early.o := n UBSAN_SANITIZE_early_nobss.o := n +KASAN_SANITIZE_early_nobss.o := n + # # Passing null pointers is ok for smp code, since we access the lowcore here. # diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index c6ab34f94b1b..3072b89785dd 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -11,6 +11,7 @@ endif GCOV_PROFILE_sclp_early_core.o := n KCOV_INSTRUMENT_sclp_early_core.o := n UBSAN_SANITIZE_sclp_early_core.o := n +KASAN_SANITIZE_sclp_early_core.o := n CFLAGS_sclp_early_core.o += -D__NO_FORTIFY From fb594ec13ea89151e7a79933119ccd7b40d5d313 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 13:17:24 +0100 Subject: [PATCH 43/83] s390/kasan: replace some memory functions Follow the common kasan approach: "KASan replaces memory functions with manually instrumented variants. Original functions declared as weak symbols so strong definitions in mm/kasan/kasan.c could replace them. Original functions have aliases with '__' prefix in name, so we could call non-instrumented variant if needed." Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/string.h | 21 +++++++++++++++++++++ arch/s390/lib/mem.S | 12 +++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 50f26fc9acb2..116cc15a4b8a 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -53,6 +53,27 @@ char *strstr(const char *s1, const char *s2); #undef __HAVE_ARCH_STRSEP #undef __HAVE_ARCH_STRSPN +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +extern void *__memcpy(void *dest, const void *src, size_t n); +extern void *__memset(void *s, int c, size_t n); +extern void *__memmove(void *dest, const void *src, size_t n); + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif /* defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) */ + void *__memset16(uint16_t *s, uint16_t v, size_t count); void *__memset32(uint32_t *s, uint32_t v, size_t count); void *__memset64(uint64_t *s, uint64_t v, size_t count); diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 40c4d59c926e..53008da05190 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -14,7 +14,8 @@ /* * void *memmove(void *dest, const void *src, size_t n) */ -ENTRY(memmove) +WEAK(memmove) +ENTRY(__memmove) ltgr %r4,%r4 lgr %r1,%r2 jz .Lmemmove_exit @@ -47,6 +48,7 @@ ENTRY(memmove) BR_EX %r14 .Lmemmove_mvc: mvc 0(1,%r1),0(%r3) +ENDPROC(__memmove) EXPORT_SYMBOL(memmove) /* @@ -64,7 +66,8 @@ EXPORT_SYMBOL(memmove) * return __builtin_memset(s, c, n); * } */ -ENTRY(memset) +WEAK(memset) +ENTRY(__memset) ltgr %r4,%r4 jz .Lmemset_exit ltgr %r3,%r3 @@ -108,6 +111,7 @@ ENTRY(memset) xc 0(1,%r1),0(%r1) .Lmemset_mvc: mvc 1(1,%r1),0(%r1) +ENDPROC(__memset) EXPORT_SYMBOL(memset) /* @@ -115,7 +119,8 @@ EXPORT_SYMBOL(memset) * * void *memcpy(void *dest, const void *src, size_t n) */ -ENTRY(memcpy) +WEAK(memcpy) +ENTRY(__memcpy) ltgr %r4,%r4 jz .Lmemcpy_exit aghi %r4,-1 @@ -136,6 +141,7 @@ ENTRY(memcpy) j .Lmemcpy_remainder .Lmemcpy_mvc: mvc 0(1,%r1),0(%r3) +ENDPROC(__memcpy) EXPORT_SYMBOL(memcpy) /* From 34377d3cfba71be187422b70f72fd72d3a14d088 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 12 Sep 2018 13:23:58 +0200 Subject: [PATCH 44/83] s390: introduce MAX_PTRS_PER_P4D Kasan common code requires MAX_PTRS_PER_P4D definition, which in case of s390 is always PTRS_PER_P4D. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0e7cb0dc9c33..ffae5dc8b3ce 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -341,6 +341,8 @@ static inline int is_module_addr(void *addr) #define PTRS_PER_P4D _CRST_ENTRIES #define PTRS_PER_PGD _CRST_ENTRIES +#define MAX_PTRS_PER_P4D PTRS_PER_P4D + /* * Segment table and region3 table entry encoding * (R = read-only, I = invalid, y = young bit): From d0e2eb0a36ac0a3b27a968abb66eae17448458fb Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 13 Sep 2018 10:59:43 +0200 Subject: [PATCH 45/83] s390: add pgd_page primitive Add pgd_page primitive which is required by kasan common code. Also fixes typo in p4d_page definition. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index ffae5dc8b3ce..5f0fb9320dd0 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -601,6 +601,14 @@ static inline int pgd_bad(pgd_t pgd) return (pgd_val(pgd) & mask) != 0; } +static inline unsigned long pgd_pfn(pgd_t pgd) +{ + unsigned long origin_mask; + + origin_mask = _REGION_ENTRY_ORIGIN; + return (pgd_val(pgd) & origin_mask) >> PAGE_SHIFT; +} + static inline int p4d_folded(p4d_t p4d) { return (p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2; @@ -1212,7 +1220,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) #define pud_page(pud) pfn_to_page(pud_pfn(pud)) -#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d)) +#define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) +#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) /* Find an entry in the lowest level page table.. */ #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) From 42db5ed86090d8e57ca08bfd162a10be6320cc49 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 14:29:13 +0100 Subject: [PATCH 46/83] s390/kasan: add initialization code and enable it Kasan needs 1/8 of kernel virtual address space to be reserved as the shadow area. And eventually it requires the shadow memory offset to be known at compile time (passed to the compiler when full instrumentation is enabled). Any value picked as the shadow area offset for 3-level paging would eat up identity mapping on 4-level paging (with 1PB shadow area size). So, the kernel sticks to 3-level paging when kasan is enabled. 3TB border is picked as the shadow offset. The memory layout is adjusted so, that physical memory border does not exceed KASAN_SHADOW_START and vmemmap does not go below KASAN_SHADOW_END. Due to the fact that on s390 paging is set up very late and to cover more code with kasan instrumentation, temporary identity mapping and final shadow memory are set up early. The shadow memory mapping is later carried over to init_mm.pgd during paging_init. For the needs of paging structures allocation and shadow memory population a primitive allocator is used, which simply chops off memory blocks from the end of the physical memory. Kasan currenty doesn't track vmemmap and vmalloc areas. Current memory layout (for 3-level paging, 2GB physical memory). ---[ Identity Mapping ]--- 0x0000000000000000-0x0000000000100000 ---[ Kernel Image Start ]--- 0x0000000000100000-0x0000000002b00000 ---[ Kernel Image End ]--- 0x0000000002b00000-0x0000000080000000 2G <- physical memory border 0x0000000080000000-0x0000030000000000 3070G PUD I ---[ Kasan Shadow Start ]--- 0x0000030000000000-0x0000030010000000 256M PMD RW X <- shadow for 2G memory 0x0000030010000000-0x0000037ff0000000 523776M PTE RO NX <- kasan zero ro page 0x0000037ff0000000-0x0000038000000000 256M PMD RW X <- shadow for 2G modules ---[ Kasan Shadow End ]--- 0x0000038000000000-0x000003d100000000 324G PUD I ---[ vmemmap Area ]--- 0x000003d100000000-0x000003e080000000 ---[ vmalloc Area ]--- 0x000003e080000000-0x000003ff80000000 ---[ Modules Area ]--- 0x000003ff80000000-0x0000040000000000 2G Acked-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/include/asm/kasan.h | 23 +++ arch/s390/include/asm/pgtable.h | 1 + arch/s390/kernel/early_nobss.c | 2 + arch/s390/kernel/setup.c | 22 ++- arch/s390/mm/Makefile | 3 + arch/s390/mm/init.c | 4 +- arch/s390/mm/kasan_init.c | 294 ++++++++++++++++++++++++++++++++ 8 files changed, 343 insertions(+), 7 deletions(-) create mode 100644 arch/s390/include/asm/kasan.h create mode 100644 arch/s390/mm/kasan_init.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6061dd7578fe..95fff775ac7c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -120,6 +120,7 @@ config S390 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KASAN select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h new file mode 100644 index 000000000000..7de6eceb562e --- /dev/null +++ b/arch/s390/include/asm/kasan.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#include + +#ifdef CONFIG_KASAN + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_SIZE \ + (_AC(1, UL) << (_REGION2_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) +#define KASAN_SHADOW_OFFSET _AC(0x30000000000, UL) +#define KASAN_SHADOW_START KASAN_SHADOW_OFFSET +#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) + +extern void kasan_early_init(void); +extern void kasan_copy_shadow(pgd_t *dst); +#else +static inline void kasan_early_init(void) { } +static inline void kasan_copy_shadow(pgd_t *dst) { } +#endif + +#endif diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5f0fb9320dd0..3baf8f1fb8f2 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1181,6 +1181,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) #define pgd_offset_k(address) pgd_offset(&init_mm, address) +#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr)) #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c index 8e96590b3a68..8d73f7fae16e 100644 --- a/arch/s390/kernel/early_nobss.c +++ b/arch/s390/kernel/early_nobss.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "entry.h" static void __init reset_tod_clock(void) @@ -40,4 +41,5 @@ void __init startup_init_nobss(void) { reset_tod_clock(); clear_bss_section(); + kasan_early_init(); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ae3810c04872..93cf9bce98a1 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -531,14 +531,19 @@ static void __init setup_memory_end(void) { unsigned long vmax, vmalloc_size, tmp; - /* Choose kernel address space layout: 2, 3, or 4 levels. */ + /* Choose kernel address space layout: 3 or 4 levels. */ vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; - tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; - tmp = tmp * (sizeof(struct page) + PAGE_SIZE); - if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) + if (IS_ENABLED(CONFIG_KASAN)) { vmax = _REGION2_SIZE; /* 3-level kernel page table */ - else - vmax = _REGION1_SIZE; /* 4-level kernel page table */ + } else { + tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; + tmp = tmp * (sizeof(struct page) + PAGE_SIZE); + if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) + vmax = _REGION2_SIZE; /* 3-level kernel page table */ + else + vmax = _REGION1_SIZE; /* 4-level kernel page table */ + } + /* module area is at the end of the kernel address space. */ MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; @@ -556,6 +561,11 @@ static void __init setup_memory_end(void) /* Take care that memory_end is set and <= vmemmap */ memory_end = min(memory_end ?: max_physmem_end, tmp); +#ifdef CONFIG_KASAN + /* fit in kasan shadow memory region between 1:1 and vmemmap */ + memory_end = min(memory_end, KASAN_SHADOW_START); + vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); +#endif max_pfn = max_low_pfn = PFN_DOWN(memory_end); memblock_remove(memory_end, ULONG_MAX); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 83c83c69cab2..f5880bfd1b0c 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -10,3 +10,6 @@ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o + +KASAN_SANITIZE_kasan_init.o := n +obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 3fa3e5323612..50ebda9b3d0c 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -42,6 +42,7 @@ #include #include #include +#include pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); @@ -98,8 +99,9 @@ void __init paging_init(void) S390_lowcore.user_asce = S390_lowcore.kernel_asce; crst_table_init((unsigned long *) init_mm.pgd, pgd_type); vmem_map_init(); + kasan_copy_shadow(init_mm.pgd); - /* enable virtual mapping in kernel mode */ + /* enable virtual mapping in kernel mode */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 13, 13); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c new file mode 100644 index 000000000000..b888cbbbcf0d --- /dev/null +++ b/arch/s390/mm/kasan_init.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned long pgalloc_pos __initdata; +static unsigned long pgalloc_low __initdata; + +#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) + +static pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); + +static void __init kasan_early_panic(const char *reason) +{ + sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n"); + sclp_early_printk(reason); + disabled_wait(0); +} + +static void * __init kasan_early_alloc_pages(unsigned int order) +{ + pgalloc_pos -= (PAGE_SIZE << order); + + if (pgalloc_pos < pgalloc_low) + kasan_early_panic("out of memory during initialisation\n"); + + return (void *)pgalloc_pos; +} + +static void * __init kasan_early_crst_alloc(unsigned long val) +{ + unsigned long *table; + + table = kasan_early_alloc_pages(CRST_ALLOC_ORDER); + if (table) + crst_table_init(table, val); + return table; +} + +static pte_t * __init kasan_early_pte_alloc(void) +{ + static void *pte_leftover; + pte_t *pte; + + BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); + + if (!pte_leftover) { + pte_leftover = kasan_early_alloc_pages(0); + pte = pte_leftover + _PAGE_TABLE_SIZE; + } else { + pte = pte_leftover; + pte_leftover = NULL; + } + memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); + return pte; +} + +enum populate_mode { + POPULATE_ONE2ONE, + POPULATE_MAP, + POPULATE_ZERO_SHADOW +}; +static void __init kasan_early_vmemmap_populate(unsigned long address, + unsigned long end, + enum populate_mode mode) +{ + unsigned long pgt_prot_zero, pgt_prot; + pgd_t *pg_dir; + p4d_t *p4_dir; + pud_t *pu_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + + pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO); + pgt_prot_zero &= ~_PAGE_NOEXEC; + pgt_prot = pgprot_val(PAGE_KERNEL_EXEC); + + while (address < end) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + if (mode == POPULATE_ZERO_SHADOW && + IS_ALIGNED(address, PGDIR_SIZE) && + end - address >= PGDIR_SIZE) { + pgd_populate(&init_mm, pg_dir, kasan_zero_p4d); + address = (address + PGDIR_SIZE) & PGDIR_MASK; + continue; + } + p4_dir = kasan_early_crst_alloc(_REGION2_ENTRY_EMPTY); + pgd_populate(&init_mm, pg_dir, p4_dir); + } + + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { + if (mode == POPULATE_ZERO_SHADOW && + IS_ALIGNED(address, P4D_SIZE) && + end - address >= P4D_SIZE) { + p4d_populate(&init_mm, p4_dir, kasan_zero_pud); + address = (address + P4D_SIZE) & P4D_MASK; + continue; + } + pu_dir = kasan_early_crst_alloc(_REGION3_ENTRY_EMPTY); + p4d_populate(&init_mm, p4_dir, pu_dir); + } + + pu_dir = pud_offset(p4_dir, address); + if (pud_none(*pu_dir)) { + if (mode == POPULATE_ZERO_SHADOW && + IS_ALIGNED(address, PUD_SIZE) && + end - address >= PUD_SIZE) { + pud_populate(&init_mm, pu_dir, kasan_zero_pmd); + address = (address + PUD_SIZE) & PUD_MASK; + continue; + } + pm_dir = kasan_early_crst_alloc(_SEGMENT_ENTRY_EMPTY); + pud_populate(&init_mm, pu_dir, pm_dir); + } + + pm_dir = pmd_offset(pu_dir, address); + if (pmd_none(*pm_dir)) { + if (mode == POPULATE_ZERO_SHADOW && + IS_ALIGNED(address, PMD_SIZE) && + end - address >= PMD_SIZE) { + pmd_populate(&init_mm, pm_dir, kasan_zero_pte); + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + pt_dir = kasan_early_pte_alloc(); + pmd_populate(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + if (pte_none(*pt_dir)) { + void *page; + + switch (mode) { + case POPULATE_ONE2ONE: + page = (void *)address; + pte_val(*pt_dir) = __pa(page) | pgt_prot; + break; + case POPULATE_MAP: + page = kasan_early_alloc_pages(0); + memset(page, 0, PAGE_SIZE); + pte_val(*pt_dir) = __pa(page) | pgt_prot; + break; + case POPULATE_ZERO_SHADOW: + page = kasan_zero_page; + pte_val(*pt_dir) = __pa(page) | pgt_prot_zero; + break; + } + } + address += PAGE_SIZE; + } +} + +static void __init kasan_set_pgd(pgd_t *pgd, unsigned long asce_type) +{ + unsigned long asce_bits; + + asce_bits = asce_type | _ASCE_TABLE_LENGTH; + S390_lowcore.kernel_asce = (__pa(pgd) & PAGE_MASK) | asce_bits; + S390_lowcore.user_asce = S390_lowcore.kernel_asce; + + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.kernel_asce, 7, 7); + __ctl_load(S390_lowcore.kernel_asce, 13, 13); +} + +static void __init kasan_enable_dat(void) +{ + psw_t psw; + + psw.mask = __extract_psw(); + psw_bits(psw).dat = 1; + psw_bits(psw).as = PSW_BITS_AS_HOME; + __load_psw_mask(psw.mask); +} + +void __init kasan_early_init(void) +{ + unsigned long untracked_mem_end; + unsigned long shadow_alloc_size; + unsigned long initrd_end; + unsigned long asce_type; + unsigned long memsize; + unsigned long vmax; + unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO); + pte_t pte_z; + pmd_t pmd_z = __pmd(__pa(kasan_zero_pte) | _SEGMENT_ENTRY); + pud_t pud_z = __pud(__pa(kasan_zero_pmd) | _REGION3_ENTRY); + p4d_t p4d_z = __p4d(__pa(kasan_zero_pud) | _REGION2_ENTRY); + + pgt_prot &= ~_PAGE_NOEXEC; + pte_z = __pte(__pa(kasan_zero_page) | pgt_prot); + + /* 3 level paging */ + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PUD_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)); + crst_table_init((unsigned long *)early_pg_dir, _REGION3_ENTRY_EMPTY); + untracked_mem_end = vmax = _REGION2_SIZE; + asce_type = _ASCE_TYPE_REGION3; + + /* init kasan zero shadow */ + crst_table_init((unsigned long *)kasan_zero_p4d, p4d_val(p4d_z)); + crst_table_init((unsigned long *)kasan_zero_pud, pud_val(pud_z)); + crst_table_init((unsigned long *)kasan_zero_pmd, pmd_val(pmd_z)); + memset64((u64 *)kasan_zero_pte, pte_val(pte_z), PTRS_PER_PTE); + + memsize = min(max_physmem_end, KASAN_SHADOW_START); + shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT; + if (IS_ENABLED(CONFIG_MODULES)) + shadow_alloc_size += MODULES_LEN >> KASAN_SHADOW_SCALE_SHIFT; + pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { + initrd_end = + round_up(INITRD_START + INITRD_SIZE, _SEGMENT_SIZE); + pgalloc_low = max(pgalloc_low, initrd_end); + } + + if (pgalloc_low + shadow_alloc_size > memsize) + kasan_early_panic("out of memory during initialisation\n"); + + pgalloc_pos = memsize; + init_mm.pgd = early_pg_dir; + /* + * Current memory layout: + * +- 0 -------------+ +- shadow start -+ + * | 1:1 ram mapping | /| 1/8 ram | + * +- end of ram ----+ / +----------------+ + * | ... gap ... |/ | kasan | + * +- shadow start --+ | zero | + * | 1/8 addr space | | page | + * +- shadow end -+ | mapping | + * | ... gap ... |\ | (untracked) | + * +- modules vaddr -+ \ +----------------+ + * | 2Gb | \| 256Mb | + * +-----------------+ +- shadow end ---+ + */ + /* populate identity mapping */ + kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE); + /* populate kasan shadow (for identity mapping / modules / zero page) */ + kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); + if (IS_ENABLED(CONFIG_MODULES)) { + untracked_mem_end = vmax - MODULES_LEN; + kasan_early_vmemmap_populate(__sha(untracked_mem_end), + __sha(vmax), POPULATE_MAP); + } + kasan_early_vmemmap_populate(__sha(memsize), __sha(untracked_mem_end), + POPULATE_ZERO_SHADOW); + kasan_set_pgd(early_pg_dir, asce_type); + kasan_enable_dat(); + /* enable kasan */ + init_task.kasan_depth = 0; + memblock_reserve(pgalloc_pos, memsize - pgalloc_pos); + sclp_early_printk("KernelAddressSanitizer initialized\n"); +} + +void __init kasan_copy_shadow(pgd_t *pg_dir) +{ + /* + * At this point we are still running on early pages setup early_pg_dir, + * while swapper_pg_dir has just been initialized with identity mapping. + * Carry over shadow memory region from early_pg_dir to swapper_pg_dir. + */ + + pgd_t *pg_dir_src; + pgd_t *pg_dir_dst; + p4d_t *p4_dir_src; + p4d_t *p4_dir_dst; + pud_t *pu_dir_src; + pud_t *pu_dir_dst; + + pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START); + pg_dir_dst = pgd_offset_raw(pg_dir, KASAN_SHADOW_START); + p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START); + p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START); + if (!p4d_folded(*p4_dir_src)) { + /* 4 level paging */ + memcpy(p4_dir_dst, p4_dir_src, + (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t)); + return; + } + /* 3 level paging */ + pu_dir_src = pud_offset(p4_dir_src, KASAN_SHADOW_START); + pu_dir_dst = pud_offset(p4_dir_dst, KASAN_SHADOW_START); + memcpy(pu_dir_dst, pu_dir_src, + (KASAN_SHADOW_SIZE >> PUD_SHIFT) * sizeof(pud_t)); +} From 7fef92ccadd744492526d7749eebfe24dd8dcc48 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 16:52:22 +0100 Subject: [PATCH 47/83] s390/kasan: double the stack size Kasan stack instrumentation pads stack variables with redzones, which increases stack frames size significantly. Stack sizes are increased from 16k to 32k in the code, as well as for the kernel stack overflow detection option (CHECK_STACK). Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 2 +- arch/s390/include/asm/thread_info.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ee65185bbc80..0b33577932c3 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -27,7 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) UTS_MACHINE := s390x -STACK_SIZE := 16384 +STACK_SIZE := $(if $(CONFIG_KASAN),32768,16384) CHECKFLAGS += -D__s390__ -D__s390x__ export LD_BFD diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 1bbbaf6ae511..79b40600f523 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -13,7 +13,11 @@ /* * General size of kernel stacks */ +#ifdef CONFIG_KASAN +#define THREAD_SIZE_ORDER 3 +#else #define THREAD_SIZE_ORDER 2 +#endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ From b6cbe3e8bdff6f21f1b58b08a55f479cdcf98282 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 17:20:28 +0100 Subject: [PATCH 48/83] s390/kasan: avoid user access code instrumentation Kasan instrumentation adds "store" check for variables marked as modified by inline assembly. With user pointers containing addresses from another address space this produces false positives. static inline unsigned long clear_user_xc(void __user *to, ...) { asm volatile( ... : "+a" (to) ... User space access functions are wrapped by manually instrumented functions in kasan common code, which should be sufficient to catch errors. So, we just disable uaccess.o instrumentation altogether. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/lib/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 57ab40188d4b..5418d10dc2a8 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -9,5 +9,9 @@ lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o +# Instrumenting memory accesses to __user data (in different address space) +# produce false positives +KASAN_SANITIZE_uaccess.o := n + chkbss := mem.o include $(srctree)/arch/s390/scripts/Makefile.chkbss From 0dac8f6bc3699f28d807ad3a5ec575e18da8ba62 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 17:40:16 +0100 Subject: [PATCH 49/83] s390/mm: add kasan shadow to the debugfs pgtable dump This change adds address space markers for kasan shadow memory. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/dump_pagetables.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 7cdea2ec51e9..5139c24111bc 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -17,18 +18,26 @@ enum address_markers_idx { IDENTITY_NR = 0, KERNEL_START_NR, KERNEL_END_NR, +#ifdef CONFIG_KASAN + KASAN_SHADOW_START_NR, + KASAN_SHADOW_END_NR, +#endif VMEMMAP_NR, VMALLOC_NR, MODULES_NR, }; static struct addr_marker address_markers[] = { - [IDENTITY_NR] = {0, "Identity Mapping"}, - [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, - [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, - [VMEMMAP_NR] = {0, "vmemmap Area"}, - [VMALLOC_NR] = {0, "vmalloc Area"}, - [MODULES_NR] = {0, "Modules Area"}, + [IDENTITY_NR] = {0, "Identity Mapping"}, + [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, + [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, +#ifdef CONFIG_KASAN + [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"}, + [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"}, +#endif + [VMEMMAP_NR] = {0, "vmemmap Area"}, + [VMALLOC_NR] = {0, "vmalloc Area"}, + [MODULES_NR] = {0, "Modules Area"}, { -1, NULL } }; From 793213a82de4ccc96f394ea5deaaf57c0bb01f0b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 18:22:24 +0100 Subject: [PATCH 50/83] s390/kasan: dynamic shadow mem allocation for modules Move from modules area entire shadow memory preallocation to dynamic allocation per module load. This behaivior has been introduced for x86 with bebf56a1b: "This patch also forces module_alloc() to return 8*PAGE_SIZE aligned address making shadow memory handling ( kasan_module_alloc()/kasan_module_free() ) more simple. Such alignment guarantees that each shadow page backing modules address space correspond to only one module_alloc() allocation" Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/module.c | 15 +++++++++++---- arch/s390/mm/kasan_init.c | 11 +++-------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index d298d3cb46d0..31889db609e9 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -32,12 +33,18 @@ void *module_alloc(unsigned long size) { + void *p; + if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, - 0, NUMA_NO_NODE, - __builtin_return_address(0)); + p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + if (p && (kasan_module_alloc(p, size) < 0)) { + vfree(p); + return NULL; + } + return p; } void module_arch_freeing_init(struct module *mod) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index b888cbbbcf0d..714ac41e3ee5 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -214,8 +214,6 @@ void __init kasan_early_init(void) memsize = min(max_physmem_end, KASAN_SHADOW_START); shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT; - if (IS_ENABLED(CONFIG_MODULES)) - shadow_alloc_size += MODULES_LEN >> KASAN_SHADOW_SCALE_SHIFT; pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE); if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { initrd_end = @@ -239,18 +237,15 @@ void __init kasan_early_init(void) * +- shadow end -+ | mapping | * | ... gap ... |\ | (untracked) | * +- modules vaddr -+ \ +----------------+ - * | 2Gb | \| 256Mb | + * | 2Gb | \| unmapped | allocated per module * +-----------------+ +- shadow end ---+ */ /* populate identity mapping */ kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE); - /* populate kasan shadow (for identity mapping / modules / zero page) */ + /* populate kasan shadow (for identity mapping and zero page mapping) */ kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); - if (IS_ENABLED(CONFIG_MODULES)) { + if (IS_ENABLED(CONFIG_MODULES)) untracked_mem_end = vmax - MODULES_LEN; - kasan_early_vmemmap_populate(__sha(untracked_mem_end), - __sha(vmax), POPULATE_MAP); - } kasan_early_vmemmap_populate(__sha(memsize), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); kasan_set_pgd(early_pg_dir, asce_type); From d58106c3ec9abcf2f9882171d6230eccfd6dc52e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 18:44:28 +0100 Subject: [PATCH 51/83] s390/kasan: use noexec and large pages To lower memory footprint and speed up kasan initialisation detect EDAT availability and use large pages if possible. As we know how much memory is needed for initialisation, another simplistic large page allocator is introduced to avoid memory fragmentation. Since facilities list is retrieved anyhow, detect noexec support and adjust pages attributes. Handle noexec kernel option to avoid inconsistent kasan shadow memory pages flags. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/ipl_parm.c | 11 +++++- arch/s390/boot/string.c | 38 +++++++++++++++++++ arch/s390/include/asm/pgtable.h | 6 +++ arch/s390/include/asm/setup.h | 1 + arch/s390/kernel/setup.c | 1 + arch/s390/mm/kasan_init.c | 66 +++++++++++++++++++++++++++++++-- 6 files changed, 118 insertions(+), 5 deletions(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 7f8e546400a1..9dab596be98e 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -13,6 +13,7 @@ int __bootdata(early_ipl_block_valid); unsigned long __bootdata(memory_end); int __bootdata(memory_end_set); +int __bootdata(noexec_disabled); static inline int __diag308(unsigned long subcode, void *addr) { @@ -145,8 +146,10 @@ void setup_boot_command_line(void) static char command_line_buf[COMMAND_LINE_SIZE] __section(.data); static void parse_mem_opt(void) { - char *args; char *param, *val; + bool enabled; + char *args; + int rc; args = strcpy(command_line_buf, early_command_line); while (*args) { @@ -156,6 +159,12 @@ static void parse_mem_opt(void) memory_end = memparse(val, NULL); memory_end_set = 1; } + + if (!strcmp(param, "noexec")) { + rc = kstrtobool(val, &enabled); + if (!rc && !enabled) + noexec_disabled = 1; + } } } diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c index 09ca9130e73a..25aca07898ba 100644 --- a/arch/s390/boot/string.c +++ b/arch/s390/boot/string.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include "../lib/string.c" int strncmp(const char *cs, const char *ct, size_t count) @@ -98,3 +99,40 @@ long simple_strtol(const char *cp, char **endp, unsigned int base) return simple_strtoull(cp, endp, base); } + +int kstrtobool(const char *s, bool *res) +{ + if (!s) + return -EINVAL; + + switch (s[0]) { + case 'y': + case 'Y': + case '1': + *res = true; + return 0; + case 'n': + case 'N': + case '0': + *res = false; + return 0; + case 'o': + case 'O': + switch (s[1]) { + case 'n': + case 'N': + *res = true; + return 0; + case 'f': + case 'F': + *res = false; + return 0; + default: + break; + } + default: + break; + } + + return -EINVAL; +} diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 3baf8f1fb8f2..411d435e7a7d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -468,6 +468,12 @@ static inline int is_module_addr(void *addr) _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_NOEXEC) +#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY | \ + _SEGMENT_ENTRY_LARGE | \ + _SEGMENT_ENTRY_READ | \ + _SEGMENT_ENTRY_WRITE | \ + _SEGMENT_ENTRY_YOUNG | \ + _SEGMENT_ENTRY_DIRTY) /* * Region3 entry (large page) protection definitions. diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 522e4553373a..efda97804aa4 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -65,6 +65,7 @@ #define OLDMEM_SIZE (*(unsigned long *) (OLDMEM_SIZE_OFFSET)) #define COMMAND_LINE ((char *) (COMMAND_LINE_OFFSET)) +extern int noexec_disabled; extern int memory_end_set; extern unsigned long memory_end; extern unsigned long max_physmem_end; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 93cf9bce98a1..d7548806d887 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -90,6 +90,7 @@ char elf_platform[ELF_PLATFORM_SIZE]; unsigned long int_hwcap = 0; +int __bootdata(noexec_disabled); int __bootdata(memory_end_set); unsigned long __bootdata(memory_end); unsigned long __bootdata(max_physmem_end); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 714ac41e3ee5..e4697900e884 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -7,11 +7,16 @@ #include #include #include +#include #include #include +static unsigned long segment_pos __initdata; +static unsigned long segment_low __initdata; static unsigned long pgalloc_pos __initdata; static unsigned long pgalloc_low __initdata; +static bool has_edat __initdata; +static bool has_nx __initdata; #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) @@ -24,6 +29,16 @@ static void __init kasan_early_panic(const char *reason) disabled_wait(0); } +static void * __init kasan_early_alloc_segment(void) +{ + segment_pos -= _SEGMENT_SIZE; + + if (segment_pos < segment_low) + kasan_early_panic("out of memory during initialisation\n"); + + return (void *)segment_pos; +} + static void * __init kasan_early_alloc_pages(unsigned int order) { pgalloc_pos -= (PAGE_SIZE << order); @@ -71,7 +86,7 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, unsigned long end, enum populate_mode mode) { - unsigned long pgt_prot_zero, pgt_prot; + unsigned long pgt_prot_zero, pgt_prot, sgt_prot; pgd_t *pg_dir; p4d_t *p4_dir; pud_t *pu_dir; @@ -79,8 +94,10 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pte_t *pt_dir; pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO); - pgt_prot_zero &= ~_PAGE_NOEXEC; + if (!has_nx) + pgt_prot_zero &= ~_PAGE_NOEXEC; pgt_prot = pgprot_val(PAGE_KERNEL_EXEC); + sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC); while (address < end) { pg_dir = pgd_offset_k(address); @@ -131,8 +148,27 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, address = (address + PMD_SIZE) & PMD_MASK; continue; } + /* the first megabyte of 1:1 is mapped with 4k pages */ + if (has_edat && address && end - address >= PMD_SIZE && + mode != POPULATE_ZERO_SHADOW) { + void *page; + + if (mode == POPULATE_ONE2ONE) { + page = (void *)address; + } else { + page = kasan_early_alloc_segment(); + memset(page, 0, _SEGMENT_SIZE); + } + pmd_val(*pm_dir) = __pa(page) | sgt_prot; + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + pt_dir = kasan_early_pte_alloc(); pmd_populate(&init_mm, pm_dir, pt_dir); + } else if (pmd_large(*pm_dir)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; } pt_dir = pte_offset_kernel(pm_dir, address); @@ -182,6 +218,20 @@ static void __init kasan_enable_dat(void) __load_psw_mask(psw.mask); } +static void __init kasan_early_detect_facilities(void) +{ + stfle(S390_lowcore.stfle_fac_list, + ARRAY_SIZE(S390_lowcore.stfle_fac_list)); + if (test_facility(8)) { + has_edat = true; + __ctl_set_bit(0, 23); + } + if (!noexec_disabled && test_facility(130)) { + has_nx = true; + __ctl_set_bit(0, 20); + } +} + void __init kasan_early_init(void) { unsigned long untracked_mem_end; @@ -196,7 +246,9 @@ void __init kasan_early_init(void) pud_t pud_z = __pud(__pa(kasan_zero_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_zero_pud) | _REGION2_ENTRY); - pgt_prot &= ~_PAGE_NOEXEC; + kasan_early_detect_facilities(); + if (!has_nx) + pgt_prot &= ~_PAGE_NOEXEC; pte_z = __pte(__pa(kasan_zero_page) | pgt_prot); /* 3 level paging */ @@ -224,7 +276,13 @@ void __init kasan_early_init(void) if (pgalloc_low + shadow_alloc_size > memsize) kasan_early_panic("out of memory during initialisation\n"); - pgalloc_pos = memsize; + if (has_edat) { + segment_pos = round_down(memsize, _SEGMENT_SIZE); + segment_low = segment_pos - shadow_alloc_size; + pgalloc_pos = segment_low; + } else { + pgalloc_pos = memsize; + } init_mm.pgd = early_pg_dir; /* * Current memory layout: From dde709d1361ab50d3b9f2824f72b4374f5582e84 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 20 Nov 2017 11:16:14 +0100 Subject: [PATCH 52/83] compiler: introduce __no_sanitize_address_or_inline Due to conflict between kasan instrumentation and inlining https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 functions which are defined as inline could not be called from functions defined with __no_sanitize_address. Introduce __no_sanitize_address_or_inline which would expand to __no_sanitize_address when the kernel is built with kasan support and to inline otherwise. This helps to avoid disabling kasan instrumentation for entire files. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- include/linux/compiler-gcc.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 763bbad1e258..997ade08a79d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -208,6 +208,12 @@ * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 */ #define __no_sanitize_address __attribute__((no_sanitize_address)) +#ifdef CONFIG_KASAN +#define __no_sanitize_address_or_inline \ + __no_sanitize_address __maybe_unused notrace +#else +#define __no_sanitize_address_or_inline inline +#endif #endif #if GCC_VERSION >= 50100 @@ -225,6 +231,7 @@ #if !defined(__no_sanitize_address) #define __no_sanitize_address +#define __no_sanitize_address_or_inline inline #endif /* From 9e8df6daed9e59153624e52aa4832ddaf39f1ae8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 20 Nov 2017 12:13:52 +0100 Subject: [PATCH 53/83] s390/smp: kasan stack instrumentation support smp_start_secondary function is called without DAT enabled. To avoid disabling kasan instrumentation for entire arch/s390/kernel/smp.c smp_start_secondary has been split in 2 parts. smp_start_secondary has instrumentation disabled, it does minimal setup and enables DAT. Then instrumentated __smp_start_secondary is called to do the rest. __load_psw_mask function instrumentation has been disabled as well to be able to call it from smp_start_secondary. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 2 +- arch/s390/kernel/smp.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 43494a014d5b..55956c96558c 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -336,7 +336,7 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ -static inline void __load_psw_mask(unsigned long mask) +static __no_sanitize_address_or_inline void __load_psw_mask(unsigned long mask) { unsigned long addr; psw_t psw; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 032d98bfc60a..c98059faee46 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -804,6 +804,8 @@ static void smp_init_secondary(void) { int cpu = smp_processor_id(); + S390_lowcore.last_update_clock = get_tod_clock(); + restore_access_regs(S390_lowcore.access_regs_save_area); cpu_init(); preempt_disable(); init_cpu_timer(); @@ -823,14 +825,12 @@ static void smp_init_secondary(void) /* * Activate a secondary processor. */ -static void smp_start_secondary(void *cpuvoid) +static void __no_sanitize_address smp_start_secondary(void *cpuvoid) { - S390_lowcore.last_update_clock = get_tod_clock(); S390_lowcore.restart_stack = (unsigned long) restart_stack; S390_lowcore.restart_fn = (unsigned long) do_restart; S390_lowcore.restart_data = 0; S390_lowcore.restart_source = -1UL; - restore_access_regs(S390_lowcore.access_regs_save_area); __ctl_load(S390_lowcore.cregs_save_area, 0, 15); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0); From ac1256f82619724357242eb514f162c40d5b64d8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 20 Nov 2017 12:15:10 +0100 Subject: [PATCH 54/83] s390/kasan: reipl and kexec support Some functions from both arch/s390/kernel/ipl.c and arch/s390/kernel/machine_kexec.c are called without DAT enabled (or with and without DAT enabled code paths). There is no easy way to partially disable kasan for those files without a substantial rework. Disable kasan for both files for now. To avoid disabling kasan for arch/s390/kernel/diag.c DAT flag is enabled in diag308 call. pcpu_delegate which disables DAT is marked with __no_sanitize_address to disable instrumentation for that one function. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 2 +- arch/s390/kernel/Makefile | 2 ++ arch/s390/kernel/ipl.c | 2 ++ arch/s390/kernel/smp.c | 5 +++-- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 55956c96558c..34768e6ef4fb 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -242,7 +242,7 @@ static inline unsigned long current_stack_pointer(void) return sp; } -static inline unsigned short stap(void) +static __no_sanitize_address_or_inline unsigned short stap(void) { unsigned short cpu_address; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 1f9c98f7d9e2..386b1abb217b 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -24,6 +24,8 @@ UBSAN_SANITIZE_early.o := n UBSAN_SANITIZE_early_nobss.o := n KASAN_SANITIZE_early_nobss.o := n +KASAN_SANITIZE_ipl.o := n +KASAN_SANITIZE_machine_kexec.o := n # # Passing null pointers is ok for smp code, since we access the lowcore here. diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f1d69f78bb1d..18a5d6317acc 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -156,6 +156,8 @@ static inline int __diag308(unsigned long subcode, void *addr) int diag308(unsigned long subcode, void *addr) { + if (IS_ENABLED(CONFIG_KASAN)) + __arch_local_irq_stosm(0x04); /* enable DAT */ diag_stat_inc(DIAG_STAT_X308); return __diag308(subcode, addr); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index c98059faee46..1b3188f57b58 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -313,8 +313,9 @@ static void __pcpu_delegate(void (*func)(void*), void *data) func(data); /* should not return */ } -static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), - void *data, unsigned long stack) +static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu, + void (*func)(void *), + void *data, unsigned long stack) { struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; unsigned long source_cpu = stap(); From f4f0d32bfb273537fcea9febc7081dac0252dcc0 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 20 Nov 2017 12:17:19 +0100 Subject: [PATCH 55/83] s390/dumpstack: disable __dump_trace kasan instrumentation Walking async_stack produces false positives. Disable __dump_trace function instrumentation for now. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dumpstack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index ef85a00442cd..d4c88e119e1f 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -30,7 +30,7 @@ * The stack trace can start at any of the three stacks and can potentially * touch all of them. The order is: panic stack, async stack, sync stack. */ -static unsigned long +static unsigned long __no_sanitize_address __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, unsigned long low, unsigned long high) { From 5e785963298b7923e28817d20868882fbefc863c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 19 Nov 2017 11:39:39 +0100 Subject: [PATCH 56/83] s390/kasan: enable stack and global variables access checks By defining KASAN_SHADOW_OFFSET in Kconfig stack and global variables memory access check instrumentation is enabled. gcc version 4.9.2 or newer is also required. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 5 +++++ arch/s390/include/asm/kasan.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 95fff775ac7c..24586257c5cd 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -56,6 +56,11 @@ config PCI_QUIRKS config ARCH_SUPPORTS_UPROBES def_bool y +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0x30000000000 + config S390 def_bool y select ARCH_BINFMT_ELF_STATE diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 7de6eceb562e..892f4585c663 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -9,7 +9,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 #define KASAN_SHADOW_SIZE \ (_AC(1, UL) << (_REGION2_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) -#define KASAN_SHADOW_OFFSET _AC(0x30000000000, UL) +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) #define KASAN_SHADOW_START KASAN_SHADOW_OFFSET #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) From 135ff163939294f5573927ca890699ed619c0031 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 20 Nov 2017 12:56:10 +0100 Subject: [PATCH 57/83] s390/kasan: free early identity mapping structures Kasan initialization code is changed to populate persistent shadow first, save allocator position into pgalloc_freeable and proceed with early identity mapping creation. This way early identity mapping paging structures could be freed at once after switching to swapper_pg_dir when early identity mapping is not needed anymore. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kasan.h | 2 ++ arch/s390/mm/init.c | 1 + arch/s390/mm/kasan_init.c | 12 ++++++++++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 892f4585c663..8b9ae18430ad 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -15,9 +15,11 @@ extern void kasan_early_init(void); extern void kasan_copy_shadow(pgd_t *dst); +extern void kasan_free_early_identity(void); #else static inline void kasan_early_init(void) { } static inline void kasan_copy_shadow(pgd_t *dst) { } +static inline void kasan_free_early_identity(void) { } #endif #endif diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 50ebda9b3d0c..92d7a153e72a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -109,6 +109,7 @@ void __init paging_init(void) psw_bits(psw).dat = 1; psw_bits(psw).as = PSW_BITS_AS_HOME; __load_psw_mask(psw.mask); + kasan_free_early_identity(); sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index e4697900e884..40748afc43fa 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -15,6 +15,7 @@ static unsigned long segment_pos __initdata; static unsigned long segment_low __initdata; static unsigned long pgalloc_pos __initdata; static unsigned long pgalloc_low __initdata; +static unsigned long pgalloc_freeable __initdata; static bool has_edat __initdata; static bool has_nx __initdata; @@ -298,14 +299,16 @@ void __init kasan_early_init(void) * | 2Gb | \| unmapped | allocated per module * +-----------------+ +- shadow end ---+ */ - /* populate identity mapping */ - kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE); /* populate kasan shadow (for identity mapping and zero page mapping) */ kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) untracked_mem_end = vmax - MODULES_LEN; kasan_early_vmemmap_populate(__sha(memsize), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); + /* memory allocated for identity mapping structs will be freed later */ + pgalloc_freeable = pgalloc_pos; + /* populate identity mapping */ + kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE); kasan_set_pgd(early_pg_dir, asce_type); kasan_enable_dat(); /* enable kasan */ @@ -345,3 +348,8 @@ void __init kasan_copy_shadow(pgd_t *pg_dir) memcpy(pu_dir_dst, pu_dir_src, (KASAN_SHADOW_SIZE >> PUD_SHIFT) * sizeof(pud_t)); } + +void __init kasan_free_early_identity(void) +{ + memblock_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos); +} From 5dff03813f46f267bc1ecb334901e916346692ff Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 19 Nov 2017 11:54:14 +0100 Subject: [PATCH 58/83] s390/kasan: add option for 4-level paging support By default 3-level paging is used when the kernel is compiled with kasan support. Add 4-level paging option to support systems with more then 3TB of physical memory and to cover 4-level paging specific code with kasan as well. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/include/asm/kasan.h | 5 +++++ arch/s390/kernel/setup.c | 4 +++- arch/s390/mm/kasan_init.c | 23 +++++++++++++++++------ lib/Kconfig.kasan | 9 +++++++++ 5 files changed, 35 insertions(+), 7 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 24586257c5cd..cc8313550493 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -59,6 +59,7 @@ config ARCH_SUPPORTS_UPROBES config KASAN_SHADOW_OFFSET hex depends on KASAN + default 0x18000000000000 if KASAN_S390_4_LEVEL_PAGING default 0x30000000000 config S390 diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 8b9ae18430ad..70930fe5c496 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -7,8 +7,13 @@ #ifdef CONFIG_KASAN #define KASAN_SHADOW_SCALE_SHIFT 3 +#ifdef CONFIG_KASAN_S390_4_LEVEL_PAGING +#define KASAN_SHADOW_SIZE \ + (_AC(1, UL) << (_REGION1_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) +#else #define KASAN_SHADOW_SIZE \ (_AC(1, UL) << (_REGION2_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) +#endif #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) #define KASAN_SHADOW_START KASAN_SHADOW_OFFSET #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d7548806d887..4b2039f3e2f4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -535,7 +535,9 @@ static void __init setup_memory_end(void) /* Choose kernel address space layout: 3 or 4 levels. */ vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; if (IS_ENABLED(CONFIG_KASAN)) { - vmax = _REGION2_SIZE; /* 3-level kernel page table */ + vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) + ? _REGION1_SIZE + : _REGION2_SIZE; } else { tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; tmp = tmp * (sizeof(struct page) + PAGE_SIZE); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 40748afc43fa..5129847018ba 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -252,12 +252,23 @@ void __init kasan_early_init(void) pgt_prot &= ~_PAGE_NOEXEC; pte_z = __pte(__pa(kasan_zero_page) | pgt_prot); - /* 3 level paging */ - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PUD_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)); - crst_table_init((unsigned long *)early_pg_dir, _REGION3_ENTRY_EMPTY); - untracked_mem_end = vmax = _REGION2_SIZE; - asce_type = _ASCE_TYPE_REGION3; + if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)) { + /* 4 level paging */ + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); + crst_table_init((unsigned long *)early_pg_dir, + _REGION2_ENTRY_EMPTY); + untracked_mem_end = vmax = _REGION1_SIZE; + asce_type = _ASCE_TYPE_REGION2; + } else { + /* 3 level paging */ + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PUD_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)); + crst_table_init((unsigned long *)early_pg_dir, + _REGION3_ENTRY_EMPTY); + untracked_mem_end = vmax = _REGION2_SIZE; + asce_type = _ASCE_TYPE_REGION3; + } /* init kasan zero shadow */ crst_table_init((unsigned long *)kasan_zero_p4d, p4d_val(p4d_z)); diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index befb127507c0..d0bad1bd9a2b 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -57,6 +57,15 @@ config KASAN_INLINE endchoice +config KASAN_S390_4_LEVEL_PAGING + bool "KASan: use 4-level paging" + depends on KASAN && S390 + help + Compiling the kernel with KASan disables automatic 3-level vs + 4-level paging selection. 3-level paging is used by default (up + to 3TB of RAM with KASan enabled). This options allows to force + 4-level paging instead. + config TEST_KASAN tristate "Module for testing kasan for bug detection" depends on m && KASAN From e006222b57508d58b55d36c6ae6663f5729dad2b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 17:59:53 +0100 Subject: [PATCH 59/83] s390/mm: optimize debugfs ptdump kasan zero page walking Kasan zero p4d/pud/pmd/pte are always filled in with corresponding kasan zero entries. Walking kasan zero page backed area is time consuming and unnecessary. When kasan zero p4d/pud/pmd is encountered, it eventually points to the kasan zero page always with the same attributes and nothing but it, therefore zero p4d/pud/pmd could be jumped over. Also adds a space between address range and pages number to separate them from each other when pages number is huge. 0x0018000000000000-0x0018000010000000 256M PMD RW X 0x0018000010000000-0x001bfffff0000000 1073741312M PTE RO X 0x001bfffff0000000-0x001bfffff0001000 4K PTE RW X Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/dump_pagetables.c | 35 +++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 5139c24111bc..15ee7dc48215 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -89,7 +90,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, } else if (prot != cur || level != st->level || st->current_address >= st->marker[1].start_address) { /* Print the actual finished series */ - seq_printf(m, "0x%0*lx-0x%0*lx", + seq_printf(m, "0x%0*lx-0x%0*lx ", width, st->start_address, width, st->current_address); delta = (st->current_address - st->start_address) >> 10; @@ -109,6 +110,17 @@ static void note_page(struct seq_file *m, struct pg_state *st, } } +#ifdef CONFIG_KASAN +static void note_kasan_zero_page(struct seq_file *m, struct pg_state *st) +{ + unsigned int prot; + + prot = pte_val(*kasan_zero_pte) & + (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC); + note_page(m, st, prot, 4); +} +#endif + /* * The actual page table walker functions. In order to keep the * implementation of print_prot() short, we only check and pass @@ -141,6 +153,13 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pmd_t *pmd; int i; +#ifdef CONFIG_KASAN + if ((pud_val(*pud) & PAGE_MASK) == __pa(kasan_zero_pmd)) { + note_kasan_zero_page(m, st); + return; + } +#endif + for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) { st->current_address = addr; pmd = pmd_offset(pud, addr); @@ -165,6 +184,13 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pud_t *pud; int i; +#ifdef CONFIG_KASAN + if ((p4d_val(*p4d) & PAGE_MASK) == __pa(kasan_zero_pud)) { + note_kasan_zero_page(m, st); + return; + } +#endif + for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { st->current_address = addr; pud = pud_offset(p4d, addr); @@ -188,6 +214,13 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, p4d_t *p4d; int i; +#ifdef CONFIG_KASAN + if ((pgd_val(*pgd) & PAGE_MASK) == __pa(kasan_zero_p4d)) { + note_kasan_zero_page(m, st); + return; + } +#endif + for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) { st->current_address = addr; p4d = p4d_offset(pgd, addr); From 6cad0eb561357dfde382b8d8c03c6ee30c0bff48 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 17:55:07 +0100 Subject: [PATCH 60/83] s390/mm: improve debugfs ptdump markers walking This allows to print multiple markers when they happened to have the same value. ... 0x001bfffff0100000-0x001c000000000000 255M PMD I ---[ Kasan Shadow End ]--- ---[ vmemmap Area ]--- 0x001c000000000000-0x001c000002000000 32M PMD RW X ... Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/dump_pagetables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 15ee7dc48215..363f6470d742 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -100,7 +100,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, } seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); - if (st->current_address >= st->marker[1].start_address) { + while (st->current_address >= st->marker[1].start_address) { st->marker++; seq_printf(m, "---[ %s ]---\n", st->marker->name); } From 19733fe8721b8d91b799c91082ebb9c139ca6710 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 12 Jan 2018 12:46:00 +0100 Subject: [PATCH 61/83] s390/head: avoid doubling early boot stack size under KASAN Early boot stack uses predefined 4 pages of memory 0x8000-0xC000. This stack is used to run not instumented decompressor/facilities verification C code. It doesn't make sense to double its size when the kernel is built with KASAN support. BOOT_STACK_ORDER is introduced to avoid that. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/head.S | 2 +- arch/s390/include/asm/thread_info.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index e209cfe69bb9..ce2cbbc41742 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -315,7 +315,7 @@ ENTRY(startup_kdump) brasl %r14,startup_kernel .Lstack: - .long 0x8000 + THREAD_SIZE - STACK_FRAME_OVERHEAD + .long 0x8000 + (1<<(PAGE_SHIFT+BOOT_STACK_ORDER)) - STACK_FRAME_OVERHEAD .align 8 6: .long 0x7fffffff,0xffffffff diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 79b40600f523..27248f42a03c 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -18,6 +18,7 @@ #else #define THREAD_SIZE_ORDER 2 #endif +#define BOOT_STACK_ORDER 2 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ From 296352397db68313a189e65a3513960a2c844632 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 13 Sep 2018 10:59:25 +0200 Subject: [PATCH 62/83] s390/kasan: avoid kasan crash with standby memory defined Kasan early memory allocator simply chops off memory blocks from the end of the physical memory. Reuse mem_detect info to identify actual online memory end rather than using max_physmem_end. This allows to run the kernel with kasan enabled and standby memory defined. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/kasan_init.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 5129847018ba..6b0574340f7f 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -233,6 +234,18 @@ static void __init kasan_early_detect_facilities(void) } } +static unsigned long __init get_mem_detect_end(void) +{ + unsigned long start; + unsigned long end; + + if (mem_detect.count) { + __get_mem_detect_block(mem_detect.count - 1, &start, &end); + return end; + } + return 0; +} + void __init kasan_early_init(void) { unsigned long untracked_mem_end; @@ -252,6 +265,11 @@ void __init kasan_early_init(void) pgt_prot &= ~_PAGE_NOEXEC; pte_z = __pte(__pa(kasan_zero_page) | pgt_prot); + memsize = get_mem_detect_end(); + if (!memsize) + kasan_early_panic("cannot detect physical memory size\n"); + memsize = min(memsize, KASAN_SHADOW_START); + if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)) { /* 4 level paging */ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); @@ -276,7 +294,6 @@ void __init kasan_early_init(void) crst_table_init((unsigned long *)kasan_zero_pmd, pmd_val(pmd_z)); memset64((u64 *)kasan_zero_pte, pte_val(pte_z), PTRS_PER_PTE); - memsize = min(max_physmem_end, KASAN_SHADOW_START); shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT; pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE); if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { From 12e55fa1944d2f2f15c580a94eda80cb7623f89d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 13 Sep 2018 16:09:52 +0200 Subject: [PATCH 63/83] s390/kasan: optimize kasan vmemmap allocation Kasan implementation now supports memory hotplug operations. For that reason regions of initially standby memory are now skipped from shadow mapping and are mapped/unmapped dynamically upon bringing memory online/offline. Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/kasan_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 6b0574340f7f..5b253247bc97 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -331,7 +331,8 @@ void __init kasan_early_init(void) kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) untracked_mem_end = vmax - MODULES_LEN; - kasan_early_vmemmap_populate(__sha(memsize), __sha(untracked_mem_end), + kasan_early_vmemmap_populate(__sha(max_physmem_end), + __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); /* memory allocated for identity mapping structs will be freed later */ pgalloc_freeable = pgalloc_pos; From 78333d1f908a25c9565d7518966cef717279fe32 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 26 Sep 2018 13:46:26 +0200 Subject: [PATCH 64/83] s390/kasan: add support for mem= kernel parameter Handle mem= kernel parameter in kasan to limit physical memory. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/kasan_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 5b253247bc97..55982142b2b5 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -268,6 +268,9 @@ void __init kasan_early_init(void) memsize = get_mem_detect_end(); if (!memsize) kasan_early_panic("cannot detect physical memory size\n"); + /* respect mem= cmdline parameter */ + if (memory_end_set && memsize > memory_end) + memsize = memory_end; memsize = min(memsize, KASAN_SHADOW_START); if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)) { From ee410de890cdf8fc94f6235dd9ef323a101511ab Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 4 Oct 2018 15:30:24 +0200 Subject: [PATCH 65/83] s390/zcrypt: zcrypt device driver cleanup Some cleanup in the s390 zcrypt device driver: - Removed fragments of pcixx crypto card code. This code can't be reached anymore because the hardware detection function does not recognize crypto cards < CEX2 since commit f56545430736 ("s390/zcrypt: Introduce QACT support for AP bus devices.") - Rename of some files and driver names which where still reflecting pcixx support to cex2a/cex2c. - Removed all the zcrypt version strings in the file headers. There is only one place left - the zcrypt.h header file is now the only place for zcrypt device driver version info. - Zcrypt version pump up from 2.2.0 to 2.2.1. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/zcrypt.h | 4 +- drivers/s390/crypto/Makefile | 2 +- drivers/s390/crypto/zcrypt_api.c | 2 - drivers/s390/crypto/zcrypt_api.h | 15 +-- drivers/s390/crypto/zcrypt_card.c | 2 - drivers/s390/crypto/zcrypt_cca_key.h | 2 - drivers/s390/crypto/zcrypt_cex2a.c | 6 +- drivers/s390/crypto/zcrypt_cex2a.h | 6 +- .../{zcrypt_pcixcc.c => zcrypt_cex2c.c} | 125 +++++++----------- .../{zcrypt_pcixcc.h => zcrypt_cex2c.h} | 14 +- drivers/s390/crypto/zcrypt_cex4.c | 20 +-- drivers/s390/crypto/zcrypt_error.h | 2 - drivers/s390/crypto/zcrypt_msgtype50.c | 24 ++-- drivers/s390/crypto/zcrypt_msgtype50.h | 2 - drivers/s390/crypto/zcrypt_msgtype6.c | 65 ++++----- drivers/s390/crypto/zcrypt_msgtype6.h | 15 +-- drivers/s390/crypto/zcrypt_queue.c | 2 - 17 files changed, 117 insertions(+), 191 deletions(-) rename drivers/s390/crypto/{zcrypt_pcixcc.c => zcrypt_cex2c.c} (62%) rename drivers/s390/crypto/{zcrypt_pcixcc.h => zcrypt_cex2c.h} (63%) diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 196a3047fb0a..42c81a95e97b 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -2,7 +2,7 @@ /* * include/asm-s390/zcrypt.h * - * zcrypt 2.2.0 (user-visible header) + * zcrypt 2.2.1 (user-visible header) * * Copyright IBM Corp. 2001, 2018 * Author(s): Robert Burroughs @@ -16,7 +16,7 @@ #define ZCRYPT_VERSION 2 #define ZCRYPT_RELEASE 2 -#define ZCRYPT_VARIANT 0 +#define ZCRYPT_VARIANT 1 #include #include diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile index b59af548ed1c..fd5e215c66b7 100644 --- a/drivers/s390/crypto/Makefile +++ b/drivers/s390/crypto/Makefile @@ -10,7 +10,7 @@ zcrypt-objs := zcrypt_api.o zcrypt_card.o zcrypt_queue.o zcrypt-objs += zcrypt_msgtype6.o zcrypt_msgtype50.o obj-$(CONFIG_ZCRYPT) += zcrypt.o # adapter drivers depend on ap.o and zcrypt.o -obj-$(CONFIG_ZCRYPT) += zcrypt_pcixcc.o zcrypt_cex2a.o zcrypt_cex4.o +obj-$(CONFIG_ZCRYPT) += zcrypt_cex2c.o zcrypt_cex2a.o zcrypt_cex4.o # pkey kernel module pkey-objs := pkey_api.o diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index d7e1c7cd2c89..26f1cd669e90 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * zcrypt 2.2.0 - * * Copyright IBM Corp. 2001, 2018 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index a848625c1a5a..af67a768a3fc 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -1,8 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * zcrypt 2.1.0 - * - * Copyright IBM Corp. 2001, 2012 + * Copyright IBM Corp. 2001, 2018 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) * Cornelia Huck @@ -22,17 +20,8 @@ #include "ap_bus.h" /** - * device type for an actual device is either PCICA, PCICC, PCIXCC_MCL2, - * PCIXCC_MCL3, CEX2C, or CEX2A - * - * NOTE: PCIXCC_MCL3 refers to a PCIXCC with May 2004 version of Licensed - * Internal Code (LIC) (EC J12220 level 29). - * PCIXCC_MCL2 refers to any LIC before this level. + * Supported device types */ -#define ZCRYPT_PCICA 1 -#define ZCRYPT_PCICC 2 -#define ZCRYPT_PCIXCC_MCL2 3 -#define ZCRYPT_PCIXCC_MCL3 4 #define ZCRYPT_CEX2C 5 #define ZCRYPT_CEX2A 6 #define ZCRYPT_CEX3C 7 diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index 40cd4c1c2de8..d4f35a183c15 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2012 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index e5b5c02c9d67..f09bb850763b 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2006 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index f4ae5fa30ec9..146f54f5cbb8 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2012 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) @@ -43,8 +41,8 @@ #define CEX3A_CLEANUP_TIME CEX2A_CLEANUP_TIME MODULE_AUTHOR("IBM Corporation"); -MODULE_DESCRIPTION("CEX2A Cryptographic Coprocessor device driver, " \ - "Copyright IBM Corp. 2001, 2012"); +MODULE_DESCRIPTION("CEX2A/CEX3A Cryptographic Coprocessor device driver, " \ + "Copyright IBM Corp. 2001, 2018"); MODULE_LICENSE("GPL"); static struct ap_device_id zcrypt_cex2a_card_ids[] = { diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h index 66d58bc87c66..7842214d9d09 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.h +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2006 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) @@ -14,7 +12,7 @@ #define _ZCRYPT_CEX2A_H_ /** - * The type 50 message family is associated with a CEX2A card. + * The type 50 message family is associated with CEXxA cards. * * The four members of the family are described below. * @@ -111,7 +109,7 @@ struct type50_crb3_msg { } __packed; /** - * The type 80 response family is associated with a CEX2A card. + * The type 80 response family is associated with a CEXxA cards. * * Note that all unsigned char arrays are right-justified and left-padded * with zeroes. diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_cex2c.c similarity index 62% rename from drivers/s390/crypto/zcrypt_pcixcc.c rename to drivers/s390/crypto/zcrypt_cex2c.c index 94d9f7224aea..546f67676734 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * zcrypt 2.1.0 - * - * Copyright IBM Corp. 2001, 2012 + * Copyright IBM Corp. 2001, 2018 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) * @@ -25,39 +23,22 @@ #include "zcrypt_api.h" #include "zcrypt_error.h" #include "zcrypt_msgtype6.h" -#include "zcrypt_pcixcc.h" +#include "zcrypt_cex2c.h" #include "zcrypt_cca_key.h" -#define PCIXCC_MIN_MOD_SIZE 16 /* 128 bits */ -#define PCIXCC_MIN_MOD_SIZE_OLD 64 /* 512 bits */ -#define PCIXCC_MAX_MOD_SIZE 256 /* 2048 bits */ -#define CEX3C_MIN_MOD_SIZE PCIXCC_MIN_MOD_SIZE +#define CEX2C_MIN_MOD_SIZE 16 /* 128 bits */ +#define CEX2C_MAX_MOD_SIZE 256 /* 2048 bits */ +#define CEX3C_MIN_MOD_SIZE 16 /* 128 bits */ #define CEX3C_MAX_MOD_SIZE 512 /* 4096 bits */ - -#define PCIXCC_MAX_ICA_MESSAGE_SIZE 0x77c /* max size type6 v2 crt message */ -#define PCIXCC_MAX_ICA_RESPONSE_SIZE 0x77c /* max size type86 v2 reply */ - -#define PCIXCC_MAX_XCRB_MESSAGE_SIZE (12*1024) - -#define PCIXCC_CLEANUP_TIME (15*HZ) - -#define CEIL4(x) ((((x)+3)/4)*4) - -struct response_type { - struct completion work; - int type; -}; -#define PCIXCC_RESPONSE_TYPE_ICA 0 -#define PCIXCC_RESPONSE_TYPE_XCRB 1 +#define CEX2C_MAX_XCRB_MESSAGE_SIZE (12*1024) +#define CEX2C_CLEANUP_TIME (15*HZ) MODULE_AUTHOR("IBM Corporation"); -MODULE_DESCRIPTION("PCIXCC Cryptographic Coprocessor device driver, " \ - "Copyright IBM Corp. 2001, 2012"); +MODULE_DESCRIPTION("CEX2C/CEX3C Cryptographic Coprocessor device driver, " \ + "Copyright IBM Corp. 2001, 2018"); MODULE_LICENSE("GPL"); -static struct ap_device_id zcrypt_pcixcc_card_ids[] = { - { .dev_type = AP_DEVICE_TYPE_PCIXCC, - .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, +static struct ap_device_id zcrypt_cex2c_card_ids[] = { { .dev_type = AP_DEVICE_TYPE_CEX2C, .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX3C, @@ -65,11 +46,9 @@ static struct ap_device_id zcrypt_pcixcc_card_ids[] = { { /* end of list */ }, }; -MODULE_DEVICE_TABLE(ap, zcrypt_pcixcc_card_ids); +MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_card_ids); -static struct ap_device_id zcrypt_pcixcc_queue_ids[] = { - { .dev_type = AP_DEVICE_TYPE_PCIXCC, - .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, +static struct ap_device_id zcrypt_cex2c_queue_ids[] = { { .dev_type = AP_DEVICE_TYPE_CEX2C, .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX3C, @@ -77,16 +56,16 @@ static struct ap_device_id zcrypt_pcixcc_queue_ids[] = { { /* end of list */ }, }; -MODULE_DEVICE_TABLE(ap, zcrypt_pcixcc_queue_ids); +MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_queue_ids); /** - * Large random number detection function. Its sends a message to a pcixcc + * Large random number detection function. Its sends a message to a CEX2C/CEX3C * card to find out if large random numbers are supported. * @ap_dev: pointer to the AP device. * * Returns 1 if large random numbers are supported, 0 if not and < 0 on error. */ -static int zcrypt_pcixcc_rng_supported(struct ap_queue *aq) +static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) { struct ap_message ap_msg; unsigned long long psmid; @@ -147,13 +126,11 @@ out_free: } /** - * Probe function for PCIXCC/CEX2C card devices. It always accepts the - * AP device since the bus_match already checked the hardware type. The - * PCIXCC cards come in two flavours: micro code level 2 and micro code - * level 3. This is checked by sending a test message to the device. + * Probe function for CEX2C/CEX3C card devices. It always accepts the + * AP device since the bus_match already checked the hardware type. * @ap_dev: pointer to the AP card device. */ -static int zcrypt_pcixcc_card_probe(struct ap_device *ap_dev) +static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev) { /* * Normalized speed ratings per crypto adapter @@ -179,9 +156,9 @@ static int zcrypt_pcixcc_card_probe(struct ap_device *ap_dev) zc->type_string = "CEX2C"; memcpy(zc->speed_rating, CEX2C_SPEED_IDX, sizeof(CEX2C_SPEED_IDX)); - zc->min_mod_size = PCIXCC_MIN_MOD_SIZE; - zc->max_mod_size = PCIXCC_MAX_MOD_SIZE; - zc->max_exp_bit_length = PCIXCC_MAX_MOD_SIZE; + zc->min_mod_size = CEX2C_MIN_MOD_SIZE; + zc->max_mod_size = CEX2C_MAX_MOD_SIZE; + zc->max_exp_bit_length = CEX2C_MAX_MOD_SIZE; break; case AP_DEVICE_TYPE_CEX3C: zc->user_space_type = ZCRYPT_CEX3C; @@ -208,10 +185,10 @@ static int zcrypt_pcixcc_card_probe(struct ap_device *ap_dev) } /** - * This is called to remove the PCIXCC/CEX2C card driver information + * This is called to remove the CEX2C/CEX3C card driver information * if an AP card device is removed. */ -static void zcrypt_pcixcc_card_remove(struct ap_device *ap_dev) +static void zcrypt_cex2c_card_remove(struct ap_device *ap_dev) { struct zcrypt_card *zc = to_ap_card(&ap_dev->device)->private; @@ -219,33 +196,31 @@ static void zcrypt_pcixcc_card_remove(struct ap_device *ap_dev) zcrypt_card_unregister(zc); } -static struct ap_driver zcrypt_pcixcc_card_driver = { - .probe = zcrypt_pcixcc_card_probe, - .remove = zcrypt_pcixcc_card_remove, - .ids = zcrypt_pcixcc_card_ids, +static struct ap_driver zcrypt_cex2c_card_driver = { + .probe = zcrypt_cex2c_card_probe, + .remove = zcrypt_cex2c_card_remove, + .ids = zcrypt_cex2c_card_ids, .flags = AP_DRIVER_FLAG_DEFAULT, }; /** - * Probe function for PCIXCC/CEX2C queue devices. It always accepts the - * AP device since the bus_match already checked the hardware type. The - * PCIXCC cards come in two flavours: micro code level 2 and micro code - * level 3. This is checked by sending a test message to the device. + * Probe function for CEX2C/CEX3C queue devices. It always accepts the + * AP device since the bus_match already checked the hardware type. * @ap_dev: pointer to the AP card device. */ -static int zcrypt_pcixcc_queue_probe(struct ap_device *ap_dev) +static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev) { struct ap_queue *aq = to_ap_queue(&ap_dev->device); struct zcrypt_queue *zq; int rc; - zq = zcrypt_queue_alloc(PCIXCC_MAX_XCRB_MESSAGE_SIZE); + zq = zcrypt_queue_alloc(CEX2C_MAX_XCRB_MESSAGE_SIZE); if (!zq) return -ENOMEM; zq->queue = aq; zq->online = 1; atomic_set(&zq->load, 0); - rc = zcrypt_pcixcc_rng_supported(aq); + rc = zcrypt_cex2c_rng_supported(aq); if (rc < 0) { zcrypt_queue_free(zq); return rc; @@ -257,7 +232,7 @@ static int zcrypt_pcixcc_queue_probe(struct ap_device *ap_dev) zq->ops = zcrypt_msgtype(MSGTYPE06_NAME, MSGTYPE06_VARIANT_NORNG); ap_queue_init_reply(aq, &zq->reply); - aq->request_timeout = PCIXCC_CLEANUP_TIME, + aq->request_timeout = CEX2C_CLEANUP_TIME; aq->private = zq; rc = zcrypt_queue_register(zq); if (rc) { @@ -268,10 +243,10 @@ static int zcrypt_pcixcc_queue_probe(struct ap_device *ap_dev) } /** - * This is called to remove the PCIXCC/CEX2C queue driver information + * This is called to remove the CEX2C/CEX3C queue driver information * if an AP queue device is removed. */ -static void zcrypt_pcixcc_queue_remove(struct ap_device *ap_dev) +static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev) { struct ap_queue *aq = to_ap_queue(&ap_dev->device); struct zcrypt_queue *zq = aq->private; @@ -281,37 +256,37 @@ static void zcrypt_pcixcc_queue_remove(struct ap_device *ap_dev) zcrypt_queue_unregister(zq); } -static struct ap_driver zcrypt_pcixcc_queue_driver = { - .probe = zcrypt_pcixcc_queue_probe, - .remove = zcrypt_pcixcc_queue_remove, +static struct ap_driver zcrypt_cex2c_queue_driver = { + .probe = zcrypt_cex2c_queue_probe, + .remove = zcrypt_cex2c_queue_remove, .suspend = ap_queue_suspend, .resume = ap_queue_resume, - .ids = zcrypt_pcixcc_queue_ids, + .ids = zcrypt_cex2c_queue_ids, .flags = AP_DRIVER_FLAG_DEFAULT, }; -int __init zcrypt_pcixcc_init(void) +int __init zcrypt_cex2c_init(void) { int rc; - rc = ap_driver_register(&zcrypt_pcixcc_card_driver, - THIS_MODULE, "pcixcccard"); + rc = ap_driver_register(&zcrypt_cex2c_card_driver, + THIS_MODULE, "cex2card"); if (rc) return rc; - rc = ap_driver_register(&zcrypt_pcixcc_queue_driver, - THIS_MODULE, "pcixccqueue"); + rc = ap_driver_register(&zcrypt_cex2c_queue_driver, + THIS_MODULE, "cex2cqueue"); if (rc) - ap_driver_unregister(&zcrypt_pcixcc_card_driver); + ap_driver_unregister(&zcrypt_cex2c_card_driver); return rc; } -void zcrypt_pcixcc_exit(void) +void zcrypt_cex2c_exit(void) { - ap_driver_unregister(&zcrypt_pcixcc_queue_driver); - ap_driver_unregister(&zcrypt_pcixcc_card_driver); + ap_driver_unregister(&zcrypt_cex2c_queue_driver); + ap_driver_unregister(&zcrypt_cex2c_card_driver); } -module_init(zcrypt_pcixcc_init); -module_exit(zcrypt_pcixcc_exit); +module_init(zcrypt_cex2c_init); +module_exit(zcrypt_cex2c_exit); diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_cex2c.h similarity index 63% rename from drivers/s390/crypto/zcrypt_pcixcc.h rename to drivers/s390/crypto/zcrypt_cex2c.h index cf73a0f91e9c..6ec405c2bec2 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.h +++ b/drivers/s390/crypto/zcrypt_cex2c.h @@ -1,8 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * zcrypt 2.1.0 - * - * Copyright IBM Corp. 2001, 2012 + * Copyright IBM Corp. 2001, 2018 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) * @@ -11,10 +9,10 @@ * MSGTYPE restruct: Holger Dengler */ -#ifndef _ZCRYPT_PCIXCC_H_ -#define _ZCRYPT_PCIXCC_H_ +#ifndef _ZCRYPT_CEX2C_H_ +#define _ZCRYPT_CEX2C_H_ -int zcrypt_pcixcc_init(void); -void zcrypt_pcixcc_exit(void); +int zcrypt_cex2c_init(void); +void zcrypt_cex2c_exit(void); -#endif /* _ZCRYPT_PCIXCC_H_ */ +#endif /* _ZCRYPT_CEX2C_H_ */ diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 35d58dbbc4da..f9d4c6c7521d 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -37,8 +37,8 @@ #define CEX4_CLEANUP_TIME (900*HZ) MODULE_AUTHOR("IBM Corporation"); -MODULE_DESCRIPTION("CEX4 Cryptographic Card device driver, " \ - "Copyright IBM Corp. 2012"); +MODULE_DESCRIPTION("CEX4/CEX5/CEX6 Cryptographic Card device driver, " \ + "Copyright IBM Corp. 2018"); MODULE_LICENSE("GPL"); static struct ap_device_id zcrypt_cex4_card_ids[] = { @@ -66,8 +66,9 @@ static struct ap_device_id zcrypt_cex4_queue_ids[] = { MODULE_DEVICE_TABLE(ap, zcrypt_cex4_queue_ids); /** - * Probe function for CEX4 card device. It always accepts the AP device - * since the bus_match already checked the hardware type. + * Probe function for CEX4/CEX5/CEX6 card device. It always + * accepts the AP device since the bus_match already checked + * the hardware type. * @ap_dev: pointer to the AP device. */ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) @@ -199,7 +200,7 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) } /** - * This is called to remove the CEX4 card driver information + * This is called to remove the CEX4/CEX5/CEX6 card driver information * if an AP card device is removed. */ static void zcrypt_cex4_card_remove(struct ap_device *ap_dev) @@ -218,8 +219,9 @@ static struct ap_driver zcrypt_cex4_card_driver = { }; /** - * Probe function for CEX4 queue device. It always accepts the AP device - * since the bus_match already checked the hardware type. + * Probe function for CEX4/CEX5/CEX6 queue device. It always + * accepts the AP device since the bus_match already checked + * the hardware type. * @ap_dev: pointer to the AP device. */ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev) @@ -265,8 +267,8 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev) } /** - * This is called to remove the CEX4 queue driver information - * if an AP queue device is removed. + * This is called to remove the CEX4/CEX5/CEX6 queue driver + * information if an AP queue device is removed. */ static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev) { diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 6f7ebc1dbe10..663dbdaa85b3 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2006 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index f159662c907b..fc4295b3d801 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2012 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) @@ -27,13 +25,13 @@ #include "zcrypt_error.h" #include "zcrypt_msgtype50.h" -/* 4096 bits */ +/* >= CEX3A: 4096 bits */ #define CEX3A_MAX_MOD_SIZE 512 -/* max outputdatalength + type80_hdr */ +/* CEX2A: max outputdatalength + type80_hdr */ #define CEX2A_MAX_RESPONSE_SIZE 0x110 -/* 512 bit modulus, (max outputdatalength) + type80_hdr */ +/* >= CEX3A: 512 bit modulus, (max outputdatalength) + type80_hdr */ #define CEX3A_MAX_RESPONSE_SIZE 0x210 MODULE_AUTHOR("IBM Corporation"); @@ -42,7 +40,7 @@ MODULE_DESCRIPTION("Cryptographic Accelerator (message type 50), " \ MODULE_LICENSE("GPL"); /** - * The type 50 message family is associated with a CEX2A card. + * The type 50 message family is associated with a CEXxA cards. * * The four members of the family are described below. * @@ -139,7 +137,7 @@ struct type50_crb3_msg { } __packed; /** - * The type 80 response family is associated with a CEX2A card. + * The type 80 response family is associated with a CEXxA cards. * * Note that all unsigned char arrays are right-justified and left-padded * with zeroes. @@ -273,7 +271,7 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq, /* * CEX2A and CEX3A w/o FW update can handle requests up to * 256 byte modulus (2k keys). - * CEX3A with FW update and CEX4A cards are able to handle + * CEX3A with FW update and newer CEXxA cards are able to handle * 512 byte modulus (4k keys). */ if (mod_len <= 128) { /* up to 1024 bit key size */ @@ -356,7 +354,7 @@ static int convert_type80(struct zcrypt_queue *zq, unsigned char *data; if (t80h->len < sizeof(*t80h) + outputdatalength) { - /* The result is too short, the CEX2A card may not do that.. */ + /* The result is too short, the CEXxA card may not do that.. */ zq->online = 0; pr_err("Cryptographic device %02x.%04x failed and was set offline\n", AP_QID_CARD(zq->queue->qid), @@ -447,10 +445,10 @@ out: static atomic_t zcrypt_step = ATOMIC_INIT(0); /** - * The request distributor calls this function if it picked the CEX2A + * The request distributor calls this function if it picked the CEXxA * device to handle a modexpo request. * @zq: pointer to zcrypt_queue structure that identifies the - * CEX2A device to the request distributor + * CEXxA device to the request distributor * @mex: pointer to the modexpo request buffer */ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, @@ -493,10 +491,10 @@ out_free: } /** - * The request distributor calls this function if it picked the CEX2A + * The request distributor calls this function if it picked the CEXxA * device to handle a modexpo_crt request. * @zq: pointer to zcrypt_queue structure that identifies the - * CEX2A device to the request distributor + * CEXxA device to the request distributor * @crt: pointer to the modexpoc_crt request buffer */ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index 8530f652ea4f..66bec4f45c56 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2012 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 9ac960d83c28..0cbcc238ef98 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2012 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) @@ -29,8 +27,7 @@ #include "zcrypt_msgtype6.h" #include "zcrypt_cca_key.h" -#define PCIXCC_MIN_MOD_SIZE_OLD 64 /* 512 bits */ -#define PCIXCC_MAX_ICA_RESPONSE_SIZE 0x77c /* max size type86 v2 reply */ +#define CEXXC_MAX_ICA_RESPONSE_SIZE 0x77c /* max size type86 v2 reply */ #define CEIL4(x) ((((x)+3)/4)*4) @@ -38,9 +35,9 @@ struct response_type { struct completion work; int type; }; -#define PCIXCC_RESPONSE_TYPE_ICA 0 -#define PCIXCC_RESPONSE_TYPE_XCRB 1 -#define PCIXCC_RESPONSE_TYPE_EP11 2 +#define CEXXC_RESPONSE_TYPE_ICA 0 +#define CEXXC_RESPONSE_TYPE_XCRB 1 +#define CEXXC_RESPONSE_TYPE_EP11 2 MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \ @@ -111,7 +108,7 @@ struct function_and_rules_block { } __packed; /** - * The following is used to initialize the CPRBX passed to the PCIXCC/CEX2C + * The following is used to initialize the CPRBX passed to the CEXxC/CEXxP * card in a type6 message. The 3 fields that must be filled in at execution * time are req_parml, rpl_parml and usage_domain. * Everything about this interface is ascii/big-endian, since the @@ -294,7 +291,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, /* message header, cprbx and f&r */ msg->hdr = static_type6_hdrX; msg->hdr.ToCardLen1 = size - sizeof(msg->hdr); - msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr); + msg->hdr.FromCardLen1 = CEXXC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr); msg->cprbx = static_cprbx; msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid); @@ -364,7 +361,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, /* message header, cprbx and f&r */ msg->hdr = static_type6_hdrX; msg->hdr.ToCardLen1 = size - sizeof(msg->hdr); - msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr); + msg->hdr.FromCardLen1 = CEXXC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr); msg->cprbx = static_cprbx; msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid); @@ -658,16 +655,6 @@ static int convert_type86_ica(struct zcrypt_queue *zq, (int) service_rc, (int) service_rs); return -EINVAL; } - if (service_rc == 8 && service_rs == 783) { - zq->zcard->min_mod_size = - PCIXCC_MIN_MOD_SIZE_OLD; - ZCRYPT_DBF(DBF_DEBUG, - "device=%02x.%04x rc/rs=%d/%d => rc=EAGAIN\n", - AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - (int) service_rc, (int) service_rs); - return -EAGAIN; - } zq->online = 0; pr_err("Cryptographic device %02x.%04x failed and was set offline\n", AP_QID_CARD(zq->queue->qid), @@ -697,7 +684,7 @@ static int convert_type86_ica(struct zcrypt_queue *zq, if (pad_len > 0) { if (pad_len < 10) return -EINVAL; - /* 'restore' padding left in the PCICC/PCIXCC card. */ + /* 'restore' padding left in the CEXXC card. */ if (copy_to_user(outputdata, static_pad, pad_len - 1)) return -EFAULT; if (put_user(0, outputdata + pad_len - 1)) @@ -955,13 +942,13 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq, if (t86r->hdr.type == TYPE86_RSP_CODE && t86r->cprbx.cprb_ver_id == 0x02) { switch (resp_type->type) { - case PCIXCC_RESPONSE_TYPE_ICA: + case CEXXC_RESPONSE_TYPE_ICA: length = sizeof(struct type86x_reply) + t86r->length - 2; - length = min(PCIXCC_MAX_ICA_RESPONSE_SIZE, length); + length = min(CEXXC_MAX_ICA_RESPONSE_SIZE, length); memcpy(msg->message, reply->message, length); break; - case PCIXCC_RESPONSE_TYPE_XCRB: + case CEXXC_RESPONSE_TYPE_XCRB: length = t86r->fmt2.offset2 + t86r->fmt2.count2; length = min(MSGTYPE06_MAX_MSG_SIZE, length); memcpy(msg->message, reply->message, length); @@ -1004,7 +991,7 @@ static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq, if (t86r->hdr.type == TYPE86_RSP_CODE && t86r->cprbx.cprb_ver_id == 0x04) { switch (resp_type->type) { - case PCIXCC_RESPONSE_TYPE_EP11: + case CEXXC_RESPONSE_TYPE_EP11: length = t86r->fmt2.offset1 + t86r->fmt2.count1; length = min(MSGTYPE06_MAX_MSG_SIZE, length); memcpy(msg->message, reply->message, length); @@ -1022,10 +1009,10 @@ out: static atomic_t zcrypt_step = ATOMIC_INIT(0); /** - * The request distributor calls this function if it picked the PCIXCC/CEX2C + * The request distributor calls this function if it picked the CEXxC * device to handle a modexpo request. * @zq: pointer to zcrypt_queue structure that identifies the - * PCIXCC/CEX2C device to the request distributor + * CEXxC device to the request distributor * @mex: pointer to the modexpo request buffer */ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, @@ -1033,7 +1020,7 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, { struct ap_message ap_msg; struct response_type resp_type = { - .type = PCIXCC_RESPONSE_TYPE_ICA, + .type = CEXXC_RESPONSE_TYPE_ICA, }; int rc; @@ -1066,10 +1053,10 @@ out_free: } /** - * The request distributor calls this function if it picked the PCIXCC/CEX2C + * The request distributor calls this function if it picked the CEXxC * device to handle a modexpo_crt request. * @zq: pointer to zcrypt_queue structure that identifies the - * PCIXCC/CEX2C device to the request distributor + * CEXxC device to the request distributor * @crt: pointer to the modexpoc_crt request buffer */ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, @@ -1077,7 +1064,7 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, { struct ap_message ap_msg; struct response_type resp_type = { - .type = PCIXCC_RESPONSE_TYPE_ICA, + .type = CEXXC_RESPONSE_TYPE_ICA, }; int rc; @@ -1122,7 +1109,7 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB, unsigned int *func_code, unsigned short **dom) { struct response_type resp_type = { - .type = PCIXCC_RESPONSE_TYPE_XCRB, + .type = CEXXC_RESPONSE_TYPE_XCRB, }; ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); @@ -1138,10 +1125,10 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB, } /** - * The request distributor calls this function if it picked the PCIXCC/CEX2C + * The request distributor calls this function if it picked the CEXxC * device to handle a send_cprb request. * @zq: pointer to zcrypt_queue structure that identifies the - * PCIXCC/CEX2C device to the request distributor + * CEXxC device to the request distributor * @xcRB: pointer to the send_cprb request buffer */ static long zcrypt_msgtype6_send_cprb(struct zcrypt_queue *zq, @@ -1177,7 +1164,7 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb, unsigned int *func_code) { struct response_type resp_type = { - .type = PCIXCC_RESPONSE_TYPE_EP11, + .type = CEXXC_RESPONSE_TYPE_EP11, }; ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); @@ -1271,7 +1258,7 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code, unsigned int *domain) { struct response_type resp_type = { - .type = PCIXCC_RESPONSE_TYPE_XCRB, + .type = CEXXC_RESPONSE_TYPE_XCRB, }; ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); @@ -1291,10 +1278,10 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code, } /** - * The request distributor calls this function if it picked the PCIXCC/CEX2C + * The request distributor calls this function if it picked the CEXxC * device to generate random data. * @zq: pointer to zcrypt_queue structure that identifies the - * PCIXCC/CEX2C device to the request distributor + * CEXxC device to the request distributor * @buffer: pointer to a memory page to return random data */ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq, @@ -1329,7 +1316,7 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq, } /** - * The crypto operations for a PCIXCC/CEX2C card. + * The crypto operations for a CEXxC card. */ static struct zcrypt_ops zcrypt_msgtype6_norng_ops = { .owner = THIS_MODULE, diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index e4c2f37d7ad9..41a0df5f070f 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2012 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) @@ -24,7 +22,7 @@ #define MSGTYPE06_MAX_MSG_SIZE (12*1024) /** - * The type 6 message family is associated with PCICC or PCIXCC cards. + * The type 6 message family is associated with CEXxC/CEXxP cards. * * It contains a message header followed by a CPRB, both of which * are described below. @@ -43,13 +41,8 @@ struct type6_hdr { unsigned int offset2; /* 0x00000000 */ unsigned int offset3; /* 0x00000000 */ unsigned int offset4; /* 0x00000000 */ - unsigned char agent_id[16]; /* PCICC: */ - /* 0x0100 */ - /* 0x4343412d4150504c202020 */ - /* 0x010101 */ - /* PCIXCC: */ - /* 0x4341000000000000 */ - /* 0x0000000000000000 */ + unsigned char agent_id[16]; /* 0x4341000000000000 */ + /* 0x0000000000000000 */ unsigned char rqid[2]; /* rqid. internal to 603 */ unsigned char reserved5[2]; /* 0x0000 */ unsigned char function_code[2]; /* for PKD, 0x5044 (ascii 'PD') */ @@ -65,7 +58,7 @@ struct type6_hdr { } __packed; /** - * The type 86 message family is associated with PCICC and PCIXCC cards. + * The type 86 message family is associated with CEXxC/CEXxP cards. * * It contains a message header followed by a CPRB. The CPRB is * the same as the request CPRB, which is described above. diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 8df82c6ef66e..522c4bc69a08 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * zcrypt 2.1.0 - * * Copyright IBM Corp. 2001, 2012 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) From a17b92e0487f0bfbb1c5d0bdf26c64dddfc31ac4 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 4 Oct 2018 15:37:49 +0200 Subject: [PATCH 66/83] s390/zcrypt: provide apfs failure code on type 86 error reply The apfs field (AP final status) is set on transport protocol failures (reply code 0x90) for type 86 replies. For CCA cprbs this value is copied into the xcrb status field which gives userspace a hint for the failure reason. However, for EP11 cprbs there is no such status field in the xcrb struct. So now regardless of the request type, if a reply type 86 with transport protocol failure is seen, the apfs value is printed as part of the debug message. So the user has a chance to see the apfs value without using a special build kernel. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_api.c | 4 +++- drivers/s390/crypto/zcrypt_error.h | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 26f1cd669e90..bb7ed341baaf 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -822,6 +822,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms, trace_s390_zcrypt_req(xcRB, TB_ZSECSENDCPRB); + xcRB->status = 0; ap_init_message(&ap_msg); rc = get_cprb_fc(xcRB, &ap_msg, &func_code, &domain); if (rc) @@ -1321,7 +1322,8 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, rc = _zcrypt_send_cprb(perms, &xcRB); } while (rc == -EAGAIN); if (rc) - ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d\n", rc); + ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d status=0x%x\n", + rc, xcRB.status); if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB))) return -EFAULT; return rc; diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 663dbdaa85b3..240b27f3f5f6 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -14,6 +14,7 @@ #include #include "zcrypt_debug.h" #include "zcrypt_api.h" +#include "zcrypt_msgtype6.h" /** * Reply Messages @@ -112,6 +113,27 @@ static inline int convert_error(struct zcrypt_queue *zq, card, queue, ehdr->reply_code); return -EAGAIN; case REP82_ERROR_TRANSPORT_FAIL: + /* Card or infrastructure failure, disable card */ + atomic_set(&zcrypt_rescan_req, 1); + zq->online = 0; + pr_err("Cryptographic device %02x.%04x failed and was set offline\n", + card, queue); + /* For type 86 response show the apfs value (failure reason) */ + if (ehdr->type == TYPE86_RSP_CODE) { + struct { + struct type86_hdr hdr; + struct type86_fmt2_ext fmt2; + } __packed * head = reply->message; + unsigned int apfs = *((u32 *)head->fmt2.apfs); + + ZCRYPT_DBF(DBF_ERR, + "device=%02x.%04x reply=0x%02x apfs=0x%x => online=0 rc=EAGAIN\n", + card, queue, apfs, ehdr->reply_code); + } else + ZCRYPT_DBF(DBF_ERR, + "device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n", + card, queue, ehdr->reply_code); + return -EAGAIN; case REP82_ERROR_MACHINE_FAILURE: // REP88_ERROR_MODULE_FAILURE // '10' CEX2A /* If a card fails disable it and repeat the request. */ From aa55bf5f022271903fe8a71e08be3e217e1b6363 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 1 Oct 2018 15:29:01 +0200 Subject: [PATCH 67/83] s390/zcrypt: add ap_adapter_mask sysfs attribute This patch provides a new sysfs attribute file /sys/bus/ap/ap_adapter_mask. This read-only attribute refrects the apm field as it is found in the PQAP(QCI) crypto info. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 15bca7583bb9..048665e4f13d 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1037,6 +1037,21 @@ static ssize_t ap_usage_domain_mask_show(struct bus_type *bus, char *buf) static BUS_ATTR_RO(ap_usage_domain_mask); +static ssize_t ap_adapter_mask_show(struct bus_type *bus, char *buf) +{ + if (!ap_configuration) /* QCI not supported */ + return snprintf(buf, PAGE_SIZE, "not supported\n"); + + return snprintf(buf, PAGE_SIZE, + "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", + ap_configuration->apm[0], ap_configuration->apm[1], + ap_configuration->apm[2], ap_configuration->apm[3], + ap_configuration->apm[4], ap_configuration->apm[5], + ap_configuration->apm[6], ap_configuration->apm[7]); +} + +static BUS_ATTR_RO(ap_adapter_mask); + static ssize_t ap_interrupts_show(struct bus_type *bus, char *buf) { return snprintf(buf, PAGE_SIZE, "%d\n", @@ -1195,6 +1210,7 @@ static struct bus_attribute *const ap_bus_attrs[] = { &bus_attr_ap_domain, &bus_attr_ap_control_domain_mask, &bus_attr_ap_usage_domain_mask, + &bus_attr_ap_adapter_mask, &bus_attr_config_time, &bus_attr_poll_thread, &bus_attr_ap_interrupts, From a45a5c7d36a53646094c2ba4970777a20ec0ec42 Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Thu, 23 Aug 2018 10:06:26 +0200 Subject: [PATCH 68/83] s390/pkey: Introduce new API for random protected key generation This patch introduces a new ioctl API and in-kernel API to generate a random protected key. The protected key is generated in a way that the effective clear key is never exposed in clear. Both APIs are described in detail in the header files arch/s390/include/asm/pkey.h and arch/s390/include/uapi/asm/pkey.h. Signed-off-by: Ingo Franzki Reviewed-by: Harald Freudenberger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pkey.h | 8 +++++ arch/s390/include/uapi/asm/pkey.h | 10 ++++++ drivers/s390/crypto/pkey_api.c | 55 +++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index 053117ba7328..c931818b9921 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -109,4 +109,12 @@ int pkey_verifykey(const struct pkey_seckey *seckey, u16 *pcardnr, u16 *pdomain, u16 *pkeysize, u32 *pattributes); +/* + * In-kernel API: Generate (AES) random protected key. + * @param keytype one of the PKEY_KEYTYPE values + * @param protkey pointer to buffer receiving the protected key + * @return 0 on success, negative errno value on failure + */ +int pkey_genprotkey(__u32 keytype, struct pkey_protkey *protkey); + #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index 6f84a53c3270..10a7bc7c5fa9 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -129,4 +129,14 @@ struct pkey_verifykey { #define PKEY_VERIFY_ATTR_AES 0x00000001 /* key is an AES key */ #define PKEY_VERIFY_ATTR_OLD_MKVP 0x00000100 /* key has old MKVP value */ +/* + * Generate (AES) random protected key. + */ +struct pkey_genprotk { + __u32 keytype; /* in: key type to generate */ + struct pkey_protkey protkey; /* out: the protected key */ +}; + +#define PKEY_GENPROTK _IOWR(PKEY_IOCTL_MAGIC, 0x08, struct pkey_genprotk) + #endif /* _UAPI_PKEY_H */ diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 1b4001e0285f..29028ccdce5b 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1051,6 +1052,46 @@ out: } EXPORT_SYMBOL(pkey_verifykey); +/* + * Generate a random protected key + */ +int pkey_genprotkey(__u32 keytype, struct pkey_protkey *protkey) +{ + struct pkey_clrkey clrkey; + int keysize; + int rc; + + switch (keytype) { + case PKEY_KEYTYPE_AES_128: + keysize = 16; + break; + case PKEY_KEYTYPE_AES_192: + keysize = 24; + break; + case PKEY_KEYTYPE_AES_256: + keysize = 32; + break; + default: + DEBUG_ERR("%s unknown/unsupported keytype %d\n", __func__, + keytype); + return -EINVAL; + } + + /* generate a dummy random clear key */ + get_random_bytes(clrkey.clrkey, keysize); + + /* convert it to a dummy protected key */ + rc = pkey_clr2protkey(keytype, &clrkey, protkey); + if (rc) + return rc; + + /* replace the key part of the protected key with random bytes */ + get_random_bytes(protkey->protkey, keysize); + + return 0; +} +EXPORT_SYMBOL(pkey_genprotkey); + /* * File io functions */ @@ -1167,6 +1208,20 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, return -EFAULT; break; } + case PKEY_GENPROTK: { + struct pkey_genprotk __user *ugp = (void __user *) arg; + struct pkey_genprotk kgp; + + if (copy_from_user(&kgp, ugp, sizeof(kgp))) + return -EFAULT; + rc = pkey_genprotkey(kgp.keytype, &kgp.protkey); + DEBUG_DBG("%s pkey_genprotkey()=%d\n", __func__, rc); + if (rc) + break; + if (copy_to_user(ugp, &kgp, sizeof(kgp))) + return -EFAULT; + break; + } default: /* unknown/unsupported ioctl cmd */ return -ENOTTY; From 0534bde7de19a2e66c2b2bf05fcfd00a7cc849fa Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Thu, 23 Aug 2018 16:28:16 +0200 Subject: [PATCH 69/83] s390/pkey: Define protected key blob format Define a new protected key blob format. Protected key blobs use a type of 0x00, to be distinguished from other CCA key blobs. CCA defines type 0x00 as NULL key blob, but pkey will never use NULL keys anyway, so it is save to reuse this type. Using another so far undefined type value would introduce the risk that sometimes in the future CCA defines this so far unassigned type for a future key blob. Also add defines for the key token types and versions, and use them instead of hard coded hex values. Signed-off-by: Ingo Franzki Reviewed-by: Harald Freudenberger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/pkey_api.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 29028ccdce5b..fa1044f93f0e 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -56,6 +56,16 @@ static void __exit pkey_debug_exit(void) debug_unregister(debug_info); } +/* Key token types */ +#define TOKTYPE_NON_CCA 0x00 /* Non-CCA key token */ +#define TOKTYPE_CCA_INTERNAL 0x01 /* CCA internal key token */ + +/* For TOKTYPE_NON_CCA: */ +#define TOKVER_PROTECTED_KEY 0x01 /* Protected key token */ + +/* For TOKTYPE_CCA_INTERNAL: */ +#define TOKVER_CCA_AES 0x04 /* CCA AES key token */ + /* inside view of a secure key token (only type 0x01 version 0x04) */ struct secaeskeytoken { u8 type; /* 0x01 for internal key token */ @@ -72,6 +82,17 @@ struct secaeskeytoken { u8 tvv[4]; /* token validation value */ } __packed; +/* inside view of a protected key token (only type 0x00 version 0x01) */ +struct protaeskeytoken { + u8 type; /* 0x00 for PAES specific key tokens */ + u8 res0[3]; + u8 version; /* should be 0x01 for protected AES key token */ + u8 res1[3]; + u32 keytype; /* key type, one of the PKEY_KEYTYPE values */ + u32 len; /* bytes actually stored in protkey[] */ + u8 protkey[MAXPROTKEYSIZE]; /* the protected key blob */ +} __packed; + /* * Simple check if the token is a valid CCA secure AES key * token. If keybitsize is given, the bitsize of the key is @@ -81,16 +102,16 @@ static int check_secaeskeytoken(const u8 *token, int keybitsize) { struct secaeskeytoken *t = (struct secaeskeytoken *) token; - if (t->type != 0x01) { + if (t->type != TOKTYPE_CCA_INTERNAL) { DEBUG_ERR( - "%s secure token check failed, type mismatch 0x%02x != 0x01\n", - __func__, (int) t->type); + "%s secure token check failed, type mismatch 0x%02x != 0x%02x\n", + __func__, (int) t->type, TOKTYPE_CCA_INTERNAL); return -EINVAL; } - if (t->version != 0x04) { + if (t->version != TOKVER_CCA_AES) { DEBUG_ERR( - "%s secure token check failed, version mismatch 0x%02x != 0x04\n", - __func__, (int) t->version); + "%s secure token check failed, version mismatch 0x%02x != 0x%02x\n", + __func__, (int) t->version, TOKVER_CCA_AES); return -EINVAL; } if (keybitsize > 0 && t->bitsize != keybitsize) { From d632c0478d64427cfbca999955e02b26986ae09e Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Thu, 23 Aug 2018 16:59:30 +0200 Subject: [PATCH 70/83] s390/pkey: Add sysfs attributes to emit protected key blobs Add binary read-only sysfs attributes for the pkey module that can be used to read random protected keys from. Keys are read from these attributes using a cat-like interface. A typical use case for those keys is to encrypt a swap device using the paes cipher. During processing of /etc/crypttab, the random protected key to encrypt the swap device is read from one of the attributes. The following attributes are added: protkey/aes_128 protkey/aes_192 protkey/aes_256 protkey/aes_128_xts protkey/aes_256_xts Each attribute emits a protected key blob for the corresponding key size and cipher mode. Signed-off-by: Ingo Franzki Reviewed-by: Harald Freudenberger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/pkey_api.c | 127 +++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index fa1044f93f0e..b4d88411b1bd 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -1254,6 +1254,132 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, /* * Sysfs and file io operations */ + +/* + * Sysfs attribute read function for all protected key binary attributes. + * The implementation can not deal with partial reads, because a new random + * protected key blob is generated with each read. In case of partial reads + * (i.e. off != 0 or count < key blob size) -EINVAL is returned. + */ +static ssize_t pkey_protkey_aes_attr_read(u32 keytype, bool is_xts, char *buf, + loff_t off, size_t count) +{ + struct protaeskeytoken protkeytoken; + struct pkey_protkey protkey; + int rc; + + if (off != 0 || count < sizeof(protkeytoken)) + return -EINVAL; + if (is_xts) + if (count < 2 * sizeof(protkeytoken)) + return -EINVAL; + + memset(&protkeytoken, 0, sizeof(protkeytoken)); + protkeytoken.type = TOKTYPE_NON_CCA; + protkeytoken.version = TOKVER_PROTECTED_KEY; + protkeytoken.keytype = keytype; + + rc = pkey_genprotkey(protkeytoken.keytype, &protkey); + if (rc) + return rc; + + protkeytoken.len = protkey.len; + memcpy(&protkeytoken.protkey, &protkey.protkey, protkey.len); + + memcpy(buf, &protkeytoken, sizeof(protkeytoken)); + + if (is_xts) { + rc = pkey_genprotkey(protkeytoken.keytype, &protkey); + if (rc) + return rc; + + protkeytoken.len = protkey.len; + memcpy(&protkeytoken.protkey, &protkey.protkey, protkey.len); + + memcpy(buf + sizeof(protkeytoken), &protkeytoken, + sizeof(protkeytoken)); + + return 2 * sizeof(protkeytoken); + } + + return sizeof(protkeytoken); +} + +static ssize_t protkey_aes_128_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_protkey_aes_attr_read(PKEY_KEYTYPE_AES_128, false, buf, + off, count); +} + +static ssize_t protkey_aes_192_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_protkey_aes_attr_read(PKEY_KEYTYPE_AES_192, false, buf, + off, count); +} + +static ssize_t protkey_aes_256_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_protkey_aes_attr_read(PKEY_KEYTYPE_AES_256, false, buf, + off, count); +} + +static ssize_t protkey_aes_128_xts_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_protkey_aes_attr_read(PKEY_KEYTYPE_AES_128, true, buf, + off, count); +} + +static ssize_t protkey_aes_256_xts_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_protkey_aes_attr_read(PKEY_KEYTYPE_AES_256, true, buf, + off, count); +} + +static BIN_ATTR_RO(protkey_aes_128, sizeof(struct protaeskeytoken)); +static BIN_ATTR_RO(protkey_aes_192, sizeof(struct protaeskeytoken)); +static BIN_ATTR_RO(protkey_aes_256, sizeof(struct protaeskeytoken)); +static BIN_ATTR_RO(protkey_aes_128_xts, 2 * sizeof(struct protaeskeytoken)); +static BIN_ATTR_RO(protkey_aes_256_xts, 2 * sizeof(struct protaeskeytoken)); + +static struct bin_attribute *protkey_attrs[] = { + &bin_attr_protkey_aes_128, + &bin_attr_protkey_aes_192, + &bin_attr_protkey_aes_256, + &bin_attr_protkey_aes_128_xts, + &bin_attr_protkey_aes_256_xts, + NULL +}; + +static struct attribute_group protkey_attr_group = { + .name = "protkey", + .bin_attrs = protkey_attrs, +}; + +static const struct attribute_group *pkey_attr_groups[] = { + &protkey_attr_group, + NULL, +}; + static const struct file_operations pkey_fops = { .owner = THIS_MODULE, .open = nonseekable_open, @@ -1266,6 +1392,7 @@ static struct miscdevice pkey_dev = { .minor = MISC_DYNAMIC_MINOR, .mode = 0666, .fops = &pkey_fops, + .groups = pkey_attr_groups, }; /* From af504452d10ece7c6d68bc9f90f478ebecd7ce76 Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Fri, 24 Aug 2018 10:29:43 +0200 Subject: [PATCH 71/83] s390/pkey: Add sysfs attributes to emit secure key blobs Add binary read-only sysfs attributes for the pkey module that can be used to read random ccadata secure keys from. Keys are read from these attributes using a cat-like interface. A typical use case for those keys is to encrypt a swap device using the paes cipher. During processing of /etc/crypttab, the random random ccadata secure key to encrypt the swap device is read from one of the attributes. The following attributes are added: ccadata/aes_128 ccadata/aes_192 ccadata/aes_256 ccadata/aes_128_xts ccadata/aes_256_xts Each attribute emits a secure key blob for the corresponding key size and cipher mode. Signed-off-by: Ingo Franzki Reviewed-by: Harald Freudenberger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/pkey_api.c | 104 +++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index b4d88411b1bd..d0160a18081a 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -1375,8 +1375,112 @@ static struct attribute_group protkey_attr_group = { .bin_attrs = protkey_attrs, }; +/* + * Sysfs attribute read function for all secure key ccadata binary attributes. + * The implementation can not deal with partial reads, because a new random + * protected key blob is generated with each read. In case of partial reads + * (i.e. off != 0 or count < key blob size) -EINVAL is returned. + */ +static ssize_t pkey_ccadata_aes_attr_read(u32 keytype, bool is_xts, char *buf, + loff_t off, size_t count) +{ + int rc; + + if (off != 0 || count < sizeof(struct secaeskeytoken)) + return -EINVAL; + if (is_xts) + if (count < 2 * sizeof(struct secaeskeytoken)) + return -EINVAL; + + rc = pkey_genseckey(-1, -1, keytype, (struct pkey_seckey *)buf); + if (rc) + return rc; + + if (is_xts) { + buf += sizeof(struct pkey_seckey); + rc = pkey_genseckey(-1, -1, keytype, (struct pkey_seckey *)buf); + if (rc) + return rc; + + return 2 * sizeof(struct secaeskeytoken); + } + + return sizeof(struct secaeskeytoken); +} + +static ssize_t ccadata_aes_128_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_ccadata_aes_attr_read(PKEY_KEYTYPE_AES_128, false, buf, + off, count); +} + +static ssize_t ccadata_aes_192_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_ccadata_aes_attr_read(PKEY_KEYTYPE_AES_192, false, buf, + off, count); +} + +static ssize_t ccadata_aes_256_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_ccadata_aes_attr_read(PKEY_KEYTYPE_AES_256, false, buf, + off, count); +} + +static ssize_t ccadata_aes_128_xts_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_ccadata_aes_attr_read(PKEY_KEYTYPE_AES_128, true, buf, + off, count); +} + +static ssize_t ccadata_aes_256_xts_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return pkey_ccadata_aes_attr_read(PKEY_KEYTYPE_AES_256, true, buf, + off, count); +} + +static BIN_ATTR_RO(ccadata_aes_128, sizeof(struct secaeskeytoken)); +static BIN_ATTR_RO(ccadata_aes_192, sizeof(struct secaeskeytoken)); +static BIN_ATTR_RO(ccadata_aes_256, sizeof(struct secaeskeytoken)); +static BIN_ATTR_RO(ccadata_aes_128_xts, 2 * sizeof(struct secaeskeytoken)); +static BIN_ATTR_RO(ccadata_aes_256_xts, 2 * sizeof(struct secaeskeytoken)); + +static struct bin_attribute *ccadata_attrs[] = { + &bin_attr_ccadata_aes_128, + &bin_attr_ccadata_aes_192, + &bin_attr_ccadata_aes_256, + &bin_attr_ccadata_aes_128_xts, + &bin_attr_ccadata_aes_256_xts, + NULL +}; + +static struct attribute_group ccadata_attr_group = { + .name = "ccadata", + .bin_attrs = ccadata_attrs, +}; + static const struct attribute_group *pkey_attr_groups[] = { &protkey_attr_group, + &ccadata_attr_group, NULL, }; From cb26b9ff7187ea79698f5e872d713f30affcc0a3 Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Thu, 23 Aug 2018 17:49:38 +0200 Subject: [PATCH 72/83] s390/pkey: Introduce new API for random protected key verification Introduce a new ioctl API and in-kernel API to verify if a random protected key is still valid. A protected key is invalid when its wrapping key verification pattern does not match the verification pattern of the LPAR. Each time an LPAR is activated, a new LPAR wrapping key is generated and the wrapping key verification pattern is updated. Both APIs are described in detail in the header files arch/s390/include/asm/pkey.h and arch/s390/include/uapi/asm/pkey.h. Signed-off-by: Ingo Franzki Reviewed-by: Harald Freudenberger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pkey.h | 8 ++++ arch/s390/include/uapi/asm/pkey.h | 9 +++++ drivers/s390/crypto/pkey_api.c | 67 ++++++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index c931818b9921..2833d6324979 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -117,4 +117,12 @@ int pkey_verifykey(const struct pkey_seckey *seckey, */ int pkey_genprotkey(__u32 keytype, struct pkey_protkey *protkey); +/* + * In-kernel API: Verify an (AES) protected key. + * @param protkey pointer to buffer containing the protected key to verify + * @return 0 on success, negative errno value on failure. In case the protected + * key is not valid -EKEYREJECTED is returned + */ +int pkey_verifyprotkey(const struct pkey_protkey *protkey); + #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index 10a7bc7c5fa9..fef08dbd2e8d 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -139,4 +139,13 @@ struct pkey_genprotk { #define PKEY_GENPROTK _IOWR(PKEY_IOCTL_MAGIC, 0x08, struct pkey_genprotk) +/* + * Verify an (AES) protected key. + */ +struct pkey_verifyprotk { + struct pkey_protkey protkey; /* in: the protected key to verify */ +}; + +#define PKEY_VERIFYPROTK _IOW(PKEY_IOCTL_MAGIC, 0x09, struct pkey_verifyprotk) + #endif /* _UAPI_PKEY_H */ diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index d0160a18081a..c592270b906a 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "zcrypt_api.h" @@ -1113,6 +1114,52 @@ int pkey_genprotkey(__u32 keytype, struct pkey_protkey *protkey) } EXPORT_SYMBOL(pkey_genprotkey); +/* + * Verify if a protected key is still valid + */ +int pkey_verifyprotkey(const struct pkey_protkey *protkey) +{ + unsigned long fc; + struct { + u8 iv[AES_BLOCK_SIZE]; + u8 key[MAXPROTKEYSIZE]; + } param; + u8 null_msg[AES_BLOCK_SIZE]; + u8 dest_buf[AES_BLOCK_SIZE]; + unsigned int k; + + switch (protkey->type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KMC_PAES_128; + break; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KMC_PAES_192; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KMC_PAES_256; + break; + default: + DEBUG_ERR("%s unknown/unsupported keytype %d\n", __func__, + protkey->type); + return -EINVAL; + } + + memset(null_msg, 0, sizeof(null_msg)); + + memset(param.iv, 0, sizeof(param.iv)); + memcpy(param.key, protkey->protkey, sizeof(param.key)); + + k = cpacf_kmc(fc | CPACF_ENCRYPT, ¶m, null_msg, dest_buf, + sizeof(null_msg)); + if (k != sizeof(null_msg)) { + DEBUG_ERR("%s protected key is not valid\n", __func__); + return -EKEYREJECTED; + } + + return 0; +} +EXPORT_SYMBOL(pkey_verifyprotkey); + /* * File io functions */ @@ -1243,6 +1290,16 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, return -EFAULT; break; } + case PKEY_VERIFYPROTK: { + struct pkey_verifyprotk __user *uvp = (void __user *) arg; + struct pkey_verifyprotk kvp; + + if (copy_from_user(&kvp, uvp, sizeof(kvp))) + return -EFAULT; + rc = pkey_verifyprotkey(&kvp.protkey); + DEBUG_DBG("%s pkey_verifyprotkey()=%d\n", __func__, rc); + break; + } default: /* unknown/unsupported ioctl cmd */ return -ENOTTY; @@ -1504,7 +1561,7 @@ static struct miscdevice pkey_dev = { */ static int __init pkey_init(void) { - cpacf_mask_t pckmo_functions; + cpacf_mask_t pckmo_functions, kmc_functions; /* check for pckmo instructions available */ if (!cpacf_query(CPACF_PCKMO, &pckmo_functions)) @@ -1514,6 +1571,14 @@ static int __init pkey_init(void) !cpacf_test_func(&pckmo_functions, CPACF_PCKMO_ENC_AES_256_KEY)) return -EOPNOTSUPP; + /* check for kmc instructions available */ + if (!cpacf_query(CPACF_KMC, &kmc_functions)) + return -EOPNOTSUPP; + if (!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) || + !cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) || + !cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) + return -EOPNOTSUPP; + pkey_debug_init(); return misc_register(&pkey_dev); From fb1136d6580c93af3ec33bf7a5621d980a711f24 Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Fri, 24 Aug 2018 11:29:15 +0200 Subject: [PATCH 73/83] s390/pkey: Introduce new API for transforming key blobs Introduce a new ioctl API and in-kernel API to transform a variable length key blob of any supported type into a protected key. Transforming a secure key blob uses the already existing function pkey_sec2protk(). Transforming a protected key blob also verifies if the protected key is still valid. If not, -ENODEV is returned. Both APIs are described in detail in the header files arch/s390/include/asm/pkey.h and arch/s390/include/uapi/asm/pkey.h. Signed-off-by: Ingo Franzki Reviewed-by: Harald Freudenberger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pkey.h | 10 +++ arch/s390/include/uapi/asm/pkey.h | 15 ++++ drivers/s390/crypto/pkey_api.c | 110 ++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+) diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index 2833d6324979..9b6e79077866 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -125,4 +125,14 @@ int pkey_genprotkey(__u32 keytype, struct pkey_protkey *protkey); */ int pkey_verifyprotkey(const struct pkey_protkey *protkey); +/* + * In-kernel API: Transform an key blob (of any type) into a protected key. + * @param key pointer to a buffer containing the key blob + * @param keylen size of the key blob in bytes + * @param protkey pointer to buffer receiving the protected key + * @return 0 on success, negative errno value on failure + */ +int pkey_keyblob2pkey(const __u8 *key, __u32 keylen, + struct pkey_protkey *protkey); + #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index fef08dbd2e8d..c0e86ce4a00b 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -21,9 +21,13 @@ #define PKEY_IOCTL_MAGIC 'p' #define SECKEYBLOBSIZE 64 /* secure key blob size is always 64 bytes */ +#define PROTKEYBLOBSIZE 80 /* protected key blob size is always 80 bytes */ #define MAXPROTKEYSIZE 64 /* a protected key blob may be up to 64 bytes */ #define MAXCLRKEYSIZE 32 /* a clear key value may be up to 32 bytes */ +#define MINKEYBLOBSIZE SECKEYBLOBSIZE /* Minimum size of a key blob */ +#define MAXKEYBLOBSIZE PROTKEYBLOBSIZE /* Maximum size of a key blob */ + /* defines for the type field within the pkey_protkey struct */ #define PKEY_KEYTYPE_AES_128 1 #define PKEY_KEYTYPE_AES_192 2 @@ -148,4 +152,15 @@ struct pkey_verifyprotk { #define PKEY_VERIFYPROTK _IOW(PKEY_IOCTL_MAGIC, 0x09, struct pkey_verifyprotk) +/* + * Transform an key blob (of any type) into a protected key + */ +struct pkey_kblob2pkey { + __u8 __user *key; /* in: the key blob */ + __u32 keylen; /* in: the key blob length */ + struct pkey_protkey protkey; /* out: the protected key */ +}; + +#define PKEY_KBLOB2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x0A, struct pkey_kblob2pkey) + #endif /* _UAPI_PKEY_H */ diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index c592270b906a..683ff72ae977 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -67,6 +67,14 @@ static void __exit pkey_debug_exit(void) /* For TOKTYPE_CCA_INTERNAL: */ #define TOKVER_CCA_AES 0x04 /* CCA AES key token */ +/* header part of a key token */ +struct keytoken_header { + u8 type; /* one of the TOKTYPE values */ + u8 res0[3]; + u8 version; /* one of the TOKVER values */ + u8 res1[3]; +} __packed; + /* inside view of a secure key token (only type 0x01 version 0x04) */ struct secaeskeytoken { u8 type; /* 0x01 for internal key token */ @@ -1160,6 +1168,80 @@ int pkey_verifyprotkey(const struct pkey_protkey *protkey) } EXPORT_SYMBOL(pkey_verifyprotkey); +/* + * Transform a non-CCA key token into a protected key + */ +static int pkey_nonccatok2pkey(const __u8 *key, __u32 keylen, + struct pkey_protkey *protkey) +{ + struct keytoken_header *hdr = (struct keytoken_header *)key; + struct protaeskeytoken *t; + + switch (hdr->version) { + case TOKVER_PROTECTED_KEY: + if (keylen != sizeof(struct protaeskeytoken)) + return -EINVAL; + + t = (struct protaeskeytoken *)key; + protkey->len = t->len; + protkey->type = t->keytype; + memcpy(protkey->protkey, t->protkey, + sizeof(protkey->protkey)); + + return pkey_verifyprotkey(protkey); + default: + DEBUG_ERR("%s unknown/unsupported non-CCA token version %d\n", + __func__, hdr->version); + return -EINVAL; + } +} + +/* + * Transform a CCA internal key token into a protected key + */ +static int pkey_ccainttok2pkey(const __u8 *key, __u32 keylen, + struct pkey_protkey *protkey) +{ + struct keytoken_header *hdr = (struct keytoken_header *)key; + + switch (hdr->version) { + case TOKVER_CCA_AES: + if (keylen != sizeof(struct secaeskeytoken)) + return -EINVAL; + + return pkey_skey2pkey((struct pkey_seckey *)key, + protkey); + default: + DEBUG_ERR("%s unknown/unsupported CCA internal token version %d\n", + __func__, hdr->version); + return -EINVAL; + } +} + +/* + * Transform a key blob (of any type) into a protected key + */ +int pkey_keyblob2pkey(const __u8 *key, __u32 keylen, + struct pkey_protkey *protkey) +{ + struct keytoken_header *hdr = (struct keytoken_header *)key; + + if (keylen < sizeof(struct keytoken_header)) + return -EINVAL; + + switch (hdr->type) { + case TOKTYPE_NON_CCA: + return pkey_nonccatok2pkey(key, keylen, protkey); + case TOKTYPE_CCA_INTERNAL: + return pkey_ccainttok2pkey(key, keylen, protkey); + default: + DEBUG_ERR("%s unknown/unsupported blob type %d\n", __func__, + hdr->type); + return -EINVAL; + } +} +EXPORT_SYMBOL(pkey_keyblob2pkey); + /* * File io functions */ @@ -1300,6 +1382,34 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, DEBUG_DBG("%s pkey_verifyprotkey()=%d\n", __func__, rc); break; } + case PKEY_KBLOB2PROTK: { + struct pkey_kblob2pkey __user *utp = (void __user *) arg; + struct pkey_kblob2pkey ktp; + __u8 __user *ukey; + __u8 *kkey; + + if (copy_from_user(&ktp, utp, sizeof(ktp))) + return -EFAULT; + if (ktp.keylen < MINKEYBLOBSIZE || + ktp.keylen > MAXKEYBLOBSIZE) + return -EINVAL; + ukey = ktp.key; + kkey = kmalloc(ktp.keylen, GFP_KERNEL); + if (kkey == NULL) + return -ENOMEM; + if (copy_from_user(kkey, ukey, ktp.keylen)) { + kfree(kkey); + return -EFAULT; + } + rc = pkey_keyblob2pkey(kkey, ktp.keylen, &ktp.protkey); + DEBUG_DBG("%s pkey_keyblob2pkey()=%d\n", __func__, rc); + kfree(kkey); + if (rc) + break; + if (copy_to_user(utp, &ktp, sizeof(ktp))) + return -EFAULT; + break; + } default: /* unknown/unsupported ioctl cmd */ return -ENOTTY; From 52a34b34d4ff9a61bc6da9740541d8f08a40438c Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Mon, 27 Aug 2018 10:40:10 +0200 Subject: [PATCH 74/83] s390/crypto: Enhance paes cipher to accept variable length key material Enhance the paes_s390 kernel module to allow the paes cipher to accept variable length key material. The key material accepted by the paes cipher is a key blob of various types. As of today, two key blob types are supported: CCA secure key blobs and protected key blobs. Signed-off-by: Ingo Franzki Reviewed-by: Harald Freudenberger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/paes_s390.c | 63 +++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index ab9a0ebecc19..e8d9fa54569c 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -30,26 +30,31 @@ static DEFINE_SPINLOCK(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; +struct key_blob { + __u8 key[MAXKEYBLOBSIZE]; + unsigned int keylen; +}; + struct s390_paes_ctx { - struct pkey_seckey sk; + struct key_blob kb; struct pkey_protkey pk; unsigned long fc; }; struct s390_pxts_ctx { - struct pkey_seckey sk[2]; + struct key_blob kb[2]; struct pkey_protkey pk[2]; unsigned long fc; }; -static inline int __paes_convert_key(struct pkey_seckey *sk, +static inline int __paes_convert_key(struct key_blob *kb, struct pkey_protkey *pk) { int i, ret; /* try three times in case of failure */ for (i = 0; i < 3; i++) { - ret = pkey_skey2pkey(sk, pk); + ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk); if (ret == 0) break; } @@ -61,7 +66,7 @@ static int __paes_set_key(struct s390_paes_ctx *ctx) { unsigned long fc; - if (__paes_convert_key(&ctx->sk, &ctx->pk)) + if (__paes_convert_key(&ctx->kb, &ctx->pk)) return -EINVAL; /* Pick the correct function code based on the protected key type */ @@ -80,10 +85,8 @@ static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, { struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); - if (key_len != SECKEYBLOBSIZE) - return -EINVAL; - - memcpy(ctx->sk.seckey, in_key, SECKEYBLOBSIZE); + memcpy(ctx->kb.key, in_key, key_len); + ctx->kb.keylen = key_len; if (__paes_set_key(ctx)) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; @@ -147,8 +150,8 @@ static struct crypto_alg ecb_paes_alg = { .cra_list = LIST_HEAD_INIT(ecb_paes_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = SECKEYBLOBSIZE, - .max_keysize = SECKEYBLOBSIZE, + .min_keysize = MINKEYBLOBSIZE, + .max_keysize = MAXKEYBLOBSIZE, .setkey = ecb_paes_set_key, .encrypt = ecb_paes_encrypt, .decrypt = ecb_paes_decrypt, @@ -160,7 +163,7 @@ static int __cbc_paes_set_key(struct s390_paes_ctx *ctx) { unsigned long fc; - if (__paes_convert_key(&ctx->sk, &ctx->pk)) + if (__paes_convert_key(&ctx->kb, &ctx->pk)) return -EINVAL; /* Pick the correct function code based on the protected key type */ @@ -179,7 +182,8 @@ static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, { struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); - memcpy(ctx->sk.seckey, in_key, SECKEYBLOBSIZE); + memcpy(ctx->kb.key, in_key, key_len); + ctx->kb.keylen = key_len; if (__cbc_paes_set_key(ctx)) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; @@ -250,8 +254,8 @@ static struct crypto_alg cbc_paes_alg = { .cra_list = LIST_HEAD_INIT(cbc_paes_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = SECKEYBLOBSIZE, - .max_keysize = SECKEYBLOBSIZE, + .min_keysize = MINKEYBLOBSIZE, + .max_keysize = MAXKEYBLOBSIZE, .ivsize = AES_BLOCK_SIZE, .setkey = cbc_paes_set_key, .encrypt = cbc_paes_encrypt, @@ -264,8 +268,8 @@ static int __xts_paes_set_key(struct s390_pxts_ctx *ctx) { unsigned long fc; - if (__paes_convert_key(&ctx->sk[0], &ctx->pk[0]) || - __paes_convert_key(&ctx->sk[1], &ctx->pk[1])) + if (__paes_convert_key(&ctx->kb[0], &ctx->pk[0]) || + __paes_convert_key(&ctx->kb[1], &ctx->pk[1])) return -EINVAL; if (ctx->pk[0].type != ctx->pk[1].type) @@ -287,10 +291,16 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, { struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm); u8 ckey[2 * AES_MAX_KEY_SIZE]; - unsigned int ckey_len; + unsigned int ckey_len, keytok_len; - memcpy(ctx->sk[0].seckey, in_key, SECKEYBLOBSIZE); - memcpy(ctx->sk[1].seckey, in_key + SECKEYBLOBSIZE, SECKEYBLOBSIZE); + if (key_len % 2) + return -EINVAL; + + keytok_len = key_len / 2; + memcpy(ctx->kb[0].key, in_key, keytok_len); + ctx->kb[0].keylen = keytok_len; + memcpy(ctx->kb[1].key, in_key + keytok_len, keytok_len); + ctx->kb[1].keylen = keytok_len; if (__xts_paes_set_key(ctx)) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; @@ -386,8 +396,8 @@ static struct crypto_alg xts_paes_alg = { .cra_list = LIST_HEAD_INIT(xts_paes_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = 2 * SECKEYBLOBSIZE, - .max_keysize = 2 * SECKEYBLOBSIZE, + .min_keysize = 2 * MINKEYBLOBSIZE, + .max_keysize = 2 * MAXKEYBLOBSIZE, .ivsize = AES_BLOCK_SIZE, .setkey = xts_paes_set_key, .encrypt = xts_paes_encrypt, @@ -400,7 +410,7 @@ static int __ctr_paes_set_key(struct s390_paes_ctx *ctx) { unsigned long fc; - if (__paes_convert_key(&ctx->sk, &ctx->pk)) + if (__paes_convert_key(&ctx->kb, &ctx->pk)) return -EINVAL; /* Pick the correct function code based on the protected key type */ @@ -420,7 +430,8 @@ static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, { struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); - memcpy(ctx->sk.seckey, in_key, key_len); + memcpy(ctx->kb.key, in_key, key_len); + ctx->kb.keylen = key_len; if (__ctr_paes_set_key(ctx)) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; @@ -532,8 +543,8 @@ static struct crypto_alg ctr_paes_alg = { .cra_list = LIST_HEAD_INIT(ctr_paes_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = SECKEYBLOBSIZE, - .max_keysize = SECKEYBLOBSIZE, + .min_keysize = MINKEYBLOBSIZE, + .max_keysize = MAXKEYBLOBSIZE, .ivsize = AES_BLOCK_SIZE, .setkey = ctr_paes_set_key, .encrypt = ctr_paes_encrypt, From e494990e7b74c55862b8b19c28ce38628a282cef Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Oct 2018 12:32:56 +0200 Subject: [PATCH 75/83] s390/dumpstack: print psw mask and address again With pointer obfuscation the output of show_registers() became quite useless: Krnl PSW : (____ptrval____) (____ptrval____) (__list_add_valid+0x98/0xa8) In order to print the psw mask and address use %px instead of %p. And the output looks again like this: Krnl PSW : 0404d00180000000 00000000007c0dd0 (__list_add_valid+0x98/0xa8) Reviewed-by: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dumpstack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index d4c88e119e1f..cb7f55bbe06e 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -124,7 +124,7 @@ void show_registers(struct pt_regs *regs) char *mode; mode = user_mode(regs) ? "User" : "Krnl"; - printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); + printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); if (!user_mode(regs)) pr_cont(" (%pSR)", (void *)regs->psw.addr); pr_cont("\n"); From c72251ad879056d096d39db21c08cb52e481eb2d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Oct 2018 12:23:43 +0200 Subject: [PATCH 76/83] s390/mem_detect: add missing include Fix this allnoconfig build breakage: arch/s390/boot/mem_detect.c: In function 'tprot': arch/s390/boot/mem_detect.c:122:12: error: 'EFAULT' undeclared (first use in this function) Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/boot/mem_detect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 65ae3c926042..4cb771ba13fa 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include From 5eaf436e0e5b7ce05ef9371e82f87319bc9a5173 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Wed, 10 Oct 2018 13:44:45 +0200 Subject: [PATCH 77/83] s390/vmalloc: fix VMALLOC_START calculation With the introduction of the module area on top of the vmalloc area, the calculation of VMALLOC_START in setup_memory_end() function hasn't been adjusted. As a result we got vmalloc area 2 Gb (MODULES_LEN) smaller than it should be and the preceding vmemmap area got extra memory instead. The patch fixes this calculation error although there were no visible negative effects. Apart from that, change 'tmp' variable to 'vmemmap' in memory_end calculation for better readability. Reviewed-by: Heiko Carstens Signed-off-by: Mikhail Zaslonko Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4b2039f3e2f4..a2e952b66248 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -551,7 +551,7 @@ static void __init setup_memory_end(void) MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; - VMALLOC_START = vmax - vmalloc_size; + VMALLOC_START = VMALLOC_END - vmalloc_size; /* Split remaining virtual space between 1:1 mapping & vmemmap array */ tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); @@ -563,7 +563,7 @@ static void __init setup_memory_end(void) vmemmap = (struct page *) tmp; /* Take care that memory_end is set and <= vmemmap */ - memory_end = min(memory_end ?: max_physmem_end, tmp); + memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap); #ifdef CONFIG_KASAN /* fit in kasan shadow memory region between 1:1 and vmemmap */ memory_end = min(memory_end, KASAN_SHADOW_START); From 9434f5d3bef97c7212ef6d96d7783e3a3a6c1cb5 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 12 Oct 2018 08:35:24 +0200 Subject: [PATCH 78/83] s390/zcrypt: fix broken zcrypt_send_cprb in-kernel api function With the new multi zcrypt device node support there came in a code rework which broke the in-kernel api function zcrypt_send_cprb(). This function is used by the pkey kernel module and as an effect, transforming a secure key into a protected key did not work any more. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index bb7ed341baaf..eb93c2d27d0a 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -893,7 +893,7 @@ out: long zcrypt_send_cprb(struct ica_xcRB *xcRB) { - return _zcrypt_send_cprb(NULL, xcRB); + return _zcrypt_send_cprb(&ap_perms, xcRB); } EXPORT_SYMBOL(zcrypt_send_cprb); From b5130dc2224d1881f24224c0590c6d97f2168d6a Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 2 Oct 2018 10:57:52 +0200 Subject: [PATCH 79/83] s390/sthyi: Fix machine name validity indication When running as a level 3 guest with no host provided sthyi support sclp_ocf_cpc_name_copy() will only return zeroes. Zeroes are not a valid group name, so let's not indicate that the group name field is valid. Also the group name is not dependent on stsi, let's not return based on stsi before setting it. Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation") Signed-off-by: Janosch Frank Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sthyi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 0859cde36f75..888cc2f166db 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -183,17 +183,19 @@ static void fill_hdr(struct sthyi_sctns *sctns) static void fill_stsi_mac(struct sthyi_sctns *sctns, struct sysinfo_1_1_1 *sysinfo) { + sclp_ocf_cpc_name_copy(sctns->mac.infmname); + if (*(u64 *)sctns->mac.infmname != 0) + sctns->mac.infmval1 |= MAC_NAME_VLD; + if (stsi(sysinfo, 1, 1, 1)) return; - sclp_ocf_cpc_name_copy(sctns->mac.infmname); - memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); - sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; + sctns->mac.infmval1 |= MAC_ID_VLD; } static void fill_stsi_par(struct sthyi_sctns *sctns, From ec0c0bb489727de0d4dca6a00be6970ab8a3b30a Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 15 Oct 2018 14:39:29 +0100 Subject: [PATCH 80/83] s390/perf: Return error when debug_register fails Return an error when the function debug_register() fails allocating the debug handle. Also remove the registered debug handle when the initialization fails later on. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 5c53e977be62..7bf604ff50a1 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -2045,14 +2045,17 @@ static int __init init_cpum_sampling_pmu(void) } sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); - if (!sfdbg) + if (!sfdbg) { pr_err("Registering for s390dbf failed\n"); + return -ENOMEM; + } debug_register_view(sfdbg, &debug_sprintf_view); err = register_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); if (err) { pr_cpumsf_err(RS_INIT_FAILURE_ALRT); + debug_unregister(sfdbg); goto out; } @@ -2061,6 +2064,7 @@ static int __init init_cpum_sampling_pmu(void) pr_cpumsf_err(RS_INIT_FAILURE_PERF); unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); + debug_unregister(sfdbg); goto out; } From 63c19be095d0f8eb8992674991e44b4228bd4179 Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Wed, 17 Oct 2018 17:24:43 +0200 Subject: [PATCH 81/83] s390/pkey: Load pkey kernel module automatically With the recent enhancements of the pkey kernel module, the pkey kernel module should be loaded automatically during system startup, if MSA is available. When used for swap device encryption with random protected keys, pkey must be loaded before /etc/crypttab is processed, otherwise the sysfs attributes to read the key from are not available. Signed-off-by: Ingo Franzki Reviewed-by: Hendrik Brueckner Reviewed-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/pkey_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 683ff72ae977..86a8799475e9 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1704,5 +1705,5 @@ static void __exit pkey_exit(void) pkey_debug_exit(); } -module_init(pkey_init); +module_cpu_feature_match(MSA, pkey_init); module_exit(pkey_exit); From cf3dbe5dacb3a95d497ace9c714306d17cb05b11 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 19 Oct 2018 12:13:58 +0200 Subject: [PATCH 82/83] s390/kasan: support preemptible kernel build When the kernel is built with: CONFIG_PREEMPT=y CONFIG_PREEMPT_COUNT=y "stfle" function used by kasan initialization code makes additional call to preempt_count_add/preempt_count_sub. To avoid removing kasan instrumentation from sched code where those functions leave split stfle function and provide __stfle variant without preemption handling to be used by Kasan. Reported-by: Benjamin Block Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/facility.h | 9 +++++++-- arch/s390/mm/kasan_init.c | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 99c8ce30b3cd..e78cda94456b 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -64,11 +64,10 @@ static inline int test_facility(unsigned long nr) * @stfle_fac_list: array where facility list can be stored * @size: size of passed in array in double words */ -static inline void stfle(u64 *stfle_fac_list, int size) +static inline void __stfle(u64 *stfle_fac_list, int size) { unsigned long nr; - preempt_disable(); asm volatile( " stfl 0(0)\n" : "=m" (S390_lowcore.stfl_fac_list)); @@ -85,6 +84,12 @@ static inline void stfle(u64 *stfle_fac_list, int size) nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ } memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); +} + +static inline void stfle(u64 *stfle_fac_list, int size) +{ + preempt_disable(); + __stfle(stfle_fac_list, size); preempt_enable(); } diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 55982142b2b5..acb9645b762b 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -222,8 +222,8 @@ static void __init kasan_enable_dat(void) static void __init kasan_early_detect_facilities(void) { - stfle(S390_lowcore.stfle_fac_list, - ARRAY_SIZE(S390_lowcore.stfle_fac_list)); + __stfle(S390_lowcore.stfle_fac_list, + ARRAY_SIZE(S390_lowcore.stfle_fac_list)); if (test_facility(8)) { has_edat = true; __ctl_set_bit(0, 23); From f822ad2c2c03af85a531c5174136b6d5b1abc566 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 19 Oct 2018 10:36:28 +0200 Subject: [PATCH 83/83] s390/pkey: move pckmo subfunction available checks away from module init The init of the pkey module currently fails if the pckmo instruction or the subfunctions are not available. However, customers may restrict their LPAR to switch off exactly these functions and work with secure key only. So it is a valid case to have the pkey module active and use it for secure key to protected key transfer only. This patch moves the pckmo subfunction check from the pkey module init function into the internal function where the pckmo instruction is called. So now only on invocation of the pckmo instruction the check for the required subfunction is done. If not available EOPNOTSUPP is returned to the caller. The check for having the pckmo instruction available is still done during module init. This instruction came in with MSA 3 together with the basic set of kmc instructions needed to work with protected keys. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/pkey_api.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 86a8799475e9..2f92bbed4bf6 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -35,6 +35,9 @@ MODULE_DESCRIPTION("s390 protected key interface"); /* Size of vardata block used for some of the cca requests/replies */ #define VARDATASIZE 4096 +/* mask of available pckmo subfunctions, fetched once at module init */ +static cpacf_mask_t pckmo_functions; + /* * debug feature data and functions */ @@ -679,6 +682,16 @@ int pkey_clr2protkey(u32 keytype, return -EINVAL; } + /* + * Check if the needed pckmo subfunction is available. + * These subfunctions can be enabled/disabled by customers + * in the LPAR profile or may even change on the fly. + */ + if (!cpacf_test_func(&pckmo_functions, fc)) { + DEBUG_ERR("%s pckmo functions not available\n", __func__); + return -EOPNOTSUPP; + } + /* prepare param block */ memset(paramblock, 0, sizeof(paramblock)); memcpy(paramblock, clrkey->clrkey, keysize); @@ -1672,15 +1685,16 @@ static struct miscdevice pkey_dev = { */ static int __init pkey_init(void) { - cpacf_mask_t pckmo_functions, kmc_functions; + cpacf_mask_t kmc_functions; - /* check for pckmo instructions available */ + /* + * The pckmo instruction should be available - even if we don't + * actually invoke it. This instruction comes with MSA 3 which + * is also the minimum level for the kmc instructions which + * are able to work with protected keys. + */ if (!cpacf_query(CPACF_PCKMO, &pckmo_functions)) return -EOPNOTSUPP; - if (!cpacf_test_func(&pckmo_functions, CPACF_PCKMO_ENC_AES_128_KEY) || - !cpacf_test_func(&pckmo_functions, CPACF_PCKMO_ENC_AES_192_KEY) || - !cpacf_test_func(&pckmo_functions, CPACF_PCKMO_ENC_AES_256_KEY)) - return -EOPNOTSUPP; /* check for kmc instructions available */ if (!cpacf_query(CPACF_KMC, &kmc_functions))