From d5dcafee5f183e9aedddb147a89cb46ab038f26b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 10 Jun 2016 10:56:44 +0200 Subject: [PATCH 01/48] s390/mm: no local TLB flush for clearing-by-ASCE IDTE The local-clearing control of the IDTE instruction does not have any effect for the clearing-by-ASCE operation. Only the invalidation-and-clearing operation respects the local-clearing bit. Remove __tlb_flush_idte_local and simplify the batched TLB flushing code. Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/tlbflush.h | 44 +++++++------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 1a691ef740cf..490014c48b13 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -26,17 +26,6 @@ static inline void __tlb_flush_idte(unsigned long asce) : : "a" (2048), "a" (asce) : "cc"); } -/* - * Flush TLB entries for a specific ASCE on the local CPU - */ -static inline void __tlb_flush_idte_local(unsigned long asce) -{ - /* Local TLB flush for the mm */ - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,1" - : : "a" (2048), "a" (asce) : "cc"); -} - #ifdef CONFIG_SMP void smp_ptlb_all(void); @@ -65,9 +54,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) /* Global TLB flush */ __tlb_flush_global(); /* Reset TLB flush mask */ - if (MACHINE_HAS_TLB_LC) - cpumask_copy(mm_cpumask(mm), - &mm->context.cpu_attach_mask); + cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); } atomic_dec(&mm->context.flush_count); preempt_enable(); @@ -81,19 +68,12 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) { preempt_disable(); atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - __tlb_flush_idte_local(asce); - } else { - if (MACHINE_HAS_IDTE) - __tlb_flush_idte(asce); - else - __tlb_flush_global(); - /* Reset TLB flush mask */ - if (MACHINE_HAS_TLB_LC) - cpumask_copy(mm_cpumask(mm), - &mm->context.cpu_attach_mask); - } + if (MACHINE_HAS_IDTE) + __tlb_flush_idte(asce); + else + __tlb_flush_global(); + /* Reset TLB flush mask */ + cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); atomic_dec(&mm->context.flush_count); preempt_enable(); } @@ -114,18 +94,12 @@ static inline void __tlb_flush_kernel(void) */ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) { - if (MACHINE_HAS_TLB_LC) - __tlb_flush_idte_local(asce); - else - __tlb_flush_local(); + __tlb_flush_local(); } static inline void __tlb_flush_kernel(void) { - if (MACHINE_HAS_TLB_LC) - __tlb_flush_idte_local(init_mm.context.asce); - else - __tlb_flush_local(); + __tlb_flush_local(); } #endif From 44b6cc8130e80e673ba8b3baf8e41891fe484786 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 13 Jun 2016 10:36:00 +0200 Subject: [PATCH 02/48] s390/mm,kvm: flush gmap address space with IDTE The __tlb_flush_mm() helper uses a global flush if the mm struct has a gmap structure attached to it. Replace the global flush with two individual flushes by means of the IDTE instruction if only a single gmap is attached the the mm. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 1 + arch/s390/include/asm/mmu_context.h | 1 + arch/s390/include/asm/tlbflush.h | 40 ++++++++++++----------------- arch/s390/mm/gmap.c | 15 +++++++++++ 4 files changed, 34 insertions(+), 23 deletions(-) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 6d39329c894b..bea785d7f853 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -12,6 +12,7 @@ typedef struct { struct list_head pgtable_list; spinlock_t gmap_lock; struct list_head gmap_list; + unsigned long gmap_asce; unsigned long asce; unsigned long asce_limit; unsigned long vdso_base; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index c6a088c91aee..515fea5a3fc4 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -21,6 +21,7 @@ static inline int init_new_context(struct task_struct *tsk, INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.flush_count, 0); + mm->context.gmap_asce = 0; mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste; diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 490014c48b13..39846100682a 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -60,18 +60,25 @@ static inline void __tlb_flush_full(struct mm_struct *mm) preempt_enable(); } -/* - * Flush TLB entries for a specific ASCE on all CPUs. Should never be used - * when more than one asce (e.g. gmap) ran on this mm. - */ -static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) +static inline void __tlb_flush_mm(struct mm_struct *mm) { + unsigned long gmap_asce; + + /* + * If the machine has IDTE we prefer to do a per mm flush + * on all cpus instead of doing a local flush if the mm + * only ran on the local cpu. + */ preempt_disable(); atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_IDTE) - __tlb_flush_idte(asce); - else - __tlb_flush_global(); + gmap_asce = READ_ONCE(mm->context.gmap_asce); + if (MACHINE_HAS_IDTE && gmap_asce != -1UL) { + if (gmap_asce) + __tlb_flush_idte(gmap_asce); + __tlb_flush_idte(mm->context.asce); + } else { + __tlb_flush_full(mm); + } /* Reset TLB flush mask */ cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); atomic_dec(&mm->context.flush_count); @@ -92,7 +99,7 @@ static inline void __tlb_flush_kernel(void) /* * Flush TLB entries for a specific ASCE on all CPUs. */ -static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) +static inline void __tlb_flush_mm(struct mm_struct *mm) { __tlb_flush_local(); } @@ -103,19 +110,6 @@ static inline void __tlb_flush_kernel(void) } #endif -static inline void __tlb_flush_mm(struct mm_struct * mm) -{ - /* - * If the machine has IDTE we prefer to do a per mm flush - * on all cpus instead of doing a local flush if the mm - * only ran on the local cpu. - */ - if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) - __tlb_flush_asce(mm, mm->context.asce); - else - __tlb_flush_full(mm); -} - static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { if (mm->context.flush_mm) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 2ce6bb3bab32..3ba622702ce4 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -94,6 +94,7 @@ out: struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) { struct gmap *gmap; + unsigned long gmap_asce; gmap = gmap_alloc(limit); if (!gmap) @@ -101,6 +102,11 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) gmap->mm = mm; spin_lock(&mm->context.gmap_lock); list_add_rcu(&gmap->list, &mm->context.gmap_list); + if (list_is_singular(&mm->context.gmap_list)) + gmap_asce = gmap->asce; + else + gmap_asce = -1UL; + WRITE_ONCE(mm->context.gmap_asce, gmap_asce); spin_unlock(&mm->context.gmap_lock); return gmap; } @@ -230,6 +236,7 @@ EXPORT_SYMBOL_GPL(gmap_put); void gmap_remove(struct gmap *gmap) { struct gmap *sg, *next; + unsigned long gmap_asce; /* Remove all shadow gmaps linked to this gmap */ if (!list_empty(&gmap->children)) { @@ -243,6 +250,14 @@ void gmap_remove(struct gmap *gmap) /* Remove gmap from the pre-mm list */ spin_lock(&gmap->mm->context.gmap_lock); list_del_rcu(&gmap->list); + if (list_empty(&gmap->mm->context.gmap_list)) + gmap_asce = 0; + else if (list_is_singular(&gmap->mm->context.gmap_list)) + gmap_asce = list_first_entry(&gmap->mm->context.gmap_list, + struct gmap, list)->asce; + else + gmap_asce = -1UL; + WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce); spin_unlock(&gmap->mm->context.gmap_lock); synchronize_rcu(); /* Put reference */ From 34eeaf376dbe53849acc3d4edc4efc2ad97ab23e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 14 Jun 2016 12:38:40 +0200 Subject: [PATCH 03/48] s390/mm: merge local / non-local IPTE helper Merge the __ptep_ipte and __ptep_ipte_local functions into a single __ptep_ipte function with an additional parameter. The __pte_ipte_range function is still extra as the while loops makes it hard to merge. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 32 ++++++++++++++------------------ arch/s390/mm/pageattr.c | 4 ++-- arch/s390/mm/pgtable.c | 8 ++++---- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 72c7f60bfe83..7ef2306e35f3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -874,35 +874,31 @@ static inline pte_t pte_mkhuge(pte_t pte) } #endif -static inline void __ptep_ipte(unsigned long address, pte_t *ptep) +#define IPTE_GLOBAL 0 +#define IPTE_LOCAL 1 + +static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local) { unsigned long pto = (unsigned long) ptep; - /* Invalidation + global TLB flush for the pte */ + /* Invalidation + TLB flush for the pte */ asm volatile( - " ipte %2,%3" - : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); + " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" + : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), + [m4] "i" (local)); } -static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep) +static inline void __ptep_ipte_range(unsigned long address, int nr, + pte_t *ptep, int local) { unsigned long pto = (unsigned long) ptep; - /* Invalidation + local TLB flush for the pte */ - asm volatile( - " .insn rrf,0xb2210000,%2,%3,0,1" - : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); -} - -static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep) -{ - unsigned long pto = (unsigned long) ptep; - - /* Invalidate a range of ptes + global TLB flush of the ptes */ + /* Invalidate a range of ptes + TLB flush of the ptes */ do { asm volatile( - " .insn rrf,0xb2210000,%2,%0,%1,0" - : "+a" (address), "+a" (nr) : "a" (pto) : "memory"); + " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + : [r2] "+a" (address), [r3] "+a" (nr) + : [r1] "a" (pto), [m4] "i" (local) : "memory"); } while (nr != 255); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index af7cf28cf97e..44f150312a16 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -309,11 +309,11 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr) int i; if (test_facility(13)) { - __ptep_ipte_range(address, nr - 1, pte); + __ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL); return; } for (i = 0; i < nr; i++) { - __ptep_ipte(address, pte); + __ptep_ipte(address, pte, IPTE_GLOBAL); address += PAGE_SIZE; pte++; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5f092015aaa7..1dc6cad9a5ac 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -35,9 +35,9 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_ipte_local(addr, ptep); + __ptep_ipte(addr, ptep, IPTE_LOCAL); else - __ptep_ipte(addr, ptep); + __ptep_ipte(addr, ptep, IPTE_GLOBAL); atomic_dec(&mm->context.flush_count); return old; } @@ -56,7 +56,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, pte_val(*ptep) |= _PAGE_INVALID; mm->context.flush_mm = 1; } else - __ptep_ipte(addr, ptep); + __ptep_ipte(addr, ptep, IPTE_GLOBAL); atomic_dec(&mm->context.flush_count); return old; } @@ -620,7 +620,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { pgste = pgste_pte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep); + __ptep_ipte(addr, ptep, IPTE_GLOBAL); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) pte_val(pte) |= _PAGE_PROTECT; else From 47e4d851c57840b3b5e440cf2c64c37e99b36a09 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 14 Jun 2016 12:41:35 +0200 Subject: [PATCH 04/48] s390/mm: merge local / non-local IDTE helper Merge the __p[m|u]xdp_idte and __p[m|u]dp_idte_local functions into a single __p[m|u]dp_idte function with an additional parameter. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 46 ++++++++++----------------------- arch/s390/mm/pgtable.c | 10 +++---- 2 files changed, 18 insertions(+), 38 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7ef2306e35f3..0362cd5fa187 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1235,53 +1235,33 @@ static inline void __pmdp_csp(pmd_t *pmdp) pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); } -static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp) +#define IDTE_GLOBAL 0 +#define IDTE_LOCAL 1 + +static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local) { unsigned long sto; sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); asm volatile( - " .insn rrf,0xb98e0000,%2,%3,0,0" - : "=m" (*pmdp) - : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) + " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + : "+m" (*pmdp) + : [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)), + [m4] "i" (local) : "cc" ); } -static inline void __pudp_idte(unsigned long address, pud_t *pudp) +static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local) { unsigned long r3o; r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); r3o |= _ASCE_TYPE_REGION3; asm volatile( - " .insn rrf,0xb98e0000,%2,%3,0,0" - : "=m" (*pudp) - : "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK)) - : "cc"); -} - -static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) -{ - unsigned long sto; - - sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); - asm volatile( - " .insn rrf,0xb98e0000,%2,%3,0,1" - : "=m" (*pmdp) - : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) - : "cc" ); -} - -static inline void __pudp_idte_local(unsigned long address, pud_t *pudp) -{ - unsigned long r3o; - - r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); - r3o |= _ASCE_TYPE_REGION3; - asm volatile( - " .insn rrf,0xb98e0000,%2,%3,0,1" - : "=m" (*pudp) - : "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK)) + " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + : "+m" (*pudp) + : [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)), + [m4] "i" (local) : "cc"); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 1dc6cad9a5ac..7a1897c51c54 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -301,9 +301,9 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pmdp_idte_local(addr, pmdp); + __pmdp_idte(addr, pmdp, IDTE_LOCAL); else - __pmdp_idte(addr, pmdp); + __pmdp_idte(addr, pmdp, IDTE_GLOBAL); atomic_dec(&mm->context.flush_count); return old; } @@ -322,7 +322,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; } else if (MACHINE_HAS_IDTE) - __pmdp_idte(addr, pmdp); + __pmdp_idte(addr, pmdp, IDTE_GLOBAL); else __pmdp_csp(pmdp); atomic_dec(&mm->context.flush_count); @@ -374,9 +374,9 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pudp_idte_local(addr, pudp); + __pudp_idte(addr, pudp, IDTE_LOCAL); else - __pudp_idte(addr, pudp); + __pudp_idte(addr, pudp, IDTE_GLOBAL); atomic_dec(&mm->context.flush_count); return old; } From 6c29588578edc9ae2c9fae27ff96f443cf39c0f3 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Mon, 8 Aug 2016 04:27:15 +0200 Subject: [PATCH 05/48] s390: cio: remove redundant cio_cancel declaration cio_cancel was declared twice. Remove one of them. Signed-off-by: Dong Jia Shi Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/cio.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index 93de0b46b489..f0e57aefb5f2 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -127,7 +127,6 @@ extern int cio_resume (struct subchannel *); extern int cio_halt (struct subchannel *); extern int cio_start (struct subchannel *, struct ccw1 *, __u8); extern int cio_start_key (struct subchannel *, struct ccw1 *, __u8, __u8); -extern int cio_cancel (struct subchannel *); extern int cio_set_options (struct subchannel *, int); extern int cio_update_schib(struct subchannel *sch); extern int cio_commit_config(struct subchannel *sch); From bd3a172557d6685f5fbd4b09713f74b84872e2b9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 18 Jul 2016 14:05:21 +0200 Subject: [PATCH 06/48] s390/pci: add zpci_report_error interface The 'report_error' interface for PCI devices found on s390 can be used by a user space program to inject an adapter error notification. Add a new kernel interface zpci_report_error to allow a PCI device driver to inject these error notifications without a detour over user space. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 4 ++++ arch/s390/pci/pci.c | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 0da91c4d30fd..8769cbf9d7cf 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -11,6 +11,7 @@ #include #include #include +#include #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 @@ -216,6 +217,9 @@ void zpci_debug_init_device(struct zpci_dev *, const char *); void zpci_debug_exit_device(struct zpci_dev *); void zpci_debug_info(struct zpci_dev *, struct seq_file *); +/* Error reporting */ +int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *); + #ifdef CONFIG_NUMA /* Returns the node based on PCI bus */ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 871af75c69c2..15ffc19c8c0c 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -854,6 +854,15 @@ void zpci_stop_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_stop_device); +int zpci_report_error(struct pci_dev *pdev, + struct zpci_report_error_header *report) +{ + struct zpci_dev *zdev = to_zpci(pdev); + + return sclp_pci_report(report, zdev->fh, zdev->fid); +} +EXPORT_SYMBOL(zpci_report_error); + static inline int barsize(u8 size) { return (size) ? (1 << size) >> 10 : 0; From 2ccb5bf0e2f190c825c69087aa40c89db89ca1ad Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 20 Aug 2016 19:25:34 +0200 Subject: [PATCH 07/48] s390/tape: Use memdup_user() rather than duplicating its implementation Reuse existing functionality from memdup_user() instead of keeping duplicate source code. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Martin Schwidefsky --- drivers/s390/char/tape_3590.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index d3d1936057b4..e352047ed9f7 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -312,15 +312,10 @@ static int tape_3592_ioctl_kekl_set(struct tape_device *device, return -ENOSYS; if (!crypt_enabled(device)) return -EUNATCH; - ext_kekls = kmalloc(sizeof(*ext_kekls), GFP_KERNEL); - if (!ext_kekls) - return -ENOMEM; - if (copy_from_user(ext_kekls, (char __user *)arg, sizeof(*ext_kekls))) { - rc = -EFAULT; - goto out; - } + ext_kekls = memdup_user((char __user *)arg, sizeof(*ext_kekls)); + if (IS_ERR(ext_kekls)) + return PTR_ERR(ext_kekls); rc = tape_3592_kekl_set(device, ext_kekls); -out: kfree(ext_kekls); return rc; } From f6c1d359be6bb0aa0715b4b75d9ecf63bdb07c4a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 16 Aug 2016 10:31:10 +0200 Subject: [PATCH 08/48] KVM: s390: generate facility mask from readable list Automatically generate the KVM facility mask out of a readable list. Manually changing the masks is very error prone, especially if the special IBM bit numbering has to be considered. Signed-off-by: Heiko Carstens Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/facilities_src.h | 24 ++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.c | 5 +---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/facilities_src.h b/arch/s390/include/asm/facilities_src.h index 4917728e5828..3b758f66e48b 100644 --- a/arch/s390/include/asm/facilities_src.h +++ b/arch/s390/include/asm/facilities_src.h @@ -55,4 +55,28 @@ static struct facility_def facility_defs[] = { -1 /* END */ } }, + { + .name = "FACILITIES_KVM", + .bits = (int[]){ + 0, /* N3 instructions */ + 1, /* z/Arch mode installed */ + 2, /* z/Arch mode active */ + 3, /* DAT-enhancement */ + 4, /* idte segment table */ + 5, /* idte region table */ + 6, /* ASN-and-LX reuse */ + 7, /* stfle */ + 8, /* enhanced-DAT 1 */ + 9, /* sense-running-status */ + 10, /* conditional sske */ + 13, /* ipte-range */ + 14, /* nonquiescing key-setting */ + 73, /* transactional execution */ + 75, /* access-exception-fetch/store indication */ + 76, /* msa extension 3 */ + 77, /* msa extension 4 */ + 78, /* enhanced-DAT 2 */ + -1 /* END */ + } + }, }; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3f3ae4865d57..4f484e09eeb9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -132,10 +132,7 @@ module_param(nested, int, S_IRUGO); MODULE_PARM_DESC(nested, "Nested virtualization support"); /* upper facilities limit for kvm */ -unsigned long kvm_s390_fac_list_mask[16] = { - 0xffe6000000000000UL, - 0x005e000000000000UL, -}; +unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM }; unsigned long kvm_s390_fac_list_mask_size(void) { From 41ad022039522485456802f09d8eccebc24aac3e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 14 Jul 2016 13:09:57 +0200 Subject: [PATCH 09/48] s390/time: simplify stp time syncs The way we call do_adjtimex() today is broken. It has 0 effect, as ADJ_OFFSET_SINGLESHOT (0x0001) in the kernel maps to !ADJ_ADJTIME (in contrast to user space where it maps to ADJ_OFFSET_SINGLESHOT | ADJ_ADJTIME - 0x8001). !ADJ_ADJTIME will silently ignore all adjustments without STA_PLL being active. We could switch to ADJ_ADJTIME or turn STA_PLL on, but still we would run into some problems: - Even when switching to nanoseconds, we lose accuracy. - Successive calls to do_adjtimex() will simply overwrite any leftovers from the previous call (if not fully handled) - Anything that NTP does using the sysctl heavily interferes with our use. - !ADJ_ADJTIME will silently round stuff > or < than 0.5 seconds Reusing do_adjtimex() here just feels wrong. The whole STP synchronization works right now *somehow* only, as do_adjtimex() does nothing and our TOD clock jumps in time, although it shouldn't. This is especially bad as the clock could jump backwards in time. We will have to find another way to fix this up. As leap seconds are also not properly handled yet, let's just get rid of all this complex logic altogether and use the correct clock_delta for fixing up the clock comparator and keeping the sched_clock monotonic. This change should have 0 effect on the current STP mechanism. Once we know how to best handle sync events and leap second updates, we'll start with a fresh implementation. Signed-off-by: David Hildenbrand Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/time.c | 50 ++++------------------------------------- 1 file changed, 4 insertions(+), 46 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 4e9949800562..c95e98d49b38 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -50,10 +50,6 @@ #include #include "entry.h" -/* change this if you have some constant time drift */ -#define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) -#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12) - u64 sched_clock_base_cc = -1; /* Force to data section. */ EXPORT_SYMBOL_GPL(sched_clock_base_cc); @@ -318,43 +314,6 @@ void __init time_init(void) vtime_init(); } -/* - * The time is "clock". old is what we think the time is. - * Adjust the value by a multiple of jiffies and add the delta to ntp. - * "delay" is an approximation how long the synchronization took. If - * the time correction is positive, then "delay" is subtracted from - * the time difference and only the remaining part is passed to ntp. - */ -static unsigned long long adjust_time(unsigned long long old, - unsigned long long clock, - unsigned long long delay) -{ - unsigned long long delta, ticks; - struct timex adjust; - - if (clock > old) { - /* It is later than we thought. */ - delta = ticks = clock - old; - delta = ticks = (delta < delay) ? 0 : delta - delay; - delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); - adjust.offset = ticks * (1000000 / HZ); - } else { - /* It is earlier than we thought. */ - delta = ticks = old - clock; - delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); - delta = -delta; - adjust.offset = -ticks * (1000000 / HZ); - } - sched_clock_base_cc += delta; - if (adjust.offset != 0) { - pr_notice("The ETR interface has adjusted the clock " - "by %li microseconds\n", adjust.offset); - adjust.modes = ADJ_OFFSET_SINGLESHOT; - do_adjtimex(&adjust); - } - return delta; -} - static DEFINE_PER_CPU(atomic_t, clock_sync_word); static DEFINE_MUTEX(clock_sync_mutex); static unsigned long clock_sync_flags; @@ -582,7 +541,7 @@ void stp_queue_work(void) static int stp_sync_clock(void *data) { static int first; - unsigned long long old_clock, delta, new_clock, clock_delta; + unsigned long long clock_delta; struct clock_sync_data *stp_sync; struct ptff_qto qto; int rc; @@ -605,18 +564,17 @@ static int stp_sync_clock(void *data) if (stp_info.todoff[0] || stp_info.todoff[1] || stp_info.todoff[2] || stp_info.todoff[3] || stp_info.tmd != 2) { - old_clock = get_tod_clock(); rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta); if (rc == 0) { - new_clock = old_clock + clock_delta; - delta = adjust_time(old_clock, new_clock, 0); + /* fixup the monotonic sched clock */ + sched_clock_base_cc += clock_delta; if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) /* Update LPAR offset */ lpar_offset = qto.tod_epoch_difference; atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &clock_delta); - fixup_clock_comparator(delta); + fixup_clock_comparator(clock_delta); rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); if (rc == 0 && stp_info.tmd != 2) From ca64f63901a054af755994648bf6835047c71cbb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 14 Jul 2016 13:38:06 +0200 Subject: [PATCH 10/48] s390/time: cleanup etr leftovers There are still some etr leftovers and wrong comments, let's clean that up. Signed-off-by: David Hildenbrand Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/time.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index c95e98d49b38..5e7237dc39fc 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -318,10 +318,8 @@ static DEFINE_PER_CPU(atomic_t, clock_sync_word); static DEFINE_MUTEX(clock_sync_mutex); static unsigned long clock_sync_flags; -#define CLOCK_SYNC_HAS_ETR 0 -#define CLOCK_SYNC_HAS_STP 1 -#define CLOCK_SYNC_ETR 2 -#define CLOCK_SYNC_STP 3 +#define CLOCK_SYNC_HAS_STP 0 +#define CLOCK_SYNC_STP 1 /* * The get_clock function for the physical clock. It will get the current @@ -343,34 +341,32 @@ int get_phys_clock(unsigned long long *clock) if (sw0 == sw1 && (sw0 & 0x80000000U)) /* Success: time is in sync. */ return 0; - if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) && - !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) return -EOPNOTSUPP; - if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) && - !test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) + if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) return -EACCES; return -EAGAIN; } EXPORT_SYMBOL(get_phys_clock); /* - * Make get_sync_clock return -EAGAIN. + * Make get_phys_clock() return -EAGAIN. */ static void disable_sync_clock(void *dummy) { atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word); /* - * Clear the in-sync bit 2^31. All get_sync_clock calls will + * Clear the in-sync bit 2^31. All get_phys_clock calls will * fail until the sync bit is turned back on. In addition * increase the "sequence" counter to avoid the race of an - * etr event and the complete recovery against get_sync_clock. + * stp event and the complete recovery against get_phys_clock. */ atomic_andnot(0x80000000, sw_ptr); atomic_inc(sw_ptr); } /* - * Make get_sync_clock return 0 again. + * Make get_phys_clock() return 0 again. * Needs to be called from a context disabled for preemption. */ static void enable_sync_clock(void) @@ -393,7 +389,7 @@ static inline int check_sync_clock(void) return rc; } -/* Single threaded workqueue used for etr and stp sync events */ +/* Single threaded workqueue used for stp sync events */ static struct workqueue_struct *time_sync_wq; static void __init time_init_wq(void) @@ -407,20 +403,12 @@ struct clock_sync_data { atomic_t cpus; int in_sync; unsigned long long fixup_cc; - int etr_port; - struct etr_aib *etr_aib; }; static void clock_sync_cpu(struct clock_sync_data *sync) { atomic_dec(&sync->cpus); enable_sync_clock(); - /* - * This looks like a busy wait loop but it isn't. etr_sync_cpus - * is called on all other cpus while the TOD clocks is stopped. - * __udelay will stop the cpu on an enabled wait psw until the - * TOD is running again. - */ while (sync->in_sync == 0) { __udelay(1); /* From 0c00b1e00bba9c9046bee4e487ed19360da9ded0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 14 Jul 2016 14:46:56 +0200 Subject: [PATCH 11/48] s390/time: fixup the clock comparator on all cpus By leaving fixup_cc unset, only the clock comparator of the cpu actually doing the sync is fixed up until now. Signed-off-by: David Hildenbrand Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/time.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 5e7237dc39fc..5b1a199667e2 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -562,6 +562,7 @@ static int stp_sync_clock(void *data) lpar_offset = qto.tod_epoch_difference; atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &clock_delta); + stp_sync->fixup_cc = clock_delta; fixup_clock_comparator(clock_delta); rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); From 67f03de5f0ad6b4b0481bb43e4a819d1a441900b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Jul 2016 17:10:17 +0200 Subject: [PATCH 12/48] s390/time: avoid races when updating tb_update_count The increment might not be atomic and we're not holding the timekeeper_lock. Therefore we might lose an update to count, resulting in VDSO being trapped in a loop. As other archs also simply update the values and count doesn't seem to have an impact on reloading of these values in VDSO code, let's just remove the update of tb_update_count. Suggested-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/time.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 5b1a199667e2..0bfcc492987e 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -278,13 +278,8 @@ extern struct timezone sys_tz; void update_vsyscall_tz(void) { - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_wmb(); vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; vdso_data->tz_dsttime = sys_tz.tz_dsttime; - smp_wmb(); - ++vdso_data->tb_update_count; } /* From 0eab11c7e0d30de14a15ccd8269eef238321a8e1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 25 Aug 2016 10:40:19 +0200 Subject: [PATCH 13/48] s390/vx: allow to include vx-insn.h with .include To make the vx-insn.h more versatile avoid cpp preprocessor macros and allow to use plain numbers for vector and general purpose register operands. With that you can emit an .include from a C file into the assembler text and then use the vx-insn macros in inline assemblies. For example: asm (".include \"asm/vx-insn.h\""); static inline void xor_vec(int x, int y, int z) { asm volatile("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); } Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/vx-insn.h | 62 +++++++++++++++------------------ 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h index 4a3135620f5e..b61846dff70f 100644 --- a/arch/s390/include/asm/vx-insn.h +++ b/arch/s390/include/asm/vx-insn.h @@ -16,15 +16,13 @@ /* Macros to generate vector instruction byte code */ -#define REG_NUM_INVALID 255 - /* GR_NUM - Retrieve general-purpose register number * * @opd: Operand to store register number * @r64: String designation register in the format "%rN" */ .macro GR_NUM opd gr - \opd = REG_NUM_INVALID + \opd = 255 .ifc \gr,%r0 \opd = 0 .endif @@ -73,14 +71,11 @@ .ifc \gr,%r15 \opd = 15 .endif - .if \opd == REG_NUM_INVALID - .error "Invalid general-purpose register designation: \gr" + .if \opd == 255 + \opd = \gr .endif .endm -/* VX_R() - Macro to encode the VX_NUM into the instruction */ -#define VX_R(v) (v & 0x0F) - /* VX_NUM - Retrieve vector register number * * @opd: Operand to store register number @@ -88,11 +83,10 @@ * * The vector register number is used for as input number to the * instruction and, as well as, to compute the RXB field of the - * instruction. To encode the particular vector register number, - * use the VX_R(v) macro to extract the instruction opcode. + * instruction. */ .macro VX_NUM opd vxr - \opd = REG_NUM_INVALID + \opd = 255 .ifc \vxr,%v0 \opd = 0 .endif @@ -189,8 +183,8 @@ .ifc \vxr,%v31 \opd = 31 .endif - .if \opd == REG_NUM_INVALID - .error "Invalid vector register designation: \vxr" + .if \opd == 255 + \opd = \vxr .endif .endm @@ -251,7 +245,7 @@ /* VECTOR GENERATE BYTE MASK */ .macro VGBM vr imm2 VX_NUM v1, \vr - .word (0xE700 | (VX_R(v1) << 4)) + .word (0xE700 | ((v1&15) << 4)) .word \imm2 MRXBOPC 0, 0x44, v1 .endm @@ -267,7 +261,7 @@ VX_NUM v1, \v GR_NUM b2, "%r0" GR_NUM r3, \gr - .word 0xE700 | (VX_R(v1) << 4) | r3 + .word 0xE700 | ((v1&15) << 4) | r3 .word (b2 << 12) | (\disp) MRXBOPC \m, 0x22, v1 .endm @@ -289,7 +283,7 @@ VX_NUM v1, \v GR_NUM x2, \index GR_NUM b2, \base - .word 0xE700 | (VX_R(v1) << 4) | x2 + .word 0xE700 | ((v1&15) << 4) | x2 .word (b2 << 12) | (\disp) MRXBOPC 0, 0x06, v1 .endm @@ -299,7 +293,7 @@ VX_NUM v1, \vr1 GR_NUM x2, \index GR_NUM b2, \base - .word 0xE700 | (VX_R(v1) << 4) | x2 + .word 0xE700 | ((v1&15) << 4) | x2 .word (b2 << 12) | (\disp) MRXBOPC \m3, \opc, v1 .endm @@ -319,7 +313,7 @@ /* VECTOR LOAD ELEMENT IMMEDIATE */ .macro VLEIx vr1, imm2, m3, opc VX_NUM v1, \vr1 - .word 0xE700 | (VX_R(v1) << 4) + .word 0xE700 | ((v1&15) << 4) .word \imm2 MRXBOPC \m3, \opc, v1 .endm @@ -341,7 +335,7 @@ GR_NUM r1, \gr GR_NUM b2, \base VX_NUM v3, \vr - .word 0xE700 | (r1 << 4) | VX_R(v3) + .word 0xE700 | (r1 << 4) | (v3&15) .word (b2 << 12) | (\disp) MRXBOPC \m, 0x21, v3 .endm @@ -363,7 +357,7 @@ VX_NUM v1, \vfrom VX_NUM v3, \vto GR_NUM b2, \base /* Base register */ - .word 0xE700 | (VX_R(v1) << 4) | VX_R(v3) + .word 0xE700 | ((v1&15) << 4) | (v3&15) .word (b2 << 12) | (\disp) MRXBOPC 0, 0x36, v1, v3 .endm @@ -373,7 +367,7 @@ VX_NUM v1, \vfrom VX_NUM v3, \vto GR_NUM b2, \base /* Base register */ - .word 0xE700 | (VX_R(v1) << 4) | VX_R(v3) + .word 0xE700 | ((v1&15) << 4) | (v3&15) .word (b2 << 12) | (\disp) MRXBOPC 0, 0x3E, v1, v3 .endm @@ -384,16 +378,16 @@ VX_NUM v2, \vr2 VX_NUM v3, \vr3 VX_NUM v4, \vr4 - .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2) - .word (VX_R(v3) << 12) - MRXBOPC VX_R(v4), 0x8C, v1, v2, v3, v4 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC (v4&15), 0x8C, v1, v2, v3, v4 .endm /* VECTOR UNPACK LOGICAL LOW */ .macro VUPLL vr1, vr2, m3 VX_NUM v1, \vr1 VX_NUM v2, \vr2 - .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2) + .word 0xE700 | ((v1&15) << 4) | (v2&15) .word 0x0000 MRXBOPC \m3, 0xD4, v1, v2 .endm @@ -415,8 +409,8 @@ VX_NUM v1, \vr1 VX_NUM v2, \vr2 VX_NUM v3, \vr3 - .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2) - .word (VX_R(v3) << 12) + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) MRXBOPC 0, 0x6D, v1, v2, v3 .endm @@ -425,8 +419,8 @@ VX_NUM v1, \vr1 VX_NUM v2, \vr2 VX_NUM v3, \vr3 - .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2) - .word (VX_R(v3) << 12) + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) MRXBOPC \m4, 0xB4, v1, v2, v3 .endm .macro VGFMB vr1, vr2, vr3 @@ -448,9 +442,9 @@ VX_NUM v2, \vr2 VX_NUM v3, \vr3 VX_NUM v4, \vr4 - .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2) - .word (VX_R(v3) << 12) | (\m5 << 8) - MRXBOPC VX_R(v4), 0xBC, v1, v2, v3, v4 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) | (\m5 << 8) + MRXBOPC (v4&15), 0xBC, v1, v2, v3, v4 .endm .macro VGFMAB vr1, vr2, vr3, vr4 VGFMA \vr1, \vr2, \vr3, \vr4, 0 @@ -470,8 +464,8 @@ VX_NUM v1, \vr1 VX_NUM v2, \vr2 VX_NUM v3, \vr3 - .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2) - .word (VX_R(v3) << 12) + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) MRXBOPC 0, 0x7D, v1, v2, v3 .endm From 7f79695cc1b6aa6d80a861780d9f8ce75d3dddcb Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Aug 2016 12:06:21 +0200 Subject: [PATCH 14/48] s390/fpu: improve kernel_fpu_[begin|end] In case of nested user of the FPU or vector registers in the kernel the current code uses the mask of the FPU/vector registers of the previous contexts to decide which registers to save and restore. E.g. if the previous context used KERNEL_VXR_V0V7 and the next context wants to use KERNEL_VXR_V24V31 the first 8 vector registers are stored to the FPU state structure. But this is not necessary as the next context does not use these registers. Rework the FPU/vector register save and restore code. The new code does a few things differently: 1) A lowcore field is used instead of a per-cpu variable. 2) The kernel_fpu_end function now has two parameters just like kernel_fpu_begin. The register flags are required by both functions to save / restore the minimal register set. 3) The inline functions kernel_fpu_begin/kernel_fpu_end now do the update of the register masks. If the user space FPU registers have already been stored neither save_fpu_regs nor the __kernel_fpu_begin/__kernel_fpu_end functions have to be called for the first context. In this case kernel_fpu_begin adds 7 instructions and kernel_fpu_end adds 4 instructions. 3) The inline assemblies in __kernel_fpu_begin / __kernel_fpu_end to save / restore the vector registers are simplified a bit. Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/crc32-vx.c | 2 +- arch/s390/include/asm/fpu/api.h | 32 ++-- arch/s390/include/asm/lowcore.h | 3 +- arch/s390/kernel/fpu.c | 319 ++++++++++++-------------------- arch/s390/kernel/sysinfo.c | 2 +- 5 files changed, 148 insertions(+), 210 deletions(-) diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 2bad9d837029..992e630c227b 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -67,7 +67,7 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); \ kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ crc = ___crc32_vx(crc, data, aligned); \ - kernel_fpu_end(&vxstate); \ + kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ \ if (remaining) \ crc = ___crc32_sw(crc, data + aligned, remaining); \ diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 6aba6fc406ad..02124d66bfb5 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -64,18 +64,18 @@ static inline int test_fp_ctl(u32 fpc) return rc; } -#define KERNEL_VXR_V0V7 1 -#define KERNEL_VXR_V8V15 2 -#define KERNEL_VXR_V16V23 4 -#define KERNEL_VXR_V24V31 8 -#define KERNEL_FPR 16 -#define KERNEL_FPC 256 +#define KERNEL_FPC 1 +#define KERNEL_VXR_V0V7 2 +#define KERNEL_VXR_V8V15 4 +#define KERNEL_VXR_V16V23 8 +#define KERNEL_VXR_V24V31 16 #define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15) #define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23) #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) -#define KERNEL_FPU_MASK (KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR) +#define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH) +#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7) struct kernel_fpu; @@ -87,18 +87,28 @@ struct kernel_fpu; * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. */ void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); -void __kernel_fpu_end(struct kernel_fpu *state); +void __kernel_fpu_end(struct kernel_fpu *state, u32 flags); static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) { preempt_disable(); - __kernel_fpu_begin(state, flags); + state->mask = S390_lowcore.fpu_flags; + if (!test_cpu_flag(CIF_FPU)) + /* Save user space FPU state and register contents */ + save_fpu_regs(); + else if (state->mask & flags) + /* Save FPU/vector register in-use by the kernel */ + __kernel_fpu_begin(state, flags); + S390_lowcore.fpu_flags |= flags; } -static inline void kernel_fpu_end(struct kernel_fpu *state) +static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags) { - __kernel_fpu_end(state); + S390_lowcore.fpu_flags = state->mask; + if (state->mask & flags) + /* Restore FPU/vector register in-use by the kernel */ + __kernel_fpu_end(state, flags); preempt_enable(); } diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index d79ba7cf75b0..7b93b78f423c 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -129,7 +129,8 @@ struct lowcore { __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ __u64 gmap; /* 0x0398 */ __u32 spinlock_lockval; /* 0x03a0 */ - __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */ + __u32 fpu_flags; /* 0x03a4 */ + __u8 pad_0x03a8[0x0400-0x03a8]; /* 0x03a8 */ /* Per cpu primary space access list */ __u32 paste[16]; /* 0x0400 */ diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 81d1d1887507..1235b9438df4 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -10,240 +10,167 @@ #include #include -/* - * Per-CPU variable to maintain FPU register ranges that are in use - * by the kernel. - */ -static DEFINE_PER_CPU(u32, kernel_fpu_state); - -#define KERNEL_FPU_STATE_MASK (KERNEL_FPU_MASK|KERNEL_FPC) - +asm(".include \"asm/vx-insn.h\"\n"); void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) { - if (!__this_cpu_read(kernel_fpu_state)) { - /* - * Save user space FPU state and register contents. Multiple - * calls because of interruptions do not matter and return - * immediately. This also sets CIF_FPU to lazy restore FP/VX - * register contents when returning to user space. - */ - save_fpu_regs(); - } - - /* Update flags to use the vector facility for KERNEL_FPR */ - if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) { - flags |= KERNEL_VXR_LOW | KERNEL_FPC; - flags &= ~KERNEL_FPR; - } - - /* Save and update current kernel VX state */ - state->mask = __this_cpu_read(kernel_fpu_state); - __this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK); - /* - * If this is the first call to __kernel_fpu_begin(), no additional - * work is required. + * Limit the save to the FPU/vector registers already + * in use by the previous context */ - if (!(state->mask & KERNEL_FPU_STATE_MASK)) - return; + flags &= state->mask; - /* - * If KERNEL_FPR is still set, the vector facility is not available - * and, thus, save floating-point control and registers only. - */ - if (state->mask & KERNEL_FPR) { - asm volatile("stfpc %0" : "=Q" (state->fpc)); - asm volatile("std 0,%0" : "=Q" (state->fprs[0])); - asm volatile("std 1,%0" : "=Q" (state->fprs[1])); - asm volatile("std 2,%0" : "=Q" (state->fprs[2])); - asm volatile("std 3,%0" : "=Q" (state->fprs[3])); - asm volatile("std 4,%0" : "=Q" (state->fprs[4])); - asm volatile("std 5,%0" : "=Q" (state->fprs[5])); - asm volatile("std 6,%0" : "=Q" (state->fprs[6])); - asm volatile("std 7,%0" : "=Q" (state->fprs[7])); - asm volatile("std 8,%0" : "=Q" (state->fprs[8])); - asm volatile("std 9,%0" : "=Q" (state->fprs[9])); - asm volatile("std 10,%0" : "=Q" (state->fprs[10])); - asm volatile("std 11,%0" : "=Q" (state->fprs[11])); - asm volatile("std 12,%0" : "=Q" (state->fprs[12])); - asm volatile("std 13,%0" : "=Q" (state->fprs[13])); - asm volatile("std 14,%0" : "=Q" (state->fprs[14])); - asm volatile("std 15,%0" : "=Q" (state->fprs[15])); - return; - } - - /* - * If this is a nested call to __kernel_fpu_begin(), check the saved - * state mask to save and later restore the vector registers that - * are already in use. Let's start with checking floating-point - * controls. - */ - if (state->mask & KERNEL_FPC) + if (flags & KERNEL_FPC) + /* Save floating point control */ asm volatile("stfpc %0" : "=m" (state->fpc)); + if (!MACHINE_HAS_VX) { + if (flags & KERNEL_VXR_V0V7) { + /* Save floating-point registers */ + asm volatile("std 0,%0" : "=Q" (state->fprs[0])); + asm volatile("std 1,%0" : "=Q" (state->fprs[1])); + asm volatile("std 2,%0" : "=Q" (state->fprs[2])); + asm volatile("std 3,%0" : "=Q" (state->fprs[3])); + asm volatile("std 4,%0" : "=Q" (state->fprs[4])); + asm volatile("std 5,%0" : "=Q" (state->fprs[5])); + asm volatile("std 6,%0" : "=Q" (state->fprs[6])); + asm volatile("std 7,%0" : "=Q" (state->fprs[7])); + asm volatile("std 8,%0" : "=Q" (state->fprs[8])); + asm volatile("std 9,%0" : "=Q" (state->fprs[9])); + asm volatile("std 10,%0" : "=Q" (state->fprs[10])); + asm volatile("std 11,%0" : "=Q" (state->fprs[11])); + asm volatile("std 12,%0" : "=Q" (state->fprs[12])); + asm volatile("std 13,%0" : "=Q" (state->fprs[13])); + asm volatile("std 14,%0" : "=Q" (state->fprs[14])); + asm volatile("std 15,%0" : "=Q" (state->fprs[15])); + } + return; + } + /* Test and save vector registers */ asm volatile ( /* * Test if any vector register must be saved and, if so, * test if all register can be saved. */ - " tmll %[m],15\n" /* KERNEL_VXR_MASK */ - " jz 20f\n" /* no work -> done */ " la 1,%[vxrs]\n" /* load save area */ - " jo 18f\n" /* -> save V0..V31 */ - + " tmll %[m],30\n" /* KERNEL_VXR */ + " jz 7f\n" /* no work -> done */ + " jo 5f\n" /* -> save V0..V31 */ /* - * Test if V8..V23 can be saved at once... this speeds up - * for KERNEL_fpu_MID only. Otherwise continue to split the - * range of vector registers into two halves and test them - * separately. + * Test for special case KERNEL_FPU_MID only. In this + * case a vstm V8..V23 is the best instruction */ - " tmll %[m],6\n" /* KERNEL_VXR_MID */ - " jo 17f\n" /* -> save V8..V23 */ - + " chi %[m],12\n" /* KERNEL_VXR_MID */ + " jne 0f\n" /* -> save V8..V23 */ + " VSTM 8,23,128,1\n" /* vstm %v8,%v23,128(%r1) */ + " j 7f\n" /* Test and save the first half of 16 vector registers */ - "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ - " jz 10f\n" /* -> KERNEL_VXR_HIGH */ + "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */ + " jz 3f\n" /* -> KERNEL_VXR_HIGH */ " jo 2f\n" /* 11 -> save V0..V15 */ - " brc 4,3f\n" /* 01 -> save V0..V7 */ - " brc 2,4f\n" /* 10 -> save V8..V15 */ - + " brc 2,1f\n" /* 10 -> save V8..V15 */ + " VSTM 0,7,0,1\n" /* vstm %v0,%v7,0(%r1) */ + " j 3f\n" + "1: VSTM 8,15,128,1\n" /* vstm %v8,%v15,128(%r1) */ + " j 3f\n" + "2: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */ /* Test and save the second half of 16 vector registers */ - "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ - " jo 19f\n" /* 11 -> save V16..V31 */ - " brc 4,11f\n" /* 01 -> save V16..V23 */ - " brc 2,12f\n" /* 10 -> save V24..V31 */ - " j 20f\n" /* 00 -> done */ - - /* - * Below are the vstm combinations to save multiple vector - * registers at once. - */ - "2: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ - " j 10b\n" /* -> VXR_HIGH */ - "3: .word 0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */ - " j 10b\n" /* -> VXR_HIGH */ - "4: .word 0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */ - " j 10b\n" /* -> VXR_HIGH */ - "\n" - "11: .word 0xe707,0x1100,0x0c3e\n" /* vstm 16,23,256(1) */ - " j 20f\n" /* -> done */ - "12: .word 0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */ - " j 20f\n" /* -> done */ - "\n" - "17: .word 0xe787,0x1080,0x043e\n" /* vstm 8,23,128(1) */ - " nill %[m],249\n" /* m &= ~VXR_MID */ - " j 1b\n" /* -> VXR_LOW */ - "\n" - "18: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ - "19: .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ - "20:" + "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */ + " jz 7f\n" + " jo 6f\n" /* 11 -> save V16..V31 */ + " brc 2,4f\n" /* 10 -> save V24..V31 */ + " VSTM 16,23,256,1\n" /* vstm %v16,%v23,256(%r1) */ + " j 7f\n" + "4: VSTM 24,31,384,1\n" /* vstm %v24,%v31,384(%r1) */ + " j 7f\n" + "5: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */ + "6: VSTM 16,31,256,1\n" /* vstm %v16,%v31,256(%r1) */ + "7:" : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) - : [m] "d" (state->mask) + : [m] "d" (flags) : "1", "cc"); } EXPORT_SYMBOL(__kernel_fpu_begin); -void __kernel_fpu_end(struct kernel_fpu *state) +void __kernel_fpu_end(struct kernel_fpu *state, u32 flags) { - /* Just update the per-CPU state if there is nothing to restore */ - if (!(state->mask & KERNEL_FPU_STATE_MASK)) - goto update_fpu_state; - /* - * If KERNEL_FPR is specified, the vector facility is not available - * and, thus, restore floating-point control and registers only. + * Limit the restore to the FPU/vector registers of the + * previous context that have been overwritte by the + * current context */ - if (state->mask & KERNEL_FPR) { - asm volatile("lfpc %0" : : "Q" (state->fpc)); - asm volatile("ld 0,%0" : : "Q" (state->fprs[0])); - asm volatile("ld 1,%0" : : "Q" (state->fprs[1])); - asm volatile("ld 2,%0" : : "Q" (state->fprs[2])); - asm volatile("ld 3,%0" : : "Q" (state->fprs[3])); - asm volatile("ld 4,%0" : : "Q" (state->fprs[4])); - asm volatile("ld 5,%0" : : "Q" (state->fprs[5])); - asm volatile("ld 6,%0" : : "Q" (state->fprs[6])); - asm volatile("ld 7,%0" : : "Q" (state->fprs[7])); - asm volatile("ld 8,%0" : : "Q" (state->fprs[8])); - asm volatile("ld 9,%0" : : "Q" (state->fprs[9])); - asm volatile("ld 10,%0" : : "Q" (state->fprs[10])); - asm volatile("ld 11,%0" : : "Q" (state->fprs[11])); - asm volatile("ld 12,%0" : : "Q" (state->fprs[12])); - asm volatile("ld 13,%0" : : "Q" (state->fprs[13])); - asm volatile("ld 14,%0" : : "Q" (state->fprs[14])); - asm volatile("ld 15,%0" : : "Q" (state->fprs[15])); - goto update_fpu_state; - } + flags &= state->mask; - /* Test and restore floating-point controls */ - if (state->mask & KERNEL_FPC) + if (flags & KERNEL_FPC) + /* Restore floating-point controls */ asm volatile("lfpc %0" : : "Q" (state->fpc)); + if (!MACHINE_HAS_VX) { + if (flags & KERNEL_VXR_V0V7) { + /* Restore floating-point registers */ + asm volatile("ld 0,%0" : : "Q" (state->fprs[0])); + asm volatile("ld 1,%0" : : "Q" (state->fprs[1])); + asm volatile("ld 2,%0" : : "Q" (state->fprs[2])); + asm volatile("ld 3,%0" : : "Q" (state->fprs[3])); + asm volatile("ld 4,%0" : : "Q" (state->fprs[4])); + asm volatile("ld 5,%0" : : "Q" (state->fprs[5])); + asm volatile("ld 6,%0" : : "Q" (state->fprs[6])); + asm volatile("ld 7,%0" : : "Q" (state->fprs[7])); + asm volatile("ld 8,%0" : : "Q" (state->fprs[8])); + asm volatile("ld 9,%0" : : "Q" (state->fprs[9])); + asm volatile("ld 10,%0" : : "Q" (state->fprs[10])); + asm volatile("ld 11,%0" : : "Q" (state->fprs[11])); + asm volatile("ld 12,%0" : : "Q" (state->fprs[12])); + asm volatile("ld 13,%0" : : "Q" (state->fprs[13])); + asm volatile("ld 14,%0" : : "Q" (state->fprs[14])); + asm volatile("ld 15,%0" : : "Q" (state->fprs[15])); + } + return; + } + /* Test and restore (load) vector registers */ asm volatile ( /* - * Test if any vector registers must be loaded and, if so, + * Test if any vector register must be loaded and, if so, * test if all registers can be loaded at once. */ - " tmll %[m],15\n" /* KERNEL_VXR_MASK */ - " jz 20f\n" /* no work -> done */ - " la 1,%[vxrs]\n" /* load load area */ - " jo 18f\n" /* -> load V0..V31 */ - + " la 1,%[vxrs]\n" /* load restore area */ + " tmll %[m],30\n" /* KERNEL_VXR */ + " jz 7f\n" /* no work -> done */ + " jo 5f\n" /* -> restore V0..V31 */ /* - * Test if V8..V23 can be restored at once... this speeds up - * for KERNEL_VXR_MID only. Otherwise continue to split the - * range of vector registers into two halves and test them - * separately. + * Test for special case KERNEL_FPU_MID only. In this + * case a vlm V8..V23 is the best instruction */ - " tmll %[m],6\n" /* KERNEL_VXR_MID */ - " jo 17f\n" /* -> load V8..V23 */ - - /* Test and load the first half of 16 vector registers */ - "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ - " jz 10f\n" /* -> KERNEL_VXR_HIGH */ - " jo 2f\n" /* 11 -> load V0..V15 */ - " brc 4,3f\n" /* 01 -> load V0..V7 */ - " brc 2,4f\n" /* 10 -> load V8..V15 */ - - /* Test and load the second half of 16 vector registers */ - "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ - " jo 19f\n" /* 11 -> load V16..V31 */ - " brc 4,11f\n" /* 01 -> load V16..V23 */ - " brc 2,12f\n" /* 10 -> load V24..V31 */ - " j 20f\n" /* 00 -> done */ - - /* - * Below are the vstm combinations to load multiple vector - * registers at once. - */ - "2: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ - " j 10b\n" /* -> VXR_HIGH */ - "3: .word 0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */ - " j 10b\n" /* -> VXR_HIGH */ - "4: .word 0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */ - " j 10b\n" /* -> VXR_HIGH */ - "\n" - "11: .word 0xe707,0x1100,0x0c36\n" /* vlm 16,23,256(1) */ - " j 20f\n" /* -> done */ - "12: .word 0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */ - " j 20f\n" /* -> done */ - "\n" - "17: .word 0xe787,0x1080,0x0436\n" /* vlm 8,23,128(1) */ - " nill %[m],249\n" /* m &= ~VXR_MID */ - " j 1b\n" /* -> VXR_LOW */ - "\n" - "18: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ - "19: .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - "20:" - : - : [vxrs] "Q" (*(struct vx_array *) &state->vxrs), - [m] "d" (state->mask) + " chi %[m],12\n" /* KERNEL_VXR_MID */ + " jne 0f\n" /* -> restore V8..V23 */ + " VLM 8,23,128,1\n" /* vlm %v8,%v23,128(%r1) */ + " j 7f\n" + /* Test and restore the first half of 16 vector registers */ + "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */ + " jz 3f\n" /* -> KERNEL_VXR_HIGH */ + " jo 2f\n" /* 11 -> restore V0..V15 */ + " brc 2,1f\n" /* 10 -> restore V8..V15 */ + " VLM 0,7,0,1\n" /* vlm %v0,%v7,0(%r1) */ + " j 3f\n" + "1: VLM 8,15,128,1\n" /* vlm %v8,%v15,128(%r1) */ + " j 3f\n" + "2: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */ + /* Test and restore the second half of 16 vector registers */ + "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */ + " jz 7f\n" + " jo 6f\n" /* 11 -> restore V16..V31 */ + " brc 2,4f\n" /* 10 -> restore V24..V31 */ + " VLM 16,23,256,1\n" /* vlm %v16,%v23,256(%r1) */ + " j 7f\n" + "4: VLM 24,31,384,1\n" /* vlm %v24,%v31,384(%r1) */ + " j 7f\n" + "5: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */ + "6: VLM 16,31,256,1\n" /* vlm %v16,%v31,256(%r1) */ + "7:" + : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) + : [m] "d" (flags) : "1", "cc"); - -update_fpu_state: - /* Update current kernel VX state */ - __this_cpu_write(kernel_fpu_state, state->mask); } EXPORT_SYMBOL(__kernel_fpu_end); diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 050b8d067d3b..bfda6aa40280 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -454,7 +454,7 @@ void s390_adjust_jiffies(void) : "Q" (info->capability), "d" (10000000), "d" (0) : "cc" ); - kernel_fpu_end(&fpu); + kernel_fpu_end(&fpu, KERNEL_FPR); } else /* * Really old machine without stsi block for basic From 8f149ea6e91534c3e0b4cfcd843323bd94273087 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Aug 2016 14:40:06 +0200 Subject: [PATCH 15/48] s390/nmi: improve revalidation of fpu / vector registers The machine check handler will do one of two things if the floating-point control, a floating point register or a vector register can not be revalidated: 1) if the PSW indicates user mode the process is terminated 2) if the PSW indicates kernel mode the system is stopped To unconditionally stop the system for 2) is incorrect. There are three possible outcomes if the floating-point control, a floating point register or a vector registers can not be revalidated: 1) The kernel is inside a kernel_fpu_begin/kernel_fpu_end block and needs the register. The system is stopped. 2) No active kernel_fpu_begin/kernel_fpu_end block and the CIF_CPU bit is not set. The user space process needs the register and is killed. 3) No active kernel_fpu_begin/kernel_fpu_end block and the CIF_FPU bit is set. Neither the kernel nor the user space process needs the lost register. Just revalidate it and continue. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/nmi.c | 67 ++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 29376f0e725c..9a32f7419d78 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(s390_handle_mcck); * returns 0 if all registers could be validated * returns 1 otherwise */ -static int notrace s390_validate_registers(union mci mci) +static int notrace s390_validate_registers(union mci mci, int umode) { int kill_task; u64 zero; @@ -110,26 +110,41 @@ static int notrace s390_validate_registers(union mci mci) if (!mci.gr) { /* * General purpose registers couldn't be restored and have - * unknown contents. Process needs to be terminated. + * unknown contents. Stop system or terminate process. */ + if (!umode) + s390_handle_damage(); kill_task = 1; } if (!mci.fp) { /* - * Floating point registers can't be restored and - * therefore the process needs to be terminated. + * Floating point registers can't be restored. If the + * kernel currently uses floating point registers the + * system is stopped. If the process has its floating + * pointer registers loaded it is terminated. + * Otherwise just revalidate the registers. */ - kill_task = 1; + if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7) + s390_handle_damage(); + if (!test_cpu_flag(CIF_FPU)) + kill_task = 1; } fpt_save_area = &S390_lowcore.floating_pt_save_area; fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; if (!mci.fc) { /* * Floating point control register can't be restored. - * Task will be terminated. + * If the kernel currently uses the floating pointer + * registers and needs the FPC register the system is + * stopped. If the process has its floating pointer + * registers loaded it is terminated. Otherwiese the + * FPC is just revalidated. */ + if (S390_lowcore.fpu_flags & KERNEL_FPC) + s390_handle_damage(); asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero)); - kill_task = 1; + if (!test_cpu_flag(CIF_FPU)) + kill_task = 1; } else asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); @@ -159,10 +174,16 @@ static int notrace s390_validate_registers(union mci mci) if (!mci.vr) { /* - * Vector registers can't be restored and therefore - * the process needs to be terminated. + * Vector registers can't be restored. If the kernel + * currently uses vector registers the system is + * stopped. If the process has its vector registers + * loaded it is terminated. Otherwise just revalidate + * the registers. */ - kill_task = 1; + if (S390_lowcore.fpu_flags & KERNEL_VXR) + s390_handle_damage(); + if (!test_cpu_flag(CIF_FPU)) + kill_task = 1; } cr0.val = S390_lowcore.cregs_save_area[0]; cr0.afp = cr0.vx = 1; @@ -250,13 +271,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs) struct mcck_struct *mcck; unsigned long long tmp; union mci mci; - int umode; nmi_enter(); inc_irq_stat(NMI_NMI); mci.val = S390_lowcore.mcck_interruption_code; mcck = this_cpu_ptr(&cpu_mcck); - umode = user_mode(regs); if (mci.sd) { /* System damage -> stopping machine */ @@ -297,22 +316,14 @@ void notrace s390_do_machine_check(struct pt_regs *regs) s390_handle_damage(); } } - if (s390_validate_registers(mci)) { - if (umode) { - /* - * Couldn't restore all register contents while in - * user mode -> mark task for termination. - */ - mcck->kill_task = 1; - mcck->mcck_code = mci.val; - set_cpu_flag(CIF_MCCK_PENDING); - } else { - /* - * Couldn't restore all register contents while in - * kernel mode -> stopping machine. - */ - s390_handle_damage(); - } + if (s390_validate_registers(mci, user_mode(regs))) { + /* + * Couldn't restore all register contents for the + * user space process -> mark task for termination. + */ + mcck->kill_task = 1; + mcck->mcck_code = mci.val; + set_cpu_flag(CIF_MCCK_PENDING); } if (mci.cd) { /* Timing facility damage */ From 474fd6e80fe529e9adeeb7ea9d4e5d6c4da0b7fe Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 23 Aug 2016 13:30:24 +0200 Subject: [PATCH 16/48] RAID/s390: add SIMD implementation for raid6 gen/xor Using vector registers is slightly faster: raid6: vx128x8 gen() 19705 MB/s raid6: vx128x8 xor() 11886 MB/s raid6: using algorithm vx128x8 gen() 19705 MB/s raid6: .... xor() 11886 MB/s, rmw enabled vs the software algorithms: raid6: int64x1 gen() 3018 MB/s raid6: int64x1 xor() 1429 MB/s raid6: int64x2 gen() 4661 MB/s raid6: int64x2 xor() 3143 MB/s raid6: int64x4 gen() 5392 MB/s raid6: int64x4 xor() 3509 MB/s raid6: int64x8 gen() 4441 MB/s raid6: int64x8 xor() 3207 MB/s raid6: using algorithm int64x4 gen() 5392 MB/s raid6: .... xor() 3509 MB/s, rmw enabled Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/vx-insn.h | 86 ++++++++++++++++ include/linux/raid/pq.h | 1 + lib/raid6/.gitignore | 1 + lib/raid6/Makefile | 6 ++ lib/raid6/algos.c | 3 + lib/raid6/s390vx.uc | 168 ++++++++++++++++++++++++++++++++ 6 files changed, 265 insertions(+) create mode 100644 lib/raid6/s390vx.uc diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h index b61846dff70f..49c24a2afce0 100644 --- a/arch/s390/include/asm/vx-insn.h +++ b/arch/s390/include/asm/vx-insn.h @@ -278,6 +278,15 @@ VLVG \v, \gr, \index, 3 .endm +/* VECTOR LOAD REGISTER */ +.macro VLR v1, v2 + VX_NUM v1, \v1 + VX_NUM v2, \v2 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word 0 + MRXBOPC 0, 0x56, v1, v2 +.endm + /* VECTOR LOAD */ .macro VL v, disp, index="%r0", base VX_NUM v1, \v @@ -404,6 +413,16 @@ /* Vector integer instructions */ +/* VECTOR AND */ +.macro VN vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x68, v1, v2, v3 +.endm + /* VECTOR EXCLUSIVE OR */ .macro VX vr1, vr2, vr3 VX_NUM v1, \vr1 @@ -469,6 +488,73 @@ MRXBOPC 0, 0x7D, v1, v2, v3 .endm +/* VECTOR REPLICATE IMMEDIATE */ +.macro VREPI vr1, imm2, m3 + VX_NUM v1, \vr1 + .word 0xE700 | ((v1&15) << 4) + .word \imm2 + MRXBOPC \m3, 0x45, v1 +.endm +.macro VREPIB vr1, imm2 + VREPI \vr1, \imm2, 0 +.endm +.macro VREPIH vr1, imm2 + VREPI \vr1, \imm2, 1 +.endm +.macro VREPIF vr1, imm2 + VREPI \vr1, \imm2, 2 +.endm +.macro VREPIG vr1, imm2 + VREP \vr1, \imm2, 3 +.endm + +/* VECTOR ADD */ +.macro VA vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0xF3, v1, v2, v3 +.endm +.macro VAB vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 0 +.endm +.macro VAH vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 1 +.endm +.macro VAF vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 2 +.endm +.macro VAG vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 3 +.endm +.macro VAQ vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 4 +.endm + +/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ +.macro VESRAV vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x7A, v1, v2, v3 +.endm + +.macro VESRAVB vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 0 +.endm +.macro VESRAVH vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 1 +.endm +.macro VESRAVF vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 2 +.endm +.macro VESRAVG vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 3 +.endm #endif /* __ASSEMBLY__ */ #endif /* __ASM_S390_VX_INSN_H */ diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index a0118d5929a9..c032a6a408a6 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -103,6 +103,7 @@ extern const struct raid6_calls raid6_avx2x1; extern const struct raid6_calls raid6_avx2x2; extern const struct raid6_calls raid6_avx2x4; extern const struct raid6_calls raid6_tilegx8; +extern const struct raid6_calls raid6_s390vx8; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore index 0a7e494b2bcd..f01b1cb04f91 100644 --- a/lib/raid6/.gitignore +++ b/lib/raid6/.gitignore @@ -3,3 +3,4 @@ altivec*.c int*.c tables.c neon?.c +s390vx?.c diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 3b10a48fa040..667b9607f8c0 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -7,6 +7,7 @@ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o raid6_pq-$(CONFIG_TILEGX) += tilegx8.o +raid6_pq-$(CONFIG_S390) += s390vx8.o hostprogs-y += mktables @@ -116,6 +117,11 @@ $(obj)/tilegx8.c: UNROLL := 8 $(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) +targets += s390vx8.c +$(obj)/s390vx8.c: UNROLL := 8 +$(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + quiet_cmd_mktable = TABLE $@ cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 975c6e0434bd..e1923b602bbc 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -68,6 +68,9 @@ const struct raid6_calls * const raid6_algos[] = { #endif #if defined(CONFIG_TILEGX) &raid6_tilegx8, +#endif +#if defined(CONFIG_S390) + &raid6_s390vx8, #endif &raid6_intx1, &raid6_intx2, diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc new file mode 100644 index 000000000000..7b45191a655f --- /dev/null +++ b/lib/raid6/s390vx.uc @@ -0,0 +1,168 @@ +/* + * raid6_vx$#.c + * + * $#-way unrolled RAID6 gen/xor functions for s390 + * based on the vector facility + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + * + * This file is postprocessed using unroll.awk. + */ + +#include +#include + +asm(".include \"asm/vx-insn.h\"\n"); + +#define NSIZE 16 + +static inline void LOAD_CONST(void) +{ + asm volatile("VREPIB %v24,7"); + asm volatile("VREPIB %v25,0x1d"); +} + +/* + * The SHLBYTE() operation shifts each of the 16 bytes in + * vector register y left by 1 bit and stores the result in + * vector register x. + */ +static inline void SHLBYTE(int x, int y) +{ + asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y)); +} + +/* + * For each of the 16 bytes in the vector register y the MASK() + * operation returns 0xFF if the high bit of the byte is 1, + * or 0x00 if the high bit is 0. The result is stored in vector + * register x. + */ +static inline void MASK(int x, int y) +{ + asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y)); +} + +static inline void AND(int x, int y, int z) +{ + asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); +} + +static inline void XOR(int x, int y, int z) +{ + asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); +} + +static inline void LOAD_DATA(int x, int n, u8 *ptr) +{ + typedef struct { u8 _[16*n]; } addrtype; + register addrtype *__ptr asm("1") = (addrtype *) ptr; + + asm volatile ("VLM %2,%3,0,%r1" + : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1)); +} + +static inline void STORE_DATA(int x, int n, u8 *ptr) +{ + typedef struct { u8 _[16*n]; } addrtype; + register addrtype *__ptr asm("1") = (addrtype *) ptr; + + asm volatile ("VSTM %2,%3,0,1" + : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1)); +} + +static inline void COPY_VEC(int x, int y) +{ + asm volatile ("VLR %0,%1" : : "i" (x), "i" (y)); +} + +static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + struct kernel_fpu vxstate; + u8 **dptr, *p, *q; + int d, z, z0; + + kernel_fpu_begin(&vxstate, KERNEL_VXR); + LOAD_CONST(); + + dptr = (u8 **) ptrs; + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0 + 1]; /* XOR parity */ + q = dptr[z0 + 2]; /* RS syndrome */ + + for (d = 0; d < bytes; d += $#*NSIZE) { + LOAD_DATA(0,$#,&dptr[z0][d]); + COPY_VEC(8+$$,0+$$); + for (z = z0 - 1; z >= 0; z--) { + MASK(16+$$,8+$$); + AND(16+$$,16+$$,25); + SHLBYTE(8+$$,8+$$); + XOR(8+$$,8+$$,16+$$); + LOAD_DATA(16,$#,&dptr[z][d]); + XOR(0+$$,0+$$,16+$$); + XOR(8+$$,8+$$,16+$$); + } + STORE_DATA(0,$#,&p[d]); + STORE_DATA(8,$#,&q[d]); + } + kernel_fpu_end(&vxstate, KERNEL_VXR); +} + +static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + struct kernel_fpu vxstate; + u8 **dptr, *p, *q; + int d, z, z0; + + dptr = (u8 **) ptrs; + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks - 2]; /* XOR parity */ + q = dptr[disks - 1]; /* RS syndrome */ + + kernel_fpu_begin(&vxstate, KERNEL_VXR); + LOAD_CONST(); + + for (d = 0; d < bytes; d += $#*NSIZE) { + /* P/Q data pages */ + LOAD_DATA(0,$#,&dptr[z0][d]); + COPY_VEC(8+$$,0+$$); + for (z = z0 - 1; z >= start; z--) { + MASK(16+$$,8+$$); + AND(16+$$,16+$$,25); + SHLBYTE(8+$$,8+$$); + XOR(8+$$,8+$$,16+$$); + LOAD_DATA(16,$#,&dptr[z][d]); + XOR(0+$$,0+$$,16+$$); + XOR(8+$$,8+$$,16+$$); + } + /* P/Q left side optimization */ + for (z = start - 1; z >= 0; z--) { + MASK(16+$$,8+$$); + AND(16+$$,16+$$,25); + SHLBYTE(8+$$,8+$$); + XOR(8+$$,8+$$,16+$$); + } + LOAD_DATA(16,$#,&p[d]); + XOR(16+$$,16+$$,0+$$); + STORE_DATA(16,$#,&p[d]); + LOAD_DATA(16,$#,&q[d]); + XOR(16+$$,16+$$,8+$$); + STORE_DATA(16,$#,&q[d]); + } + kernel_fpu_end(&vxstate, KERNEL_VXR); +} + +static int raid6_s390vx$#_valid(void) +{ + return MACHINE_HAS_VX; +} + +const struct raid6_calls raid6_s390vx$# = { + raid6_s390vx$#_gen_syndrome, + raid6_s390vx$#_xor_syndrome, + raid6_s390vx$#_valid, + "vx128x$#", + 1 +}; From edc63a3785b48455e05793e848f0174e21f38d09 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 15 Aug 2016 09:19:16 +0200 Subject: [PATCH 17/48] s390/crypto: cleanup cpacf function codes Use a separate define for the decryption modifier bit instead of duplicating the function codes for encryption / decrypton. In addition use an unsigned type for the function code. Reviewed-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 98 +++++++++++++++-------------------- arch/s390/crypto/des_s390.c | 47 ++++++++++------- arch/s390/include/asm/cpacf.h | 98 +++++++++++++++-------------------- 3 files changed, 110 insertions(+), 133 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 2ea18b050309..9da54698b87a 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -41,9 +41,8 @@ static char keylen_flag; struct s390_aes_ctx { u8 key[AES_MAX_KEY_SIZE]; - long enc; - long dec; int key_len; + unsigned long fc; union { struct crypto_skcipher *blk; struct crypto_cipher *cip; @@ -61,9 +60,8 @@ struct pcc_param { struct s390_xts_ctx { u8 key[32]; u8 pcc_key[32]; - long enc; - long dec; int key_len; + unsigned long fc; struct crypto_skcipher *fallback; }; @@ -146,16 +144,16 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) switch (sctx->key_len) { case 16: - cpacf_km(CPACF_KM_AES_128_ENC, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_128, + &sctx->key, out, in, AES_BLOCK_SIZE); break; case 24: - cpacf_km(CPACF_KM_AES_192_ENC, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_192, + &sctx->key, out, in, AES_BLOCK_SIZE); break; case 32: - cpacf_km(CPACF_KM_AES_256_ENC, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_256, + &sctx->key, out, in, AES_BLOCK_SIZE); break; } } @@ -171,16 +169,16 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) switch (sctx->key_len) { case 16: - cpacf_km(CPACF_KM_AES_128_DEC, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_128 | CPACF_DECRYPT, + &sctx->key, out, in, AES_BLOCK_SIZE); break; case 24: - cpacf_km(CPACF_KM_AES_192_DEC, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_192 | CPACF_DECRYPT, + &sctx->key, out, in, AES_BLOCK_SIZE); break; case 32: - cpacf_km(CPACF_KM_AES_256_DEC, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_256 | CPACF_DECRYPT, + &sctx->key, out, in, AES_BLOCK_SIZE); break; } } @@ -301,16 +299,13 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, switch (key_len) { case 16: - sctx->enc = CPACF_KM_AES_128_ENC; - sctx->dec = CPACF_KM_AES_128_DEC; + sctx->fc = CPACF_KM_AES_128; break; case 24: - sctx->enc = CPACF_KM_AES_192_ENC; - sctx->dec = CPACF_KM_AES_192_DEC; + sctx->fc = CPACF_KM_AES_192; break; case 32: - sctx->enc = CPACF_KM_AES_256_ENC; - sctx->dec = CPACF_KM_AES_256_DEC; + sctx->fc = CPACF_KM_AES_256; break; } @@ -351,7 +346,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, return fallback_blk_enc(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk); + return ecb_aes_crypt(desc, sctx->fc, sctx->key, &walk); } static int ecb_aes_decrypt(struct blkcipher_desc *desc, @@ -365,7 +360,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, return fallback_blk_dec(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk); + return ecb_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx->key, &walk); } static int fallback_init_blk(struct crypto_tfm *tfm) @@ -430,16 +425,13 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, switch (key_len) { case 16: - sctx->enc = CPACF_KMC_AES_128_ENC; - sctx->dec = CPACF_KMC_AES_128_DEC; + sctx->fc = CPACF_KMC_AES_128; break; case 24: - sctx->enc = CPACF_KMC_AES_192_ENC; - sctx->dec = CPACF_KMC_AES_192_DEC; + sctx->fc = CPACF_KMC_AES_192; break; case 32: - sctx->enc = CPACF_KMC_AES_256_ENC; - sctx->dec = CPACF_KMC_AES_256_DEC; + sctx->fc = CPACF_KMC_AES_256; break; } @@ -492,7 +484,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, return fallback_blk_enc(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, sctx->enc, &walk); + return cbc_aes_crypt(desc, sctx->fc, &walk); } static int cbc_aes_decrypt(struct blkcipher_desc *desc, @@ -506,7 +498,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, return fallback_blk_dec(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, sctx->dec, &walk); + return cbc_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, &walk); } static struct crypto_alg cbc_aes_alg = { @@ -603,19 +595,16 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, switch (key_len) { case 32: - xts_ctx->enc = CPACF_KM_XTS_128_ENC; - xts_ctx->dec = CPACF_KM_XTS_128_DEC; + xts_ctx->fc = CPACF_KM_XTS_128; memcpy(xts_ctx->key + 16, in_key, 16); memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16); break; case 48: - xts_ctx->enc = 0; - xts_ctx->dec = 0; + xts_ctx->fc = 0; xts_fallback_setkey(tfm, in_key, key_len); break; case 64: - xts_ctx->enc = CPACF_KM_XTS_256_ENC; - xts_ctx->dec = CPACF_KM_XTS_256_DEC; + xts_ctx->fc = CPACF_KM_XTS_256; memcpy(xts_ctx->key, in_key, 32); memcpy(xts_ctx->pcc_key, in_key + 32, 32); break; @@ -685,7 +674,7 @@ static int xts_aes_encrypt(struct blkcipher_desc *desc, return xts_fallback_encrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk); + return xts_aes_crypt(desc, xts_ctx->fc, xts_ctx, &walk); } static int xts_aes_decrypt(struct blkcipher_desc *desc, @@ -699,7 +688,7 @@ static int xts_aes_decrypt(struct blkcipher_desc *desc, return xts_fallback_decrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk); + return xts_aes_crypt(desc, xts_ctx->fc | CPACF_DECRYPT, xts_ctx, &walk); } static int xts_fallback_init(struct crypto_tfm *tfm) @@ -759,16 +748,13 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, switch (key_len) { case 16: - sctx->enc = CPACF_KMCTR_AES_128_ENC; - sctx->dec = CPACF_KMCTR_AES_128_DEC; + sctx->fc = CPACF_KMCTR_AES_128; break; case 24: - sctx->enc = CPACF_KMCTR_AES_192_ENC; - sctx->dec = CPACF_KMCTR_AES_192_DEC; + sctx->fc = CPACF_KMCTR_AES_192; break; case 32: - sctx->enc = CPACF_KMCTR_AES_256_ENC; - sctx->dec = CPACF_KMCTR_AES_256_DEC; + sctx->fc = CPACF_KMCTR_AES_256; break; } @@ -865,7 +851,7 @@ static int ctr_aes_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_aes_crypt(desc, sctx->enc, sctx, &walk); + return ctr_aes_crypt(desc, sctx->fc, sctx, &walk); } static int ctr_aes_decrypt(struct blkcipher_desc *desc, @@ -876,7 +862,7 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_aes_crypt(desc, sctx->dec, sctx, &walk); + return ctr_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx, &walk); } static struct crypto_alg ctr_aes_alg = { @@ -906,11 +892,11 @@ static int __init aes_s390_init(void) { int ret; - if (cpacf_query(CPACF_KM, CPACF_KM_AES_128_ENC)) + if (cpacf_query(CPACF_KM, CPACF_KM_AES_128)) keylen_flag |= AES_KEYLEN_128; - if (cpacf_query(CPACF_KM, CPACF_KM_AES_192_ENC)) + if (cpacf_query(CPACF_KM, CPACF_KM_AES_192)) keylen_flag |= AES_KEYLEN_192; - if (cpacf_query(CPACF_KM, CPACF_KM_AES_256_ENC)) + if (cpacf_query(CPACF_KM, CPACF_KM_AES_256)) keylen_flag |= AES_KEYLEN_256; if (!keylen_flag) @@ -933,17 +919,17 @@ static int __init aes_s390_init(void) if (ret) goto cbc_aes_err; - if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128_ENC) && - cpacf_query(CPACF_KM, CPACF_KM_XTS_256_ENC)) { + if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128) && + cpacf_query(CPACF_KM, CPACF_KM_XTS_256)) { ret = crypto_register_alg(&xts_aes_alg); if (ret) goto xts_aes_err; xts_aes_alg_reg = 1; } - if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128_ENC) && - cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192_ENC) && - cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256_ENC)) { + if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128) && + cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192) && + cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256)) { ctrblk = (u8 *) __get_free_page(GFP_KERNEL); if (!ctrblk) { ret = -ENOMEM; diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 697e71a75fc2..fadd474bf8bb 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -53,14 +53,15 @@ static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - cpacf_km(CPACF_KM_DEA_ENC, ctx->key, out, in, DES_BLOCK_SIZE); + cpacf_km(CPACF_KM_DEA, ctx->key, out, in, DES_BLOCK_SIZE); } static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - cpacf_km(CPACF_KM_DEA_DEC, ctx->key, out, in, DES_BLOCK_SIZE); + cpacf_km(CPACF_KM_DEA | CPACF_DECRYPT, + ctx->key, out, in, DES_BLOCK_SIZE); } static struct crypto_alg des_alg = { @@ -148,7 +149,7 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_DEA_ENC, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_DEA, ctx->key, &walk); } static int ecb_des_decrypt(struct blkcipher_desc *desc, @@ -159,7 +160,8 @@ static int ecb_des_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_DEA_DEC, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, + ctx->key, &walk); } static struct crypto_alg ecb_des_alg = { @@ -189,7 +191,7 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, CPACF_KMC_DEA_ENC, &walk); + return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk); } static int cbc_des_decrypt(struct blkcipher_desc *desc, @@ -199,7 +201,7 @@ static int cbc_des_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, CPACF_KMC_DEA_DEC, &walk); + return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk); } static struct crypto_alg cbc_des_alg = { @@ -257,14 +259,15 @@ static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - cpacf_km(CPACF_KM_TDEA_192_ENC, ctx->key, dst, src, DES_BLOCK_SIZE); + cpacf_km(CPACF_KM_TDEA_192, ctx->key, dst, src, DES_BLOCK_SIZE); } static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - cpacf_km(CPACF_KM_TDEA_192_DEC, ctx->key, dst, src, DES_BLOCK_SIZE); + cpacf_km(CPACF_KM_TDEA_192 | CPACF_DECRYPT, + ctx->key, dst, src, DES_BLOCK_SIZE); } static struct crypto_alg des3_alg = { @@ -294,7 +297,7 @@ static int ecb_des3_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_ENC, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, ctx->key, &walk); } static int ecb_des3_decrypt(struct blkcipher_desc *desc, @@ -305,7 +308,8 @@ static int ecb_des3_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_DEC, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT, + ctx->key, &walk); } static struct crypto_alg ecb_des3_alg = { @@ -335,7 +339,7 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_ENC, &walk); + return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk); } static int cbc_des3_decrypt(struct blkcipher_desc *desc, @@ -345,7 +349,8 @@ static int cbc_des3_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_DEC, &walk); + return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT, + &walk); } static struct crypto_alg cbc_des3_alg = { @@ -456,7 +461,7 @@ static int ctr_des_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_ENC, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, ctx, &walk); } static int ctr_des_decrypt(struct blkcipher_desc *desc, @@ -467,7 +472,8 @@ static int ctr_des_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_DEC, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, + ctx, &walk); } static struct crypto_alg ctr_des_alg = { @@ -499,7 +505,7 @@ static int ctr_des3_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_ENC, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, ctx, &walk); } static int ctr_des3_decrypt(struct blkcipher_desc *desc, @@ -510,7 +516,8 @@ static int ctr_des3_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_DEC, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT, + ctx, &walk); } static struct crypto_alg ctr_des3_alg = { @@ -538,8 +545,8 @@ static int __init des_s390_init(void) { int ret; - if (!cpacf_query(CPACF_KM, CPACF_KM_DEA_ENC) || - !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192_ENC)) + if (!cpacf_query(CPACF_KM, CPACF_KM_DEA) || + !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192)) return -EOPNOTSUPP; ret = crypto_register_alg(&des_alg); @@ -561,8 +568,8 @@ static int __init des_s390_init(void) if (ret) goto cbc_des3_err; - if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA_ENC) && - cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192_ENC)) { + if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA) && + cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192)) { ret = crypto_register_alg(&ctr_des_alg); if (ret) goto ctr_des_err; diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index d28621de8e0b..ca884198805c 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -28,67 +28,51 @@ #define CPACF_PPNO 0xb93c /* MSA5 */ /* - * Function codes for the KM (CIPHER MESSAGE) - * instruction (0x80 is the decipher modifier bit) + * Decryption modifier bit + */ +#define CPACF_DECRYPT 0x80 + +/* + * Function codes for the KM (CIPHER MESSAGE) instruction */ #define CPACF_KM_QUERY 0x00 -#define CPACF_KM_DEA_ENC 0x01 -#define CPACF_KM_DEA_DEC 0x81 -#define CPACF_KM_TDEA_128_ENC 0x02 -#define CPACF_KM_TDEA_128_DEC 0x82 -#define CPACF_KM_TDEA_192_ENC 0x03 -#define CPACF_KM_TDEA_192_DEC 0x83 -#define CPACF_KM_AES_128_ENC 0x12 -#define CPACF_KM_AES_128_DEC 0x92 -#define CPACF_KM_AES_192_ENC 0x13 -#define CPACF_KM_AES_192_DEC 0x93 -#define CPACF_KM_AES_256_ENC 0x14 -#define CPACF_KM_AES_256_DEC 0x94 -#define CPACF_KM_XTS_128_ENC 0x32 -#define CPACF_KM_XTS_128_DEC 0xb2 -#define CPACF_KM_XTS_256_ENC 0x34 -#define CPACF_KM_XTS_256_DEC 0xb4 +#define CPACF_KM_DEA 0x01 +#define CPACF_KM_TDEA_128 0x02 +#define CPACF_KM_TDEA_192 0x03 +#define CPACF_KM_AES_128 0x12 +#define CPACF_KM_AES_192 0x13 +#define CPACF_KM_AES_256 0x14 +#define CPACF_KM_XTS_128 0x32 +#define CPACF_KM_XTS_256 0x34 /* * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING) - * instruction (0x80 is the decipher modifier bit) + * instruction */ #define CPACF_KMC_QUERY 0x00 -#define CPACF_KMC_DEA_ENC 0x01 -#define CPACF_KMC_DEA_DEC 0x81 -#define CPACF_KMC_TDEA_128_ENC 0x02 -#define CPACF_KMC_TDEA_128_DEC 0x82 -#define CPACF_KMC_TDEA_192_ENC 0x03 -#define CPACF_KMC_TDEA_192_DEC 0x83 -#define CPACF_KMC_AES_128_ENC 0x12 -#define CPACF_KMC_AES_128_DEC 0x92 -#define CPACF_KMC_AES_192_ENC 0x13 -#define CPACF_KMC_AES_192_DEC 0x93 -#define CPACF_KMC_AES_256_ENC 0x14 -#define CPACF_KMC_AES_256_DEC 0x94 +#define CPACF_KMC_DEA 0x01 +#define CPACF_KMC_TDEA_128 0x02 +#define CPACF_KMC_TDEA_192 0x03 +#define CPACF_KMC_AES_128 0x12 +#define CPACF_KMC_AES_192 0x13 +#define CPACF_KMC_AES_256 0x14 #define CPACF_KMC_PRNG 0x43 /* * Function codes for the KMCTR (CIPHER MESSAGE WITH COUNTER) - * instruction (0x80 is the decipher modifier bit) + * instruction */ -#define CPACF_KMCTR_QUERY 0x00 -#define CPACF_KMCTR_DEA_ENC 0x01 -#define CPACF_KMCTR_DEA_DEC 0x81 -#define CPACF_KMCTR_TDEA_128_ENC 0x02 -#define CPACF_KMCTR_TDEA_128_DEC 0x82 -#define CPACF_KMCTR_TDEA_192_ENC 0x03 -#define CPACF_KMCTR_TDEA_192_DEC 0x83 -#define CPACF_KMCTR_AES_128_ENC 0x12 -#define CPACF_KMCTR_AES_128_DEC 0x92 -#define CPACF_KMCTR_AES_192_ENC 0x13 -#define CPACF_KMCTR_AES_192_DEC 0x93 -#define CPACF_KMCTR_AES_256_ENC 0x14 -#define CPACF_KMCTR_AES_256_DEC 0x94 +#define CPACF_KMCTR_QUERY 0x00 +#define CPACF_KMCTR_DEA 0x01 +#define CPACF_KMCTR_TDEA_128 0x02 +#define CPACF_KMCTR_TDEA_192 0x03 +#define CPACF_KMCTR_AES_128 0x12 +#define CPACF_KMCTR_AES_192 0x13 +#define CPACF_KMCTR_AES_256 0x14 /* * Function codes for the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) - * instruction (0x80 is the decipher modifier bit) + * instruction */ #define CPACF_KIMD_QUERY 0x00 #define CPACF_KIMD_SHA_1 0x01 @@ -98,7 +82,7 @@ /* * Function codes for the KLMD (COMPUTE LAST MESSAGE DIGEST) - * instruction (0x80 is the decipher modifier bit) + * instruction */ #define CPACF_KLMD_QUERY 0x00 #define CPACF_KLMD_SHA_1 0x01 @@ -107,7 +91,7 @@ /* * function codes for the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) - * instruction (0x80 is the decipher modifier bit) + * instruction */ #define CPACF_KMAC_QUERY 0x00 #define CPACF_KMAC_DEA 0x01 @@ -116,7 +100,7 @@ /* * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) - * instruction (0x80 is the decipher modifier bit) + * instruction */ #define CPACF_PPNO_QUERY 0x00 #define CPACF_PPNO_SHA512_DRNG_GEN 0x03 @@ -194,7 +178,7 @@ static inline int cpacf_query(unsigned int opcode, unsigned int func) * Returns 0 for the query func, number of processed bytes for * encryption/decryption funcs */ -static inline int cpacf_km(long func, void *param, +static inline int cpacf_km(unsigned long func, void *param, u8 *dest, const u8 *src, long src_len) { register unsigned long r0 asm("0") = (unsigned long) func; @@ -224,7 +208,7 @@ static inline int cpacf_km(long func, void *param, * Returns 0 for the query func, number of processed bytes for * encryption/decryption funcs */ -static inline int cpacf_kmc(long func, void *param, +static inline int cpacf_kmc(unsigned long func, void *param, u8 *dest, const u8 *src, long src_len) { register unsigned long r0 asm("0") = (unsigned long) func; @@ -253,7 +237,7 @@ static inline int cpacf_kmc(long func, void *param, * * Returns 0 for the query func, number of processed bytes for digest funcs */ -static inline int cpacf_kimd(long func, void *param, +static inline int cpacf_kimd(unsigned long func, void *param, const u8 *src, long src_len) { register unsigned long r0 asm("0") = (unsigned long) func; @@ -280,7 +264,7 @@ static inline int cpacf_kimd(long func, void *param, * * Returns 0 for the query func, number of processed bytes for digest funcs */ -static inline int cpacf_klmd(long func, void *param, +static inline int cpacf_klmd(unsigned long func, void *param, const u8 *src, long src_len) { register unsigned long r0 asm("0") = (unsigned long) func; @@ -308,7 +292,7 @@ static inline int cpacf_klmd(long func, void *param, * * Returns 0 for the query func, number of processed bytes for digest funcs */ -static inline int cpacf_kmac(long func, void *param, +static inline int cpacf_kmac(unsigned long func, void *param, const u8 *src, long src_len) { register unsigned long r0 asm("0") = (unsigned long) func; @@ -338,7 +322,7 @@ static inline int cpacf_kmac(long func, void *param, * Returns 0 for the query func, number of processed bytes for * encryption/decryption funcs */ -static inline int cpacf_kmctr(long func, void *param, u8 *dest, +static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest, const u8 *src, long src_len, u8 *counter) { register unsigned long r0 asm("0") = (unsigned long) func; @@ -372,7 +356,7 @@ static inline int cpacf_kmctr(long func, void *param, u8 *dest, * Returns 0 for the query func, number of random bytes stored in * dest buffer for generate function */ -static inline int cpacf_ppno(long func, void *param, +static inline int cpacf_ppno(unsigned long func, void *param, u8 *dest, long dest_len, const u8 *seed, long seed_len) { @@ -402,7 +386,7 @@ static inline int cpacf_ppno(long func, void *param, * * Returns 0. */ -static inline int cpacf_pcc(long func, void *param) +static inline int cpacf_pcc(unsigned long func, void *param) { register unsigned long r0 asm("0") = (unsigned long) func; register unsigned long r1 asm("1") = (unsigned long) param; From 0177db01adf26cf9c5dfe1feaf17087de4b9e40e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 15 Aug 2016 10:41:52 +0200 Subject: [PATCH 18/48] s390/crypto: simplify return code handling The CPACF instructions can complete with three different condition codes: CC=0 for successful completion, CC=1 if the protected key verification failed, and CC=3 for partial completion. The inline functions will restart the CPACF instruction for partial completion, this removes the CC=3 case. The CC=1 case is only relevant for the protected key functions of the KM, KMC, KMAC and KMCTR instructions. As the protected key functions are not used by the current code, there is no need for any kind of return code handling. Reviewed-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 28 +++-------- arch/s390/crypto/des_s390.c | 20 ++------ arch/s390/crypto/ghash_s390.c | 18 ++----- arch/s390/crypto/prng.c | 95 +++++++++-------------------------- arch/s390/crypto/sha_common.c | 23 +++------ arch/s390/include/asm/cpacf.h | 33 +++--------- 6 files changed, 52 insertions(+), 165 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 9da54698b87a..4eb8de417419 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -324,9 +324,7 @@ static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = cpacf_km(func, param, out, in, n); - if (ret < 0 || ret != n) - return -EIO; + cpacf_km(func, param, out, in, n); nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); @@ -460,9 +458,7 @@ static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = cpacf_kmc(func, ¶m, out, in, n); - if (ret < 0 || ret != n) - return -EIO; + cpacf_kmc(func, ¶m, out, in, n); nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); @@ -640,9 +636,7 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func, memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); memcpy(pcc_param.key, xts_ctx->pcc_key, 32); /* remove decipher modifier bit from 'func' and call PCC */ - ret = cpacf_pcc(func & 0x7f, &pcc_param.key[offset]); - if (ret < 0) - return -EIO; + cpacf_pcc(func & 0x7f, &pcc_param.key[offset]); memcpy(xts_param.key, xts_ctx->key, 32); memcpy(xts_param.init, pcc_param.xts, 16); @@ -652,9 +646,7 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func, out = walk->dst.virt.addr; in = walk->src.virt.addr; - ret = cpacf_km(func, &xts_param.key[offset], out, in, n); - if (ret < 0 || ret != n) - return -EIO; + cpacf_km(func, &xts_param.key[offset], out, in, n); nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); @@ -798,12 +790,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, n = __ctrblk_init(ctrptr, nbytes); else n = AES_BLOCK_SIZE; - ret = cpacf_kmctr(func, sctx->key, out, in, n, ctrptr); - if (ret < 0 || ret != n) { - if (ctrptr == ctrblk) - spin_unlock(&ctrblk_lock); - return -EIO; - } + cpacf_kmctr(func, sctx->key, out, in, n, ctrptr); if (n > AES_BLOCK_SIZE) memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE, AES_BLOCK_SIZE); @@ -830,10 +817,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, if (nbytes) { out = walk->dst.virt.addr; in = walk->src.virt.addr; - ret = cpacf_kmctr(func, sctx->key, buf, in, - AES_BLOCK_SIZE, ctrbuf); - if (ret < 0 || ret != AES_BLOCK_SIZE) - return -EIO; + cpacf_kmctr(func, sctx->key, buf, in, AES_BLOCK_SIZE, ctrbuf); memcpy(out, buf, nbytes); crypto_inc(ctrbuf, AES_BLOCK_SIZE); ret = blkcipher_walk_done(desc, walk, 0); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index fadd474bf8bb..999878597331 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -95,9 +95,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = cpacf_km(func, key, out, in, n); - if (ret < 0 || ret != n) - return -EIO; + cpacf_km(func, key, out, in, n); nbytes &= DES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); @@ -128,9 +126,7 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = cpacf_kmc(func, ¶m, out, in, n); - if (ret < 0 || ret != n) - return -EIO; + cpacf_kmc(func, ¶m, out, in, n); nbytes &= DES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); @@ -411,12 +407,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, n = __ctrblk_init(ctrptr, nbytes); else n = DES_BLOCK_SIZE; - ret = cpacf_kmctr(func, ctx->key, out, in, n, ctrptr); - if (ret < 0 || ret != n) { - if (ctrptr == ctrblk) - spin_unlock(&ctrblk_lock); - return -EIO; - } + cpacf_kmctr(func, ctx->key, out, in, n, ctrptr); if (n > DES_BLOCK_SIZE) memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE, DES_BLOCK_SIZE); @@ -441,10 +432,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, if (nbytes) { out = walk->dst.virt.addr; in = walk->src.virt.addr; - ret = cpacf_kmctr(func, ctx->key, buf, in, - DES_BLOCK_SIZE, ctrbuf); - if (ret < 0 || ret != DES_BLOCK_SIZE) - return -EIO; + cpacf_kmctr(func, ctx->key, buf, in, DES_BLOCK_SIZE, ctrbuf); memcpy(out, buf, nbytes); crypto_inc(ctrbuf, DES_BLOCK_SIZE); ret = blkcipher_walk_done(desc, walk, 0); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index ab68de72e795..8e87f5176799 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -58,7 +58,6 @@ static int ghash_update(struct shash_desc *desc, struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); unsigned int n; u8 *buf = dctx->buffer; - int ret; if (dctx->bytes) { u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); @@ -71,18 +70,14 @@ static int ghash_update(struct shash_desc *desc, src += n; if (!dctx->bytes) { - ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, - GHASH_BLOCK_SIZE); - if (ret != GHASH_BLOCK_SIZE) - return -EIO; + cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, + GHASH_BLOCK_SIZE); } } n = srclen & ~(GHASH_BLOCK_SIZE - 1); if (n) { - ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n); - if (ret != n) - return -EIO; + cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n); src += n; srclen -= n; } @@ -98,17 +93,12 @@ static int ghash_update(struct shash_desc *desc, static int ghash_flush(struct ghash_desc_ctx *dctx) { u8 *buf = dctx->buffer; - int ret; if (dctx->bytes) { u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); memset(pos, 0, dctx->bytes); - - ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); - if (ret != GHASH_BLOCK_SIZE) - return -EIO; - + cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); dctx->bytes = 0; } diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 41527b113f5a..bbf2af74c549 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -135,12 +135,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes) else h = ebuf; /* generate sha256 from this page */ - if (cpacf_kimd(CPACF_KIMD_SHA_256, h, - pg, PAGE_SIZE) != PAGE_SIZE) { - prng_errorflag = PRNG_GEN_ENTROPY_FAILED; - ret = -EIO; - goto out; - } + cpacf_kimd(CPACF_KIMD_SHA_256, h, pg, PAGE_SIZE); if (n < sizeof(hash)) memcpy(ebuf, hash, n); ret += n; @@ -148,7 +143,6 @@ static int generate_entropy(u8 *ebuf, size_t nbytes) nbytes -= n; } -out: free_page((unsigned long)pg); return ret; } @@ -160,13 +154,11 @@ static void prng_tdes_add_entropy(void) { __u64 entropy[4]; unsigned int i; - int ret; for (i = 0; i < 16; i++) { - ret = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block, - (char *)entropy, (char *)entropy, - sizeof(entropy)); - BUG_ON(ret < 0 || ret != sizeof(entropy)); + cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block, + (char *) entropy, (char *) entropy, + sizeof(entropy)); memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy)); } } @@ -303,21 +295,14 @@ static int __init prng_sha512_selftest(void) 0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c, 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 }; - int ret = 0; u8 buf[sizeof(random)]; struct ppno_ws_s ws; memset(&ws, 0, sizeof(ws)); /* initial seed */ - ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, &ws, NULL, 0, - seed, sizeof(seed)); - if (ret < 0) { - pr_err("The prng self test seed operation for the " - "SHA-512 mode failed with rc=%d\n", ret); - prng_errorflag = PRNG_SELFTEST_FAILED; - return -EIO; - } + cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, + &ws, NULL, 0, seed, sizeof(seed)); /* check working states V and C */ if (memcmp(ws.V, V0, sizeof(V0)) != 0 @@ -329,22 +314,10 @@ static int __init prng_sha512_selftest(void) } /* generate random bytes */ - ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, - &ws, buf, sizeof(buf), NULL, 0); - if (ret < 0) { - pr_err("The prng self test generate operation for " - "the SHA-512 mode failed with rc=%d\n", ret); - prng_errorflag = PRNG_SELFTEST_FAILED; - return -EIO; - } - ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, - &ws, buf, sizeof(buf), NULL, 0); - if (ret < 0) { - pr_err("The prng self test generate operation for " - "the SHA-512 mode failed with rc=%d\n", ret); - prng_errorflag = PRNG_SELFTEST_FAILED; - return -EIO; - } + cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), NULL, 0); + cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), NULL, 0); /* check against expected data */ if (memcmp(buf, random, sizeof(random)) != 0) { @@ -392,26 +365,16 @@ static int __init prng_sha512_instantiate(void) get_tod_clock_ext(seed + 48); /* initial seed of the ppno drng */ - ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, - &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); - if (ret < 0) { - prng_errorflag = PRNG_SEED_FAILED; - ret = -EIO; - goto outfree; - } + cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); /* if fips mode is enabled, generate a first block of random bytes for the FIPS 140-2 Conditional Self Test */ if (fips_enabled) { prng_data->prev = prng_data->buf + prng_chunk_size; - ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, - &prng_data->ppnows, - prng_data->prev, prng_chunk_size, NULL, 0); - if (ret < 0 || ret != prng_chunk_size) { - prng_errorflag = PRNG_GEN_FAILED; - ret = -EIO; - goto outfree; - } + cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, + prng_data->prev, prng_chunk_size, NULL, 0); } return 0; @@ -440,12 +403,8 @@ static int prng_sha512_reseed(void) return ret; /* do a reseed of the ppno drng with this bytestring */ - ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, - &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); - if (ret) { - prng_errorflag = PRNG_RESEED_FAILED; - return -EIO; - } + cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); return 0; } @@ -463,12 +422,8 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes) } /* PPNO generate */ - ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, - &prng_data->ppnows, buf, nbytes, NULL, 0); - if (ret < 0 || ret != nbytes) { - prng_errorflag = PRNG_GEN_FAILED; - return -EIO; - } + cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, buf, nbytes, NULL, 0); /* FIPS 140-2 Conditional Self Test */ if (fips_enabled) { @@ -494,7 +449,7 @@ static int prng_open(struct inode *inode, struct file *file) static ssize_t prng_tdes_read(struct file *file, char __user *ubuf, size_t nbytes, loff_t *ppos) { - int chunk, n, tmp, ret = 0; + int chunk, n, ret = 0; /* lock prng_data struct */ if (mutex_lock_interruptible(&prng_data->mutex)) @@ -545,13 +500,9 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf, * * Note: you can still get strict X9.17 conformity by setting * prng_chunk_size to 8 bytes. - */ - tmp = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block, - prng_data->buf, prng_data->buf, n); - if (tmp < 0 || tmp != n) { - ret = -EIO; - break; - } + */ + cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block, + prng_data->buf, prng_data->buf, n); prng_data->prngws.byte_counter += n; prng_data->prngws.reseed_counter += n; diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 8e908166c3ee..c740f77285b2 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -22,8 +22,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct s390_sha_ctx *ctx = shash_desc_ctx(desc); unsigned int bsize = crypto_shash_blocksize(desc->tfm); - unsigned int index; - int ret; + unsigned int index, n; /* how much is already in the buffer? */ index = ctx->count & (bsize - 1); @@ -35,9 +34,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) /* process one stored block */ if (index) { memcpy(ctx->buf + index, data, bsize - index); - ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize); - if (ret != bsize) - return -EIO; + cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize); data += bsize - index; len -= bsize - index; index = 0; @@ -45,12 +42,10 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) /* process as many blocks as possible */ if (len >= bsize) { - ret = cpacf_kimd(ctx->func, ctx->state, data, - len & ~(bsize - 1)); - if (ret != (len & ~(bsize - 1))) - return -EIO; - data += ret; - len -= ret; + n = len & ~(bsize - 1); + cpacf_kimd(ctx->func, ctx->state, data, n); + data += n; + len -= n; } store: if (len) @@ -66,7 +61,6 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) unsigned int bsize = crypto_shash_blocksize(desc->tfm); u64 bits; unsigned int index, end, plen; - int ret; /* SHA-512 uses 128 bit padding length */ plen = (bsize > SHA256_BLOCK_SIZE) ? 16 : 8; @@ -88,10 +82,7 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) */ bits = ctx->count * 8; memcpy(ctx->buf + end - 8, &bits, sizeof(bits)); - - ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, end); - if (ret != end) - return -EIO; + cpacf_kimd(ctx->func, ctx->state, ctx->buf, end); /* copy digest to out */ memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index ca884198805c..c226c9ba24ac 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -234,11 +234,9 @@ static inline int cpacf_kmc(unsigned long func, void *param, * @param: address of parameter block; see POP for details on each func * @src: address of source memory area * @src_len: length of src operand in bytes - * - * Returns 0 for the query func, number of processed bytes for digest funcs */ -static inline int cpacf_kimd(unsigned long func, void *param, - const u8 *src, long src_len) +static inline void cpacf_kimd(unsigned long func, void *param, + const u8 *src, long src_len) { register unsigned long r0 asm("0") = (unsigned long) func; register unsigned long r1 asm("1") = (unsigned long) param; @@ -251,8 +249,6 @@ static inline int cpacf_kimd(unsigned long func, void *param, : [src] "+a" (r2), [len] "+d" (r3) : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KIMD) : "cc", "memory"); - - return src_len - r3; } /** @@ -261,11 +257,9 @@ static inline int cpacf_kimd(unsigned long func, void *param, * @param: address of parameter block; see POP for details on each func * @src: address of source memory area * @src_len: length of src operand in bytes - * - * Returns 0 for the query func, number of processed bytes for digest funcs */ -static inline int cpacf_klmd(unsigned long func, void *param, - const u8 *src, long src_len) +static inline void cpacf_klmd(unsigned long func, void *param, + const u8 *src, long src_len) { register unsigned long r0 asm("0") = (unsigned long) func; register unsigned long r1 asm("1") = (unsigned long) param; @@ -278,8 +272,6 @@ static inline int cpacf_klmd(unsigned long func, void *param, : [src] "+a" (r2), [len] "+d" (r3) : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KLMD) : "cc", "memory"); - - return src_len - r3; } /** @@ -352,13 +344,10 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest, * @dest_len: size of destination memory area in bytes * @seed: address of seed data * @seed_len: size of seed data in bytes - * - * Returns 0 for the query func, number of random bytes stored in - * dest buffer for generate function */ -static inline int cpacf_ppno(unsigned long func, void *param, - u8 *dest, long dest_len, - const u8 *seed, long seed_len) +static inline void cpacf_ppno(unsigned long func, void *param, + u8 *dest, long dest_len, + const u8 *seed, long seed_len) { register unsigned long r0 asm("0") = (unsigned long) func; register unsigned long r1 asm("1") = (unsigned long) param; @@ -374,8 +363,6 @@ static inline int cpacf_ppno(unsigned long func, void *param, : [fc] "d" (r0), [pba] "a" (r1), [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO) : "cc", "memory"); - - return dest_len - r3; } /** @@ -383,10 +370,8 @@ static inline int cpacf_ppno(unsigned long func, void *param, * instruction * @func: the function code passed to PCC; see CPACF_KM_xxx defines * @param: address of parameter block; see POP for details on each func - * - * Returns 0. */ -static inline int cpacf_pcc(unsigned long func, void *param) +static inline void cpacf_pcc(unsigned long func, void *param) { register unsigned long r0 asm("0") = (unsigned long) func; register unsigned long r1 asm("1") = (unsigned long) param; @@ -397,8 +382,6 @@ static inline int cpacf_pcc(unsigned long func, void *param) : : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCC) : "cc", "memory"); - - return 0; } #endif /* _ASM_S390_CPACF_H */ From d863d5945f2be0abfcd9d36b1a7c605f3eaef517 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 18 Aug 2016 12:34:34 +0200 Subject: [PATCH 19/48] s390/crypto: simplify init / exit functions The aes and the des module register multiple crypto algorithms dependent on the availability of specific CPACF instructions. To simplify the deregistration with crypto_unregister_alg add an array with pointers to the successfully registered algorithms and use it for the error handling in the init function and in the module exit function. Reviewed-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 79 +++++++++++++----------------- arch/s390/crypto/des_s390.c | 98 +++++++++++++++++-------------------- 2 files changed, 79 insertions(+), 98 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 4eb8de417419..be87575270ff 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -731,8 +731,6 @@ static struct crypto_alg xts_aes_alg = { } }; -static int xts_aes_alg_reg; - static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { @@ -870,7 +868,26 @@ static struct crypto_alg ctr_aes_alg = { } }; -static int ctr_aes_alg_reg; +static struct crypto_alg *aes_s390_algs_ptr[5]; +static int aes_s390_algs_num; + +static int aes_s390_register_alg(struct crypto_alg *alg) +{ + int ret; + + ret = crypto_register_alg(alg); + if (!ret) + aes_s390_algs_ptr[aes_s390_algs_num++] = alg; + return ret; +} + +static void aes_s390_fini(void) +{ + while (aes_s390_algs_num--) + crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]); + if (ctrblk) + free_page((unsigned long) ctrblk); +} static int __init aes_s390_init(void) { @@ -891,24 +908,23 @@ static int __init aes_s390_init(void) pr_info("AES hardware acceleration is only available for" " 128-bit keys\n"); - ret = crypto_register_alg(&aes_alg); + ret = aes_s390_register_alg(&aes_alg); if (ret) - goto aes_err; + goto out_err; - ret = crypto_register_alg(&ecb_aes_alg); + ret = aes_s390_register_alg(&ecb_aes_alg); if (ret) - goto ecb_aes_err; + goto out_err; - ret = crypto_register_alg(&cbc_aes_alg); + ret = aes_s390_register_alg(&cbc_aes_alg); if (ret) - goto cbc_aes_err; + goto out_err; if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128) && cpacf_query(CPACF_KM, CPACF_KM_XTS_256)) { - ret = crypto_register_alg(&xts_aes_alg); + ret = aes_s390_register_alg(&xts_aes_alg); if (ret) - goto xts_aes_err; - xts_aes_alg_reg = 1; + goto out_err; } if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128) && @@ -917,42 +933,17 @@ static int __init aes_s390_init(void) ctrblk = (u8 *) __get_free_page(GFP_KERNEL); if (!ctrblk) { ret = -ENOMEM; - goto ctr_aes_err; + goto out_err; } - ret = crypto_register_alg(&ctr_aes_alg); - if (ret) { - free_page((unsigned long) ctrblk); - goto ctr_aes_err; - } - ctr_aes_alg_reg = 1; + ret = aes_s390_register_alg(&ctr_aes_alg); + if (ret) + goto out_err; } -out: + return 0; +out_err: + aes_s390_fini(); return ret; - -ctr_aes_err: - crypto_unregister_alg(&xts_aes_alg); -xts_aes_err: - crypto_unregister_alg(&cbc_aes_alg); -cbc_aes_err: - crypto_unregister_alg(&ecb_aes_alg); -ecb_aes_err: - crypto_unregister_alg(&aes_alg); -aes_err: - goto out; -} - -static void __exit aes_s390_fini(void) -{ - if (ctr_aes_alg_reg) { - crypto_unregister_alg(&ctr_aes_alg); - free_page((unsigned long) ctrblk); - } - if (xts_aes_alg_reg) - crypto_unregister_alg(&xts_aes_alg); - crypto_unregister_alg(&cbc_aes_alg); - crypto_unregister_alg(&ecb_aes_alg); - crypto_unregister_alg(&aes_alg); } module_cpu_feature_match(MSA, aes_s390_init); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 999878597331..b77a546f1e76 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -529,6 +529,27 @@ static struct crypto_alg ctr_des3_alg = { } }; +static struct crypto_alg *des_s390_algs_ptr[8]; +static int des_s390_algs_num; + +static int des_s390_register_alg(struct crypto_alg *alg) +{ + int ret; + + ret = crypto_register_alg(alg); + if (!ret) + des_s390_algs_ptr[des_s390_algs_num++] = alg; + return ret; +} + +static void des_s390_exit(void) +{ + while (des_s390_algs_num--) + crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]); + if (ctrblk) + free_page((unsigned long) ctrblk); +} + static int __init des_s390_init(void) { int ret; @@ -537,75 +558,44 @@ static int __init des_s390_init(void) !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192)) return -EOPNOTSUPP; - ret = crypto_register_alg(&des_alg); + ret = des_s390_register_alg(&des_alg); if (ret) - goto des_err; - ret = crypto_register_alg(&ecb_des_alg); + goto out_err; + ret = des_s390_register_alg(&ecb_des_alg); if (ret) - goto ecb_des_err; - ret = crypto_register_alg(&cbc_des_alg); + goto out_err; + ret = des_s390_register_alg(&cbc_des_alg); if (ret) - goto cbc_des_err; - ret = crypto_register_alg(&des3_alg); + goto out_err; + ret = des_s390_register_alg(&des3_alg); if (ret) - goto des3_err; - ret = crypto_register_alg(&ecb_des3_alg); + goto out_err; + ret = des_s390_register_alg(&ecb_des3_alg); if (ret) - goto ecb_des3_err; - ret = crypto_register_alg(&cbc_des3_alg); + goto out_err; + ret = des_s390_register_alg(&cbc_des3_alg); if (ret) - goto cbc_des3_err; + goto out_err; if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA) && cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192)) { - ret = crypto_register_alg(&ctr_des_alg); - if (ret) - goto ctr_des_err; - ret = crypto_register_alg(&ctr_des3_alg); - if (ret) - goto ctr_des3_err; ctrblk = (u8 *) __get_free_page(GFP_KERNEL); if (!ctrblk) { ret = -ENOMEM; - goto ctr_mem_err; + goto out_err; } + ret = des_s390_register_alg(&ctr_des_alg); + if (ret) + goto out_err; + ret = des_s390_register_alg(&ctr_des3_alg); + if (ret) + goto out_err; } -out: + + return 0; +out_err: + des_s390_exit(); return ret; - -ctr_mem_err: - crypto_unregister_alg(&ctr_des3_alg); -ctr_des3_err: - crypto_unregister_alg(&ctr_des_alg); -ctr_des_err: - crypto_unregister_alg(&cbc_des3_alg); -cbc_des3_err: - crypto_unregister_alg(&ecb_des3_alg); -ecb_des3_err: - crypto_unregister_alg(&des3_alg); -des3_err: - crypto_unregister_alg(&cbc_des_alg); -cbc_des_err: - crypto_unregister_alg(&ecb_des_alg); -ecb_des_err: - crypto_unregister_alg(&des_alg); -des_err: - goto out; -} - -static void __exit des_s390_exit(void) -{ - if (ctrblk) { - crypto_unregister_alg(&ctr_des_alg); - crypto_unregister_alg(&ctr_des3_alg); - free_page((unsigned long) ctrblk); - } - crypto_unregister_alg(&cbc_des3_alg); - crypto_unregister_alg(&ecb_des3_alg); - crypto_unregister_alg(&des3_alg); - crypto_unregister_alg(&cbc_des_alg); - crypto_unregister_alg(&ecb_des_alg); - crypto_unregister_alg(&des_alg); } module_cpu_feature_match(MSA, des_s390_init); From 69c0e360f990c2dc737681f40a361195066cef02 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 18 Aug 2016 12:59:46 +0200 Subject: [PATCH 20/48] s390/crypto: cpacf function detection The CPACF code makes some assumptions about the availablity of hardware support. E.g. if the machine supports KM(AES-256) without chaining it is assumed that KMC(AES-256) with chaining is available as well. For the existing CPUs this is true but the architecturally correct way is to check each CPACF functions on its own. This is what the query function of each instructions is all about. Reviewed-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 299 +++++++++++++-------------------- arch/s390/crypto/des_s390.c | 73 ++++---- arch/s390/crypto/ghash_s390.c | 2 +- arch/s390/crypto/prng.c | 4 +- arch/s390/crypto/sha1_s390.c | 2 +- arch/s390/crypto/sha256_s390.c | 2 +- arch/s390/crypto/sha512_s390.c | 2 +- arch/s390/include/asm/cpacf.h | 55 +++--- arch/s390/kvm/kvm-s390.c | 33 ++-- 9 files changed, 221 insertions(+), 251 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index be87575270ff..f4ad96ebb7e9 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -31,13 +31,10 @@ #include #include -#define AES_KEYLEN_128 1 -#define AES_KEYLEN_192 2 -#define AES_KEYLEN_256 4 - static u8 *ctrblk; static DEFINE_SPINLOCK(ctrblk_lock); -static char keylen_flag; + +static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; struct s390_aes_ctx { u8 key[AES_MAX_KEY_SIZE]; @@ -65,33 +62,6 @@ struct s390_xts_ctx { struct crypto_skcipher *fallback; }; -/* - * Check if the key_len is supported by the HW. - * Returns 0 if it is, a positive number if it is not and software fallback is - * required or a negative number in case the key size is not valid - */ -static int need_fallback(unsigned int key_len) -{ - switch (key_len) { - case 16: - if (!(keylen_flag & AES_KEYLEN_128)) - return 1; - break; - case 24: - if (!(keylen_flag & AES_KEYLEN_192)) - return 1; - break; - case 32: - if (!(keylen_flag & AES_KEYLEN_256)) - return 1; - break; - default: - return -1; - break; - } - return 0; -} - static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { @@ -115,72 +85,44 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int ret; + unsigned long fc; - ret = need_fallback(key_len); - if (ret < 0) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } + /* Pick the correct function code based on the key length */ + fc = (key_len == 16) ? CPACF_KM_AES_128 : + (key_len == 24) ? CPACF_KM_AES_192 : + (key_len == 32) ? CPACF_KM_AES_256 : 0; + + /* Check if the function code is available */ + sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + if (!sctx->fc) + return setkey_fallback_cip(tfm, in_key, key_len); sctx->key_len = key_len; - if (!ret) { - memcpy(sctx->key, in_key, key_len); - return 0; - } - - return setkey_fallback_cip(tfm, in_key, key_len); + memcpy(sctx->key, in_key, key_len); + return 0; } static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - if (unlikely(need_fallback(sctx->key_len))) { + if (unlikely(!sctx->fc)) { crypto_cipher_encrypt_one(sctx->fallback.cip, out, in); return; } - - switch (sctx->key_len) { - case 16: - cpacf_km(CPACF_KM_AES_128, - &sctx->key, out, in, AES_BLOCK_SIZE); - break; - case 24: - cpacf_km(CPACF_KM_AES_192, - &sctx->key, out, in, AES_BLOCK_SIZE); - break; - case 32: - cpacf_km(CPACF_KM_AES_256, - &sctx->key, out, in, AES_BLOCK_SIZE); - break; - } + cpacf_km(sctx->fc, &sctx->key, out, in, AES_BLOCK_SIZE); } static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - if (unlikely(need_fallback(sctx->key_len))) { + if (unlikely(!sctx->fc)) { crypto_cipher_decrypt_one(sctx->fallback.cip, out, in); return; } - - switch (sctx->key_len) { - case 16: - cpacf_km(CPACF_KM_AES_128 | CPACF_DECRYPT, - &sctx->key, out, in, AES_BLOCK_SIZE); - break; - case 24: - cpacf_km(CPACF_KM_AES_192 | CPACF_DECRYPT, - &sctx->key, out, in, AES_BLOCK_SIZE); - break; - case 32: - cpacf_km(CPACF_KM_AES_256 | CPACF_DECRYPT, - &sctx->key, out, in, AES_BLOCK_SIZE); - break; - } + cpacf_km(sctx->fc | CPACF_DECRYPT, + &sctx->key, out, in, AES_BLOCK_SIZE); } static int fallback_init_cip(struct crypto_tfm *tfm) @@ -289,27 +231,21 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - int ret; + unsigned long fc; - ret = need_fallback(key_len); - if (ret > 0) { - sctx->key_len = key_len; + /* Pick the correct function code based on the key length */ + fc = (key_len == 16) ? CPACF_KM_AES_128 : + (key_len == 24) ? CPACF_KM_AES_192 : + (key_len == 32) ? CPACF_KM_AES_256 : 0; + + /* Check if the function code is available */ + sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + if (!sctx->fc) return setkey_fallback_blk(tfm, in_key, key_len); - } - switch (key_len) { - case 16: - sctx->fc = CPACF_KM_AES_128; - break; - case 24: - sctx->fc = CPACF_KM_AES_192; - break; - case 32: - sctx->fc = CPACF_KM_AES_256; - break; - } - - return aes_set_key(tfm, in_key, key_len); + sctx->key_len = key_len; + memcpy(sctx->key, in_key, key_len); + return 0; } static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param, @@ -340,7 +276,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(need_fallback(sctx->key_len))) + if (unlikely(!sctx->fc)) return fallback_blk_enc(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); @@ -354,7 +290,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(need_fallback(sctx->key_len))) + if (unlikely(!sctx->fc)) return fallback_blk_dec(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); @@ -413,27 +349,21 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - int ret; + unsigned long fc; - ret = need_fallback(key_len); - if (ret > 0) { - sctx->key_len = key_len; + /* Pick the correct function code based on the key length */ + fc = (key_len == 16) ? CPACF_KMC_AES_128 : + (key_len == 24) ? CPACF_KMC_AES_192 : + (key_len == 32) ? CPACF_KMC_AES_256 : 0; + + /* Check if the function code is available */ + sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0; + if (!sctx->fc) return setkey_fallback_blk(tfm, in_key, key_len); - } - switch (key_len) { - case 16: - sctx->fc = CPACF_KMC_AES_128; - break; - case 24: - sctx->fc = CPACF_KMC_AES_192; - break; - case 32: - sctx->fc = CPACF_KMC_AES_256; - break; - } - - return aes_set_key(tfm, in_key, key_len); + sctx->key_len = key_len; + memcpy(sctx->key, in_key, key_len); + return 0; } static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, @@ -476,7 +406,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(need_fallback(sctx->key_len))) + if (unlikely(!sctx->fc)) return fallback_blk_enc(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); @@ -490,7 +420,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(need_fallback(sctx->key_len))) + if (unlikely(!sctx->fc)) return fallback_blk_dec(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); @@ -582,33 +512,27 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; + unsigned long fc; int err; err = xts_check_key(tfm, in_key, key_len); if (err) return err; - switch (key_len) { - case 32: - xts_ctx->fc = CPACF_KM_XTS_128; - memcpy(xts_ctx->key + 16, in_key, 16); - memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16); - break; - case 48: - xts_ctx->fc = 0; - xts_fallback_setkey(tfm, in_key, key_len); - break; - case 64: - xts_ctx->fc = CPACF_KM_XTS_256; - memcpy(xts_ctx->key, in_key, 32); - memcpy(xts_ctx->pcc_key, in_key + 32, 32); - break; - default: - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } + /* Pick the correct function code based on the key length */ + fc = (key_len == 32) ? CPACF_KM_XTS_128 : + (key_len == 64) ? CPACF_KM_XTS_256 : 0; + + /* Check if the function code is available */ + xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + if (!xts_ctx->fc) + return xts_fallback_setkey(tfm, in_key, key_len); + + /* Split the XTS key into the two subkeys */ + key_len = key_len / 2; xts_ctx->key_len = key_len; + memcpy(xts_ctx->key, in_key, key_len); + memcpy(xts_ctx->pcc_key, in_key + key_len, key_len); return 0; } @@ -616,7 +540,7 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func, struct s390_xts_ctx *xts_ctx, struct blkcipher_walk *walk) { - unsigned int offset = (xts_ctx->key_len >> 1) & 0x10; + unsigned int offset = xts_ctx->key_len & 0x10; int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes = walk->nbytes; unsigned int n; @@ -634,11 +558,11 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func, memset(pcc_param.bit, 0, sizeof(pcc_param.bit)); memset(pcc_param.xts, 0, sizeof(pcc_param.xts)); memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); - memcpy(pcc_param.key, xts_ctx->pcc_key, 32); + memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len); /* remove decipher modifier bit from 'func' and call PCC */ cpacf_pcc(func & 0x7f, &pcc_param.key[offset]); - memcpy(xts_param.key, xts_ctx->key, 32); + memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len); memcpy(xts_param.init, pcc_param.xts, 16); do { /* only use complete blocks */ @@ -662,7 +586,7 @@ static int xts_aes_encrypt(struct blkcipher_desc *desc, struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(xts_ctx->key_len == 48)) + if (unlikely(!xts_ctx->fc)) return xts_fallback_encrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); @@ -676,7 +600,7 @@ static int xts_aes_decrypt(struct blkcipher_desc *desc, struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(xts_ctx->key_len == 48)) + if (unlikely(!xts_ctx->fc)) return xts_fallback_decrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); @@ -735,20 +659,21 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + unsigned long fc; - switch (key_len) { - case 16: - sctx->fc = CPACF_KMCTR_AES_128; - break; - case 24: - sctx->fc = CPACF_KMCTR_AES_192; - break; - case 32: - sctx->fc = CPACF_KMCTR_AES_256; - break; - } + /* Pick the correct function code based on the key length */ + fc = (key_len == 16) ? CPACF_KMCTR_AES_128 : + (key_len == 24) ? CPACF_KMCTR_AES_192 : + (key_len == 32) ? CPACF_KMCTR_AES_256 : 0; - return aes_set_key(tfm, in_key, key_len); + /* Check if the function code is available */ + sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0; + if (!sctx->fc) + return setkey_fallback_blk(tfm, in_key, key_len); + + sctx->key_len = key_len; + memcpy(sctx->key, in_key, key_len); + return 0; } static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) @@ -832,6 +757,9 @@ static int ctr_aes_encrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; + if (unlikely(!sctx->fc)) + return fallback_blk_enc(desc, dst, src, nbytes); + blkcipher_walk_init(&walk, dst, src, nbytes); return ctr_aes_crypt(desc, sctx->fc, sctx, &walk); } @@ -843,6 +771,9 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; + if (unlikely(!sctx->fc)) + return fallback_blk_dec(desc, dst, src, nbytes); + blkcipher_walk_init(&walk, dst, src, nbytes); return ctr_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx, &walk); } @@ -851,11 +782,14 @@ static struct crypto_alg ctr_aes_alg = { .cra_name = "ctr(aes)", .cra_driver_name = "ctr-aes-s390", .cra_priority = 400, /* combo: aes + ctr */ - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct s390_aes_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, + .cra_init = fallback_init_blk, + .cra_exit = fallback_exit_blk, .cra_u = { .blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, @@ -893,43 +827,40 @@ static int __init aes_s390_init(void) { int ret; - if (cpacf_query(CPACF_KM, CPACF_KM_AES_128)) - keylen_flag |= AES_KEYLEN_128; - if (cpacf_query(CPACF_KM, CPACF_KM_AES_192)) - keylen_flag |= AES_KEYLEN_192; - if (cpacf_query(CPACF_KM, CPACF_KM_AES_256)) - keylen_flag |= AES_KEYLEN_256; + /* Query available functions for KM, KMC and KMCTR */ + cpacf_query(CPACF_KM, &km_functions); + cpacf_query(CPACF_KMC, &kmc_functions); + cpacf_query(CPACF_KMCTR, &kmctr_functions); - if (!keylen_flag) - return -EOPNOTSUPP; + if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) || + cpacf_test_func(&km_functions, CPACF_KM_AES_192) || + cpacf_test_func(&km_functions, CPACF_KM_AES_256)) { + ret = aes_s390_register_alg(&aes_alg); + if (ret) + goto out_err; + ret = aes_s390_register_alg(&ecb_aes_alg); + if (ret) + goto out_err; + } - /* z9 109 and z9 BC/EC only support 128 bit key length */ - if (keylen_flag == AES_KEYLEN_128) - pr_info("AES hardware acceleration is only available for" - " 128-bit keys\n"); + if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) || + cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) || + cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) { + ret = aes_s390_register_alg(&cbc_aes_alg); + if (ret) + goto out_err; + } - ret = aes_s390_register_alg(&aes_alg); - if (ret) - goto out_err; - - ret = aes_s390_register_alg(&ecb_aes_alg); - if (ret) - goto out_err; - - ret = aes_s390_register_alg(&cbc_aes_alg); - if (ret) - goto out_err; - - if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128) && - cpacf_query(CPACF_KM, CPACF_KM_XTS_256)) { + if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) || + cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) { ret = aes_s390_register_alg(&xts_aes_alg); if (ret) goto out_err; } - if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128) && - cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192) && - cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256)) { + if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_128) || + cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_192) || + cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_256)) { ctrblk = (u8 *) __get_free_page(GFP_KERNEL); if (!ctrblk) { ret = -ENOMEM; diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index b77a546f1e76..965587eefc39 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -27,6 +27,8 @@ static u8 *ctrblk; static DEFINE_SPINLOCK(ctrblk_lock); +static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; + struct s390_des_ctx { u8 iv[DES_BLOCK_SIZE]; u8 key[DES3_KEY_SIZE]; @@ -36,12 +38,12 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; u32 tmp[DES_EXPKEY_WORDS]; /* check for weak keys */ - if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { - *flags |= CRYPTO_TFM_RES_WEAK_KEY; + if (!des_ekey(tmp, key) && + (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } @@ -238,13 +240,12 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], DES_KEY_SIZE)) && - (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { - *flags |= CRYPTO_TFM_RES_WEAK_KEY; + (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } memcpy(ctx->key, key, key_len); @@ -554,39 +555,53 @@ static int __init des_s390_init(void) { int ret; - if (!cpacf_query(CPACF_KM, CPACF_KM_DEA) || - !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192)) - return -EOPNOTSUPP; + /* Query available functions for KM, KMC and KMCTR */ + cpacf_query(CPACF_KM, &km_functions); + cpacf_query(CPACF_KMC, &kmc_functions); + cpacf_query(CPACF_KMCTR, &kmctr_functions); - ret = des_s390_register_alg(&des_alg); - if (ret) - goto out_err; - ret = des_s390_register_alg(&ecb_des_alg); - if (ret) - goto out_err; - ret = des_s390_register_alg(&cbc_des_alg); - if (ret) - goto out_err; - ret = des_s390_register_alg(&des3_alg); - if (ret) - goto out_err; - ret = des_s390_register_alg(&ecb_des3_alg); - if (ret) - goto out_err; - ret = des_s390_register_alg(&cbc_des3_alg); - if (ret) - goto out_err; + if (cpacf_test_func(&km_functions, CPACF_KM_DEA)) { + ret = des_s390_register_alg(&des_alg); + if (ret) + goto out_err; + ret = des_s390_register_alg(&ecb_des_alg); + if (ret) + goto out_err; + } + if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) { + ret = des_s390_register_alg(&cbc_des_alg); + if (ret) + goto out_err; + } + if (cpacf_test_func(&km_functions, CPACF_KM_TDEA_192)) { + ret = des_s390_register_alg(&des3_alg); + if (ret) + goto out_err; + ret = des_s390_register_alg(&ecb_des3_alg); + if (ret) + goto out_err; + } + if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) { + ret = des_s390_register_alg(&cbc_des3_alg); + if (ret) + goto out_err; + } - if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA) && - cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192)) { + if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA) || + cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) { ctrblk = (u8 *) __get_free_page(GFP_KERNEL); if (!ctrblk) { ret = -ENOMEM; goto out_err; } + } + + if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) { ret = des_s390_register_alg(&ctr_des_alg); if (ret) goto out_err; + } + if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) { ret = des_s390_register_alg(&ctr_des3_alg); if (ret) goto out_err; diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 8e87f5176799..564616d48d8b 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -136,7 +136,7 @@ static struct shash_alg ghash_alg = { static int __init ghash_mod_init(void) { - if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_GHASH)) + if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH)) return -EOPNOTSUPP; return crypto_register_shash(&ghash_alg); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index bbf2af74c549..79e3a1f6313a 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -757,13 +757,13 @@ static int __init prng_init(void) int ret; /* check if the CPU has a PRNG */ - if (!cpacf_query(CPACF_KMC, CPACF_KMC_PRNG)) + if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) return -EOPNOTSUPP; /* choose prng mode */ if (prng_mode != PRNG_MODE_TDES) { /* check for MSA5 support for PPNO operations */ - if (!cpacf_query(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) { + if (!cpacf_query_func(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) { if (prng_mode == PRNG_MODE_SHA512) { pr_err("The prng module cannot " "start in SHA-512 mode\n"); diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 5fbf91bbb478..c7de53d8da75 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -91,7 +91,7 @@ static struct shash_alg alg = { static int __init sha1_s390_init(void) { - if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_1)) + if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1)) return -EOPNOTSUPP; return crypto_register_shash(&alg); } diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 10aac0b11988..53c277999a28 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -123,7 +123,7 @@ static int __init sha256_s390_init(void) { int ret; - if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_256)) + if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) return -EOPNOTSUPP; ret = crypto_register_shash(&sha256_alg); if (ret < 0) diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index ea85757be407..2f4caa1ef123 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -133,7 +133,7 @@ static int __init init(void) { int ret; - if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_512)) + if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512)) return -EOPNOTSUPP; if ((ret = crypto_register_shash(&sha512_alg)) < 0) goto out; diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index c226c9ba24ac..2c680db7e5c1 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -106,6 +106,8 @@ #define CPACF_PPNO_SHA512_DRNG_GEN 0x03 #define CPACF_PPNO_SHA512_DRNG_SEED 0x83 +typedef struct { unsigned char bytes[16]; } cpacf_mask_t; + /** * cpacf_query() - check if a specific CPACF function is available * @opcode: the opcode of the crypto instruction @@ -116,55 +118,66 @@ * * Returns 1 if @func is available for @opcode, 0 otherwise */ -static inline void __cpacf_query(unsigned int opcode, unsigned char *status) +static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) { - typedef struct { unsigned char _[16]; } status_type; register unsigned long r0 asm("0") = 0; /* query function */ - register unsigned long r1 asm("1") = (unsigned long) status; + register unsigned long r1 asm("1") = (unsigned long) mask; asm volatile( " spm 0\n" /* pckmo doesn't change the cc */ /* Parameter registers are ignored, but may not be 0 */ "0: .insn rrf,%[opc] << 16,2,2,2,0\n" " brc 1,0b\n" /* handle partial completion */ - : "=m" (*(status_type *) status) + : "=m" (*mask) : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode) : "cc"); } -static inline int cpacf_query(unsigned int opcode, unsigned int func) +static inline int __cpacf_check_opcode(unsigned int opcode) { - unsigned char status[16]; - switch (opcode) { case CPACF_KMAC: case CPACF_KM: case CPACF_KMC: case CPACF_KIMD: case CPACF_KLMD: - if (!test_facility(17)) /* check for MSA */ - return 0; - break; + return test_facility(17); /* check for MSA */ case CPACF_PCKMO: - if (!test_facility(76)) /* check for MSA3 */ - return 0; - break; + return test_facility(76); /* check for MSA3 */ case CPACF_KMF: case CPACF_KMO: case CPACF_PCC: case CPACF_KMCTR: - if (!test_facility(77)) /* check for MSA4 */ - return 0; - break; + return test_facility(77); /* check for MSA4 */ case CPACF_PPNO: - if (!test_facility(57)) /* check for MSA5 */ - return 0; - break; + return test_facility(57); /* check for MSA5 */ default: BUG(); } - __cpacf_query(opcode, status); - return (status[func >> 3] & (0x80 >> (func & 7))) != 0; +} + +static inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask) +{ + if (__cpacf_check_opcode(opcode)) { + __cpacf_query(opcode, mask); + return 1; + } + memset(mask, 0, sizeof(*mask)); + return 0; +} + +static inline int cpacf_test_func(cpacf_mask_t *mask, unsigned int func) +{ + return (mask->bytes[func >> 3] & (0x80 >> (func & 7))) != 0; +} + +static inline int cpacf_query_func(unsigned int opcode, unsigned int func) +{ + cpacf_mask_t mask; + + if (cpacf_query(opcode, &mask)) + return cpacf_test_func(&mask, func); + return 0; } /** diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fd2f1201204e..d6e7e527f0bf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -245,22 +245,33 @@ static void kvm_s390_cpu_feat_init(void) PTFF_QAF); if (test_facility(17)) { /* MSA */ - __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac); - __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc); - __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km); - __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd); - __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd); + __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) + kvm_s390_available_subfunc.kmac); + __cpacf_query(CPACF_KMC, (cpacf_mask_t *) + kvm_s390_available_subfunc.kmc); + __cpacf_query(CPACF_KM, (cpacf_mask_t *) + kvm_s390_available_subfunc.km); + __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) + kvm_s390_available_subfunc.kimd); + __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) + kvm_s390_available_subfunc.klmd); } if (test_facility(76)) /* MSA3 */ - __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo); + __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) + kvm_s390_available_subfunc.pckmo); if (test_facility(77)) { /* MSA4 */ - __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr); - __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf); - __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo); - __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc); + __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) + kvm_s390_available_subfunc.kmctr); + __cpacf_query(CPACF_KMF, (cpacf_mask_t *) + kvm_s390_available_subfunc.kmf); + __cpacf_query(CPACF_KMO, (cpacf_mask_t *) + kvm_s390_available_subfunc.kmo); + __cpacf_query(CPACF_PCC, (cpacf_mask_t *) + kvm_s390_available_subfunc.pcc); } if (test_facility(57)) /* MSA5 */ - __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno); + __cpacf_query(CPACF_PPNO, (cpacf_mask_t *) + kvm_s390_available_subfunc.ppno); if (MACHINE_HAS_ESOP) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); From 7bac4f5b8e3a607f7ba1d3a652f5922a657fa9e8 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 15 Aug 2016 15:17:52 +0200 Subject: [PATCH 21/48] s390/crypto: simplify CPACF encryption / decryption functions The double while loops of the CTR mode encryption / decryption functions are overly complex for little gain. Simplify the functions to a single while loop at the cost of an additional memcpy of a few bytes for every 4K page worth of data. Adapt the other crypto functions to make them all look alike. Reviewed-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 196 +++++++++++++++--------------------- arch/s390/crypto/des_s390.c | 168 ++++++++++++------------------- 2 files changed, 143 insertions(+), 221 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index f4ad96ebb7e9..303d28eb03a2 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -46,14 +46,6 @@ struct s390_aes_ctx { } fallback; }; -struct pcc_param { - u8 key[32]; - u8 tweak[16]; - u8 block[16]; - u8 bit[16]; - u8 xts[16]; -}; - struct s390_xts_ctx { u8 key[32]; u8 pcc_key[32]; @@ -248,22 +240,20 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param, +static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, struct blkcipher_walk *walk) { - int ret = blkcipher_walk_virt(desc, walk); - unsigned int nbytes; + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int nbytes, n; + int ret; - while ((nbytes = walk->nbytes)) { + ret = blkcipher_walk_virt(desc, walk); + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { /* only use complete blocks */ - unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); - u8 *out = walk->dst.virt.addr; - u8 *in = walk->src.virt.addr; - - cpacf_km(func, param, out, in, n); - - nbytes &= AES_BLOCK_SIZE - 1; - ret = blkcipher_walk_done(desc, walk, nbytes); + n = nbytes & ~(AES_BLOCK_SIZE - 1); + cpacf_km(sctx->fc | modifier, sctx->key, + walk->dst.virt.addr, walk->src.virt.addr, n); + ret = blkcipher_walk_done(desc, walk, nbytes - n); } return ret; @@ -280,7 +270,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, return fallback_blk_enc(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_aes_crypt(desc, sctx->fc, sctx->key, &walk); + return ecb_aes_crypt(desc, 0, &walk); } static int ecb_aes_decrypt(struct blkcipher_desc *desc, @@ -294,7 +284,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, return fallback_blk_dec(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx->key, &walk); + return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk); } static int fallback_init_blk(struct crypto_tfm *tfm) @@ -366,36 +356,28 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, +static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, struct blkcipher_walk *walk) { struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); - int ret = blkcipher_walk_virt(desc, walk); - unsigned int nbytes = walk->nbytes; + unsigned int nbytes, n; + int ret; struct { u8 iv[AES_BLOCK_SIZE]; u8 key[AES_MAX_KEY_SIZE]; } param; - if (!nbytes) - goto out; - + ret = blkcipher_walk_virt(desc, walk); memcpy(param.iv, walk->iv, AES_BLOCK_SIZE); memcpy(param.key, sctx->key, sctx->key_len); - do { + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { /* only use complete blocks */ - unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); - u8 *out = walk->dst.virt.addr; - u8 *in = walk->src.virt.addr; - - cpacf_kmc(func, ¶m, out, in, n); - - nbytes &= AES_BLOCK_SIZE - 1; - ret = blkcipher_walk_done(desc, walk, nbytes); - } while ((nbytes = walk->nbytes)); + n = nbytes & ~(AES_BLOCK_SIZE - 1); + cpacf_kmc(sctx->fc | modifier, ¶m, + walk->dst.virt.addr, walk->src.virt.addr, n); + ret = blkcipher_walk_done(desc, walk, nbytes - n); + } memcpy(walk->iv, param.iv, AES_BLOCK_SIZE); - -out: return ret; } @@ -410,7 +392,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, return fallback_blk_enc(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, sctx->fc, &walk); + return cbc_aes_crypt(desc, 0, &walk); } static int cbc_aes_decrypt(struct blkcipher_desc *desc, @@ -424,7 +406,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, return fallback_blk_dec(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, &walk); + return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk); } static struct crypto_alg cbc_aes_alg = { @@ -536,46 +518,43 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static int xts_aes_crypt(struct blkcipher_desc *desc, long func, - struct s390_xts_ctx *xts_ctx, +static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, struct blkcipher_walk *walk) { - unsigned int offset = xts_ctx->key_len & 0x10; - int ret = blkcipher_walk_virt(desc, walk); - unsigned int nbytes = walk->nbytes; - unsigned int n; - u8 *in, *out; - struct pcc_param pcc_param; + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int offset, nbytes, n; + int ret; + struct { + u8 key[32]; + u8 tweak[16]; + u8 block[16]; + u8 bit[16]; + u8 xts[16]; + } pcc_param; struct { u8 key[32]; u8 init[16]; } xts_param; - if (!nbytes) - goto out; - + ret = blkcipher_walk_virt(desc, walk); + offset = xts_ctx->key_len & 0x10; memset(pcc_param.block, 0, sizeof(pcc_param.block)); memset(pcc_param.bit, 0, sizeof(pcc_param.bit)); memset(pcc_param.xts, 0, sizeof(pcc_param.xts)); memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len); - /* remove decipher modifier bit from 'func' and call PCC */ - cpacf_pcc(func & 0x7f, &pcc_param.key[offset]); + cpacf_pcc(xts_ctx->fc, pcc_param.key + offset); memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len); memcpy(xts_param.init, pcc_param.xts, 16); - do { + + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - out = walk->dst.virt.addr; - in = walk->src.virt.addr; - - cpacf_km(func, &xts_param.key[offset], out, in, n); - - nbytes &= AES_BLOCK_SIZE - 1; - ret = blkcipher_walk_done(desc, walk, nbytes); - } while ((nbytes = walk->nbytes)); -out: + cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset, + walk->dst.virt.addr, walk->src.virt.addr, n); + ret = blkcipher_walk_done(desc, walk, nbytes - n); + } return ret; } @@ -590,7 +569,7 @@ static int xts_aes_encrypt(struct blkcipher_desc *desc, return xts_fallback_encrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return xts_aes_crypt(desc, xts_ctx->fc, xts_ctx, &walk); + return xts_aes_crypt(desc, 0, &walk); } static int xts_aes_decrypt(struct blkcipher_desc *desc, @@ -604,7 +583,7 @@ static int xts_aes_decrypt(struct blkcipher_desc *desc, return xts_fallback_decrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return xts_aes_crypt(desc, xts_ctx->fc | CPACF_DECRYPT, xts_ctx, &walk); + return xts_aes_crypt(desc, CPACF_DECRYPT, &walk); } static int xts_fallback_init(struct crypto_tfm *tfm) @@ -676,75 +655,58 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) { unsigned int i, n; /* only use complete blocks, max. PAGE_SIZE */ + memcpy(ctrptr, iv, AES_BLOCK_SIZE); n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); - for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { - memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE, - AES_BLOCK_SIZE); - crypto_inc(ctrptr + i, AES_BLOCK_SIZE); + for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) { + memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE); + crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE); + ctrptr += AES_BLOCK_SIZE; } return n; } -static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, - struct s390_aes_ctx *sctx, struct blkcipher_walk *walk) +static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, + struct blkcipher_walk *walk) { - int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + u8 buf[AES_BLOCK_SIZE], *ctrptr; unsigned int n, nbytes; - u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE]; - u8 *out, *in, *ctrptr = ctrbuf; + int ret, locked; - if (!walk->nbytes) - return ret; + locked = spin_trylock(&ctrblk_lock); - if (spin_trylock(&ctrblk_lock)) - ctrptr = ctrblk; - - memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE); + ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { - out = walk->dst.virt.addr; - in = walk->src.virt.addr; - while (nbytes >= AES_BLOCK_SIZE) { - if (ctrptr == ctrblk) - n = __ctrblk_init(ctrptr, nbytes); - else - n = AES_BLOCK_SIZE; - cpacf_kmctr(func, sctx->key, out, in, n, ctrptr); - if (n > AES_BLOCK_SIZE) - memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE, - AES_BLOCK_SIZE); - crypto_inc(ctrptr, AES_BLOCK_SIZE); - out += n; - in += n; - nbytes -= n; - } - ret = blkcipher_walk_done(desc, walk, nbytes); + n = AES_BLOCK_SIZE; + if (nbytes >= 2*AES_BLOCK_SIZE && locked) + n = __ctrblk_init(ctrblk, walk->iv, nbytes); + ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv; + cpacf_kmctr(sctx->fc | modifier, sctx->key, + walk->dst.virt.addr, walk->src.virt.addr, + n, ctrptr); + if (ctrptr == ctrblk) + memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(walk->iv, AES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, nbytes - n); } - if (ctrptr == ctrblk) { - if (nbytes) - memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE); - else - memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE); + if (locked) spin_unlock(&ctrblk_lock); - } else { - if (!nbytes) - memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE); - } /* * final block may be < AES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { - out = walk->dst.virt.addr; - in = walk->src.virt.addr; - cpacf_kmctr(func, sctx->key, buf, in, AES_BLOCK_SIZE, ctrbuf); - memcpy(out, buf, nbytes); - crypto_inc(ctrbuf, AES_BLOCK_SIZE); + cpacf_kmctr(sctx->fc | modifier, sctx->key, + buf, walk->src.virt.addr, + AES_BLOCK_SIZE, walk->iv); + memcpy(walk->dst.virt.addr, buf, nbytes); + crypto_inc(walk->iv, AES_BLOCK_SIZE); ret = blkcipher_walk_done(desc, walk, 0); - memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE); } return ret; @@ -761,7 +723,7 @@ static int ctr_aes_encrypt(struct blkcipher_desc *desc, return fallback_blk_enc(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_aes_crypt(desc, sctx->fc, sctx, &walk); + return ctr_aes_crypt(desc, 0, &walk); } static int ctr_aes_decrypt(struct blkcipher_desc *desc, @@ -775,7 +737,7 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc, return fallback_blk_dec(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx, &walk); + return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk); } static struct crypto_alg ctr_aes_alg = { diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 965587eefc39..8b83144206eb 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -85,57 +85,46 @@ static struct crypto_alg des_alg = { } }; -static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, - u8 *key, struct blkcipher_walk *walk) -{ - int ret = blkcipher_walk_virt(desc, walk); - unsigned int nbytes; - - while ((nbytes = walk->nbytes)) { - /* only use complete blocks */ - unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); - u8 *out = walk->dst.virt.addr; - u8 *in = walk->src.virt.addr; - - cpacf_km(func, key, out, in, n); - - nbytes &= DES_BLOCK_SIZE - 1; - ret = blkcipher_walk_done(desc, walk, nbytes); - } - - return ret; -} - -static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, +static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, struct blkcipher_walk *walk) { struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - int ret = blkcipher_walk_virt(desc, walk); - unsigned int nbytes = walk->nbytes; + unsigned int nbytes, n; + int ret; + + ret = blkcipher_walk_virt(desc, walk); + while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { + /* only use complete blocks */ + n = nbytes & ~(DES_BLOCK_SIZE - 1); + cpacf_km(fc, ctx->key, walk->dst.virt.addr, + walk->src.virt.addr, n); + ret = blkcipher_walk_done(desc, walk, nbytes - n); + } + return ret; +} + +static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, + struct blkcipher_walk *walk) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int nbytes, n; + int ret; struct { u8 iv[DES_BLOCK_SIZE]; u8 key[DES3_KEY_SIZE]; } param; - if (!nbytes) - goto out; - + ret = blkcipher_walk_virt(desc, walk); memcpy(param.iv, walk->iv, DES_BLOCK_SIZE); memcpy(param.key, ctx->key, DES3_KEY_SIZE); - do { + while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { /* only use complete blocks */ - unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); - u8 *out = walk->dst.virt.addr; - u8 *in = walk->src.virt.addr; - - cpacf_kmc(func, ¶m, out, in, n); - - nbytes &= DES_BLOCK_SIZE - 1; - ret = blkcipher_walk_done(desc, walk, nbytes); - } while ((nbytes = walk->nbytes)); + n = nbytes & ~(DES_BLOCK_SIZE - 1); + cpacf_kmc(fc, ¶m, walk->dst.virt.addr, + walk->src.virt.addr, n); + ret = blkcipher_walk_done(desc, walk, nbytes - n); + } memcpy(walk->iv, param.iv, DES_BLOCK_SIZE); - -out: return ret; } @@ -143,23 +132,20 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_DEA, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk); } static int ecb_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, - ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk); } static struct crypto_alg ecb_des_alg = { @@ -290,23 +276,21 @@ static int ecb_des3_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk); } static int ecb_des3_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT, - ctx->key, &walk); + &walk); } static struct crypto_alg ecb_des3_alg = { @@ -371,73 +355,54 @@ static struct crypto_alg cbc_des3_alg = { } }; -static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) { unsigned int i, n; /* align to block size, max. PAGE_SIZE */ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1); - for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { - memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE); - crypto_inc(ctrptr + i, DES_BLOCK_SIZE); + memcpy(ctrptr, iv, DES_BLOCK_SIZE); + for (i = (n / DES_BLOCK_SIZE) - 1; i > 0; i--) { + memcpy(ctrptr + DES_BLOCK_SIZE, ctrptr, DES_BLOCK_SIZE); + crypto_inc(ctrptr + DES_BLOCK_SIZE, DES_BLOCK_SIZE); + ctrptr += DES_BLOCK_SIZE; } return n; } -static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, - struct s390_des_ctx *ctx, +static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, struct blkcipher_walk *walk) { - int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 buf[DES_BLOCK_SIZE], *ctrptr; unsigned int n, nbytes; - u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE]; - u8 *out, *in, *ctrptr = ctrbuf; + int ret, locked; - if (!walk->nbytes) - return ret; + locked = spin_trylock(&ctrblk_lock); - if (spin_trylock(&ctrblk_lock)) - ctrptr = ctrblk; - - memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE); + ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { - out = walk->dst.virt.addr; - in = walk->src.virt.addr; - while (nbytes >= DES_BLOCK_SIZE) { - if (ctrptr == ctrblk) - n = __ctrblk_init(ctrptr, nbytes); - else - n = DES_BLOCK_SIZE; - cpacf_kmctr(func, ctx->key, out, in, n, ctrptr); - if (n > DES_BLOCK_SIZE) - memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE, - DES_BLOCK_SIZE); - crypto_inc(ctrptr, DES_BLOCK_SIZE); - out += n; - in += n; - nbytes -= n; - } - ret = blkcipher_walk_done(desc, walk, nbytes); + n = DES_BLOCK_SIZE; + if (nbytes >= 2*DES_BLOCK_SIZE && locked) + n = __ctrblk_init(ctrblk, walk->iv, nbytes); + ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv; + cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr, + walk->src.virt.addr, n, ctrptr); + if (ctrptr == ctrblk) + memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE, + DES_BLOCK_SIZE); + crypto_inc(walk->iv, DES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, nbytes - n); } - if (ctrptr == ctrblk) { - if (nbytes) - memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE); - else - memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE); + if (locked) spin_unlock(&ctrblk_lock); - } else { - if (!nbytes) - memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE); - } /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { - out = walk->dst.virt.addr; - in = walk->src.virt.addr; - cpacf_kmctr(func, ctx->key, buf, in, DES_BLOCK_SIZE, ctrbuf); - memcpy(out, buf, nbytes); - crypto_inc(ctrbuf, DES_BLOCK_SIZE); + cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr, + DES_BLOCK_SIZE, walk->iv); + memcpy(walk->dst.virt.addr, buf, nbytes); + crypto_inc(walk->iv, DES_BLOCK_SIZE); ret = blkcipher_walk_done(desc, walk, 0); - memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE); } return ret; } @@ -446,23 +411,20 @@ static int ctr_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk); } static int ctr_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, - ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk); } static struct crypto_alg ctr_des_alg = { @@ -490,23 +452,21 @@ static int ctr_des3_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk); } static int ctr_des3_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT, - ctx, &walk); + &walk); } static struct crypto_alg ctr_des3_alg = { From f5b55fa1f81d518925d68b50d2316850c525d1ad Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Aug 2016 09:27:35 +0200 Subject: [PATCH 22/48] RAID/s390: provide raid6 recovery optimization The XC instruction can be used to improve the speed of the raid6 recovery. The loops now operate on blocks of 256 bytes. Signed-off-by: Martin Schwidefsky --- include/linux/raid/pq.h | 1 + lib/raid6/Makefile | 2 +- lib/raid6/algos.c | 3 + lib/raid6/recov_s390xc.c | 116 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 lib/raid6/recov_s390xc.c diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index c032a6a408a6..395a4c674168 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -116,6 +116,7 @@ struct raid6_recov_calls { extern const struct raid6_recov_calls raid6_recov_intx1; extern const struct raid6_recov_calls raid6_recov_ssse3; extern const struct raid6_recov_calls raid6_recov_avx2; +extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 667b9607f8c0..29f503ebfd60 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -7,7 +7,7 @@ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o raid6_pq-$(CONFIG_TILEGX) += tilegx8.o -raid6_pq-$(CONFIG_S390) += s390vx8.o +raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o hostprogs-y += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index e1923b602bbc..592ff49df47d 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -97,6 +97,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { #endif #ifdef CONFIG_AS_SSSE3 &raid6_recov_ssse3, +#endif +#ifdef CONFIG_S390 + &raid6_recov_s390xc, #endif &raid6_recov_intx1, NULL diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c new file mode 100644 index 000000000000..b042dac826cc --- /dev/null +++ b/lib/raid6/recov_s390xc.c @@ -0,0 +1,116 @@ +/* + * RAID-6 data recovery in dual failure mode based on the XC instruction. + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + */ + +#include +#include + +static inline void xor_block(u8 *p1, u8 *p2) +{ + typedef struct { u8 _[256]; } addrtype; + + asm volatile( + " xc 0(256,%[p1]),0(%[p2])\n" + : "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2), + [p1] "a" (p1), [p2] "a" (p2) : "cc"); +} + +/* Recover two failed data blocks. */ +static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + int i; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data pages + Use the dead data pages as temporary storage for + delta p and delta q */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks-2] = p; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; + + /* Now do it... */ + while (bytes) { + xor_block(dp, p); + xor_block(dq, q); + for (i = 0; i < 256; i++) + dq[i] = pbmul[dp[i]] ^ qmul[dq[i]]; + xor_block(dp, dq); + p += 256; + q += 256; + dp += 256; + dq += 256; + bytes -= 256; + } +} + +/* Recover failure of one data block plus the P block */ +static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + int i; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data page + Use the dead data page as temporary storage for delta q */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + /* Now do it... */ + while (bytes) { + xor_block(dq, q); + for (i = 0; i < 256; i++) + dq[i] = qmul[dq[i]]; + xor_block(p, dq); + p += 256; + q += 256; + dq += 256; + bytes -= 256; + } +} + + +const struct raid6_recov_calls raid6_recov_s390xc = { + .data2 = raid6_2data_recov_s390xc, + .datap = raid6_datap_recov_s390xc, + .valid = NULL, + .name = "s390xc", + .priority = 1, +}; From e68f1d4ca99e08652066f40d7778b6007f0149d9 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Wed, 31 Aug 2016 01:57:20 +0530 Subject: [PATCH 23/48] s390: Remove deprecated create_singlethread_workqueue The workqueue "appldata_wq" has been replaced with an ordered dedicated workqueue. WQ_MEM_RECLAIM has not been set since the workqueue is not being used on a memory reclaim path. The adapter->work_queue queues multiple work items viz &adapter->scan_work, &port->rport_work, &adapter->ns_up_work, &adapter->stat_work, adapter->work_queue, &adapter->events.work, &port->gid_pn_work, &port->test_link_work. Hence, an ordered dedicated workqueue has been used. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 2 +- drivers/s390/scsi/zfcp_aux.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 15c94246b600..f587c4811faf 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -542,7 +542,7 @@ static int __init appldata_init(void) rc = PTR_ERR(appldata_pdev); goto out_driver; } - appldata_wq = create_singlethread_workqueue("appldata"); + appldata_wq = alloc_ordered_workqueue("appldata", 0); if (!appldata_wq) { rc = -ENOMEM; goto out_device; diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index c00ac4650dce..bcc8f3dfd4c4 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -310,7 +310,7 @@ static int zfcp_setup_adapter_work_queue(struct zfcp_adapter *adapter) snprintf(name, sizeof(name), "zfcp_q_%s", dev_name(&adapter->ccw_device->dev)); - adapter->work_queue = create_singlethread_workqueue(name); + adapter->work_queue = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (adapter->work_queue) return 0; From 6512391a30f6b158488e941214541e84473b6bf9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 6 Sep 2016 09:01:31 +0200 Subject: [PATCH 24/48] s390/crypto: avoid returning garbage value Static analysis with cppcheck detected that ret is not initialized and hence garbage is potentially being returned in the case where prng_data->ppnows.reseed_counter <= prng_reseed_limit. Thanks to Martin Schwidefsky for spotting a mistake in my original fix. Fixes: 0177db01adf26cf9 ("s390/crypto: simplify return code handling") Signed-off-by: Colin Ian King Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/prng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 79e3a1f6313a..9cc050f9536c 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -434,7 +434,7 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes) memcpy(prng_data->prev, buf, nbytes); } - return ret; + return nbytes; } From c783b91ebdbab67e848889c29dd0611c2b2c9fea Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 6 Sep 2016 10:46:36 +0200 Subject: [PATCH 25/48] s390: add assembler include path for vx-insn.h With git commit 0eab11c7e0d30de14a15ccd8269eef238321a8e1 "s390/vx: allow to include vx-insn.h with .include" and an older gcc we get errors like this: {standard input}:6: Error: can't open asm/vx-insn.h for reading: No such file or directory arch/s390/kernel/fpu.c:57: Error: Unrecognized opcode: `vstm' To solve this issue simply add the path to arch/s390/include to all assembler runs. Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 224b42734f0d..54e00526b8df 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -46,6 +46,8 @@ cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 +cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include + #KBUILD_IMAGE is necessary for make rpm KBUILD_IMAGE :=arch/s390/boot/image From 9078a54996a0989e25a04bbb9276bc340a52a673 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 12 Sep 2016 13:13:38 +0200 Subject: [PATCH 26/48] s390: claim efficient unaligned access most unaligned accesses are reasonable efficient (no kernel emulation) on s390, let's announce it This also - removes the ubsan false positives for unaligned accesses on s390 with default config - uses simpler arithmetic in several functions in several other areas of the kernel like ethernet frame classification Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e751fe25d6ab..f4989fb4ac8d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -137,6 +137,7 @@ config S390 select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER From f296190e41ee1e1e6912f0ddae09b28e9cfae48d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 13 Sep 2016 03:10:39 +0900 Subject: [PATCH 27/48] s390/crashdump: use list_first_entry_or_null The combo of list_empty() check and return list_first_entry() can be replaced with list_first_entry_or_null(). Signed-off-by: Masahiro Yamada Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/crash_dump.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 29df8484282b..f9293bfefb7f 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -71,9 +71,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu) */ struct save_area * __init save_area_boot_cpu(void) { - if (list_empty(&dump_save_areas)) - return NULL; - return list_first_entry(&dump_save_areas, struct save_area, list); + return list_first_entry_or_null(&dump_save_areas, struct save_area, list); } /* From 725c4d22bbc4fcac5779963e0ff9cdf232afbb90 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 12 Sep 2016 14:37:19 +0200 Subject: [PATCH 28/48] ubsan: allow to disable the null sanitizer Some architectures use a hardware defined structure at address zero. Checking for a null pointer will result in many ubsan reports. Allow users to disable the null sanitizer. Signed-off-by: Christian Borntraeger Acked-by: Andrey Ryabinin Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- lib/Kconfig.ubsan | 11 +++++++++++ scripts/Makefile.ubsan | 5 ++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 39494af9a84a..bc6e651df68c 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -1,6 +1,9 @@ config ARCH_HAS_UBSAN_SANITIZE_ALL bool +config ARCH_WANTS_UBSAN_NO_NULL + def_bool n + config UBSAN bool "Undefined behaviour sanity checker" help @@ -34,3 +37,11 @@ config UBSAN_ALIGNMENT This option enables detection of unaligned memory accesses. Enabling this option on architectures that support unaligned accesses may produce a lot of false positives. + +config UBSAN_NULL + bool "Enable checking of null pointers" + depends on UBSAN + default y if !ARCH_WANTS_UBSAN_NO_NULL + help + This option enables detection of memory accesses via a + null pointer. diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan index 8ab68679cfb5..dd779c40c8e6 100644 --- a/scripts/Makefile.ubsan +++ b/scripts/Makefile.ubsan @@ -3,7 +3,6 @@ ifdef CONFIG_UBSAN CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero) CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable) CFLAGS_UBSAN += $(call cc-option, -fsanitize=vla-bound) - CFLAGS_UBSAN += $(call cc-option, -fsanitize=null) CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow) CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds) CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size) @@ -14,4 +13,8 @@ ifdef CONFIG_UBSAN ifdef CONFIG_UBSAN_ALIGNMENT CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment) endif + +ifdef CONFIG_UBSAN_NULL + CFLAGS_UBSAN += $(call cc-option, -fsanitize=null) +endif endif From c42d8c7dbe596d849b43b7581bcc39b51f148c48 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 12 Sep 2016 14:37:20 +0200 Subject: [PATCH 29/48] s390: enable UBSAN This enables UBSAN for s390. We have to disable the null sanitizer as s390 code does access memory via a null pointer (the prefix page). Signed-off-by: Christian Borntraeger Acked-by: Andrey Ryabinin Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 ++ arch/s390/boot/compressed/Makefile | 1 + arch/s390/kernel/Makefile | 3 +++ arch/s390/kernel/vdso32/Makefile | 3 ++- arch/s390/kernel/vdso64/Makefile | 3 ++- 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f4989fb4ac8d..608f4eabb2de 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -74,6 +74,7 @@ config S390 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV select ARCH_HAS_SG_CHAIN + select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH @@ -110,6 +111,7 @@ config S390 select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_PROT_NUMA_PROT_NONE + select ARCH_WANTS_UBSAN_NO_NULL select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 33ba697c782d..0daa070d6c9d 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -17,6 +17,7 @@ KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) GCOV_PROFILE := n +UBSAN_SANITIZE := n OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o als.o) OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 3234817c7d47..72ccc41444dc 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -48,6 +48,9 @@ AFLAGS_head.o += -march=z900 endif GCOV_PROFILE_sclp.o := n GCOV_PROFILE_als.o := n +UBSAN_SANITIZE_als.o := n +UBSAN_SANITIZE_early.o := n +UBSAN_SANITIZE_sclp.o := n obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 68145456fee2..6cc947896c77 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -24,8 +24,9 @@ obj-y += vdso32_wrapper.o extra-y += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) -# Disable gcov profiling for VDSO code +# Disable gcov profiling and ubsan for VDSO code GCOV_PROFILE := n +UBSAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 0b0fd22c869a..2d54c18089eb 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -24,8 +24,9 @@ obj-y += vdso64_wrapper.o extra-y += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) -# Disable gcov profiling for VDSO code +# Disable gcov profiling and ubsan for VDSO code GCOV_PROFILE := n +UBSAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so From eed5c4b117d1f77553d517072584c4ac779af0ba Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 31 Aug 2016 13:31:10 +0200 Subject: [PATCH 30/48] s390/dasd: add missing KOBJ_CHANGE event for unformatted devices The DASD device driver throws change events for the DASD blockdevice after the online processing is done so that udev rules can take actions after it. The change event was missing for unformatted devices. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index fb1b56a71475..5245d7e37a46 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -336,6 +336,7 @@ static int dasd_state_basic_to_ready(struct dasd_device *device) { int rc; struct dasd_block *block; + struct gendisk *disk; rc = 0; block = device->block; @@ -346,6 +347,9 @@ static int dasd_state_basic_to_ready(struct dasd_device *device) if (rc) { if (rc != -EAGAIN) { device->state = DASD_STATE_UNFMT; + disk = device->block->gdp; + kobject_uevent(&disk_to_dev(disk)->kobj, + KOBJ_CHANGE); goto out; } return rc; From f622b517563b0d3be6c41e932124e0b717149ad8 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 19 Aug 2016 19:57:49 +0200 Subject: [PATCH 31/48] s390/vmur: fix irq pointer dereference in int handler "irq" in vmur's int handler can be an error pointer. Don't dereference this pointer in that case. Reported-by: Dan Carpenter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/char/vmur.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index 6c30e93ab8fa..ff18f373af9a 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -306,10 +306,11 @@ static void ur_int_handler(struct ccw_device *cdev, unsigned long intparm, { struct urdev *urd; - TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n", - intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat, - irb->scsw.cmd.count); - + if (!IS_ERR(irb)) { + TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n", + intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat, + irb->scsw.cmd.count); + } if (!intparm) { TRACE("ur_int_handler: unsolicited interrupt\n"); return; From ecc6410abf898c580077e028dd8eb123bfbda502 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 16 Sep 2016 17:01:46 +0200 Subject: [PATCH 32/48] s390: export header for CLP ioctl Export clp.h for usage by userspace. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 08fe6dad9026..cc44b09c25fc 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -6,6 +6,7 @@ header-y += bitsperlong.h header-y += byteorder.h header-y += chpid.h header-y += chsc.h +header-y += clp.h header-y += cmb.h header-y += dasd.h header-y += debug.h From dcc096c540d794456c1edbe6d55b9d611c86e8db Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 19 Sep 2016 17:54:56 -0400 Subject: [PATCH 33/48] s390: migrate exception table users off module.h and onto extable.h These files were only including module.h for exception table related functions. We've now separated that content out into its own file "extable.h" so now move over to that and avoid all the extra header content in module.h that we don't really need to compile these files. The additions of uaccess.h are to deal with implict includes like: arch/s390/kernel/traps.c: In function 'do_report_trap': arch/s390/kernel/traps.c:56:4: error: implicit declaration of function 'extable_fixup' [-Werror=implicit-function-declaration] arch/s390/kernel/traps.c: In function 'illegal_op': arch/s390/kernel/traps.c:173:3: error: implicit declaration of function 'get_user' [-Werror=implicit-function-declaration] Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 2 +- arch/s390/kernel/kprobes.c | 2 ++ arch/s390/kernel/traps.c | 3 ++- arch/s390/mm/fault.c | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 717b03aa16b5..2374c5b46bbc 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index dd6306c51bd6..fdb40424acfe 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -26,12 +26,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include DEFINE_PER_CPU(struct kprobe *, current_kprobe); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index dd97a3e8a34a..d0539f76fd24 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -14,11 +14,12 @@ */ #include #include -#include +#include #include #include #include #include +#include #include #include "entry.h" diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a58bca62a93b..cca73881e672 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include From bb2b7ffbc4e25b0c4839317b6c31cb768efe8b1f Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 23 Aug 2016 15:59:15 +0200 Subject: [PATCH 34/48] iommu/s390: simplify registration of I/O address translation parameters When a new function is attached to an iommu domain we need to register I/O address translation parameters. Since commit 69eea95c ("s390/pci_dma: fix DMA table corruption with > 4 TB main memory") start_dma and end_dma correctly describe the range of usable I/O addresses. Simplify the code by using these values directly. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- drivers/iommu/s390-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index a04d491cf431..3b44b1d82f3b 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -101,8 +101,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, zpci_dma_exit_device(zdev); zdev->dma_table = s390_domain->dma_table; - rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, - zdev->start_dma + zdev->iommu_size - 1, + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); if (rc) goto out_restore; From 3b13f1fea1be44f29be4150246624502a0227ebd Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 17 Aug 2016 13:39:46 +0200 Subject: [PATCH 35/48] s390/pci_dma: remove dma address range check We calculate dma addresses using an iommu bitmap. Since commit 69eea95c ("s390/pci_dma: fix DMA table corruption with > 4 TB main memory") we've made sure that addresses created using that bitmap are below the maximum reported by firmware. Thus the additional check for that address to be within range can be removed. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 7297fce9bf80..658123961f42 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -305,12 +305,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, /* Use rounded up size */ size = nr_pages * PAGE_SIZE; - dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE; - if (dma_addr + size > zdev->end_dma) { - ret = -ERANGE; - goto out_free; - } if (direction == DMA_NONE || direction == DMA_TO_DEVICE) flags |= ZPCI_TABLE_PROTECTED; From 8cb63b78791eef67ea95831c6ef5e6039c572b14 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 17 Aug 2016 13:51:11 +0200 Subject: [PATCH 36/48] s390/pci_dma: simplify dma address calculation Simplify the code we use to calculate dma addresses by putting everything related in a dma_alloc_address function. Also provide a dma_free_address counterpart. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 658123961f42..12b58b6b7f79 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -230,34 +230,36 @@ static unsigned long __dma_alloc_iommu(struct device *dev, boundary_size, 0); } -static unsigned long dma_alloc_iommu(struct device *dev, int size) +static dma_addr_t dma_alloc_address(struct device *dev, int size) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long offset, flags; - int wrap = 0; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); offset = __dma_alloc_iommu(dev, zdev->next_bit, size); if (offset == -1) { /* wrap-around */ offset = __dma_alloc_iommu(dev, 0, size); - wrap = 1; - } - - if (offset != -1) { - zdev->next_bit = offset + size; - if (!zdev->tlb_refresh && !s390_iommu_strict && wrap) + if (offset == -1) { + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); + return DMA_ERROR_CODE; + } + if (!zdev->tlb_refresh && !s390_iommu_strict) /* global flush after wrap-around with lazy unmap */ zpci_refresh_global(zdev); } + zdev->next_bit = offset + size; spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - return offset; + + return zdev->start_dma + offset * PAGE_SIZE; } -static void dma_free_iommu(struct device *dev, unsigned long offset, int size) +static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long flags; + unsigned long flags, offset; + + offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); if (!zdev->iommu_bitmap) @@ -289,23 +291,22 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, unsigned long attrs) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long nr_pages, iommu_page_index; unsigned long pa = page_to_phys(page) + offset; int flags = ZPCI_PTE_VALID; + unsigned long nr_pages; dma_addr_t dma_addr; int ret; /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); - iommu_page_index = dma_alloc_iommu(dev, nr_pages); - if (iommu_page_index == -1) { + dma_addr = dma_alloc_address(dev, nr_pages); + if (dma_addr == DMA_ERROR_CODE) { ret = -ENOSPC; goto out_err; } /* Use rounded up size */ size = nr_pages * PAGE_SIZE; - dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE; if (direction == DMA_NONE || direction == DMA_TO_DEVICE) flags |= ZPCI_TABLE_PROTECTED; @@ -318,7 +319,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, return dma_addr + (offset & ~PAGE_MASK); out_free: - dma_free_iommu(dev, iommu_page_index, nr_pages); + dma_free_address(dev, dma_addr, nr_pages); out_err: zpci_err("map error:\n"); zpci_err_dma(ret, pa); @@ -330,7 +331,6 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, unsigned long attrs) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long iommu_page_index; int npages, ret; npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); @@ -344,8 +344,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, } atomic64_add(npages, &zdev->unmapped_pages); - iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; - dma_free_iommu(dev, iommu_page_index, npages); + dma_free_address(dev, dma_addr, npages); } static void *s390_dma_alloc(struct device *dev, size_t size, From ee877b81c6b92c190e7186c1ffd054804b426c02 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 19 Aug 2016 09:12:09 +0200 Subject: [PATCH 37/48] s390/pci_dma: improve map_sg Our map_sg implementation mapped sg entries independently of each other. For ease of use and possible performance improvements this patch changes the implementation to try to map as many (likely physically non-contiguous) sglist entries as possible into a contiguous DMA segment. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 108 ++++++++++++++++++++++++++++++---------- 1 file changed, 83 insertions(+), 25 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 12b58b6b7f79..54cb54cbc764 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -388,37 +388,94 @@ static void s390_dma_free(struct device *dev, size_t size, free_pages((unsigned long) pa, get_order(size)); } +/* Map a segment into a contiguous dma address area */ +static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, + size_t size, dma_addr_t *handle, + enum dma_data_direction dir) +{ + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); + dma_addr_t dma_addr_base, dma_addr; + int flags = ZPCI_PTE_VALID; + struct scatterlist *s; + unsigned long pa; + int ret; + + size = PAGE_ALIGN(size); + dma_addr_base = dma_alloc_address(dev, size >> PAGE_SHIFT); + if (dma_addr_base == DMA_ERROR_CODE) + return -ENOMEM; + + dma_addr = dma_addr_base; + if (dir == DMA_NONE || dir == DMA_TO_DEVICE) + flags |= ZPCI_TABLE_PROTECTED; + + for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { + pa = page_to_phys(sg_page(s)) + s->offset; + ret = dma_update_trans(zdev, pa, dma_addr, s->length, flags); + if (ret) + goto unmap; + + dma_addr += s->length; + } + *handle = dma_addr_base; + atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages); + + return ret; + +unmap: + dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, + ZPCI_PTE_INVALID); + dma_free_address(dev, dma_addr_base, size >> PAGE_SHIFT); + zpci_err("map error:\n"); + zpci_err_dma(ret, pa); + return ret; +} + static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, int nr_elements, enum dma_data_direction dir, unsigned long attrs) { - int mapped_elements = 0; - struct scatterlist *s; - int i; + struct scatterlist *s = sg, *start = sg, *dma = sg; + unsigned int max = dma_get_max_seg_size(dev); + unsigned int size = s->offset + s->length; + unsigned int offset = s->offset; + int count = 0, i; - for_each_sg(sg, s, nr_elements, i) { - struct page *page = sg_page(s); - s->dma_address = s390_dma_map_pages(dev, page, s->offset, - s->length, dir, 0); - if (!dma_mapping_error(dev, s->dma_address)) { - s->dma_length = s->length; - mapped_elements++; - } else - goto unmap; - } -out: - return mapped_elements; + for (i = 1; i < nr_elements; i++) { + s = sg_next(s); -unmap: - for_each_sg(sg, s, mapped_elements, i) { - if (s->dma_address) - s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, - dir, 0); - s->dma_address = 0; + s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; + + if (s->offset || (size & ~PAGE_MASK) || + size + s->length > max) { + if (__s390_dma_map_sg(dev, start, size, + &dma->dma_address, dir)) + goto unmap; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + size = offset = s->offset; + start = s; + dma = sg_next(dma); + count++; + } + size += s->length; } - mapped_elements = 0; - goto out; + if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir)) + goto unmap; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + return count + 1; +unmap: + for_each_sg(sg, s, count, i) + s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), + dir, attrs); + + return 0; } static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, @@ -429,8 +486,9 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nr_elements, i) { - s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, - 0); + if (s->dma_length) + s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, + dir, attrs); s->dma_address = 0; s->dma_length = 0; } From 1f166e9e5c7cd5d1fe2a5da7c97c1688d4c93fbb Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 5 Sep 2016 17:49:17 +0200 Subject: [PATCH 38/48] s390/pci_dma: split dma_update_trans Split dma_update_trans into __dma_update_trans which handles updating the dma translation tables and __dma_purge_tlb which takes care of purging associated entries in the dma translation lookaside buffer. The map_sg API makes use of this split approach by calling __dma_update_trans once per physically contiguous address range but __dma_purge_tlb only once per dma contiguous address range. This results in less invocations of the expensive RPCIT instruction when using map_sg. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 65 ++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 54cb54cbc764..9e5f2ecf7f25 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -129,12 +129,11 @@ void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) entry_clr_protected(entry); } -static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, - dma_addr_t dma_addr, size_t size, int flags) +static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa, + dma_addr_t dma_addr, size_t size, int flags) { unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; u8 *page_addr = (u8 *) (pa & PAGE_MASK); - dma_addr_t start_dma_addr = dma_addr; unsigned long irq_flags; unsigned long *entry; int i, rc = 0; @@ -145,7 +144,7 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); if (!zdev->dma_table) { rc = -EINVAL; - goto no_refresh; + goto out_unlock; } for (i = 0; i < nr_pages; i++) { @@ -159,20 +158,6 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, dma_addr += PAGE_SIZE; } - /* - * With zdev->tlb_refresh == 0, rpcit is not required to establish new - * translations when previously invalid translation-table entries are - * validated. With lazy unmap, it also is skipped for previously valid - * entries, but a global rpcit is then required before any address can - * be re-used, i.e. after each iommu bitmap wrap-around. - */ - if (!zdev->tlb_refresh && - (!s390_iommu_strict || - ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))) - goto no_refresh; - - rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, - nr_pages * PAGE_SIZE); undo_cpu_trans: if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { flags = ZPCI_PTE_INVALID; @@ -185,12 +170,46 @@ undo_cpu_trans: dma_update_cpu_trans(entry, page_addr, flags); } } - -no_refresh: +out_unlock: spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); return rc; } +static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, + size_t size, int flags) +{ + /* + * With zdev->tlb_refresh == 0, rpcit is not required to establish new + * translations when previously invalid translation-table entries are + * validated. With lazy unmap, it also is skipped for previously valid + * entries, but a global rpcit is then required before any address can + * be re-used, i.e. after each iommu bitmap wrap-around. + */ + if (!zdev->tlb_refresh && + (!s390_iommu_strict || + ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))) + return 0; + + return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, + PAGE_ALIGN(size)); +} + +static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, + dma_addr_t dma_addr, size_t size, int flags) +{ + int rc; + + rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); + if (rc) + return rc; + + rc = __dma_purge_tlb(zdev, dma_addr, size, flags); + if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) + __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); + + return rc; +} + void dma_free_seg_table(unsigned long entry) { unsigned long *sto = get_rt_sto(entry); @@ -411,12 +430,16 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { pa = page_to_phys(sg_page(s)) + s->offset; - ret = dma_update_trans(zdev, pa, dma_addr, s->length, flags); + ret = __dma_update_trans(zdev, pa, dma_addr, s->length, flags); if (ret) goto unmap; dma_addr += s->length; } + ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); + if (ret) + goto unmap; + *handle = dma_addr_base; atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages); From 13954fd6913acff8f8b8c21612074b57051ba457 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 8 Sep 2016 13:25:01 +0200 Subject: [PATCH 39/48] s390/pci_dma: improve lazy flush for unmap Lazy unmap (defer tlb flush after unmap until dma address reuse) can greatly reduce the number of RPCIT instructions in the best case. In reality we are often far away from the best case scenario because our implementation suffers from the following problem: To create dma addresses we maintain an iommu bitmap and a pointer into that bitmap to mark the start of the next search. That pointer moves from the start to the end of that bitmap and we issue a global tlb flush once that pointer wraps around. To prevent address reuse before we issue the tlb flush we even have to move the next pointer during unmaps - when clearing a bit > next. This could lead to a situation where we only use the rear part of that bitmap and issue more tlb flushes than expected. To fix this we no longer clear bits during unmap but maintain a 2nd bitmap which we use to mark addresses that can't be reused until we issue the global tlb flush after wrap around. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 1 + arch/s390/pci/pci_dma.c | 47 ++++++++++++++++++++++++++----------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 8769cbf9d7cf..6611f798d2be 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -118,6 +118,7 @@ struct zpci_dev { spinlock_t iommu_bitmap_lock; unsigned long *iommu_bitmap; + unsigned long *lazy_bitmap; unsigned long iommu_size; unsigned long iommu_pages; unsigned int next_bit; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 9e5f2ecf7f25..7350c8bc13a2 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -257,20 +257,28 @@ static dma_addr_t dma_alloc_address(struct device *dev, int size) spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); offset = __dma_alloc_iommu(dev, zdev->next_bit, size); if (offset == -1) { + if (!zdev->tlb_refresh && !s390_iommu_strict) { + /* global flush before DMA addresses are reused */ + if (zpci_refresh_global(zdev)) + goto out_error; + + bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, + zdev->lazy_bitmap, zdev->iommu_pages); + bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); + } /* wrap-around */ offset = __dma_alloc_iommu(dev, 0, size); - if (offset == -1) { - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - return DMA_ERROR_CODE; - } - if (!zdev->tlb_refresh && !s390_iommu_strict) - /* global flush after wrap-around with lazy unmap */ - zpci_refresh_global(zdev); + if (offset == -1) + goto out_error; } zdev->next_bit = offset + size; spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); return zdev->start_dma + offset * PAGE_SIZE; + +out_error: + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); + return DMA_ERROR_CODE; } static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) @@ -283,13 +291,12 @@ static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); if (!zdev->iommu_bitmap) goto out; - bitmap_clear(zdev->iommu_bitmap, offset, size); - /* - * Lazy flush for unmap: need to move next_bit to avoid address re-use - * until wrap-around. - */ - if (!s390_iommu_strict && offset >= zdev->next_bit) - zdev->next_bit = offset + size; + + if (zdev->tlb_refresh || s390_iommu_strict) + bitmap_clear(zdev->iommu_bitmap, offset, size); + else + bitmap_set(zdev->lazy_bitmap, offset, size); + out: spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); } @@ -557,7 +564,14 @@ int zpci_dma_init_device(struct zpci_dev *zdev) rc = -ENOMEM; goto free_dma_table; } + if (!zdev->tlb_refresh && !s390_iommu_strict) { + zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); + if (!zdev->lazy_bitmap) { + rc = -ENOMEM; + goto free_bitmap; + } + } rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); if (rc) @@ -567,6 +581,8 @@ int zpci_dma_init_device(struct zpci_dev *zdev) free_bitmap: vfree(zdev->iommu_bitmap); zdev->iommu_bitmap = NULL; + vfree(zdev->lazy_bitmap); + zdev->lazy_bitmap = NULL; free_dma_table: dma_free_cpu_table(zdev->dma_table); zdev->dma_table = NULL; @@ -588,6 +604,9 @@ void zpci_dma_exit_device(struct zpci_dev *zdev) zdev->dma_table = NULL; vfree(zdev->iommu_bitmap); zdev->iommu_bitmap = NULL; + vfree(zdev->lazy_bitmap); + zdev->lazy_bitmap = NULL; + zdev->next_bit = 0; } From a9f6273ff9c80dd2c226f7a2d5c16272e5092d3e Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 20 Sep 2016 10:29:22 +0200 Subject: [PATCH 40/48] s390/dasd: fix hanging offline processing Internal I/O is processed by the _sleep_on_function which might wait for a device to get operational. During offline processing this will never happen and therefore the refcount of the device will not drop to zero and the offline processing blocks as well. Fix by letting requests fail in the _sleep_on function during offline processing. No further handling of the requests is necessary since this is internal I/O and the device is thrown away afterwards. Reviewed-by: Peter Oberparleiter Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 5245d7e37a46..706ae0ac94c9 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2277,6 +2277,15 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) cqr->intrc = -ENOLINK; continue; } + /* + * Don't try to start requests if device is in + * offline processing, it might wait forever + */ + if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) { + cqr->status = DASD_CQR_FAILED; + cqr->intrc = -ENODEV; + continue; + } /* * Don't try to start requests if device is stopped * except path verification requests From c020d722b110a44c613ef71e657e6dd4116e09d9 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 20 Sep 2016 10:42:38 +0200 Subject: [PATCH 41/48] s390/dasd: fix panic during offline processing A DASD device consists of the device itself and a discipline with a corresponding private structure. These fields are set up during online processing right after the device is created and before it is processed by the state machine and made available for I/O. During offline processing the discipline pointer and the private data gets freed within the state machine and without protection of the existing reference count. This might lead to a kernel panic because a function might have taken a device reference and accesses the discipline pointer and/or private data of the device while this is already freed. Fix by freeing the discipline pointer and the private data after ensuring that there is no reference to the device left. Reviewed-by: Peter Oberparleiter Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 26 ++++++++++++++++---------- drivers/s390/block/dasd_devmap.c | 1 + drivers/s390/block/dasd_int.h | 1 + 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 706ae0ac94c9..1de089019268 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -212,16 +212,6 @@ static int dasd_state_known_to_new(struct dasd_device *device) { /* Disable extended error reporting for this device. */ dasd_eer_disable(device); - /* Forget the discipline information. */ - if (device->discipline) { - if (device->discipline->uncheck_device) - device->discipline->uncheck_device(device); - module_put(device->discipline->owner); - } - device->discipline = NULL; - if (device->base_discipline) - module_put(device->base_discipline->owner); - device->base_discipline = NULL; device->state = DASD_STATE_NEW; if (device->block) @@ -3377,6 +3367,22 @@ int dasd_generic_probe(struct ccw_device *cdev, } EXPORT_SYMBOL_GPL(dasd_generic_probe); +void dasd_generic_free_discipline(struct dasd_device *device) +{ + /* Forget the discipline information. */ + if (device->discipline) { + if (device->discipline->uncheck_device) + device->discipline->uncheck_device(device); + module_put(device->discipline->owner); + device->discipline = NULL; + } + if (device->base_discipline) { + module_put(device->base_discipline->owner); + device->base_discipline = NULL; + } +} +EXPORT_SYMBOL_GPL(dasd_generic_free_discipline); + /* * This will one day be called from a global not_oper handler. * It is also used by driver_unregister during module unload. diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 3cdbce45e464..15a1a70cace9 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -617,6 +617,7 @@ dasd_delete_device(struct dasd_device *device) /* Wait for reference counter to drop to zero. */ wait_event(dasd_delete_wq, atomic_read(&device->ref_count) == 0); + dasd_generic_free_discipline(device); /* Disconnect dasd_device structure from ccw_device structure. */ cdev = device->cdev; device->cdev = NULL; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index ac7027e6d52b..87ff6cef872f 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -725,6 +725,7 @@ void dasd_block_clear_timer(struct dasd_block *); int dasd_cancel_req(struct dasd_ccw_req *); int dasd_flush_device_queue(struct dasd_device *); int dasd_generic_probe (struct ccw_device *, struct dasd_discipline *); +void dasd_generic_free_discipline(struct dasd_device *); void dasd_generic_remove (struct ccw_device *cdev); int dasd_generic_set_online(struct ccw_device *, struct dasd_discipline *); int dasd_generic_set_offline (struct ccw_device *cdev); From f50af850f407df7b548802461bc248c7683c6dd3 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 22 Sep 2016 10:49:40 +0200 Subject: [PATCH 42/48] s390/dasd: make query host access interruptible If the DASD device gets blocked for any reason, e.g. because it is reserved somewhere, the host_access_count sysfs entry or the host_access_list debugfs entry may sleep forever. Make it interruptible so that userspace can use ^C to abort the operation. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 98bbec44bcd0..831935af7389 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -5201,7 +5201,7 @@ static int dasd_eckd_query_host_access(struct dasd_device *device, cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; - rc = dasd_sleep_on(cqr); + rc = dasd_sleep_on_interruptible(cqr); if (rc == 0) { *data = *host_access; } else { From fdcebf6f18ee87c6da69327c9972d57b8ce58166 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 26 Sep 2016 19:13:04 +0200 Subject: [PATCH 43/48] s390/config: Enable config options for Docker The following config options are required/recommended for running Docker: Networking: - CONFIG_NF_NAT_MASQUERADE_IPV4=m - CONFIG_NF_NAT_MASQUERADE_IPV6=m - CONFIG_IPVLAN=m - CGROUP_NET_PRIO=y Storage drivers: - CONFIG_DM_THIN_PROVISIONING=m - CONFIG_OVERLAY_FS=m Scheduling: - CONFIG_FAIR_GROUP_SCHED=y - CONFIG_CFS_BANDWIDTH=y Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/configs/default_defconfig | 11 ++++++++--- arch/s390/configs/gcov_defconfig | 13 ++++++++++--- arch/s390/configs/performance_defconfig | 13 ++++++++++--- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 26e0c7f08814..ced7e7ae627c 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -260,7 +260,6 @@ CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NF_TABLES_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NF_TABLES_ARP=m -CONFIG_NF_NAT_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -269,6 +268,8 @@ CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m @@ -281,7 +282,6 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NF_NAT_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m @@ -299,6 +299,8 @@ CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m @@ -359,6 +361,7 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m CONFIG_NET_TCPPROBE=m @@ -409,6 +412,7 @@ CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -428,6 +432,7 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m +CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_TUN=m CONFIG_VETH=m @@ -453,7 +458,6 @@ CONFIG_PPP_SYNC_TTY=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y CONFIG_LEGACY_PTY_COUNT=0 CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m @@ -495,6 +499,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m +CONFIG_OVERLAY_FS=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 24879dab47bc..d72500603551 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -15,6 +15,8 @@ CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y @@ -255,7 +257,6 @@ CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NF_TABLES_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NF_TABLES_ARP=m -CONFIG_NF_NAT_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -264,6 +265,8 @@ CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m @@ -276,7 +279,6 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NF_NAT_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m @@ -294,6 +296,8 @@ CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m @@ -353,6 +357,7 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m CONFIG_NET_TCPPROBE=m @@ -403,6 +408,7 @@ CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -422,6 +428,7 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m +CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_TUN=m CONFIG_VETH=m @@ -447,7 +454,6 @@ CONFIG_PPP_SYNC_TTY=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y CONFIG_LEGACY_PTY_COUNT=0 CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m @@ -487,6 +493,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m +CONFIG_OVERLAY_FS=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index a5c1e5f2a0ca..bf99c386d5c3 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -16,6 +16,8 @@ CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y @@ -255,7 +257,6 @@ CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NF_TABLES_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NF_TABLES_ARP=m -CONFIG_NF_NAT_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -264,6 +265,8 @@ CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m @@ -276,7 +279,6 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NF_NAT_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m @@ -294,6 +296,8 @@ CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m @@ -353,6 +357,7 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m CONFIG_NET_TCPPROBE=m @@ -403,6 +408,7 @@ CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -422,6 +428,7 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m +CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_TUN=m CONFIG_VETH=m @@ -447,7 +454,6 @@ CONFIG_PPP_SYNC_TTY=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y CONFIG_LEGACY_PTY_COUNT=0 CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m @@ -488,6 +494,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m +CONFIG_OVERLAY_FS=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y From 871f8bf0c477ef20ac1457bb241416d7d8749732 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 27 Sep 2016 12:13:18 -0700 Subject: [PATCH 44/48] s390/dasd: add missing \n to end of dev_err messages Trival fix, dev_err messages are missing a \n, so add it. Signed-off-by: Colin Ian King Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_erp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c index e1e88486b2b4..d138d0116734 100644 --- a/drivers/s390/block/dasd_erp.c +++ b/drivers/s390/block/dasd_erp.c @@ -169,12 +169,12 @@ dasd_log_sense(struct dasd_ccw_req *cqr, struct irb *irb) device = cqr->startdev; if (cqr->intrc == -ETIMEDOUT) { dev_err(&device->cdev->dev, - "A timeout error occurred for cqr %p", cqr); + "A timeout error occurred for cqr %p\n", cqr); return; } if (cqr->intrc == -ENOLINK) { dev_err(&device->cdev->dev, - "A transport error occurred for cqr %p", cqr); + "A transport error occurred for cqr %p\n", cqr); return; } /* dump sense data */ From d53c51f26145657aa7c55fa396f93677e613548d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 28 Sep 2016 13:36:19 +0200 Subject: [PATCH 45/48] s390/cio: fix accidental interrupt enabling during resume Since commit 9f3d6d7 chsc_get_channel_measurement_chars is called with interrupts disabled during resume from hibernate. Since this function used spin_unlock_irq, interrupts have been enabled accidentally. Fix this by using the irqsave variant. Since we can't guarantee the IRQ-enablement state for all (future/ external) callers, change the locking in related functions to prevent similar bugs in the future. Fixes: 9f3d6d7 ("s390/cio: update measurement characteristics") Signed-off-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 940e725bde1e..11674698b36d 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -95,12 +95,13 @@ struct chsc_ssd_area { int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd) { struct chsc_ssd_area *ssd_area; + unsigned long flags; int ccode; int ret; int i; int mask; - spin_lock_irq(&chsc_page_lock); + spin_lock_irqsave(&chsc_page_lock, flags); memset(chsc_page, 0, PAGE_SIZE); ssd_area = chsc_page; ssd_area->request.length = 0x0010; @@ -144,7 +145,7 @@ int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd) ssd->fla[i] = ssd_area->fla[i]; } out: - spin_unlock_irq(&chsc_page_lock); + spin_unlock_irqrestore(&chsc_page_lock, flags); return ret; } @@ -832,9 +833,10 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable) u32 fmt : 4; u32 : 16; } __attribute__ ((packed)) *secm_area; + unsigned long flags; int ret, ccode; - spin_lock_irq(&chsc_page_lock); + spin_lock_irqsave(&chsc_page_lock, flags); memset(chsc_page, 0, PAGE_SIZE); secm_area = chsc_page; secm_area->request.length = 0x0050; @@ -864,7 +866,7 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable) CIO_CRW_EVENT(2, "chsc: secm failed (rc=%04x)\n", secm_area->response.code); out: - spin_unlock_irq(&chsc_page_lock); + spin_unlock_irqrestore(&chsc_page_lock, flags); return ret; } @@ -992,6 +994,7 @@ chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv, int chsc_get_channel_measurement_chars(struct channel_path *chp) { + unsigned long flags; int ccode, ret; struct { @@ -1021,7 +1024,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp) if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm) return -EINVAL; - spin_lock_irq(&chsc_page_lock); + spin_lock_irqsave(&chsc_page_lock, flags); memset(chsc_page, 0, PAGE_SIZE); scmc_area = chsc_page; scmc_area->request.length = 0x0010; @@ -1053,7 +1056,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp) chsc_initialize_cmg_chars(chp, scmc_area->cmcv, (struct cmg_chars *) &scmc_area->data); out: - spin_unlock_irq(&chsc_page_lock); + spin_unlock_irqrestore(&chsc_page_lock, flags); return ret; } @@ -1134,6 +1137,7 @@ struct css_chsc_char css_chsc_characteristics; int __init chsc_determine_css_characteristics(void) { + unsigned long flags; int result; struct { struct chsc_header request; @@ -1146,7 +1150,7 @@ chsc_determine_css_characteristics(void) u32 chsc_char[508]; } __attribute__ ((packed)) *scsc_area; - spin_lock_irq(&chsc_page_lock); + spin_lock_irqsave(&chsc_page_lock, flags); memset(chsc_page, 0, PAGE_SIZE); scsc_area = chsc_page; scsc_area->request.length = 0x0010; @@ -1168,7 +1172,7 @@ chsc_determine_css_characteristics(void) CIO_CRW_EVENT(2, "chsc: scsc failed (rc=%04x)\n", scsc_area->response.code); exit: - spin_unlock_irq(&chsc_page_lock); + spin_unlock_irqrestore(&chsc_page_lock, flags); return result; } From 38b7f07a0503271cf8e08399ecda7063c371a181 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 29 Sep 2016 12:13:53 +0200 Subject: [PATCH 46/48] MAINTAINERS: update DASD maintainer Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0bbe4b105c34..30601dba5347 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10076,8 +10076,8 @@ S: Supported F: drivers/s390/cio/ S390 DASD DRIVER -M: Stefan Weinhuber -M: Stefan Haberland +M: Stefan Haberland +M: Jan Hoeppner L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported From c14f2aac7aa147861793eed9f41f91dd530f0be1 Mon Sep 17 00:00:00 2001 From: Sascha Silbe Date: Thu, 11 Aug 2016 21:34:54 +0200 Subject: [PATCH 47/48] s390/con3270: fix use of uninitialised data con3270 contains an optimisation that reduces the amount of data to be transmitted to the 3270 terminal by putting a Repeat to Address (RA) order into the data stream. The RA order itself takes up space, so con3270 only uses it if there's enough space left in the line buffer. Otherwise it just pads out the line manually. For lines too long to include the RA order, one byte was left uninitialised. This was caused by an off-by-one bug in the loop that pads out the line. Since the buffer is allocated from a common pool, the single byte left uninitialised contained some previous buffer content. Usually this was just a space or some character (which can result in clutter but is otherwise harmless). Sometimes, however, it was a Repeat to Address order, messing up the entire screen layout and causing the display to send the entire buffer content on every keystroke. Fixes: f51320a5 ("[PATCH] s390: new 3270 driver.") (tglx/history.git) Reported-by: Liu Jing Tested-by: Jing Liu Tested-by: Yang Chen Signed-off-by: Sascha Silbe Signed-off-by: Martin Schwidefsky --- drivers/s390/char/con3270.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index 6b1577c73fe7..0c161dbec73d 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -464,7 +464,7 @@ con3270_cline_end(struct con3270 *cp) s->string[s->len - 4] = TO_RA; s->string[s->len - 1] = 0; } else { - while (--size > cp->cline->len) + while (--size >= cp->cline->len) s->string[size] = cp->view.ascebc[' ']; } /* Replace cline with allocated line s and reset cline. */ From 6cd997db911f28f2510b771691270c52b63ed2e6 Mon Sep 17 00:00:00 2001 From: Sascha Silbe Date: Tue, 20 Sep 2016 19:09:07 +0200 Subject: [PATCH 48/48] s390/con3270: fix insufficient space padding con3270 contains an optimisation that reduces the amount of data to be transmitted to the 3270 terminal by putting a Repeat to Address (RA) order into the data stream. The RA order itself takes up space, so con3270 only uses it if there's enough space left in the line buffer. Otherwise it just pads out the line manually. For lines that were _just_ short enough that the RA order still fit in the line buffer, the line was instead padded with an insufficient amount of spaces. This was caused by examining the size of the allocated line buffer rather than the length of the string to be displayed. For con3270_cline_end(), we just compare against the line length. For con3270_update_string() however that isn't available anymore, so we check whether the Repeat to Address order is present. Fixes: f51320a5 ("[PATCH] s390: new 3270 driver.") (tglx/history.git) Tested-by: Jing Liu Tested-by: Yang Chen Signed-off-by: Sascha Silbe Signed-off-by: Martin Schwidefsky --- drivers/s390/char/con3270.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index 0c161dbec73d..285b4006f44b 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -124,7 +124,12 @@ con3270_create_status(struct con3270 *cp) static void con3270_update_string(struct con3270 *cp, struct string *s, int nr) { - if (s->len >= cp->view.cols - 5) + if (s->len < 4) { + /* This indicates a bug, but printing a warning would + * cause a deadlock. */ + return; + } + if (s->string[s->len - 4] != TO_RA) return; raw3270_buffer_address(cp->view.dev, s->string + s->len - 3, cp->view.cols * (nr + 1)); @@ -460,7 +465,7 @@ con3270_cline_end(struct con3270 *cp) cp->cline->len + 4 : cp->view.cols; s = con3270_alloc_string(cp, size); memcpy(s->string, cp->cline->string, cp->cline->len); - if (s->len < cp->view.cols - 5) { + if (cp->cline->len < cp->view.cols - 5) { s->string[s->len - 4] = TO_RA; s->string[s->len - 1] = 0; } else {