diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 903fc926860b..95ca68d663a4 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -176,7 +176,8 @@ Architectures: s390 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH -Allows user space to set/get the TOD clock extension (u8). +Allows user space to set/get the TOD clock extension (u8) (superseded by +KVM_S390_VM_TOD_EXT). Parameters: address of a buffer in user space to store the data (u8) to Returns: -EFAULT if the given address is not accessible from kernel space @@ -190,6 +191,17 @@ the POP (u64). Parameters: address of a buffer in user space to store the data (u64) to Returns: -EFAULT if the given address is not accessible from kernel space +3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT +Allows user space to set/get bits 0-63 of the TOD clock register as defined in +the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it +also allows user space to get/set it. If the guest CPU model does not support +it, it is stored as 0 and not allowed to be set to a value != 0. + +Parameters: address of a buffer in user space to store the data + (kvm_s390_vm_tod_clock) to +Returns: -EFAULT if the given address is not accessible from kernel space + -EINVAL if setting the TOD clock extension to != 0 is not supported + 4. GROUP: KVM_S390_VM_CRYPTO Architectures: s390 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a409d5991934..51375e766e90 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -226,7 +226,9 @@ struct kvm_s390_sie_block { #define ECB3_RI 0x01 __u8 ecb3; /* 0x0063 */ __u32 scaol; /* 0x0064 */ - __u8 reserved68[4]; /* 0x0068 */ + __u8 reserved68; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 reserved6a[2]; /* 0x006a */ __u32 todpr; /* 0x006c */ __u8 reserved70[16]; /* 0x0070 */ __u64 mso; /* 0x0080 */ @@ -265,6 +267,7 @@ struct kvm_s390_sie_block { __u64 cbrlo; /* 0x01b8 */ __u8 reserved1c0[8]; /* 0x01c0 */ #define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 __u32 ecd; /* 0x01c8 */ __u8 reserved1cc[18]; /* 0x01cc */ __u64 pp; /* 0x01de */ @@ -739,6 +742,7 @@ struct kvm_arch{ struct kvm_s390_cpu_model model; struct kvm_s390_crypto crypto; struct kvm_s390_vsie vsie; + u8 epdx; u64 epoch; struct kvm_s390_migration_state *migration_state; /* subset of available cpu features enabled by user space */ diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index 42267a2fe29e..22b0f49e87c1 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h @@ -13,7 +13,8 @@ #define ESSA_SET_POT_VOLATILE 4 #define ESSA_SET_STABLE_RESIDENT 5 #define ESSA_SET_STABLE_IF_RESIDENT 6 +#define ESSA_SET_STABLE_NODAT 7 -#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT +#define ESSA_MAX ESSA_SET_STABLE_NODAT #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 624deaa44230..b463df89f344 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -133,6 +133,9 @@ static inline int page_reset_referenced(unsigned long addr) struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +void arch_set_page_dat(struct page *page, int order); +void arch_set_page_nodat(struct page *page, int order); +int arch_test_page_nodat(struct page *page); void arch_set_page_states(int make_stable); static inline int devmem_is_allowed(unsigned long pfn) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 57057fb1cc07..bb59a0aa3249 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -376,6 +376,7 @@ static inline int is_module_addr(void *addr) /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL +#define _PGSTE_GPS_NODAT 0x0000000040000000UL #define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL #define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL #define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL @@ -952,15 +953,30 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_GLOBAL 0 #define IPTE_LOCAL 1 -static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local) +#define IPTE_NODAT 0x400 +#define IPTE_GUEST_ASCE 0x800 + +static inline void __ptep_ipte(unsigned long address, pte_t *ptep, + unsigned long opt, unsigned long asce, + int local) { unsigned long pto = (unsigned long) ptep; - /* Invalidation + TLB flush for the pte */ + if (__builtin_constant_p(opt) && opt == 0) { + /* Invalidation + TLB flush for the pte */ + asm volatile( + " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" + : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), + [m4] "i" (local)); + return; + } + + /* Invalidate ptes with options + TLB flush of the ptes */ + opt = opt | (asce & _ASCE_ORIGIN); asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" - : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), - [m4] "i" (local)); + " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + : [r2] "+a" (address), [r3] "+a" (opt) + : [r1] "a" (pto), [m4] "i" (local) : "memory"); } static inline void __ptep_ipte_range(unsigned long address, int nr, @@ -1341,31 +1357,61 @@ static inline void __pmdp_csp(pmd_t *pmdp) #define IDTE_GLOBAL 0 #define IDTE_LOCAL 1 -static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local) +#define IDTE_PTOA 0x0800 +#define IDTE_NODAT 0x1000 +#define IDTE_GUEST_ASCE 0x2000 + +static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, + unsigned long opt, unsigned long asce, + int local) { unsigned long sto; - sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); - asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" - : "+m" (*pmdp) - : [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)), - [m4] "i" (local) - : "cc" ); + sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t); + if (__builtin_constant_p(opt) && opt == 0) { + /* flush without guest asce */ + asm volatile( + " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + : "+m" (*pmdp) + : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)), + [m4] "i" (local) + : "cc" ); + } else { + /* flush with guest asce */ + asm volatile( + " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + : "+m" (*pmdp) + : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt), + [r3] "a" (asce), [m4] "i" (local) + : "cc" ); + } } -static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local) +static inline void __pudp_idte(unsigned long addr, pud_t *pudp, + unsigned long opt, unsigned long asce, + int local) { unsigned long r3o; - r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); + r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t); r3o |= _ASCE_TYPE_REGION3; - asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" - : "+m" (*pudp) - : [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)), - [m4] "i" (local) - : "cc"); + if (__builtin_constant_p(opt) && opt == 0) { + /* flush without guest asce */ + asm volatile( + " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + : "+m" (*pudp) + : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)), + [m4] "i" (local) + : "cc"); + } else { + /* flush with guest asce */ + asm volatile( + " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + : "+m" (*pudp) + : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt), + [r3] "a" (asce), [m4] "i" (local) + : "cc" ); + } } pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index cd78155b1829..49c425903894 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -29,8 +29,9 @@ #define MACHINE_FLAG_TE _BITUL(11) #define MACHINE_FLAG_TLB_LC _BITUL(12) #define MACHINE_FLAG_VX _BITUL(13) -#define MACHINE_FLAG_NX _BITUL(14) -#define MACHINE_FLAG_GS _BITUL(15) +#define MACHINE_FLAG_TLB_GUEST _BITUL(14) +#define MACHINE_FLAG_NX _BITUL(15) +#define MACHINE_FLAG_GS _BITUL(16) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) @@ -68,6 +69,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) +#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) @@ -106,7 +108,8 @@ extern void pfault_fini(void); void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); -extern void cmma_init(void); +void cmma_init(void); +void cmma_init_nodat(void); extern void (*_machine_restart)(char *command); extern void (*_machine_halt)(void); diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 39846100682a..4d759f8f4bc7 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -20,10 +20,15 @@ static inline void __tlb_flush_local(void) */ static inline void __tlb_flush_idte(unsigned long asce) { + unsigned long opt; + + opt = IDTE_PTOA; + if (MACHINE_HAS_TLB_GUEST) + opt |= IDTE_GUEST_ASCE; /* Global TLB flush for the mm */ asm volatile( " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (2048), "a" (asce) : "cc"); + : : "a" (opt), "a" (asce) : "cc"); } #ifdef CONFIG_SMP diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 69d09c39bbcd..cd7359e23d86 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req { /* kvm attributes for KVM_S390_VM_TOD */ #define KVM_S390_VM_TOD_LOW 0 #define KVM_S390_VM_TOD_HIGH 1 +#define KVM_S390_VM_TOD_EXT 2 + +struct kvm_s390_vm_tod_clock { + __u8 epoch_idx; + __u64 tod; +}; /* kvm attributes for KVM_S390_VM_CPU_MODEL */ /* processor related attributes are r/w */ diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index 39e2f41b6cf0..c8ea715bfe10 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -98,10 +98,16 @@ int page_key_alloc(unsigned long pages) */ void page_key_read(unsigned long *pfn) { + struct page *page; unsigned long addr; + unsigned char key; - addr = (unsigned long) page_address(pfn_to_page(*pfn)); - *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); + page = pfn_to_page(*pfn); + addr = (unsigned long) page_address(page); + key = (unsigned char) page_get_storage_key(addr) & 0x7f; + if (arch_test_page_nodat(page)) + key |= 0x80; + *(unsigned char *) pfn = key; } /* @@ -126,8 +132,16 @@ void page_key_memorize(unsigned long *pfn) */ void page_key_write(void *address) { - page_set_storage_key((unsigned long) address, - page_key_rp->data[page_key_rx], 0); + struct page *page; + unsigned char key; + + key = page_key_rp->data[page_key_rx]; + page_set_storage_key((unsigned long) address, key & 0x7f, 0); + page = virt_to_page(address); + if (key & 0x80) + arch_set_page_nodat(page, 0); + else + arch_set_page_dat(page, 0); if (++page_key_rx >= PAGE_KEY_DATA_SIZE) return; page_key_rp = page_key_rp->next; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index b89d19f6f2ab..eacda05b45d7 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -157,6 +157,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) page_frame = get_zeroed_page(GFP_KERNEL); if (!segment_table || !page_table || !page_frame) goto out; + arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER); + arch_set_page_dat(virt_to_page(page_table), 0); /* Initialize per-cpu vdso data page */ vd = (struct vdso_per_cpu_data *) page_frame; diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index c2e0ddc1356e..bcbd86621d01 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -308,7 +308,7 @@ static inline int in_addr_range(u64 addr, u64 a, u64 b) return (addr >= a) && (addr <= b); else /* "overflowing" interval */ - return (addr <= a) && (addr >= b); + return (addr >= a) || (addr <= b); } #define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a619ddae610d..a832ad031cee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2479,6 +2479,7 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, struct kvm_s390_mchk_info *mchk; union mci mci; __u64 cr14 = 0; /* upper bits are not used */ + int rc; mci.val = mcck_info->mcic; if (mci.sr) @@ -2496,12 +2497,13 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, if (mci.ck) { /* Inject the floating machine check */ inti.type = KVM_S390_MCHK; - WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti)); + rc = __inject_vm(vcpu->kvm, &inti); } else { /* Inject the machine check to specified vcpu */ irq.type = KVM_S390_MCHK; - WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); + rc = kvm_s390_inject_vcpu(vcpu, &irq); } + WARN_ON_ONCE(rc); } int kvm_set_routing_entry(struct kvm *kvm, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9f23a9e81a91..40d0a1a97889 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -130,6 +130,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +struct kvm_s390_tod_clock_ext { + __u8 epoch_idx; + __u64 tod; + __u8 reserved[7]; +} __packed; + /* allow nested virtualization in KVM (if enabled by user space) */ static int nested; module_param(nested, int, S_IRUGO); @@ -874,6 +880,26 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, return 0; } +static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_tod_clock gtod; + + if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) + return -EFAULT; + + if (test_kvm_facility(kvm, 139)) + kvm_s390_set_tod_clock_ext(kvm, >od); + else if (gtod.epoch_idx == 0) + kvm_s390_set_tod_clock(kvm, gtod.tod); + else + return -EINVAL; + + VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", + gtod.epoch_idx, gtod.tod); + + return 0; +} + static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) { u8 gtod_high; @@ -909,6 +935,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; switch (attr->attr) { + case KVM_S390_VM_TOD_EXT: + ret = kvm_s390_set_tod_ext(kvm, attr); + break; case KVM_S390_VM_TOD_HIGH: ret = kvm_s390_set_tod_high(kvm, attr); break; @@ -922,6 +951,43 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) return ret; } +static void kvm_s390_get_tod_clock_ext(struct kvm *kvm, + struct kvm_s390_vm_tod_clock *gtod) +{ + struct kvm_s390_tod_clock_ext htod; + + preempt_disable(); + + get_tod_clock_ext((char *)&htod); + + gtod->tod = htod.tod + kvm->arch.epoch; + gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; + + if (gtod->tod < htod.tod) + gtod->epoch_idx += 1; + + preempt_enable(); +} + +static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_tod_clock gtod; + + memset(>od, 0, sizeof(gtod)); + + if (test_kvm_facility(kvm, 139)) + kvm_s390_get_tod_clock_ext(kvm, >od); + else + gtod.tod = kvm_s390_get_tod_clock_fast(kvm); + + if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) + return -EFAULT; + + VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", + gtod.epoch_idx, gtod.tod); + return 0; +} + static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) { u8 gtod_high = 0; @@ -954,6 +1020,9 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; switch (attr->attr) { + case KVM_S390_VM_TOD_EXT: + ret = kvm_s390_get_tod_ext(kvm, attr); + break; case KVM_S390_VM_TOD_HIGH: ret = kvm_s390_get_tod_high(kvm, attr); break; @@ -1505,7 +1574,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, if (r < 0) pgstev = 0; /* save the value */ - res[i++] = (pgstev >> 24) & 0x3; + res[i++] = (pgstev >> 24) & 0x43; /* * if the next bit is too far away, stop. * if we reached the previous "next", find the next one @@ -1583,7 +1652,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, pgstev = bits[i]; pgstev = pgstev << 24; - mask &= _PGSTE_GPS_USAGE_MASK; + mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; set_pgste_bits(kvm->mm, hva, mask, pgstev); } srcu_read_unlock(&kvm->srcu, srcu_idx); @@ -1858,8 +1927,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); + /* we are always in czam mode - even on pre z14 machines */ + set_kvm_facility(kvm->arch.model.fac_mask, 138); + set_kvm_facility(kvm->arch.model.fac_list, 138); + /* we emulate STHYI in kvm */ set_kvm_facility(kvm->arch.model.fac_mask, 74); set_kvm_facility(kvm->arch.model.fac_list, 74); + if (MACHINE_HAS_TLB_GUEST) { + set_kvm_facility(kvm->arch.model.fac_mask, 147); + set_kvm_facility(kvm->arch.model.fac_list, 147); + } kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; @@ -2369,6 +2446,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= ECA_VX; vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; } + if (test_kvm_facility(vcpu->kvm, 139)) + vcpu->arch.sie_block->ecd |= ECD_MEF; + vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) | SDNXC; vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; @@ -2860,6 +2940,35 @@ retry: return 0; } +void kvm_s390_set_tod_clock_ext(struct kvm *kvm, + const struct kvm_s390_vm_tod_clock *gtod) +{ + struct kvm_vcpu *vcpu; + struct kvm_s390_tod_clock_ext htod; + int i; + + mutex_lock(&kvm->lock); + preempt_disable(); + + get_tod_clock_ext((char *)&htod); + + kvm->arch.epoch = gtod->tod - htod.tod; + kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; + + if (kvm->arch.epoch > gtod->tod) + kvm->arch.epdx -= 1; + + kvm_s390_vcpu_block_all(kvm); + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu->arch.sie_block->epoch = kvm->arch.epoch; + vcpu->arch.sie_block->epdx = kvm->arch.epdx; + } + + kvm_s390_vcpu_unblock_all(kvm); + preempt_enable(); + mutex_unlock(&kvm->lock); +} + void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) { struct kvm_vcpu *vcpu; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 6fedc8bc7a37..9f8fdd7b2311 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -272,6 +272,8 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); int handle_sthyi(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ +void kvm_s390_set_tod_clock_ext(struct kvm *kvm, + const struct kvm_s390_vm_tod_clock *gtod); void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 8a1dac793d6b..91dc4a87ad61 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -988,6 +988,8 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) if (pgstev & _PGSTE_GPS_ZERO) res |= 1; } + if (pgstev & _PGSTE_GPS_NODAT) + res |= 0x20; vcpu->run->s.regs.gprs[r1] = res; /* * It is possible that all the normal 511 slots were full, in which case @@ -1027,7 +1029,9 @@ static int handle_essa(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); /* Check for invalid operation request code */ orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; - if (orc > ESSA_MAX) + /* ORCs 0-6 are always valid */ + if (orc > (test_kvm_facility(vcpu->kvm, 147) ? ESSA_SET_STABLE_NODAT + : ESSA_SET_STABLE_IF_RESIDENT)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); if (likely(!vcpu->kvm->arch.migration_state)) { diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 1a252f537081..9d592ef4104b 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -155,29 +155,26 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, return rc; } -static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) +static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter, + u64 *status_reg) { - int rc; unsigned int i; struct kvm_vcpu *v; + bool all_stopped = true; - switch (parameter & 0xff) { - case 0: - rc = SIGP_CC_NOT_OPERATIONAL; - break; - case 1: - case 2: - kvm_for_each_vcpu(i, v, vcpu->kvm) { - v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; - kvm_clear_async_pf_completion_queue(v); - } - - rc = SIGP_CC_ORDER_CODE_ACCEPTED; - break; - default: - rc = -EOPNOTSUPP; + kvm_for_each_vcpu(i, v, vcpu->kvm) { + if (v == vcpu) + continue; + if (!is_vcpu_stopped(v)) + all_stopped = false; } - return rc; + + *status_reg &= 0xffffffff00000000UL; + + /* Reject set arch order, with czam we're always in z/Arch mode. */ + *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER : + SIGP_STATUS_INCORRECT_STATE); + return SIGP_CC_STATUS_STORED; } static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, @@ -446,7 +443,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) switch (order_code) { case SIGP_SET_ARCHITECTURE: vcpu->stat.instruction_sigp_arch++; - rc = __sigp_set_arch(vcpu, parameter); + rc = __sigp_set_arch(vcpu, parameter, + &vcpu->run->s.regs.gprs[r1]); break; default: rc = handle_sigp_dst(vcpu, order_code, cpu_addr, diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 926b5244263e..395926b8c1ed 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -394,7 +394,7 @@ static int sthyi(u64 vaddr) "srl %[cc],28\n" : [cc] "=d" (cc) : [code] "d" (code), [addr] "a" (addr) - : "memory", "cc"); + : "3", "memory", "cc"); return cc; } @@ -425,7 +425,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); trace_kvm_s390_handle_sthyi(vcpu, code, addr); - if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) + if (reg1 == reg2 || reg1 & 1 || reg2 & 1) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); if (code & 0xffff) { @@ -433,13 +433,8 @@ int handle_sthyi(struct kvm_vcpu *vcpu) goto out; } - /* - * If the page has not yet been faulted in, we want to do that - * now and not after all the expensive calculations. - */ - r = write_guest(vcpu, addr, reg2, &cc, 1); - if (r) - return kvm_s390_inject_prog_cond(vcpu, r); + if (addr & ~PAGE_MASK) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); sctns = (void *)get_zeroed_page(GFP_KERNEL); if (!sctns) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 715c19c45d9a..fbe46dd0e55d 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -349,6 +349,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->eca |= scb_o->eca & ECA_IB; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) scb_s->eca |= scb_o->eca & ECA_CEI; + /* Epoch Extension */ + if (test_kvm_facility(vcpu->kvm, 139)) + scb_s->ecd |= scb_o->ecd & ECD_MEF; prepare_ibc(vcpu, vsie_page); rc = shadow_crycb(vcpu, vsie_page); @@ -806,8 +809,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; - struct mcck_volatile_info *mcck_info; - struct sie_page *sie_page; int rc; handle_last_fault(vcpu, vsie_page); @@ -831,9 +832,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (rc == -EINTR) { VCPU_EVENT(vcpu, 3, "%s", "machine check"); - sie_page = container_of(scb_s, struct sie_page, sie_block); - mcck_info = &sie_page->mcck_info; - kvm_s390_reinject_machine_check(vcpu, mcck_info); + kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info); return 0; } @@ -919,6 +918,13 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu, */ preempt_disable(); scb_s->epoch += vcpu->kvm->arch.epoch; + + if (scb_s->ecd & ECD_MEF) { + scb_s->epdx += vcpu->kvm->arch.epdx; + if (scb_s->epoch < vcpu->kvm->arch.epoch) + scb_s->epdx += 1; + } + preempt_enable(); } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8111694ce55a..3aee54b2ba60 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -137,6 +137,8 @@ void __init mem_init(void) free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ + cmma_init_nodat(); + mem_init_print_info(NULL); } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 69a7b01ae746..07fa7b8ae233 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -10,9 +10,10 @@ #include #include #include +#include #include #include - +#include #include static int cmma_flag = 1; @@ -36,14 +37,16 @@ __setup("cmma=", cmma); static inline int cmma_test_essa(void) { register unsigned long tmp asm("0") = 0; - register int rc asm("1") = -EOPNOTSUPP; + register int rc asm("1"); + /* test ESSA_GET_STATE */ asm volatile( - " .insn rrf,0xb9ab0000,%1,%1,0,0\n" + " .insn rrf,0xb9ab0000,%1,%1,%2,0\n" "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "+&d" (rc), "+&d" (tmp)); + : "=&d" (rc), "+&d" (tmp) + : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP)); return rc; } @@ -51,11 +54,26 @@ void __init cmma_init(void) { if (!cmma_flag) return; - if (cmma_test_essa()) + if (cmma_test_essa()) { cmma_flag = 0; + return; + } + if (test_facility(147)) + cmma_flag = 2; } -static inline void set_page_unstable(struct page *page, int order) +static inline unsigned char get_page_state(struct page *page) +{ + unsigned char state; + + asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (state) + : "a" (page_to_phys(page)), + "i" (ESSA_GET_STATE)); + return state & 0x3f; +} + +static inline void set_page_unused(struct page *page, int order) { int i, rc; @@ -66,14 +84,7 @@ static inline void set_page_unstable(struct page *page, int order) "i" (ESSA_SET_UNUSED)); } -void arch_free_page(struct page *page, int order) -{ - if (!cmma_flag) - return; - set_page_unstable(page, order); -} - -static inline void set_page_stable(struct page *page, int order) +static inline void set_page_stable_dat(struct page *page, int order) { int i, rc; @@ -84,11 +95,162 @@ static inline void set_page_stable(struct page *page, int order) "i" (ESSA_SET_STABLE)); } +static inline void set_page_stable_nodat(struct page *page, int order) +{ + int i, rc; + + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" (page_to_phys(page + i)), + "i" (ESSA_SET_STABLE_NODAT)); +} + +static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end) +{ + unsigned long next; + struct page *page; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd) || pmd_large(*pmd)) + continue; + page = virt_to_page(pmd_val(*pmd)); + set_bit(PG_arch_1, &page->flags); + } while (pmd++, addr = next, addr != end); +} + +static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end) +{ + unsigned long next; + struct page *page; + pud_t *pud; + int i; + + pud = pud_offset(p4d, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none(*pud) || pud_large(*pud)) + continue; + if (!pud_folded(*pud)) { + page = virt_to_page(pud_val(*pud)); + for (i = 0; i < 3; i++) + set_bit(PG_arch_1, &page[i].flags); + } + mark_kernel_pmd(pud, addr, next); + } while (pud++, addr = next, addr != end); +} + +static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + unsigned long next; + struct page *page; + p4d_t *p4d; + int i; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none(*p4d)) + continue; + if (!p4d_folded(*p4d)) { + page = virt_to_page(p4d_val(*p4d)); + for (i = 0; i < 3; i++) + set_bit(PG_arch_1, &page[i].flags); + } + mark_kernel_pud(p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + +static void mark_kernel_pgd(void) +{ + unsigned long addr, next; + struct page *page; + pgd_t *pgd; + int i; + + addr = 0; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, MODULES_END); + if (pgd_none(*pgd)) + continue; + if (!pgd_folded(*pgd)) { + page = virt_to_page(pgd_val(*pgd)); + for (i = 0; i < 3; i++) + set_bit(PG_arch_1, &page[i].flags); + } + mark_kernel_p4d(pgd, addr, next); + } while (pgd++, addr = next, addr != MODULES_END); +} + +void __init cmma_init_nodat(void) +{ + struct memblock_region *reg; + struct page *page; + unsigned long start, end, ix; + + if (cmma_flag < 2) + return; + /* Mark pages used in kernel page tables */ + mark_kernel_pgd(); + + /* Set all kernel pages not used for page tables to stable/no-dat */ + for_each_memblock(memory, reg) { + start = memblock_region_memory_base_pfn(reg); + end = memblock_region_memory_end_pfn(reg); + page = pfn_to_page(start); + for (ix = start; ix < end; ix++, page++) { + if (__test_and_clear_bit(PG_arch_1, &page->flags)) + continue; /* skip page table pages */ + if (!list_empty(&page->lru)) + continue; /* skip free pages */ + set_page_stable_nodat(page, 0); + } + } +} + +void arch_free_page(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_unused(page, order); +} + void arch_alloc_page(struct page *page, int order) { if (!cmma_flag) return; - set_page_stable(page, order); + if (cmma_flag < 2) + set_page_stable_dat(page, order); + else + set_page_stable_nodat(page, order); +} + +void arch_set_page_dat(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_stable_dat(page, order); +} + +void arch_set_page_nodat(struct page *page, int order) +{ + if (cmma_flag < 2) + return; + set_page_stable_nodat(page, order); +} + +int arch_test_page_nodat(struct page *page) +{ + unsigned char state; + + if (cmma_flag < 2) + return 0; + state = get_page_state(page); + return !!(state & 0x20); } void arch_set_page_states(int make_stable) @@ -108,9 +270,9 @@ void arch_set_page_states(int make_stable) list_for_each(l, &zone->free_area[order].free_list[t]) { page = list_entry(l, struct page, lru); if (make_stable) - set_page_stable(page, order); + set_page_stable_dat(page, 0); else - set_page_unstable(page, order); + set_page_unused(page, order); } } spin_unlock_irqrestore(&zone->lock, flags); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 180481589246..567fe92e2bb8 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -328,7 +328,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr) return; } for (i = 0; i < nr; i++) { - __ptep_ipte(address, pte, IPTE_GLOBAL); + __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL); address += PAGE_SIZE; pte++; } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 18918e394ce4..a4de34ce392c 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -57,6 +57,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) if (!page) return NULL; + arch_set_page_dat(page, 2); return (unsigned long *) page_to_phys(page); } @@ -214,6 +215,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) __free_page(page); return NULL; } + arch_set_page_dat(page, 0); /* Initialize page table */ table = (unsigned long *) page_to_phys(page); if (mm_alloc_pgste(mm)) { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4a1f7366b17a..ae677f814bc0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -25,8 +25,49 @@ #include #include +static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int nodat) +{ + unsigned long opt, asce; + + if (MACHINE_HAS_TLB_GUEST) { + opt = 0; + asce = READ_ONCE(mm->context.gmap_asce); + if (asce == 0UL || nodat) + opt |= IPTE_NODAT; + if (asce != -1UL) { + asce = asce ? : mm->context.asce; + opt |= IPTE_GUEST_ASCE; + } + __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); + } else { + __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); + } +} + +static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int nodat) +{ + unsigned long opt, asce; + + if (MACHINE_HAS_TLB_GUEST) { + opt = 0; + asce = READ_ONCE(mm->context.gmap_asce); + if (asce == 0UL || nodat) + opt |= IPTE_NODAT; + if (asce != -1UL) { + asce = asce ? : mm->context.asce; + opt |= IPTE_GUEST_ASCE; + } + __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); + } else { + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + } +} + static inline pte_t ptep_flush_direct(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + int nodat) { pte_t old; @@ -36,15 +77,16 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_ipte(addr, ptep, IPTE_LOCAL); + ptep_ipte_local(mm, addr, ptep, nodat); else - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + ptep_ipte_global(mm, addr, ptep, nodat); atomic_dec(&mm->context.flush_count); return old; } static inline pte_t ptep_flush_lazy(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + int nodat) { pte_t old; @@ -57,7 +99,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, pte_val(*ptep) |= _PAGE_INVALID; mm->context.flush_mm = 1; } else - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + ptep_ipte_global(mm, addr, ptep, nodat); atomic_dec(&mm->context.flush_count); return old; } @@ -229,10 +271,12 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; pte_t old; + int nodat; preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); - old = ptep_flush_direct(mm, addr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + old = ptep_flush_direct(mm, addr, ptep, nodat); old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; @@ -244,10 +288,12 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; pte_t old; + int nodat; preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); - old = ptep_flush_lazy(mm, addr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + old = ptep_flush_lazy(mm, addr, ptep, nodat); old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; @@ -259,10 +305,12 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; pte_t old; + int nodat; preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); - old = ptep_flush_lazy(mm, addr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + old = ptep_flush_lazy(mm, addr, ptep, nodat); if (mm_has_pgste(mm)) { pgste = pgste_update_all(old, pgste, mm); pgste_set(ptep, pgste); @@ -290,6 +338,28 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(ptep_modify_prot_commit); +static inline void pmdp_idte_local(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_LOCAL); + else + __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); +} + +static inline void pmdp_idte_global(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); +} + static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { @@ -298,16 +368,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, old = *pmdp; if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; - if (!MACHINE_HAS_IDTE) { - __pmdp_csp(pmdp); - return old; - } atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pmdp_idte(addr, pmdp, IDTE_LOCAL); + pmdp_idte_local(mm, addr, pmdp); else - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); + pmdp_idte_global(mm, addr, pmdp); atomic_dec(&mm->context.flush_count); return old; } @@ -325,10 +391,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; - } else if (MACHINE_HAS_IDTE) - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); - else - __pmdp_csp(pmdp); + } else { + pmdp_idte_global(mm, addr, pmdp); + } atomic_dec(&mm->context.flush_count); return old; } @@ -359,6 +424,32 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(pmdp_xchg_lazy); +static inline void pudp_idte_local(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_LOCAL); + else + __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); +} + +static inline void pudp_idte_global(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); + else + /* + * Invalid bit position is the same for pmd and pud, so we can + * re-use _pmd_csp() here + */ + __pmdp_csp((pmd_t *) pudp); +} + static inline pud_t pudp_flush_direct(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { @@ -367,20 +458,12 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm, old = *pudp; if (pud_val(old) & _REGION_ENTRY_INVALID) return old; - if (!MACHINE_HAS_IDTE) { - /* - * Invalid bit position is the same for pmd and pud, so we can - * re-use _pmd_csp() here - */ - __pmdp_csp((pmd_t *) pudp); - return old; - } atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pudp_idte(addr, pudp, IDTE_LOCAL); + pudp_idte_local(mm, addr, pudp); else - __pudp_idte(addr, pudp, IDTE_GLOBAL); + pudp_idte_global(mm, addr, pudp); atomic_dec(&mm->context.flush_count); return old; } @@ -482,7 +565,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, { pte_t entry; pgste_t pgste; - int pte_i, pte_p; + int pte_i, pte_p, nodat; pgste = pgste_get_lock(ptep); entry = *ptep; @@ -495,13 +578,14 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, return -EAGAIN; } /* Change access rights and set pgste bit */ + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); if (prot == PROT_NONE && !pte_i) { - ptep_flush_direct(mm, addr, ptep); + ptep_flush_direct(mm, addr, ptep, nodat); pgste = pgste_update_all(entry, pgste, mm); pte_val(entry) |= _PAGE_INVALID; } if (prot == PROT_READ && !pte_p) { - ptep_flush_direct(mm, addr, ptep); + ptep_flush_direct(mm, addr, ptep, nodat); pte_val(entry) &= ~_PAGE_INVALID; pte_val(entry) |= _PAGE_PROTECT; } @@ -541,10 +625,12 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) { pgste_t pgste; + int nodat; pgste = pgste_get_lock(ptep); /* notifier is called by the caller */ - ptep_flush_direct(mm, saddr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + ptep_flush_direct(mm, saddr, ptep, nodat); /* don't touch the storage key - it belongs to parent pgste */ pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); pgste_set_unlock(ptep, pgste); @@ -617,6 +703,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) pte_t *ptep; pte_t pte; bool dirty; + int nodat; pgd = pgd_offset(mm, addr); p4d = p4d_alloc(mm, pgd, addr); @@ -645,7 +732,8 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { pgste = pgste_pte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + ptep_ipte_global(mm, addr, ptep, nodat); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) pte_val(pte) |= _PAGE_PROTECT; else @@ -831,7 +919,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, case ESSA_GET_STATE: break; case ESSA_SET_STABLE: - pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgstev |= _PGSTE_GPS_USAGE_STABLE; break; case ESSA_SET_UNUSED: @@ -877,6 +965,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, pgstev |= _PGSTE_GPS_USAGE_STABLE; } break; + case ESSA_SET_STABLE_NODAT: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; + break; default: /* we should never get here! */ break; diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 025ea20fc4b4..19f12cef5bdf 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -80,6 +80,7 @@ static struct facility_def facility_defs[] = { 78, /* enhanced-DAT 2 */ 130, /* instruction-execution-protection */ 131, /* enhanced-SOP 2 and side-effect */ + 139, /* multiple epoch facility */ 146, /* msa extension 8 */ -1 /* END */ } diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index efd84d1d178b..bc1fc00910b0 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -39,7 +39,7 @@ struct read_info_sccb { u8 fac84; /* 84 */ u8 fac85; /* 85 */ u8 _pad_86[91 - 86]; /* 86-90 */ - u8 flags; /* 91 */ + u8 fac91; /* 91 */ u8 _pad_92[98 - 92]; /* 92-97 */ u8 fac98; /* 98 */ u8 hamaxpow; /* 99 */ @@ -103,6 +103,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) sclp.has_kss = !!(sccb->fac98 & 0x01); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; + if (sccb->fac91 & 0x40) + S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm <<= 20; @@ -139,7 +141,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) /* Save IPL information */ sclp_ipl_info.is_valid = 1; - if (sccb->flags & 0x2) + if (sccb->fac91 & 0x2) sclp_ipl_info.has_dump = 1; memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN);