KVM: s390: initial host large page support
- must be enabled via module parameter hpage=1 - cannot be used together with nested - does support migration - does support hugetlbfs - no THP yet -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQIcBAABAgAGBQJbX4AwAAoJEBF7vIC1phx85eMP/ifsNHwqfAOrZBdlJuLVPla5 47J8iY4i4DOKGhKI4YOTcJQhn1izKZhECXS8d8hghB/sQUCE2CLVr1X/r1Udy2Pq bpKG4apYtcJZBF6qn7yDMjBGkIRK4OCBD1pkuKEq2NyvUgPsHUVUgpuq2gngMTBk ZN9MIfRQMdIEJsT389D6T9as0lwABJ0MJap5AudkQwguN2dDhQGeZv8l0QYV8C2I WqRI2VsI1QEo3cJr1lJ5li/F9fC7q0l6QwlvPVocIHJAnq01zJvOekeAgQ4hzz16 JIoQckJq8m4d4PqZ7aWmAaMEemoQ9llmCavovspJNtFT79jho6cWWtBEvq+t0GLQ qTsG9Yi20hONZMWAw+JIdSdOuFMD0HCpOWdUtSMjENFRbr8LLHUr91dGIxRLjF8Z gv3vDJrbGzCQ+b9qPA8SrAN7U3VNCZG384MEmobwTuv5hxOopWp6chcK7RCriV/m 7cFDfO7+2pZymdW7D4DWlFiZl4mWpwOxip32C9tCt0CQveqeYSZsb5Qb9Pe+50vr JhpB74UL79Wffvd65InGlu5jx1SdGG0QAzmBOkdOsAhX+0WMmXRB1ddn4whu7HPU ssNtdKgLt9KkM/kIsB9RC/YLvUFK1lBVHrfnzUmLw3CBHP3QeO+V+arLwdVLVDjV PA/LPECBWtGtQtxGWb2H =Y0Wl -----END PGP SIGNATURE----- Merge tag 'hlp_stage1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into features Pull hlp_stage1 from Christian Borntraeger with the following changes: KVM: s390: initial host large page support - must be enabled via module parameter hpage=1 - cannot be used together with nested - does support migration - does support hugetlbfs - no THP yet
This commit is contained in:
commit
03760d44b1
|
@ -4391,6 +4391,22 @@ all such vmexits.
|
||||||
|
|
||||||
Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
|
Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
|
||||||
|
|
||||||
|
7.14 KVM_CAP_S390_HPAGE_1M
|
||||||
|
|
||||||
|
Architectures: s390
|
||||||
|
Parameters: none
|
||||||
|
Returns: 0 on success, -EINVAL if hpage module parameter was not set
|
||||||
|
or cmma is enabled
|
||||||
|
|
||||||
|
With this capability the KVM support for memory backing with 1m pages
|
||||||
|
through hugetlbfs can be enabled for a VM. After the capability is
|
||||||
|
enabled, cmma can't be enabled anymore and pfmfi and the storage key
|
||||||
|
interpretation are disabled. If cmma has already been enabled or the
|
||||||
|
hpage module parameter is not set to 1, -EINVAL is returned.
|
||||||
|
|
||||||
|
While it is generally possible to create a huge page backed VM without
|
||||||
|
this capability, the VM will not be able to run.
|
||||||
|
|
||||||
8. Other capabilities.
|
8. Other capabilities.
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,14 @@
|
||||||
#ifndef _ASM_S390_GMAP_H
|
#ifndef _ASM_S390_GMAP_H
|
||||||
#define _ASM_S390_GMAP_H
|
#define _ASM_S390_GMAP_H
|
||||||
|
|
||||||
|
/* Generic bits for GMAP notification on DAT table entry changes. */
|
||||||
|
#define GMAP_NOTIFY_SHADOW 0x2
|
||||||
|
#define GMAP_NOTIFY_MPROT 0x1
|
||||||
|
|
||||||
|
/* Status bits only for huge segment entries */
|
||||||
|
#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */
|
||||||
|
#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct gmap_struct - guest address space
|
* struct gmap_struct - guest address space
|
||||||
* @list: list head for the mm->context gmap list
|
* @list: list head for the mm->context gmap list
|
||||||
|
@ -132,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
|
||||||
int gmap_mprotect_notify(struct gmap *, unsigned long start,
|
int gmap_mprotect_notify(struct gmap *, unsigned long start,
|
||||||
unsigned long len, int prot);
|
unsigned long len, int prot);
|
||||||
|
|
||||||
|
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
|
||||||
|
unsigned long gaddr, unsigned long vmaddr);
|
||||||
#endif /* _ASM_S390_GMAP_H */
|
#endif /* _ASM_S390_GMAP_H */
|
||||||
|
|
|
@ -37,7 +37,10 @@ static inline int prepare_hugepage_range(struct file *file,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define arch_clear_hugepage_flags(page) do { } while (0)
|
static inline void arch_clear_hugepage_flags(struct page *page)
|
||||||
|
{
|
||||||
|
clear_bit(PG_arch_1, &page->flags);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
|
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
|
||||||
pte_t *ptep, unsigned long sz)
|
pte_t *ptep, unsigned long sz)
|
||||||
|
|
|
@ -24,6 +24,8 @@ typedef struct {
|
||||||
unsigned int uses_skeys:1;
|
unsigned int uses_skeys:1;
|
||||||
/* The mmu context uses CMM. */
|
/* The mmu context uses CMM. */
|
||||||
unsigned int uses_cmm:1;
|
unsigned int uses_cmm:1;
|
||||||
|
/* The gmaps associated with this context are allowed to use huge pages. */
|
||||||
|
unsigned int allow_gmap_hpage_1m:1;
|
||||||
} mm_context_t;
|
} mm_context_t;
|
||||||
|
|
||||||
#define INIT_MM_CONTEXT(name) \
|
#define INIT_MM_CONTEXT(name) \
|
||||||
|
|
|
@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk,
|
||||||
mm->context.has_pgste = 0;
|
mm->context.has_pgste = 0;
|
||||||
mm->context.uses_skeys = 0;
|
mm->context.uses_skeys = 0;
|
||||||
mm->context.uses_cmm = 0;
|
mm->context.uses_cmm = 0;
|
||||||
|
mm->context.allow_gmap_hpage_1m = 0;
|
||||||
#endif
|
#endif
|
||||||
switch (mm->context.asce_limit) {
|
switch (mm->context.asce_limit) {
|
||||||
case _REGION2_SIZE:
|
case _REGION2_SIZE:
|
||||||
|
|
|
@ -268,8 +268,10 @@ static inline int is_module_addr(void *addr)
|
||||||
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
|
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
|
||||||
|
|
||||||
/* Bits in the segment table entry */
|
/* Bits in the segment table entry */
|
||||||
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
|
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
|
||||||
#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
|
#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
|
||||||
|
#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL
|
||||||
|
#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL
|
||||||
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
|
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
|
||||||
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */
|
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */
|
||||||
#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
|
#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
|
||||||
|
@ -1101,7 +1103,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
|
||||||
pte_t *sptep, pte_t *tptep, pte_t pte);
|
pte_t *sptep, pte_t *tptep, pte_t pte);
|
||||||
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
|
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
|
||||||
|
|
||||||
bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
|
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address,
|
||||||
|
pte_t *ptep);
|
||||||
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||||
unsigned char key, bool nq);
|
unsigned char key, bool nq);
|
||||||
int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||||
|
@ -1116,6 +1119,10 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
|
||||||
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
|
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
|
||||||
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
|
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
|
||||||
unsigned long *oldpte, unsigned long *oldpgste);
|
unsigned long *oldpte, unsigned long *oldpgste);
|
||||||
|
void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr);
|
||||||
|
void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);
|
||||||
|
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
|
||||||
|
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Certain architectures need to do special things when PTEs
|
* Certain architectures need to do special things when PTEs
|
||||||
|
|
|
@ -172,6 +172,10 @@ static int nested;
|
||||||
module_param(nested, int, S_IRUGO);
|
module_param(nested, int, S_IRUGO);
|
||||||
MODULE_PARM_DESC(nested, "Nested virtualization support");
|
MODULE_PARM_DESC(nested, "Nested virtualization support");
|
||||||
|
|
||||||
|
/* allow 1m huge page guest backing, if !nested */
|
||||||
|
static int hpage;
|
||||||
|
module_param(hpage, int, 0444);
|
||||||
|
MODULE_PARM_DESC(hpage, "1m huge page backing support");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For now we handle at most 16 double words as this is what the s390 base
|
* For now we handle at most 16 double words as this is what the s390 base
|
||||||
|
@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||||
case KVM_CAP_S390_AIS_MIGRATION:
|
case KVM_CAP_S390_AIS_MIGRATION:
|
||||||
r = 1;
|
r = 1;
|
||||||
break;
|
break;
|
||||||
|
case KVM_CAP_S390_HPAGE_1M:
|
||||||
|
r = 0;
|
||||||
|
if (hpage)
|
||||||
|
r = 1;
|
||||||
|
break;
|
||||||
case KVM_CAP_S390_MEM_OP:
|
case KVM_CAP_S390_MEM_OP:
|
||||||
r = MEM_OP_MAX_SIZE;
|
r = MEM_OP_MAX_SIZE;
|
||||||
break;
|
break;
|
||||||
|
@ -511,19 +520,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kvm_s390_sync_dirty_log(struct kvm *kvm,
|
static void kvm_s390_sync_dirty_log(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *memslot)
|
struct kvm_memory_slot *memslot)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
gfn_t cur_gfn, last_gfn;
|
gfn_t cur_gfn, last_gfn;
|
||||||
unsigned long address;
|
unsigned long gaddr, vmaddr;
|
||||||
struct gmap *gmap = kvm->arch.gmap;
|
struct gmap *gmap = kvm->arch.gmap;
|
||||||
|
DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
|
||||||
|
|
||||||
/* Loop over all guest pages */
|
/* Loop over all guest segments */
|
||||||
|
cur_gfn = memslot->base_gfn;
|
||||||
last_gfn = memslot->base_gfn + memslot->npages;
|
last_gfn = memslot->base_gfn + memslot->npages;
|
||||||
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
|
for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
|
||||||
address = gfn_to_hva_memslot(memslot, cur_gfn);
|
gaddr = gfn_to_gpa(cur_gfn);
|
||||||
|
vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
|
||||||
|
if (kvm_is_error_hva(vmaddr))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bitmap_zero(bitmap, _PAGE_ENTRIES);
|
||||||
|
gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
|
||||||
|
for (i = 0; i < _PAGE_ENTRIES; i++) {
|
||||||
|
if (test_bit(i, bitmap))
|
||||||
|
mark_page_dirty(kvm, cur_gfn + i);
|
||||||
|
}
|
||||||
|
|
||||||
if (test_and_clear_guest_dirty(gmap->mm, address))
|
|
||||||
mark_page_dirty(kvm, cur_gfn);
|
|
||||||
if (fatal_signal_pending(current))
|
if (fatal_signal_pending(current))
|
||||||
return;
|
return;
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
@ -667,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
||||||
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
|
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
|
||||||
r ? "(not available)" : "(success)");
|
r ? "(not available)" : "(success)");
|
||||||
break;
|
break;
|
||||||
|
case KVM_CAP_S390_HPAGE_1M:
|
||||||
|
mutex_lock(&kvm->lock);
|
||||||
|
if (kvm->created_vcpus)
|
||||||
|
r = -EBUSY;
|
||||||
|
else if (!hpage || kvm->arch.use_cmma)
|
||||||
|
r = -EINVAL;
|
||||||
|
else {
|
||||||
|
r = 0;
|
||||||
|
kvm->mm->context.allow_gmap_hpage_1m = 1;
|
||||||
|
/*
|
||||||
|
* We might have to create fake 4k page
|
||||||
|
* tables. To avoid that the hardware works on
|
||||||
|
* stale PGSTEs, we emulate these instructions.
|
||||||
|
*/
|
||||||
|
kvm->arch.use_skf = 0;
|
||||||
|
kvm->arch.use_pfmfi = 0;
|
||||||
|
}
|
||||||
|
mutex_unlock(&kvm->lock);
|
||||||
|
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
|
||||||
|
r ? "(not available)" : "(success)");
|
||||||
|
break;
|
||||||
case KVM_CAP_S390_USER_STSI:
|
case KVM_CAP_S390_USER_STSI:
|
||||||
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
|
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
|
||||||
kvm->arch.user_stsi = 1;
|
kvm->arch.user_stsi = 1;
|
||||||
|
@ -714,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
|
||||||
if (!sclp.has_cmma)
|
if (!sclp.has_cmma)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
ret = -EBUSY;
|
|
||||||
VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
|
VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
|
||||||
mutex_lock(&kvm->lock);
|
mutex_lock(&kvm->lock);
|
||||||
if (!kvm->created_vcpus) {
|
if (kvm->created_vcpus)
|
||||||
|
ret = -EBUSY;
|
||||||
|
else if (kvm->mm->context.allow_gmap_hpage_1m)
|
||||||
|
ret = -EINVAL;
|
||||||
|
else {
|
||||||
kvm->arch.use_cmma = 1;
|
kvm->arch.use_cmma = 1;
|
||||||
/* Not compatible with cmma. */
|
/* Not compatible with cmma. */
|
||||||
kvm->arch.use_pfmfi = 0;
|
kvm->arch.use_pfmfi = 0;
|
||||||
|
@ -1540,6 +1584,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
|
||||||
uint8_t *keys;
|
uint8_t *keys;
|
||||||
uint64_t hva;
|
uint64_t hva;
|
||||||
int srcu_idx, i, r = 0;
|
int srcu_idx, i, r = 0;
|
||||||
|
bool unlocked;
|
||||||
|
|
||||||
if (args->flags != 0)
|
if (args->flags != 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -1564,9 +1609,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
|
||||||
if (r)
|
if (r)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
i = 0;
|
||||||
down_read(¤t->mm->mmap_sem);
|
down_read(¤t->mm->mmap_sem);
|
||||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||||
for (i = 0; i < args->count; i++) {
|
while (i < args->count) {
|
||||||
|
unlocked = false;
|
||||||
hva = gfn_to_hva(kvm, args->start_gfn + i);
|
hva = gfn_to_hva(kvm, args->start_gfn + i);
|
||||||
if (kvm_is_error_hva(hva)) {
|
if (kvm_is_error_hva(hva)) {
|
||||||
r = -EFAULT;
|
r = -EFAULT;
|
||||||
|
@ -1580,8 +1627,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
|
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
|
||||||
if (r)
|
if (r) {
|
||||||
break;
|
r = fixup_user_fault(current, current->mm, hva,
|
||||||
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
|
if (r)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!r)
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||||
up_read(¤t->mm->mmap_sem);
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
@ -4082,6 +4135,11 @@ static int __init kvm_s390_init(void)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nested && hpage) {
|
||||||
|
pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < 16; i++)
|
for (i = 0; i < 16; i++)
|
||||||
kvm_s390_fac_base[i] |=
|
kvm_s390_fac_base[i] |=
|
||||||
S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
|
S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
|
||||||
|
|
|
@ -246,9 +246,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
static int handle_iske(struct kvm_vcpu *vcpu)
|
static int handle_iske(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
unsigned long addr;
|
unsigned long gaddr, vmaddr;
|
||||||
unsigned char key;
|
unsigned char key;
|
||||||
int reg1, reg2;
|
int reg1, reg2;
|
||||||
|
bool unlocked;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
vcpu->stat.instruction_iske++;
|
vcpu->stat.instruction_iske++;
|
||||||
|
@ -262,18 +263,28 @@ static int handle_iske(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
|
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
|
||||||
|
|
||||||
addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
|
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
|
||||||
addr = kvm_s390_logical_to_effective(vcpu, addr);
|
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
|
||||||
addr = kvm_s390_real_to_abs(vcpu, addr);
|
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
|
||||||
addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
|
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
|
||||||
if (kvm_is_error_hva(addr))
|
if (kvm_is_error_hva(vmaddr))
|
||||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
|
retry:
|
||||||
|
unlocked = false;
|
||||||
down_read(¤t->mm->mmap_sem);
|
down_read(¤t->mm->mmap_sem);
|
||||||
rc = get_guest_storage_key(current->mm, addr, &key);
|
rc = get_guest_storage_key(current->mm, vmaddr, &key);
|
||||||
up_read(¤t->mm->mmap_sem);
|
|
||||||
|
if (rc) {
|
||||||
|
rc = fixup_user_fault(current, current->mm, vmaddr,
|
||||||
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
|
if (!rc) {
|
||||||
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (rc)
|
if (rc)
|
||||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
|
up_read(¤t->mm->mmap_sem);
|
||||||
vcpu->run->s.regs.gprs[reg1] &= ~0xff;
|
vcpu->run->s.regs.gprs[reg1] &= ~0xff;
|
||||||
vcpu->run->s.regs.gprs[reg1] |= key;
|
vcpu->run->s.regs.gprs[reg1] |= key;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -281,8 +292,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
static int handle_rrbe(struct kvm_vcpu *vcpu)
|
static int handle_rrbe(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
unsigned long addr;
|
unsigned long vmaddr, gaddr;
|
||||||
int reg1, reg2;
|
int reg1, reg2;
|
||||||
|
bool unlocked;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
vcpu->stat.instruction_rrbe++;
|
vcpu->stat.instruction_rrbe++;
|
||||||
|
@ -296,19 +308,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
|
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
|
||||||
|
|
||||||
addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
|
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
|
||||||
addr = kvm_s390_logical_to_effective(vcpu, addr);
|
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
|
||||||
addr = kvm_s390_real_to_abs(vcpu, addr);
|
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
|
||||||
addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
|
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
|
||||||
if (kvm_is_error_hva(addr))
|
if (kvm_is_error_hva(vmaddr))
|
||||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
|
retry:
|
||||||
|
unlocked = false;
|
||||||
down_read(¤t->mm->mmap_sem);
|
down_read(¤t->mm->mmap_sem);
|
||||||
rc = reset_guest_reference_bit(current->mm, addr);
|
rc = reset_guest_reference_bit(current->mm, vmaddr);
|
||||||
up_read(¤t->mm->mmap_sem);
|
if (rc < 0) {
|
||||||
|
rc = fixup_user_fault(current, current->mm, vmaddr,
|
||||||
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
|
if (!rc) {
|
||||||
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (rc < 0)
|
if (rc < 0)
|
||||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
|
up_read(¤t->mm->mmap_sem);
|
||||||
kvm_s390_set_psw_cc(vcpu, rc);
|
kvm_s390_set_psw_cc(vcpu, rc);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -323,6 +343,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
|
||||||
unsigned long start, end;
|
unsigned long start, end;
|
||||||
unsigned char key, oldkey;
|
unsigned char key, oldkey;
|
||||||
int reg1, reg2;
|
int reg1, reg2;
|
||||||
|
bool unlocked;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
vcpu->stat.instruction_sske++;
|
vcpu->stat.instruction_sske++;
|
||||||
|
@ -355,19 +376,28 @@ static int handle_sske(struct kvm_vcpu *vcpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
while (start != end) {
|
while (start != end) {
|
||||||
unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
|
unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
|
||||||
|
unlocked = false;
|
||||||
|
|
||||||
if (kvm_is_error_hva(addr))
|
if (kvm_is_error_hva(vmaddr))
|
||||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
|
|
||||||
down_read(¤t->mm->mmap_sem);
|
down_read(¤t->mm->mmap_sem);
|
||||||
rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey,
|
rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
|
||||||
m3 & SSKE_NQ, m3 & SSKE_MR,
|
m3 & SSKE_NQ, m3 & SSKE_MR,
|
||||||
m3 & SSKE_MC);
|
m3 & SSKE_MC);
|
||||||
up_read(¤t->mm->mmap_sem);
|
|
||||||
if (rc < 0)
|
if (rc < 0) {
|
||||||
|
rc = fixup_user_fault(current, current->mm, vmaddr,
|
||||||
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
|
rc = !rc ? -EAGAIN : rc;
|
||||||
|
}
|
||||||
|
if (rc == -EFAULT)
|
||||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
start += PAGE_SIZE;
|
|
||||||
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
if (rc >= 0)
|
||||||
|
start += PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m3 & (SSKE_MC | SSKE_MR)) {
|
if (m3 & (SSKE_MC | SSKE_MR)) {
|
||||||
|
@ -948,15 +978,16 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
while (start != end) {
|
while (start != end) {
|
||||||
unsigned long useraddr;
|
unsigned long vmaddr;
|
||||||
|
bool unlocked = false;
|
||||||
|
|
||||||
/* Translate guest address to host address */
|
/* Translate guest address to host address */
|
||||||
useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
|
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
|
||||||
if (kvm_is_error_hva(useraddr))
|
if (kvm_is_error_hva(vmaddr))
|
||||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
|
|
||||||
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
|
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
|
||||||
if (clear_user((void __user *)useraddr, PAGE_SIZE))
|
if (clear_user((void __user *)vmaddr, PAGE_SIZE))
|
||||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -966,14 +997,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
return rc;
|
||||||
down_read(¤t->mm->mmap_sem);
|
down_read(¤t->mm->mmap_sem);
|
||||||
rc = cond_set_guest_storage_key(current->mm, useraddr,
|
rc = cond_set_guest_storage_key(current->mm, vmaddr,
|
||||||
key, NULL, nq, mr, mc);
|
key, NULL, nq, mr, mc);
|
||||||
up_read(¤t->mm->mmap_sem);
|
if (rc < 0) {
|
||||||
if (rc < 0)
|
rc = fixup_user_fault(current, current->mm, vmaddr,
|
||||||
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
|
rc = !rc ? -EAGAIN : rc;
|
||||||
|
}
|
||||||
|
if (rc == -EFAULT)
|
||||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
}
|
|
||||||
|
|
||||||
start += PAGE_SIZE;
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
if (rc >= 0)
|
||||||
|
start += PAGE_SIZE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
|
if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
|
||||||
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
|
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
|
||||||
|
|
|
@ -2,8 +2,10 @@
|
||||||
/*
|
/*
|
||||||
* KVM guest address space mapping code
|
* KVM guest address space mapping code
|
||||||
*
|
*
|
||||||
* Copyright IBM Corp. 2007, 2016
|
* Copyright IBM Corp. 2007, 2016, 2018
|
||||||
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
|
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
|
||||||
|
* David Hildenbrand <david@redhat.com>
|
||||||
|
* Janosch Frank <frankja@linux.vnet.ibm.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
|
@ -521,6 +523,9 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table,
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
|
||||||
|
unsigned long gaddr);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* gmap_link - set up shadow page tables to connect a host to a guest address
|
* gmap_link - set up shadow page tables to connect a host to a guest address
|
||||||
* @gmap: pointer to guest mapping meta data structure
|
* @gmap: pointer to guest mapping meta data structure
|
||||||
|
@ -541,6 +546,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
|
||||||
p4d_t *p4d;
|
p4d_t *p4d;
|
||||||
pud_t *pud;
|
pud_t *pud;
|
||||||
pmd_t *pmd;
|
pmd_t *pmd;
|
||||||
|
u64 unprot;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
BUG_ON(gmap_is_shadow(gmap));
|
BUG_ON(gmap_is_shadow(gmap));
|
||||||
|
@ -584,8 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
pmd = pmd_offset(pud, vmaddr);
|
pmd = pmd_offset(pud, vmaddr);
|
||||||
VM_BUG_ON(pmd_none(*pmd));
|
VM_BUG_ON(pmd_none(*pmd));
|
||||||
/* large pmds cannot yet be handled */
|
/* Are we allowed to use huge pages? */
|
||||||
if (pmd_large(*pmd))
|
if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
/* Link gmap segment table entry location to page table. */
|
/* Link gmap segment table entry location to page table. */
|
||||||
rc = radix_tree_preload(GFP_KERNEL);
|
rc = radix_tree_preload(GFP_KERNEL);
|
||||||
|
@ -596,10 +602,22 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
|
||||||
if (*table == _SEGMENT_ENTRY_EMPTY) {
|
if (*table == _SEGMENT_ENTRY_EMPTY) {
|
||||||
rc = radix_tree_insert(&gmap->host_to_guest,
|
rc = radix_tree_insert(&gmap->host_to_guest,
|
||||||
vmaddr >> PMD_SHIFT, table);
|
vmaddr >> PMD_SHIFT, table);
|
||||||
if (!rc)
|
if (!rc) {
|
||||||
*table = pmd_val(*pmd);
|
if (pmd_large(*pmd)) {
|
||||||
} else
|
*table = (pmd_val(*pmd) &
|
||||||
rc = 0;
|
_SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
|
||||||
|
| _SEGMENT_ENTRY_GMAP_UC;
|
||||||
|
} else
|
||||||
|
*table = pmd_val(*pmd) &
|
||||||
|
_SEGMENT_ENTRY_HARDWARE_BITS;
|
||||||
|
}
|
||||||
|
} else if (*table & _SEGMENT_ENTRY_PROTECT &&
|
||||||
|
!(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
|
||||||
|
unprot = (u64)*table;
|
||||||
|
unprot &= ~_SEGMENT_ENTRY_PROTECT;
|
||||||
|
unprot |= _SEGMENT_ENTRY_GMAP_UC;
|
||||||
|
gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
|
||||||
|
}
|
||||||
spin_unlock(&gmap->guest_table_lock);
|
spin_unlock(&gmap->guest_table_lock);
|
||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
radix_tree_preload_end();
|
radix_tree_preload_end();
|
||||||
|
@ -690,6 +708,12 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
|
||||||
vmaddr |= gaddr & ~PMD_MASK;
|
vmaddr |= gaddr & ~PMD_MASK;
|
||||||
/* Find vma in the parent mm */
|
/* Find vma in the parent mm */
|
||||||
vma = find_vma(gmap->mm, vmaddr);
|
vma = find_vma(gmap->mm, vmaddr);
|
||||||
|
/*
|
||||||
|
* We do not discard pages that are backed by
|
||||||
|
* hugetlbfs, so we don't have to refault them.
|
||||||
|
*/
|
||||||
|
if (vma && is_vm_hugetlb_page(vma))
|
||||||
|
continue;
|
||||||
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
|
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
|
||||||
zap_page_range(vma, vmaddr, size);
|
zap_page_range(vma, vmaddr, size);
|
||||||
}
|
}
|
||||||
|
@ -864,7 +888,128 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
|
||||||
*/
|
*/
|
||||||
static void gmap_pte_op_end(spinlock_t *ptl)
|
static void gmap_pte_op_end(spinlock_t *ptl)
|
||||||
{
|
{
|
||||||
spin_unlock(ptl);
|
if (ptl)
|
||||||
|
spin_unlock(ptl);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
|
||||||
|
* and return the pmd pointer
|
||||||
|
* @gmap: pointer to guest mapping meta data structure
|
||||||
|
* @gaddr: virtual address in the guest address space
|
||||||
|
*
|
||||||
|
* Returns a pointer to the pmd for a guest address, or NULL
|
||||||
|
*/
|
||||||
|
static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
|
||||||
|
{
|
||||||
|
pmd_t *pmdp;
|
||||||
|
|
||||||
|
BUG_ON(gmap_is_shadow(gmap));
|
||||||
|
spin_lock(&gmap->guest_table_lock);
|
||||||
|
pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
|
||||||
|
|
||||||
|
if (!pmdp || pmd_none(*pmdp)) {
|
||||||
|
spin_unlock(&gmap->guest_table_lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
|
||||||
|
if (!pmd_large(*pmdp))
|
||||||
|
spin_unlock(&gmap->guest_table_lock);
|
||||||
|
return pmdp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_pmd_op_end - release the guest_table_lock if needed
|
||||||
|
* @gmap: pointer to the guest mapping meta data structure
|
||||||
|
* @pmdp: pointer to the pmd
|
||||||
|
*/
|
||||||
|
static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
|
||||||
|
{
|
||||||
|
if (pmd_large(*pmdp))
|
||||||
|
spin_unlock(&gmap->guest_table_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* gmap_protect_pmd - remove access rights to memory and set pmd notification bits
|
||||||
|
* @pmdp: pointer to the pmd to be protected
|
||||||
|
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
|
||||||
|
* @bits: notification bits to set
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* 0 if successfully protected
|
||||||
|
* -EAGAIN if a fixup is needed
|
||||||
|
* -EINVAL if unsupported notifier bits have been specified
|
||||||
|
*
|
||||||
|
* Expected to be called with sg->mm->mmap_sem in read and
|
||||||
|
* guest_table_lock held.
|
||||||
|
*/
|
||||||
|
static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
|
||||||
|
pmd_t *pmdp, int prot, unsigned long bits)
|
||||||
|
{
|
||||||
|
int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
|
||||||
|
int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
|
||||||
|
pmd_t new = *pmdp;
|
||||||
|
|
||||||
|
/* Fixup needed */
|
||||||
|
if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
if (prot == PROT_NONE && !pmd_i) {
|
||||||
|
pmd_val(new) |= _SEGMENT_ENTRY_INVALID;
|
||||||
|
gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prot == PROT_READ && !pmd_p) {
|
||||||
|
pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID;
|
||||||
|
pmd_val(new) |= _SEGMENT_ENTRY_PROTECT;
|
||||||
|
gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bits & GMAP_NOTIFY_MPROT)
|
||||||
|
pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN;
|
||||||
|
|
||||||
|
/* Shadow GMAP protection needs split PMDs */
|
||||||
|
if (bits & GMAP_NOTIFY_SHADOW)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* gmap_protect_pte - remove access rights to memory and set pgste bits
|
||||||
|
* @gmap: pointer to guest mapping meta data structure
|
||||||
|
* @gaddr: virtual address in the guest address space
|
||||||
|
* @pmdp: pointer to the pmd associated with the pte
|
||||||
|
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
|
||||||
|
* @bits: notification bits to set
|
||||||
|
*
|
||||||
|
* Returns 0 if successfully protected, -ENOMEM if out of memory and
|
||||||
|
* -EAGAIN if a fixup is needed.
|
||||||
|
*
|
||||||
|
* Expected to be called with sg->mm->mmap_sem in read
|
||||||
|
*/
|
||||||
|
static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
|
||||||
|
pmd_t *pmdp, int prot, unsigned long bits)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
pte_t *ptep;
|
||||||
|
spinlock_t *ptl = NULL;
|
||||||
|
unsigned long pbits = 0;
|
||||||
|
|
||||||
|
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
|
||||||
|
if (!ptep)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
|
||||||
|
pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
|
||||||
|
/* Protect and unlock. */
|
||||||
|
rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
|
||||||
|
gmap_pte_op_end(ptl);
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -883,30 +1028,45 @@ static void gmap_pte_op_end(spinlock_t *ptl)
|
||||||
static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
|
static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
|
||||||
unsigned long len, int prot, unsigned long bits)
|
unsigned long len, int prot, unsigned long bits)
|
||||||
{
|
{
|
||||||
unsigned long vmaddr;
|
unsigned long vmaddr, dist;
|
||||||
spinlock_t *ptl;
|
pmd_t *pmdp;
|
||||||
pte_t *ptep;
|
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
BUG_ON(gmap_is_shadow(gmap));
|
BUG_ON(gmap_is_shadow(gmap));
|
||||||
while (len) {
|
while (len) {
|
||||||
rc = -EAGAIN;
|
rc = -EAGAIN;
|
||||||
ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
|
pmdp = gmap_pmd_op_walk(gmap, gaddr);
|
||||||
if (ptep) {
|
if (pmdp) {
|
||||||
rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits);
|
if (!pmd_large(*pmdp)) {
|
||||||
gmap_pte_op_end(ptl);
|
rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
|
||||||
|
bits);
|
||||||
|
if (!rc) {
|
||||||
|
len -= PAGE_SIZE;
|
||||||
|
gaddr += PAGE_SIZE;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
|
||||||
|
bits);
|
||||||
|
if (!rc) {
|
||||||
|
dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
|
||||||
|
len = len < dist ? 0 : len - dist;
|
||||||
|
gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gmap_pmd_op_end(gmap, pmdp);
|
||||||
}
|
}
|
||||||
if (rc) {
|
if (rc) {
|
||||||
|
if (rc == -EINVAL)
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
/* -EAGAIN, fixup of userspace mm and gmap */
|
||||||
vmaddr = __gmap_translate(gmap, gaddr);
|
vmaddr = __gmap_translate(gmap, gaddr);
|
||||||
if (IS_ERR_VALUE(vmaddr))
|
if (IS_ERR_VALUE(vmaddr))
|
||||||
return vmaddr;
|
return vmaddr;
|
||||||
rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
|
rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
|
||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
return rc;
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
gaddr += PAGE_SIZE;
|
|
||||||
len -= PAGE_SIZE;
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -935,7 +1095,7 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
|
||||||
if (!MACHINE_HAS_ESOP && prot == PROT_READ)
|
if (!MACHINE_HAS_ESOP && prot == PROT_READ)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
down_read(&gmap->mm->mmap_sem);
|
down_read(&gmap->mm->mmap_sem);
|
||||||
rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT);
|
rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
|
||||||
up_read(&gmap->mm->mmap_sem);
|
up_read(&gmap->mm->mmap_sem);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -1474,6 +1634,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
|
||||||
unsigned long limit;
|
unsigned long limit;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
|
BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
|
||||||
BUG_ON(gmap_is_shadow(parent));
|
BUG_ON(gmap_is_shadow(parent));
|
||||||
spin_lock(&parent->shadow_lock);
|
spin_lock(&parent->shadow_lock);
|
||||||
sg = gmap_find_shadow(parent, asce, edat_level);
|
sg = gmap_find_shadow(parent, asce, edat_level);
|
||||||
|
@ -1526,7 +1687,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
|
||||||
down_read(&parent->mm->mmap_sem);
|
down_read(&parent->mm->mmap_sem);
|
||||||
rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
|
rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
|
||||||
((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
|
((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
|
||||||
PROT_READ, PGSTE_VSIE_BIT);
|
PROT_READ, GMAP_NOTIFY_SHADOW);
|
||||||
up_read(&parent->mm->mmap_sem);
|
up_read(&parent->mm->mmap_sem);
|
||||||
spin_lock(&parent->shadow_lock);
|
spin_lock(&parent->shadow_lock);
|
||||||
new->initialized = true;
|
new->initialized = true;
|
||||||
|
@ -2092,6 +2253,225 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(ptep_notify);
|
EXPORT_SYMBOL_GPL(ptep_notify);
|
||||||
|
|
||||||
|
static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
|
||||||
|
unsigned long gaddr)
|
||||||
|
{
|
||||||
|
pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN;
|
||||||
|
gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_pmdp_xchg - exchange a gmap pmd with another
|
||||||
|
* @gmap: pointer to the guest address space structure
|
||||||
|
* @pmdp: pointer to the pmd entry
|
||||||
|
* @new: replacement entry
|
||||||
|
* @gaddr: the affected guest address
|
||||||
|
*
|
||||||
|
* This function is assumed to be called with the guest_table_lock
|
||||||
|
* held.
|
||||||
|
*/
|
||||||
|
static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
|
||||||
|
unsigned long gaddr)
|
||||||
|
{
|
||||||
|
gaddr &= HPAGE_MASK;
|
||||||
|
pmdp_notify_gmap(gmap, pmdp, gaddr);
|
||||||
|
pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN;
|
||||||
|
if (MACHINE_HAS_TLB_GUEST)
|
||||||
|
__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
|
||||||
|
IDTE_GLOBAL);
|
||||||
|
else if (MACHINE_HAS_IDTE)
|
||||||
|
__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
|
||||||
|
else
|
||||||
|
__pmdp_csp(pmdp);
|
||||||
|
*pmdp = new;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
|
||||||
|
int purge)
|
||||||
|
{
|
||||||
|
pmd_t *pmdp;
|
||||||
|
struct gmap *gmap;
|
||||||
|
unsigned long gaddr;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
|
||||||
|
spin_lock(&gmap->guest_table_lock);
|
||||||
|
pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
|
||||||
|
vmaddr >> PMD_SHIFT);
|
||||||
|
if (pmdp) {
|
||||||
|
gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
|
||||||
|
pmdp_notify_gmap(gmap, pmdp, gaddr);
|
||||||
|
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
|
||||||
|
_SEGMENT_ENTRY_GMAP_UC));
|
||||||
|
if (purge)
|
||||||
|
__pmdp_csp(pmdp);
|
||||||
|
pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
|
||||||
|
}
|
||||||
|
spin_unlock(&gmap->guest_table_lock);
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
|
||||||
|
* flushing
|
||||||
|
* @mm: pointer to the process mm_struct
|
||||||
|
* @vmaddr: virtual address in the process address space
|
||||||
|
*/
|
||||||
|
void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
|
||||||
|
{
|
||||||
|
gmap_pmdp_clear(mm, vmaddr, 0);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_pmdp_csp - csp all affected guest pmd entries
|
||||||
|
* @mm: pointer to the process mm_struct
|
||||||
|
* @vmaddr: virtual address in the process address space
|
||||||
|
*/
|
||||||
|
void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
|
||||||
|
{
|
||||||
|
gmap_pmdp_clear(mm, vmaddr, 1);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
|
||||||
|
* @mm: pointer to the process mm_struct
|
||||||
|
* @vmaddr: virtual address in the process address space
|
||||||
|
*/
|
||||||
|
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
|
||||||
|
{
|
||||||
|
unsigned long *entry, gaddr;
|
||||||
|
struct gmap *gmap;
|
||||||
|
pmd_t *pmdp;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
|
||||||
|
spin_lock(&gmap->guest_table_lock);
|
||||||
|
entry = radix_tree_delete(&gmap->host_to_guest,
|
||||||
|
vmaddr >> PMD_SHIFT);
|
||||||
|
if (entry) {
|
||||||
|
pmdp = (pmd_t *)entry;
|
||||||
|
gaddr = __gmap_segment_gaddr(entry);
|
||||||
|
pmdp_notify_gmap(gmap, pmdp, gaddr);
|
||||||
|
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
|
||||||
|
_SEGMENT_ENTRY_GMAP_UC));
|
||||||
|
if (MACHINE_HAS_TLB_GUEST)
|
||||||
|
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
|
||||||
|
gmap->asce, IDTE_LOCAL);
|
||||||
|
else if (MACHINE_HAS_IDTE)
|
||||||
|
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
|
||||||
|
*entry = _SEGMENT_ENTRY_EMPTY;
|
||||||
|
}
|
||||||
|
spin_unlock(&gmap->guest_table_lock);
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
|
||||||
|
* @mm: pointer to the process mm_struct
|
||||||
|
* @vmaddr: virtual address in the process address space
|
||||||
|
*/
|
||||||
|
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
|
||||||
|
{
|
||||||
|
unsigned long *entry, gaddr;
|
||||||
|
struct gmap *gmap;
|
||||||
|
pmd_t *pmdp;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
|
||||||
|
spin_lock(&gmap->guest_table_lock);
|
||||||
|
entry = radix_tree_delete(&gmap->host_to_guest,
|
||||||
|
vmaddr >> PMD_SHIFT);
|
||||||
|
if (entry) {
|
||||||
|
pmdp = (pmd_t *)entry;
|
||||||
|
gaddr = __gmap_segment_gaddr(entry);
|
||||||
|
pmdp_notify_gmap(gmap, pmdp, gaddr);
|
||||||
|
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
|
||||||
|
_SEGMENT_ENTRY_GMAP_UC));
|
||||||
|
if (MACHINE_HAS_TLB_GUEST)
|
||||||
|
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
|
||||||
|
gmap->asce, IDTE_GLOBAL);
|
||||||
|
else if (MACHINE_HAS_IDTE)
|
||||||
|
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
|
||||||
|
else
|
||||||
|
__pmdp_csp(pmdp);
|
||||||
|
*entry = _SEGMENT_ENTRY_EMPTY;
|
||||||
|
}
|
||||||
|
spin_unlock(&gmap->guest_table_lock);
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
|
||||||
|
* @gmap: pointer to guest address space
|
||||||
|
* @pmdp: pointer to the pmd to be tested
|
||||||
|
* @gaddr: virtual address in the guest address space
|
||||||
|
*
|
||||||
|
* This function is assumed to be called with the guest_table_lock
|
||||||
|
* held.
|
||||||
|
*/
|
||||||
|
bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
|
||||||
|
unsigned long gaddr)
|
||||||
|
{
|
||||||
|
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Already protected memory, which did not change is clean */
|
||||||
|
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
|
||||||
|
!(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Clear UC indication and reset protection */
|
||||||
|
pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC;
|
||||||
|
gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
|
||||||
|
* @gmap: pointer to guest address space
|
||||||
|
* @bitmap: dirty bitmap for this pmd
|
||||||
|
* @gaddr: virtual address in the guest address space
|
||||||
|
* @vmaddr: virtual address in the host address space
|
||||||
|
*
|
||||||
|
* This function is assumed to be called with the guest_table_lock
|
||||||
|
* held.
|
||||||
|
*/
|
||||||
|
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
|
||||||
|
unsigned long gaddr, unsigned long vmaddr)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
pmd_t *pmdp;
|
||||||
|
pte_t *ptep;
|
||||||
|
spinlock_t *ptl;
|
||||||
|
|
||||||
|
pmdp = gmap_pmd_op_walk(gmap, gaddr);
|
||||||
|
if (!pmdp)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (pmd_large(*pmdp)) {
|
||||||
|
if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
|
||||||
|
bitmap_fill(bitmap, _PAGE_ENTRIES);
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
|
||||||
|
ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
|
||||||
|
if (!ptep)
|
||||||
|
continue;
|
||||||
|
if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
|
||||||
|
set_bit(i, bitmap);
|
||||||
|
spin_unlock(ptl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gmap_pmd_op_end(gmap, pmdp);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
|
||||||
|
|
||||||
static inline void thp_split_mm(struct mm_struct *mm)
|
static inline void thp_split_mm(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
|
@ -2168,17 +2548,45 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
|
||||||
* Enable storage key handling from now on and initialize the storage
|
* Enable storage key handling from now on and initialize the storage
|
||||||
* keys with the default key.
|
* keys with the default key.
|
||||||
*/
|
*/
|
||||||
static int __s390_enable_skey(pte_t *pte, unsigned long addr,
|
static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
|
||||||
unsigned long next, struct mm_walk *walk)
|
unsigned long next, struct mm_walk *walk)
|
||||||
{
|
{
|
||||||
/* Clear storage key */
|
/* Clear storage key */
|
||||||
ptep_zap_key(walk->mm, addr, pte);
|
ptep_zap_key(walk->mm, addr, pte);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
|
||||||
|
unsigned long hmask, unsigned long next,
|
||||||
|
struct mm_walk *walk)
|
||||||
|
{
|
||||||
|
pmd_t *pmd = (pmd_t *)pte;
|
||||||
|
unsigned long start, end;
|
||||||
|
struct page *page = pmd_page(*pmd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The write check makes sure we do not set a key on shared
|
||||||
|
* memory. This is needed as the walker does not differentiate
|
||||||
|
* between actual guest memory and the process executable or
|
||||||
|
* shared libraries.
|
||||||
|
*/
|
||||||
|
if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
|
||||||
|
!(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
start = pmd_val(*pmd) & HPAGE_MASK;
|
||||||
|
end = start + HPAGE_SIZE - 1;
|
||||||
|
__storage_key_init_range(start, end);
|
||||||
|
set_bit(PG_arch_1, &page->flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int s390_enable_skey(void)
|
int s390_enable_skey(void)
|
||||||
{
|
{
|
||||||
struct mm_walk walk = { .pte_entry = __s390_enable_skey };
|
struct mm_walk walk = {
|
||||||
|
.hugetlb_entry = __s390_enable_skey_hugetlb,
|
||||||
|
.pte_entry = __s390_enable_skey_pte,
|
||||||
|
};
|
||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
|
@ -123,6 +123,29 @@ static inline pte_t __rste_to_pte(unsigned long rste)
|
||||||
return pte;
|
return pte;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
|
||||||
|
{
|
||||||
|
struct page *page;
|
||||||
|
unsigned long size, paddr;
|
||||||
|
|
||||||
|
if (!mm_uses_skeys(mm) ||
|
||||||
|
rste & _SEGMENT_ENTRY_INVALID)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
|
||||||
|
page = pud_page(__pud(rste));
|
||||||
|
size = PUD_SIZE;
|
||||||
|
paddr = rste & PUD_MASK;
|
||||||
|
} else {
|
||||||
|
page = pmd_page(__pmd(rste));
|
||||||
|
size = PMD_SIZE;
|
||||||
|
paddr = rste & PMD_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!test_and_set_bit(PG_arch_1, &page->flags))
|
||||||
|
__storage_key_init_range(paddr, paddr + size - 1);
|
||||||
|
}
|
||||||
|
|
||||||
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||||
pte_t *ptep, pte_t pte)
|
pte_t *ptep, pte_t pte)
|
||||||
{
|
{
|
||||||
|
@ -137,6 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||||
rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
|
rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
|
||||||
else
|
else
|
||||||
rste |= _SEGMENT_ENTRY_LARGE;
|
rste |= _SEGMENT_ENTRY_LARGE;
|
||||||
|
clear_huge_pte_skeys(mm, rste);
|
||||||
pte_val(*ptep) = rste;
|
pte_val(*ptep) = rste;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
|
|
||||||
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
|
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
|
||||||
{
|
{
|
||||||
asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
|
asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
|
||||||
: [addr] "+a" (addr) : [skey] "d" (skey));
|
: [addr] "+a" (addr) : [skey] "d" (skey));
|
||||||
return addr;
|
return addr;
|
||||||
}
|
}
|
||||||
|
@ -23,8 +23,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
unsigned long boundary, size;
|
unsigned long boundary, size;
|
||||||
|
|
||||||
if (!PAGE_DEFAULT_KEY)
|
|
||||||
return;
|
|
||||||
while (start < end) {
|
while (start < end) {
|
||||||
if (MACHINE_HAS_EDAT1) {
|
if (MACHINE_HAS_EDAT1) {
|
||||||
/* set storage keys for a 1MB frame */
|
/* set storage keys for a 1MB frame */
|
||||||
|
@ -37,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
|
page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
|
||||||
start += PAGE_SIZE;
|
start += PAGE_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -347,18 +347,27 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
|
||||||
mm->context.asce, IDTE_LOCAL);
|
mm->context.asce, IDTE_LOCAL);
|
||||||
else
|
else
|
||||||
__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
|
__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
|
||||||
|
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
|
||||||
|
gmap_pmdp_idte_local(mm, addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void pmdp_idte_global(struct mm_struct *mm,
|
static inline void pmdp_idte_global(struct mm_struct *mm,
|
||||||
unsigned long addr, pmd_t *pmdp)
|
unsigned long addr, pmd_t *pmdp)
|
||||||
{
|
{
|
||||||
if (MACHINE_HAS_TLB_GUEST)
|
if (MACHINE_HAS_TLB_GUEST) {
|
||||||
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
|
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
|
||||||
mm->context.asce, IDTE_GLOBAL);
|
mm->context.asce, IDTE_GLOBAL);
|
||||||
else if (MACHINE_HAS_IDTE)
|
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
|
||||||
|
gmap_pmdp_idte_global(mm, addr);
|
||||||
|
} else if (MACHINE_HAS_IDTE) {
|
||||||
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
|
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
|
||||||
else
|
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
|
||||||
|
gmap_pmdp_idte_global(mm, addr);
|
||||||
|
} else {
|
||||||
__pmdp_csp(pmdp);
|
__pmdp_csp(pmdp);
|
||||||
|
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
|
||||||
|
gmap_pmdp_csp(mm, addr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
|
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
|
||||||
|
@ -392,6 +401,8 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
|
||||||
cpumask_of(smp_processor_id()))) {
|
cpumask_of(smp_processor_id()))) {
|
||||||
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
|
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
|
||||||
mm->context.flush_mm = 1;
|
mm->context.flush_mm = 1;
|
||||||
|
if (mm_has_pgste(mm))
|
||||||
|
gmap_pmdp_invalidate(mm, addr);
|
||||||
} else {
|
} else {
|
||||||
pmdp_idte_global(mm, addr, pmdp);
|
pmdp_idte_global(mm, addr, pmdp);
|
||||||
}
|
}
|
||||||
|
@ -399,6 +410,24 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
|
||||||
return old;
|
return old;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
|
||||||
|
{
|
||||||
|
pgd_t *pgd;
|
||||||
|
p4d_t *p4d;
|
||||||
|
pud_t *pud;
|
||||||
|
pmd_t *pmd;
|
||||||
|
|
||||||
|
pgd = pgd_offset(mm, addr);
|
||||||
|
p4d = p4d_alloc(mm, pgd, addr);
|
||||||
|
if (!p4d)
|
||||||
|
return NULL;
|
||||||
|
pud = pud_alloc(mm, p4d, addr);
|
||||||
|
if (!pud)
|
||||||
|
return NULL;
|
||||||
|
pmd = pmd_alloc(mm, pud, addr);
|
||||||
|
return pmd;
|
||||||
|
}
|
||||||
|
|
||||||
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
|
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
|
||||||
pmd_t *pmdp, pmd_t new)
|
pmd_t *pmdp, pmd_t new)
|
||||||
{
|
{
|
||||||
|
@ -693,40 +722,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||||
/*
|
/*
|
||||||
* Test and reset if a guest page is dirty
|
* Test and reset if a guest page is dirty
|
||||||
*/
|
*/
|
||||||
bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
|
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
|
||||||
|
pte_t *ptep)
|
||||||
{
|
{
|
||||||
spinlock_t *ptl;
|
|
||||||
pgd_t *pgd;
|
|
||||||
p4d_t *p4d;
|
|
||||||
pud_t *pud;
|
|
||||||
pmd_t *pmd;
|
|
||||||
pgste_t pgste;
|
pgste_t pgste;
|
||||||
pte_t *ptep;
|
|
||||||
pte_t pte;
|
pte_t pte;
|
||||||
bool dirty;
|
bool dirty;
|
||||||
int nodat;
|
int nodat;
|
||||||
|
|
||||||
pgd = pgd_offset(mm, addr);
|
|
||||||
p4d = p4d_alloc(mm, pgd, addr);
|
|
||||||
if (!p4d)
|
|
||||||
return false;
|
|
||||||
pud = pud_alloc(mm, p4d, addr);
|
|
||||||
if (!pud)
|
|
||||||
return false;
|
|
||||||
pmd = pmd_alloc(mm, pud, addr);
|
|
||||||
if (!pmd)
|
|
||||||
return false;
|
|
||||||
/* We can't run guests backed by huge pages, but userspace can
|
|
||||||
* still set them up and then try to migrate them without any
|
|
||||||
* migration support.
|
|
||||||
*/
|
|
||||||
if (pmd_large(*pmd))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
|
|
||||||
if (unlikely(!ptep))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
pgste = pgste_get_lock(ptep);
|
pgste = pgste_get_lock(ptep);
|
||||||
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
|
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
|
||||||
pgste_val(pgste) &= ~PGSTE_UC_BIT;
|
pgste_val(pgste) &= ~PGSTE_UC_BIT;
|
||||||
|
@ -742,21 +745,43 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
|
||||||
*ptep = pte;
|
*ptep = pte;
|
||||||
}
|
}
|
||||||
pgste_set_unlock(ptep, pgste);
|
pgste_set_unlock(ptep, pgste);
|
||||||
|
|
||||||
spin_unlock(ptl);
|
|
||||||
return dirty;
|
return dirty;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
|
EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
|
||||||
|
|
||||||
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||||
unsigned char key, bool nq)
|
unsigned char key, bool nq)
|
||||||
{
|
{
|
||||||
unsigned long keyul;
|
unsigned long keyul, paddr;
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
pgste_t old, new;
|
pgste_t old, new;
|
||||||
|
pmd_t *pmdp;
|
||||||
pte_t *ptep;
|
pte_t *ptep;
|
||||||
|
|
||||||
ptep = get_locked_pte(mm, addr, &ptl);
|
pmdp = pmd_alloc_map(mm, addr);
|
||||||
|
if (unlikely(!pmdp))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
ptl = pmd_lock(mm, pmdp);
|
||||||
|
if (!pmd_present(*pmdp)) {
|
||||||
|
spin_unlock(ptl);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pmd_large(*pmdp)) {
|
||||||
|
paddr = pmd_val(*pmdp) & HPAGE_MASK;
|
||||||
|
paddr |= addr & ~HPAGE_MASK;
|
||||||
|
/*
|
||||||
|
* Huge pmds need quiescing operations, they are
|
||||||
|
* always mapped.
|
||||||
|
*/
|
||||||
|
page_set_storage_key(paddr, key, 1);
|
||||||
|
spin_unlock(ptl);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
spin_unlock(ptl);
|
||||||
|
|
||||||
|
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
|
||||||
if (unlikely(!ptep))
|
if (unlikely(!ptep))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
|
@ -767,14 +792,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||||
pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
|
pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
|
||||||
pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
|
pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
|
||||||
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
|
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
|
||||||
unsigned long address, bits, skey;
|
unsigned long bits, skey;
|
||||||
|
|
||||||
address = pte_val(*ptep) & PAGE_MASK;
|
paddr = pte_val(*ptep) & PAGE_MASK;
|
||||||
skey = (unsigned long) page_get_storage_key(address);
|
skey = (unsigned long) page_get_storage_key(paddr);
|
||||||
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
|
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
|
||||||
skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
|
skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
|
||||||
/* Set storage key ACC and FP */
|
/* Set storage key ACC and FP */
|
||||||
page_set_storage_key(address, skey, !nq);
|
page_set_storage_key(paddr, skey, !nq);
|
||||||
/* Merge host changed & referenced into pgste */
|
/* Merge host changed & referenced into pgste */
|
||||||
pgste_val(new) |= bits << 52;
|
pgste_val(new) |= bits << 52;
|
||||||
}
|
}
|
||||||
|
@ -830,11 +855,32 @@ EXPORT_SYMBOL(cond_set_guest_storage_key);
|
||||||
int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
|
int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
|
||||||
{
|
{
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
|
unsigned long paddr;
|
||||||
pgste_t old, new;
|
pgste_t old, new;
|
||||||
|
pmd_t *pmdp;
|
||||||
pte_t *ptep;
|
pte_t *ptep;
|
||||||
int cc = 0;
|
int cc = 0;
|
||||||
|
|
||||||
ptep = get_locked_pte(mm, addr, &ptl);
|
pmdp = pmd_alloc_map(mm, addr);
|
||||||
|
if (unlikely(!pmdp))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
ptl = pmd_lock(mm, pmdp);
|
||||||
|
if (!pmd_present(*pmdp)) {
|
||||||
|
spin_unlock(ptl);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pmd_large(*pmdp)) {
|
||||||
|
paddr = pmd_val(*pmdp) & HPAGE_MASK;
|
||||||
|
paddr |= addr & ~HPAGE_MASK;
|
||||||
|
cc = page_reset_referenced(paddr);
|
||||||
|
spin_unlock(ptl);
|
||||||
|
return cc;
|
||||||
|
}
|
||||||
|
spin_unlock(ptl);
|
||||||
|
|
||||||
|
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
|
||||||
if (unlikely(!ptep))
|
if (unlikely(!ptep))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
|
@ -843,7 +889,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
|
||||||
pgste_val(new) &= ~PGSTE_GR_BIT;
|
pgste_val(new) &= ~PGSTE_GR_BIT;
|
||||||
|
|
||||||
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
|
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
|
||||||
cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
|
paddr = pte_val(*ptep) & PAGE_MASK;
|
||||||
|
cc = page_reset_referenced(paddr);
|
||||||
/* Merge real referenced bit into host-set */
|
/* Merge real referenced bit into host-set */
|
||||||
pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
|
pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
|
||||||
}
|
}
|
||||||
|
@ -862,18 +909,42 @@ EXPORT_SYMBOL(reset_guest_reference_bit);
|
||||||
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||||
unsigned char *key)
|
unsigned char *key)
|
||||||
{
|
{
|
||||||
|
unsigned long paddr;
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
pgste_t pgste;
|
pgste_t pgste;
|
||||||
|
pmd_t *pmdp;
|
||||||
pte_t *ptep;
|
pte_t *ptep;
|
||||||
|
|
||||||
ptep = get_locked_pte(mm, addr, &ptl);
|
pmdp = pmd_alloc_map(mm, addr);
|
||||||
|
if (unlikely(!pmdp))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
ptl = pmd_lock(mm, pmdp);
|
||||||
|
if (!pmd_present(*pmdp)) {
|
||||||
|
/* Not yet mapped memory has a zero key */
|
||||||
|
spin_unlock(ptl);
|
||||||
|
*key = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pmd_large(*pmdp)) {
|
||||||
|
paddr = pmd_val(*pmdp) & HPAGE_MASK;
|
||||||
|
paddr |= addr & ~HPAGE_MASK;
|
||||||
|
*key = page_get_storage_key(paddr);
|
||||||
|
spin_unlock(ptl);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
spin_unlock(ptl);
|
||||||
|
|
||||||
|
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
|
||||||
if (unlikely(!ptep))
|
if (unlikely(!ptep))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
pgste = pgste_get_lock(ptep);
|
pgste = pgste_get_lock(ptep);
|
||||||
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
|
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
|
||||||
|
paddr = pte_val(*ptep) & PAGE_MASK;
|
||||||
if (!(pte_val(*ptep) & _PAGE_INVALID))
|
if (!(pte_val(*ptep) & _PAGE_INVALID))
|
||||||
*key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
|
*key = page_get_storage_key(paddr);
|
||||||
/* Reflect guest's logical view, not physical */
|
/* Reflect guest's logical view, not physical */
|
||||||
*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
|
*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
|
||||||
pgste_set_unlock(ptep, pgste);
|
pgste_set_unlock(ptep, pgste);
|
||||||
|
|
|
@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt {
|
||||||
#define KVM_CAP_GET_MSR_FEATURES 153
|
#define KVM_CAP_GET_MSR_FEATURES 153
|
||||||
#define KVM_CAP_HYPERV_EVENTFD 154
|
#define KVM_CAP_HYPERV_EVENTFD 154
|
||||||
#define KVM_CAP_HYPERV_TLBFLUSH 155
|
#define KVM_CAP_HYPERV_TLBFLUSH 155
|
||||||
|
#define KVM_CAP_S390_HPAGE_1M 156
|
||||||
|
|
||||||
#ifdef KVM_CAP_IRQ_ROUTING
|
#ifdef KVM_CAP_IRQ_ROUTING
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue