From d3383632d4e8e9ae747f582eaee8c2e79f828ae6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 17 Apr 2013 10:53:39 +0200 Subject: [PATCH] s390/mm: add pte invalidation notifier for kvm Add a notifier for kvm to get control before a page table entry is invalidated. The notifier is only called for ptes of an address space with pgstes that have been explicitly marked to require notification. Kvm will use this to get control before prefix pages of virtual CPU are unmapped. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 66 ++++++++++++++--- arch/s390/mm/pgtable.c | 121 ++++++++++++++++++++++++++++++-- 2 files changed, 173 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b4622915bd15..4105b8221fdd 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -306,6 +306,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x00200000UL #define RCP_GR_BIT 0x00040000UL #define RCP_GC_BIT 0x00020000UL +#define RCP_IN_BIT 0x00008000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x00008000UL @@ -373,6 +374,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x0020000000000000UL #define RCP_GR_BIT 0x0004000000000000UL #define RCP_GC_BIT 0x0002000000000000UL +#define RCP_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x0000800000000000UL @@ -746,30 +748,42 @@ struct gmap { /** * struct gmap_rmap - reverse mapping for segment table entries - * @next: pointer to the next gmap_rmap structure in the list + * @gmap: pointer to the gmap_struct * @entry: pointer to a segment table entry + * @vmaddr: virtual address in the guest address space */ struct gmap_rmap { struct list_head list; + struct gmap *gmap; unsigned long *entry; + unsigned long vmaddr; }; /** * struct gmap_pgtable - gmap information attached to a page table * @vmaddr: address of the 1MB segment in the process virtual memory - * @mapper: list of segment table entries maping a page table + * @mapper: list of segment table entries mapping a page table */ struct gmap_pgtable { unsigned long vmaddr; struct list_head mapper; }; +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long address); +}; + struct gmap *gmap_alloc(struct mm_struct *mm); void gmap_free(struct gmap *gmap); void gmap_enable(struct gmap *gmap); void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long length); + unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(unsigned long address, struct gmap *); unsigned long gmap_translate(unsigned long address, struct gmap *); @@ -777,6 +791,24 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); void gmap_discard(unsigned long from, unsigned long to, struct gmap *); +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); +void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); + +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & RCP_IN_BIT) { + pgste_val(pgste) &= ~RCP_IN_BIT; + gmap_do_ipte_notify(mm, addr, ptep); + } +#endif + return pgste; +} + /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following @@ -1032,8 +1064,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1052,11 +1086,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + pgste_t pgste; pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) - pgste_get_lock(ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1082,8 +1119,10 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pgste_t pgste; pte_t pte; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } pte = *ptep; __ptep_ipte(address, ptep); @@ -1111,8 +1150,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pgste_t pgste; pte_t pte; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + if (!full) + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!full) @@ -1135,8 +1177,10 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, if (pte_write(pte)) { mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } if (!mm_exclusive(mm)) __ptep_ipte(address, ptep); @@ -1160,8 +1204,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, if (pte_same(*ptep, entry)) return 0; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } __ptep_ipte(address, ptep); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index bd954e96f51c..7805ddca833d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -454,9 +454,8 @@ unsigned long gmap_translate(unsigned long address, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_translate); -static int gmap_connect_pgtable(unsigned long segment, - unsigned long *segment_ptr, - struct gmap *gmap) +static int gmap_connect_pgtable(unsigned long address, unsigned long segment, + unsigned long *segment_ptr, struct gmap *gmap) { unsigned long vmaddr; struct vm_area_struct *vma; @@ -491,7 +490,9 @@ static int gmap_connect_pgtable(unsigned long segment, /* Link gmap segment table entry location to page table. */ page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; + rmap->gmap = gmap; rmap->entry = segment_ptr; + rmap->vmaddr = address; spin_lock(&mm->page_table_lock); if (*segment_ptr == segment) { list_add(&rmap->list, &mp->mapper); @@ -553,7 +554,7 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) if (!(segment & _SEGMENT_ENTRY_RO)) /* Nothing mapped in the gmap address space. */ break; - rc = gmap_connect_pgtable(segment, segment_ptr, gmap); + rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); if (rc) return rc; } @@ -619,6 +620,118 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_discard); +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @address: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep, entry; + pgste_t pgste; + int rc = 0; + + if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + /* Convert gmap address and connect the page tables */ + addr = __gmap_fault(start, gmap); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (get_user_pages(current, gmap->mm, addr, 1, 1, 0, + NULL, NULL) != 1) { + rc = -EFAULT; + break; + } + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + if (unlikely(!ptep)) + continue; + /* Set notification bit in the pgste of the pte */ + entry = *ptep; + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= RCP_IN_BIT; + pgste_set_unlock(ptep, pgste); + start += PAGE_SIZE; + len -= PAGE_SIZE; + } + spin_unlock(ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) +{ + unsigned long segment_offset; + struct gmap_notifier *nb; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + segment_offset = segment_offset * (4096 / sizeof(pte_t)); + page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + spin_lock(&gmap_notifier_lock); + list_for_each_entry(rmap, &mp->mapper, list) { + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(rmap->gmap, + rmap->vmaddr + segment_offset); + } + spin_unlock(&gmap_notifier_lock); +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) {