From 8b271d57b57585a3e3e8cd7abc5f4d7710a0e62d Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 30 Jan 2014 12:52:59 +0000 Subject: [PATCH 1/6] arm/xen: Initialize event channels earlier Event channels driver needs to be initialized very early. Until now, Xen initialization was done after all CPUs was bring up. We can safely move the initialization to an early initcall. Also use a cpu notifier to: - Register the VCPU when the CPU is prepared - Enable event channel IRQ when the CPU is running Signed-off-by: Julien Grall Signed-off-by: Stefano Stabellini --- arch/arm/xen/enlighten.c | 71 +++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 2162172c0ddc..3465f25d7702 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -154,7 +155,7 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); -static void __init xen_percpu_init(void *unused) +static void xen_percpu_init(void) { struct vcpu_register_vcpu_info info; struct vcpu_info *vcpup; @@ -193,6 +194,31 @@ static void xen_power_off(void) BUG(); } +static int xen_cpu_notification(struct notifier_block *self, + unsigned long action, + void *hcpu) +{ + switch (action) { + case CPU_STARTING: + xen_percpu_init(); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block xen_cpu_notifier = { + .notifier_call = xen_cpu_notification, +}; + +static irqreturn_t xen_arm_callback(int irq, void *arg) +{ + xen_hvm_evtchn_do_upcall(); + return IRQ_HANDLED; +} + /* * see Documentation/devicetree/bindings/arm/xen.txt for the * documentation of the Xen Device Tree format. @@ -229,6 +255,10 @@ static int __init xen_guest_init(void) xen_events_irq = irq_of_parse_and_map(node, 0); pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n", version, xen_events_irq, (grant_frames >> PAGE_SHIFT)); + + if (xen_events_irq < 0) + return -ENODEV; + xen_domain_type = XEN_HVM_DOMAIN; xen_setup_features(); @@ -281,9 +311,21 @@ static int __init xen_guest_init(void) disable_cpuidle(); disable_cpufreq(); + xen_init_IRQ(); + + if (request_percpu_irq(xen_events_irq, xen_arm_callback, + "events", &xen_vcpu)) { + pr_err("Error request IRQ %d\n", xen_events_irq); + return -EINVAL; + } + + xen_percpu_init(); + + register_cpu_notifier(&xen_cpu_notifier); + return 0; } -core_initcall(xen_guest_init); +early_initcall(xen_guest_init); static int __init xen_pm_init(void) { @@ -297,31 +339,6 @@ static int __init xen_pm_init(void) } late_initcall(xen_pm_init); -static irqreturn_t xen_arm_callback(int irq, void *arg) -{ - xen_hvm_evtchn_do_upcall(); - return IRQ_HANDLED; -} - -static int __init xen_init_events(void) -{ - if (!xen_domain() || xen_events_irq < 0) - return -ENODEV; - - xen_init_IRQ(); - - if (request_percpu_irq(xen_events_irq, xen_arm_callback, - "events", &xen_vcpu)) { - pr_err("Error requesting IRQ %d\n", xen_events_irq); - return -EINVAL; - } - - on_each_cpu(xen_percpu_init, NULL, 0); - - return 0; -} -postcore_initcall(xen_init_events); - /* In the hypervisor.S file. */ EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op); EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op); From e17b2f114cba5420fb28fa4bfead57d406a16533 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 20 Jan 2014 11:30:41 +0000 Subject: [PATCH 2/6] xen: swiotlb: handle sizeof(dma_addr_t) != sizeof(phys_addr_t) The use of phys_to_machine and machine_to_phys in the phys<=>bus conversions causes us to lose the top bits of the DMA address if the size of a DMA address is not the same as the size of the phyiscal address. This can happen in practice on ARM where foreign pages can be above 4GB even though the local kernel does not have LPAE page tables enabled (which is totally reasonable if the guest does not itself have >4GB of RAM). In this case the kernel still maps the foreign pages at a phys addr below 4G (as it must) but the resulting DMA address (returned by the grant map operation) is much higher. This is analogous to a hardware device which has its view of RAM mapped up high for some reason. This patch makes I/O to foreign pages (specifically blkif) work on 32-bit ARM systems with more than 4GB of RAM. Signed-off-by: Ian Campbell Signed-off-by: Stefano Stabellini --- arch/arm/Kconfig | 1 + drivers/xen/swiotlb-xen.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c1f1a7eee953..24307dc85d08 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1885,6 +1885,7 @@ config XEN depends on !GENERIC_ATOMIC64 select ARM_PSCI select SWIOTLB_XEN + select ARCH_DMA_ADDR_T_64BIT help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 1eac0731c349..ebd8f218a788 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -75,14 +75,32 @@ static unsigned long xen_io_tlb_nslabs; static u64 start_dma_addr; +/* + * Both of these functions should avoid PFN_PHYS because phys_addr_t + * can be 32bit when dma_addr_t is 64bit leading to a loss in + * information if the shift is done before casting to 64bit. + */ static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr) { - return phys_to_machine(XPADDR(paddr)).maddr; + unsigned long mfn = pfn_to_mfn(PFN_DOWN(paddr)); + dma_addr_t dma = (dma_addr_t)mfn << PAGE_SHIFT; + + dma |= paddr & ~PAGE_MASK; + + return dma; } static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr) { - return machine_to_phys(XMADDR(baddr)).paddr; + unsigned long pfn = mfn_to_pfn(PFN_DOWN(baddr)); + dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT; + phys_addr_t paddr = dma; + + BUG_ON(paddr != dma); /* truncation has occurred, should never happen */ + + paddr |= baddr & ~PAGE_MASK; + + return paddr; } static inline dma_addr_t xen_virt_to_bus(void *address) From 47c542050d306e50f09512eb6339dbf2fc02fddd Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 30 Jan 2014 12:56:34 +0000 Subject: [PATCH 3/6] xen/gnttab: Use phys_addr_t to describe the grant frame base address On ARM, address size can be 32 bits or 64 bits (if CONFIG_ARCH_PHYS_ADDR_T_64BIT is enabled). We can't assume that the grant frame base address will always fits in an unsigned long. Use phys_addr_t instead of unsigned long as argument for gnttab_setup_auto_xlat_frames. Signed-off-by: Julien Grall Signed-off-by: Stefano Stabellini Acked-by: Ian Campbell Reviewed-by: David Vrabel --- arch/arm/xen/enlighten.c | 6 +++--- drivers/xen/grant-table.c | 6 +++--- include/xen/grant_table.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 3465f25d7702..b96723e258a0 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -234,7 +234,7 @@ static int __init xen_guest_init(void) const char *version = NULL; const char *xen_prefix = "xen,xen-"; struct resource res; - unsigned long grant_frames; + phys_addr_t grant_frames; node = of_find_compatible_node(NULL, NULL, "xen,xen"); if (!node) { @@ -253,8 +253,8 @@ static int __init xen_guest_init(void) return 0; grant_frames = res.start; xen_events_irq = irq_of_parse_and_map(node, 0); - pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n", - version, xen_events_irq, (grant_frames >> PAGE_SHIFT)); + pr_info("Xen %s support found, events_irq=%d gnttab_frame=%pa\n", + version, xen_events_irq, &grant_frames); if (xen_events_irq < 0) return -ENODEV; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 1ce1c40331f3..b84e3ab839aa 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -837,7 +837,7 @@ unsigned int gnttab_max_grant_frames(void) } EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); -int gnttab_setup_auto_xlat_frames(unsigned long addr) +int gnttab_setup_auto_xlat_frames(phys_addr_t addr) { xen_pfn_t *pfn; unsigned int max_nr_gframes = __max_nr_grant_frames(); @@ -849,8 +849,8 @@ int gnttab_setup_auto_xlat_frames(unsigned long addr) vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes); if (vaddr == NULL) { - pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n", - addr); + pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n", + &addr); return -ENOMEM; } pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 5acb1e4ac0d3..a5af2a26d94f 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -185,7 +185,7 @@ struct grant_frames { }; extern struct grant_frames xen_auto_xlat_grant_frames; unsigned int gnttab_max_grant_frames(void); -int gnttab_setup_auto_xlat_frames(unsigned long addr); +int gnttab_setup_auto_xlat_frames(phys_addr_t addr); void gnttab_free_auto_xlat_frames(void); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) From 08ece5bb2312b4510b161a6ef6682f37f4eac8a1 Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Thu, 23 Jan 2014 21:23:44 +0000 Subject: [PATCH 4/6] xen/grant-table: Avoid m2p_override during mapping The grant mapping API does m2p_override unnecessarily: only gntdev needs it, for blkback and future netback patches it just cause a lock contention, as those pages never go to userspace. Therefore this series does the following: - the original functions were renamed to __gnttab_[un]map_refs, with a new parameter m2p_override - based on m2p_override either they follow the original behaviour, or just set the private flag and call set_phys_to_machine - gnttab_[un]map_refs are now a wrapper to call __gnttab_[un]map_refs with m2p_override false - a new function gnttab_[un]map_refs_userspace provides the old behaviour It also removes a stray space from page.h and change ret to 0 if XENFEAT_auto_translated_physmap, as that is the only possible return value there. v2: - move the storing of the old mfn in page->index to gnttab_map_refs - move the function header update to a separate patch v3: - a new approach to retain old behaviour where it needed - squash the patches into one v4: - move out the common bits from m2p* functions, and pass pfn/mfn as parameter - clear page->private before doing anything with the page, so m2p_find_override won't race with this v5: - change return value handling in __gnttab_[un]map_refs - remove a stray space in page.h - add detail why ret = 0 now at some places v6: - don't pass pfn to m2p* functions, just get it locally Signed-off-by: Zoltan Kiss Suggested-by: David Vrabel Acked-by: David Vrabel Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/page.h | 5 +- arch/x86/xen/p2m.c | 17 +----- drivers/block/xen-blkback/blkback.c | 15 ++--- drivers/xen/gntdev.c | 13 +++-- drivers/xen/grant-table.c | 89 ++++++++++++++++++++++++----- include/xen/grant_table.h | 8 ++- 6 files changed, 101 insertions(+), 46 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 3e276eb23d1b..787e1bb5aafc 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -52,7 +52,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op); extern int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op); + struct gnttab_map_grant_ref *kmap_op, + unsigned long mfn); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); @@ -121,7 +122,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) pfn = m2p_find_override_pfn(mfn, ~0); } - /* + /* * pfn is ~0 if there are no entries in the m2p for mfn or if the * entry doesn't map back to the mfn and m2p_override doesn't have a * valid entry for it. diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 696c694986d0..8009acbe41e4 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -899,13 +899,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; } - WARN_ON(PagePrivate(page)); - SetPagePrivate(page); - set_page_private(page, mfn); - page->index = pfn_to_mfn(pfn); - - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) - return -ENOMEM; if (kmap_op != NULL) { if (!PageHighMem(page)) { @@ -944,19 +937,16 @@ int m2p_add_override(unsigned long mfn, struct page *page, } EXPORT_SYMBOL_GPL(m2p_add_override); int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op) + struct gnttab_map_grant_ref *kmap_op, + unsigned long mfn) { unsigned long flags; - unsigned long mfn; unsigned long pfn; unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; pfn = page_to_pfn(page); - mfn = get_phys_to_machine(pfn); - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) - return -EINVAL; if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); @@ -970,10 +960,7 @@ int m2p_remove_override(struct page *page, spin_lock_irqsave(&m2p_override_lock, flags); list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); - WARN_ON(!PagePrivate(page)); - ClearPagePrivate(page); - set_phys_to_machine(pfn, page->index); if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 6620b73d0490..875025f299b6 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -285,8 +285,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || !rb_next(&persistent_gnt->node)) { - ret = gnttab_unmap_refs(unmap, NULL, pages, - segs_to_unmap); + ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap); BUG_ON(ret); put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; @@ -321,8 +320,7 @@ static void unmap_purged_grants(struct work_struct *work) pages[segs_to_unmap] = persistent_gnt->page; if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { - ret = gnttab_unmap_refs(unmap, NULL, pages, - segs_to_unmap); + ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap); BUG_ON(ret); put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; @@ -330,7 +328,7 @@ static void unmap_purged_grants(struct work_struct *work) kfree(persistent_gnt); } if (segs_to_unmap > 0) { - ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); + ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap); BUG_ON(ret); put_free_pages(blkif, pages, segs_to_unmap); } @@ -670,15 +668,14 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif, GNTMAP_host_map, pages[i]->handle); pages[i]->handle = BLKBACK_INVALID_HANDLE; if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { - ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, - invcount); + ret = gnttab_unmap_refs(unmap, unmap_pages, invcount); BUG_ON(ret); put_free_pages(blkif, unmap_pages, invcount); invcount = 0; } } if (invcount) { - ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); + ret = gnttab_unmap_refs(unmap, unmap_pages, invcount); BUG_ON(ret); put_free_pages(blkif, unmap_pages, invcount); } @@ -740,7 +737,7 @@ again: } if (segs_to_map) { - ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); + ret = gnttab_map_refs(map, pages_to_gnt, segs_to_map); BUG_ON(ret); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 073b4a19a8b0..34a2704fbc88 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -284,8 +284,10 @@ static int map_grant_pages(struct grant_map *map) } pr_debug("map %d+%d\n", map->index, map->count); - err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, - map->pages, map->count); + err = gnttab_map_refs_userspace(map->map_ops, + use_ptemod ? map->kmap_ops : NULL, + map->pages, + map->count); if (err) return err; @@ -315,9 +317,10 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs(map->unmap_ops + offset, - use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, - pages); + err = gnttab_unmap_refs_userspace(map->unmap_ops + offset, + use_ptemod ? map->kmap_ops + offset : NULL, + map->pages + offset, + pages); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index b84e3ab839aa..8ee13e2e45e2 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -928,15 +928,17 @@ void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) } EXPORT_SYMBOL_GPL(gnttab_batch_copy); -int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, +int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count, + bool m2p_override) { int i, ret; bool lazy = false; pte_t *pte; - unsigned long mfn; + unsigned long mfn, pfn; + BUG_ON(kmap_ops && !m2p_override); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count); if (ret) return ret; @@ -955,10 +957,12 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT, map_ops[i].dev_bus_addr >> PAGE_SHIFT); } - return ret; + return 0; } - if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + if (m2p_override && + !in_interrupt() && + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); lazy = true; } @@ -975,8 +979,20 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, } else { mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } - ret = m2p_add_override(mfn, pages[i], kmap_ops ? - &kmap_ops[i] : NULL); + pfn = page_to_pfn(pages[i]); + + WARN_ON(PagePrivate(pages[i])); + SetPagePrivate(pages[i]); + set_page_private(pages[i], mfn); + + pages[i]->index = pfn_to_mfn(pfn); + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { + ret = -ENOMEM; + goto out; + } + if (m2p_override) + ret = m2p_add_override(mfn, pages[i], kmap_ops ? + &kmap_ops[i] : NULL); if (ret) goto out; } @@ -987,15 +1003,32 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, return ret; } + +int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, + struct page **pages, unsigned int count) +{ + return __gnttab_map_refs(map_ops, NULL, pages, count, false); +} EXPORT_SYMBOL_GPL(gnttab_map_refs); -int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, +int gnttab_map_refs_userspace(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) +{ + return __gnttab_map_refs(map_ops, kmap_ops, pages, count, true); +} +EXPORT_SYMBOL_GPL(gnttab_map_refs_userspace); + +int __gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count, + bool m2p_override) { int i, ret; bool lazy = false; + unsigned long pfn, mfn; + BUG_ON(kmap_ops && !m2p_override); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); if (ret) return ret; @@ -1006,17 +1039,33 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT, INVALID_P2M_ENTRY); } - return ret; + return 0; } - if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + if (m2p_override && + !in_interrupt() && + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); lazy = true; } for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i], kmap_ops ? - &kmap_ops[i] : NULL); + pfn = page_to_pfn(pages[i]); + mfn = get_phys_to_machine(pfn); + if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { + ret = -EINVAL; + goto out; + } + + set_page_private(pages[i], INVALID_P2M_ENTRY); + WARN_ON(!PagePrivate(pages[i])); + ClearPagePrivate(pages[i]); + set_phys_to_machine(pfn, pages[i]->index); + if (m2p_override) + ret = m2p_remove_override(pages[i], + kmap_ops ? + &kmap_ops[i] : NULL, + mfn); if (ret) goto out; } @@ -1027,8 +1076,22 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, return ret; } + +int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *map_ops, + struct page **pages, unsigned int count) +{ + return __gnttab_unmap_refs(map_ops, NULL, pages, count, false); +} EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +int gnttab_unmap_refs_userspace(struct gnttab_unmap_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) +{ + return __gnttab_unmap_refs(map_ops, kmap_ops, pages, count, true); +} +EXPORT_SYMBOL_GPL(gnttab_unmap_refs_userspace); + static unsigned nr_status_frames(unsigned nr_grant_frames) { BUG_ON(grefs_per_grant_frame == 0); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index a5af2a26d94f..7ad033dbc845 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -191,11 +191,15 @@ void gnttab_free_auto_xlat_frames(void); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, - struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); +int gnttab_map_refs_userspace(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kunmap_ops, struct page **pages, unsigned int count); +int gnttab_unmap_refs_userspace(struct gnttab_unmap_grant_ref *unmap_ops, + struct gnttab_map_grant_ref *kunmap_ops, + struct page **pages, unsigned int count); /* Perform a batch of grant map/copy operations. Retry every batch slot * for which the hypervisor returns GNTST_eagain. This is typically due From bc1b0df59e3fc4573f92bc1aab9652047a0aeaa7 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Wed, 22 Jan 2014 14:57:44 +0800 Subject: [PATCH 5/6] drivers: xen: deaggressive selfballoon driver Current xen-selfballoon driver is too aggressive which may cause OOM be triggered more often. Eg. this bug reported by James: https://lkml.org/lkml/2013/11/21/158 There are two mainly reasons: 1) The original goal_page didn't consider some pages used by kernel space, like slab pages and pages used by device drivers. 2) The balloon driver may not give back memory to guest OS fast enough when the workload suddenly aquries a lot of physical memory. In both cases, the guest OS will suffer from memory pressure and OOM may be triggered. The fix is make xen-selfballoon driver not that aggressive by adding extra 10% of total ram pages to goal_page. It's more valuable to keep the guest system reliable and response faster than balloon out these 10% pages to XEN. Signed-off-by: Bob Liu Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-selfballoon.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 21e18c18c7a1..745ad79c1d8e 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -175,6 +175,7 @@ static void frontswap_selfshrink(void) #endif /* CONFIG_FRONTSWAP */ #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) +#define PAGES2MB(pages) ((pages) >> (20 - PAGE_SHIFT)) /* * Use current balloon size, the goal (vm_committed_as), and hysteresis @@ -525,6 +526,7 @@ EXPORT_SYMBOL(register_xen_selfballooning); int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) { bool enable = false; + unsigned long reserve_pages; if (!xen_domain()) return -ENODEV; @@ -549,6 +551,26 @@ int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) if (!enable) return -ENODEV; + /* + * Give selfballoon_reserved_mb a default value(10% of total ram pages) + * to make selfballoon not so aggressive. + * + * There are mainly two reasons: + * 1) The original goal_page didn't consider some pages used by kernel + * space, like slab pages and memory used by device drivers. + * + * 2) The balloon driver may not give back memory to guest OS fast + * enough when the workload suddenly aquries a lot of physical memory. + * + * In both cases, the guest OS will suffer from memory pressure and + * OOM killer may be triggered. + * By reserving extra 10% of total ram pages, we can keep the system + * much more reliably and response faster in some cases. + */ + if (!selfballoon_reserved_mb) { + reserve_pages = totalram_pages / 10; + selfballoon_reserved_mb = PAGES2MB(reserve_pages); + } schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); return 0; From f93576e1ac34fd7a93d6f3432e71295bbe6a27ce Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 27 Jan 2014 10:56:09 -0500 Subject: [PATCH 6/6] xen/pvh: Fix misplaced kfree from xlated_setup_gnttab_pages Passing a freed 'pages' to free_xenballooned_pages will end badly on kernels with slub debug enabled. This looks out of place between the rc assign and the check, but we do want to kfree pages regardless of which path we take. Signed-off-by: Dave Jones Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/grant-table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 103c93f874b2..c98583588580 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -162,14 +162,15 @@ static int __init xlated_setup_gnttab_pages(void) rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames, &xen_auto_xlat_grant_frames.vaddr); - kfree(pages); if (rc) { pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, nr_grant_frames, rc); free_xenballooned_pages(nr_grant_frames, pages); + kfree(pages); kfree(pfns); return rc; } + kfree(pages); xen_auto_xlat_grant_frames.pfn = pfns; xen_auto_xlat_grant_frames.count = nr_grant_frames;