x86/xen: allow larger contiguous memory regions in PV guests

[ Upstream commit e93ec87286bd1fd30b7389e7a387cfb259f297e3 ]

Today a PV guest (including dom0) can create 2MB contiguous memory
regions for DMA buffers at max. This has led to problems at least
with the megaraid_sas driver, which wants to allocate a 2.3MB DMA
buffer.

The limiting factor is the frame array used to do the hypercall for
making the memory contiguous, which has 512 entries and is just a
static array in mmu_pv.c.

In order to not waste memory for non-PV guests, put the initial
frame array into .init.data section and dynamically allocate an array
from the .init_after_bootmem hook of PV guests.

In case a contiguous memory area larger than the initially supported
2MB is requested, allocate a larger buffer for the frame list. Note
that such an allocation is tried only after memory management has been
initialized properly, which is tested via a flag being set in the
.init_after_bootmem hook.

Fixes: 9f40ec84a797 ("xen/swiotlb: add alignment check for dma buffers")
Signed-off-by: Juergen Gross <jgross@suse.com>
Tested-by: Alan Robinson <Alan.Robinson@fujitsu.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Juergen Gross 2025-02-11 11:16:28 +01:00 committed by Greg Kroah-Hartman
parent 461d9e8aca
commit 5a32765ac7
1 changed files with 62 additions and 9 deletions

View File

@ -113,6 +113,51 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
*/
static DEFINE_SPINLOCK(xen_reservation_lock);
/* Protected by xen_reservation_lock. */
#define MIN_CONTIG_ORDER 9 /* 2MB */
static unsigned int discontig_frames_order = MIN_CONTIG_ORDER;
static unsigned long discontig_frames_early[1UL << MIN_CONTIG_ORDER] __initdata;
static unsigned long *discontig_frames __refdata = discontig_frames_early;
static bool discontig_frames_dyn;
static int alloc_discontig_frames(unsigned int order)
{
unsigned long *new_array, *old_array;
unsigned int old_order;
unsigned long flags;
BUG_ON(order < MIN_CONTIG_ORDER);
BUILD_BUG_ON(sizeof(discontig_frames_early) != PAGE_SIZE);
new_array = (unsigned long *)__get_free_pages(GFP_KERNEL,
order - MIN_CONTIG_ORDER);
if (!new_array)
return -ENOMEM;
spin_lock_irqsave(&xen_reservation_lock, flags);
old_order = discontig_frames_order;
if (order > discontig_frames_order || !discontig_frames_dyn) {
if (!discontig_frames_dyn)
old_array = NULL;
else
old_array = discontig_frames;
discontig_frames = new_array;
discontig_frames_order = order;
discontig_frames_dyn = true;
} else {
old_array = new_array;
}
spin_unlock_irqrestore(&xen_reservation_lock, flags);
free_pages((unsigned long)old_array, old_order - MIN_CONTIG_ORDER);
return 0;
}
/*
* Note about cr3 (pagetable base) values:
*
@ -813,6 +858,9 @@ static void __init xen_after_bootmem(void)
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
if (alloc_discontig_frames(MIN_CONTIG_ORDER))
BUG();
}
static void xen_unpin_page(struct mm_struct *mm, struct page *page,
@ -2199,10 +2247,6 @@ void __init xen_init_mmu_ops(void)
memset(dummy_mapping, 0xff, PAGE_SIZE);
}
/* Protected by xen_reservation_lock. */
#define MAX_CONTIG_ORDER 9 /* 2MB */
static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
#define VOID_PTE (mfn_pte(0, __pgprot(0)))
static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
unsigned long *in_frames,
@ -2319,18 +2363,25 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle)
{
unsigned long *in_frames = discontig_frames, out_frame;
unsigned long *in_frames, out_frame;
unsigned long flags;
int success;
unsigned long vstart = (unsigned long)phys_to_virt(pstart);
if (unlikely(order > MAX_CONTIG_ORDER))
return -ENOMEM;
if (unlikely(order > discontig_frames_order)) {
if (!discontig_frames_dyn)
return -ENOMEM;
if (alloc_discontig_frames(order))
return -ENOMEM;
}
memset((void *) vstart, 0, PAGE_SIZE << order);
spin_lock_irqsave(&xen_reservation_lock, flags);
in_frames = discontig_frames;
/* 1. Zap current PTEs, remembering MFNs. */
xen_zap_pfn_range(vstart, order, in_frames, NULL);
@ -2354,12 +2405,12 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
{
unsigned long *out_frames = discontig_frames, in_frame;
unsigned long *out_frames, in_frame;
unsigned long flags;
int success;
unsigned long vstart;
if (unlikely(order > MAX_CONTIG_ORDER))
if (unlikely(order > discontig_frames_order))
return;
vstart = (unsigned long)phys_to_virt(pstart);
@ -2367,6 +2418,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
spin_lock_irqsave(&xen_reservation_lock, flags);
out_frames = discontig_frames;
/* 1. Find start MFN of contiguous extent. */
in_frame = virt_to_mfn((void *)vstart);