memremap: add a migrate_to_ram method to struct dev_pagemap_ops

This replaces the hacky ->fault callback, which is currently directly
called from common code through a hmm specific data structure as an
exercise in layering violations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Christoph Hellwig 2019-06-26 14:27:11 +02:00 committed by Jason Gunthorpe
parent f6a55e1a3f
commit 897e6365cd
6 changed files with 17 additions and 67 deletions

View File

@ -692,11 +692,6 @@ struct hmm_devmem_ops {
* chunk, as an optimization. It must, however, prioritize the faulting address * chunk, as an optimization. It must, however, prioritize the faulting address
* over all the others. * over all the others.
*/ */
typedef vm_fault_t (*dev_page_fault_t)(struct vm_area_struct *vma,
unsigned long addr,
const struct page *page,
unsigned int flags,
pmd_t *pmdp);
struct hmm_devmem { struct hmm_devmem {
struct completion completion; struct completion completion;
@ -707,7 +702,6 @@ struct hmm_devmem {
struct dev_pagemap pagemap; struct dev_pagemap pagemap;
const struct hmm_devmem_ops *ops; const struct hmm_devmem_ops *ops;
struct percpu_ref ref; struct percpu_ref ref;
dev_page_fault_t page_fault;
}; };
/* /*

View File

@ -80,6 +80,12 @@ struct dev_pagemap_ops {
* Wait for refcount in struct dev_pagemap to be idle and reap it. * Wait for refcount in struct dev_pagemap to be idle and reap it.
*/ */
void (*cleanup)(struct dev_pagemap *pgmap); void (*cleanup)(struct dev_pagemap *pgmap);
/*
* Used for private (un-addressable) device memory only. Must migrate
* the page back to a CPU accessible page.
*/
vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf);
}; };
/** /**

View File

@ -129,12 +129,6 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
{ {
return pfn_to_page(swp_offset(entry)); return pfn_to_page(swp_offset(entry));
} }
vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
unsigned long addr,
swp_entry_t entry,
unsigned int flags,
pmd_t *pmdp);
#else /* CONFIG_DEVICE_PRIVATE */ #else /* CONFIG_DEVICE_PRIVATE */
static inline swp_entry_t make_device_private_entry(struct page *page, bool write) static inline swp_entry_t make_device_private_entry(struct page *page, bool write)
{ {
@ -164,15 +158,6 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
{ {
return NULL; return NULL;
} }
static inline vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
unsigned long addr,
swp_entry_t entry,
unsigned int flags,
pmd_t *pmdp)
{
return VM_FAULT_SIGBUS;
}
#endif /* CONFIG_DEVICE_PRIVATE */ #endif /* CONFIG_DEVICE_PRIVATE */
#ifdef CONFIG_MIGRATION #ifdef CONFIG_MIGRATION

View File

@ -11,7 +11,6 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/wait_bit.h> #include <linux/wait_bit.h>
#include <linux/xarray.h> #include <linux/xarray.h>
#include <linux/hmm.h>
static DEFINE_XARRAY(pgmap_array); static DEFINE_XARRAY(pgmap_array);
#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
@ -46,36 +45,6 @@ static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgm
} }
#endif /* CONFIG_DEV_PAGEMAP_OPS */ #endif /* CONFIG_DEV_PAGEMAP_OPS */
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
unsigned long addr,
swp_entry_t entry,
unsigned int flags,
pmd_t *pmdp)
{
struct page *page = device_private_entry_to_page(entry);
struct hmm_devmem *devmem;
devmem = container_of(page->pgmap, typeof(*devmem), pagemap);
/*
* The page_fault() callback must migrate page back to system memory
* so that CPU can access it. This might fail for various reasons
* (device issue, device was unsafely unplugged, ...). When such
* error conditions happen, the callback must return VM_FAULT_SIGBUS.
*
* Note that because memory cgroup charges are accounted to the device
* memory, this should never fail because of memory restrictions (but
* allocation of regular system page might still fail because we are
* out of memory).
*
* There is a more in-depth description of what that callback can and
* cannot do, in include/linux/memremap.h
*/
return devmem->page_fault(vma, addr, page, flags, pmdp);
}
#endif /* CONFIG_DEVICE_PRIVATE */
static void pgmap_array_delete(struct resource *res) static void pgmap_array_delete(struct resource *res)
{ {
xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end), xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
@ -193,6 +162,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
WARN(1, "Device private memory not supported\n"); WARN(1, "Device private memory not supported\n");
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
WARN(1, "Missing migrate_to_ram method\n");
return ERR_PTR(-EINVAL);
}
break; break;
case MEMORY_DEVICE_FS_DAX: case MEMORY_DEVICE_FS_DAX:
if (!IS_ENABLED(CONFIG_ZONE_DEVICE) || if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||

View File

@ -1366,15 +1366,12 @@ static void hmm_devmem_ref_kill(struct dev_pagemap *pgmap)
percpu_ref_kill(pgmap->ref); percpu_ref_kill(pgmap->ref);
} }
static vm_fault_t hmm_devmem_fault(struct vm_area_struct *vma, static vm_fault_t hmm_devmem_migrate_to_ram(struct vm_fault *vmf)
unsigned long addr,
const struct page *page,
unsigned int flags,
pmd_t *pmdp)
{ {
struct hmm_devmem *devmem = page->pgmap->data; struct hmm_devmem *devmem = vmf->page->pgmap->data;
return devmem->ops->fault(devmem, vma, addr, page, flags, pmdp); return devmem->ops->fault(devmem, vmf->vma, vmf->address, vmf->page,
vmf->flags, vmf->pmd);
} }
static void hmm_devmem_free(struct page *page, void *data) static void hmm_devmem_free(struct page *page, void *data)
@ -1388,6 +1385,7 @@ static const struct dev_pagemap_ops hmm_pagemap_ops = {
.page_free = hmm_devmem_free, .page_free = hmm_devmem_free,
.kill = hmm_devmem_ref_kill, .kill = hmm_devmem_ref_kill,
.cleanup = hmm_devmem_ref_exit, .cleanup = hmm_devmem_ref_exit,
.migrate_to_ram = hmm_devmem_migrate_to_ram,
}; };
/* /*
@ -1438,7 +1436,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT; devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
devmem->pfn_last = devmem->pfn_first + devmem->pfn_last = devmem->pfn_first +
(resource_size(devmem->resource) >> PAGE_SHIFT); (resource_size(devmem->resource) >> PAGE_SHIFT);
devmem->page_fault = hmm_devmem_fault;
devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
devmem->pagemap.res = *devmem->resource; devmem->pagemap.res = *devmem->resource;

View File

@ -2748,13 +2748,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
migration_entry_wait(vma->vm_mm, vmf->pmd, migration_entry_wait(vma->vm_mm, vmf->pmd,
vmf->address); vmf->address);
} else if (is_device_private_entry(entry)) { } else if (is_device_private_entry(entry)) {
/* vmf->page = device_private_entry_to_page(entry);
* For un-addressable device memory we call the pgmap ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
* fault handler callback. The callback must migrate
* the page back to some CPU accessible page.
*/
ret = device_private_entry_fault(vma, vmf->address, entry,
vmf->flags, vmf->pmd);
} else if (is_hwpoison_entry(entry)) { } else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON; ret = VM_FAULT_HWPOISON;
} else { } else {