mm, devm_memremap_pages: fix shutdown handling

The last step before devm_memremap_pages() returns success is to allocate
a release action, devm_memremap_pages_release(), to tear the entire setup
down.  However, the result from devm_add_action() is not checked.

Checking the error from devm_add_action() is not enough.  The api
currently relies on the fact that the percpu_ref it is using is killed by
the time the devm_memremap_pages_release() is run.  Rather than continue
this awkward situation, offload the responsibility of killing the
percpu_ref to devm_memremap_pages_release() directly.  This allows
devm_memremap_pages() to do the right thing relative to init failures and
shutdown.

Without this change we could fail to register the teardown of
devm_memremap_pages().  The likelihood of hitting this failure is tiny as
small memory allocations almost always succeed.  However, the impact of
the failure is large given any future reconfiguration, or disable/enable,
of an nvdimm namespace will fail forever as subsequent calls to
devm_memremap_pages() will fail to setup the pgmap_radix since there will
be stale entries for the physical address range.

An argument could be made to require that the ->kill() operation be set in
the @pgmap arg rather than passed in separately.  However, it helps code
readability, tracking the lifetime of a given instance, to be able to grep
the kill routine directly at the devm_memremap_pages() call site.

Link: http://lkml.kernel.org/r/154275558526.76910.7535251937849268605.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Fixes: e8d5134833 ("memremap: change devm_memremap_pages interface...")
Reviewed-by: "Jérôme Glisse" <jglisse@redhat.com>
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Dan Williams 2018-12-28 00:34:57 -08:00 committed by Linus Torvalds
parent 06489cfbd9
commit a95c90f1e2
5 changed files with 38 additions and 36 deletions

View File

@ -48,9 +48,8 @@ static void dax_pmem_percpu_exit(void *data)
percpu_ref_exit(ref); percpu_ref_exit(ref);
} }
static void dax_pmem_percpu_kill(void *data) static void dax_pmem_percpu_kill(struct percpu_ref *ref)
{ {
struct percpu_ref *ref = data;
struct dax_pmem *dax_pmem = to_dax_pmem(ref); struct dax_pmem *dax_pmem = to_dax_pmem(ref);
dev_dbg(dax_pmem->dev, "trace\n"); dev_dbg(dax_pmem->dev, "trace\n");
@ -112,17 +111,10 @@ static int dax_pmem_probe(struct device *dev)
} }
dax_pmem->pgmap.ref = &dax_pmem->ref; dax_pmem->pgmap.ref = &dax_pmem->ref;
dax_pmem->pgmap.kill = dax_pmem_percpu_kill;
addr = devm_memremap_pages(dev, &dax_pmem->pgmap); addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
if (IS_ERR(addr)) { if (IS_ERR(addr))
devm_remove_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
percpu_ref_exit(&dax_pmem->ref);
return PTR_ERR(addr); return PTR_ERR(addr);
}
rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
&dax_pmem->ref);
if (rc)
return rc;
/* adjust the dax_region resource to the start of data */ /* adjust the dax_region resource to the start of data */
memcpy(&res, &dax_pmem->pgmap.res, sizeof(res)); memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));

View File

@ -309,8 +309,11 @@ static void pmem_release_queue(void *q)
blk_cleanup_queue(q); blk_cleanup_queue(q);
} }
static void pmem_freeze_queue(void *q) static void pmem_freeze_queue(struct percpu_ref *ref)
{ {
struct request_queue *q;
q = container_of(ref, typeof(*q), q_usage_counter);
blk_freeze_queue_start(q); blk_freeze_queue_start(q);
} }
@ -402,6 +405,7 @@ static int pmem_attach_disk(struct device *dev,
pmem->pfn_flags = PFN_DEV; pmem->pfn_flags = PFN_DEV;
pmem->pgmap.ref = &q->q_usage_counter; pmem->pgmap.ref = &q->q_usage_counter;
pmem->pgmap.kill = pmem_freeze_queue;
if (is_nd_pfn(dev)) { if (is_nd_pfn(dev)) {
if (setup_pagemap_fsdax(dev, &pmem->pgmap)) if (setup_pagemap_fsdax(dev, &pmem->pgmap))
return -ENOMEM; return -ENOMEM;
@ -427,13 +431,6 @@ static int pmem_attach_disk(struct device *dev,
memcpy(&bb_res, &nsio->res, sizeof(bb_res)); memcpy(&bb_res, &nsio->res, sizeof(bb_res));
} }
/*
* At release time the queue must be frozen before
* devm_memremap_pages is unwound
*/
if (devm_add_action_or_reset(dev, pmem_freeze_queue, q))
return -ENOMEM;
if (IS_ERR(addr)) if (IS_ERR(addr))
return PTR_ERR(addr); return PTR_ERR(addr);
pmem->virt_addr = addr; pmem->virt_addr = addr;

View File

@ -111,6 +111,7 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
* @altmap: pre-allocated/reserved memory for vmemmap allocations * @altmap: pre-allocated/reserved memory for vmemmap allocations
* @res: physical address range covered by @ref * @res: physical address range covered by @ref
* @ref: reference count that pins the devm_memremap_pages() mapping * @ref: reference count that pins the devm_memremap_pages() mapping
* @kill: callback to transition @ref to the dead state
* @dev: host device of the mapping for debug * @dev: host device of the mapping for debug
* @data: private data pointer for page_free() * @data: private data pointer for page_free()
* @type: memory type: see MEMORY_* in memory_hotplug.h * @type: memory type: see MEMORY_* in memory_hotplug.h
@ -122,6 +123,7 @@ struct dev_pagemap {
bool altmap_valid; bool altmap_valid;
struct resource res; struct resource res;
struct percpu_ref *ref; struct percpu_ref *ref;
void (*kill)(struct percpu_ref *ref);
struct device *dev; struct device *dev;
void *data; void *data;
enum memory_type type; enum memory_type type;

View File

@ -88,14 +88,10 @@ static void devm_memremap_pages_release(void *data)
resource_size_t align_start, align_size; resource_size_t align_start, align_size;
unsigned long pfn; unsigned long pfn;
pgmap->kill(pgmap->ref);
for_each_device_pfn(pfn, pgmap) for_each_device_pfn(pfn, pgmap)
put_page(pfn_to_page(pfn)); put_page(pfn_to_page(pfn));
if (percpu_ref_tryget_live(pgmap->ref)) {
dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
percpu_ref_put(pgmap->ref);
}
/* pages are dead and unused, undo the arch mapping */ /* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1); align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@ -116,7 +112,7 @@ static void devm_memremap_pages_release(void *data)
/** /**
* devm_memremap_pages - remap and provide memmap backing for the given resource * devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res * @dev: hosting device for @res
* @pgmap: pointer to a struct dev_pgmap * @pgmap: pointer to a struct dev_pagemap
* *
* Notes: * Notes:
* 1/ At a minimum the res, ref and type members of @pgmap must be initialized * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
@ -125,11 +121,8 @@ static void devm_memremap_pages_release(void *data)
* 2/ The altmap field may optionally be initialized, in which case altmap_valid * 2/ The altmap field may optionally be initialized, in which case altmap_valid
* must be set to true * must be set to true
* *
* 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages() * 3/ pgmap->ref must be 'live' on entry and will be killed at
* time (or devm release event). The expected order of events is that ref has * devm_memremap_pages_release() time, or if this routine fails.
* been through percpu_ref_kill() before devm_memremap_pages_release(). The
* wait for the completion of all references being dropped and
* percpu_ref_exit() must occur after devm_memremap_pages_release().
* *
* 4/ res is expected to be a host memory range that could feasibly be * 4/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but * treated as a "System RAM" range, i.e. not a device mmio range, but
@ -145,6 +138,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
pgprot_t pgprot = PAGE_KERNEL; pgprot_t pgprot = PAGE_KERNEL;
int error, nid, is_ram; int error, nid, is_ram;
if (!pgmap->ref || !pgmap->kill)
return ERR_PTR(-EINVAL);
align_start = res->start & ~(SECTION_SIZE - 1); align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
- align_start; - align_start;
@ -170,12 +166,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (is_ram != REGION_DISJOINT) { if (is_ram != REGION_DISJOINT) {
WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__, WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
is_ram == REGION_MIXED ? "mixed" : "ram", res); is_ram == REGION_MIXED ? "mixed" : "ram", res);
return ERR_PTR(-ENXIO); error = -ENXIO;
goto err_array;
} }
if (!pgmap->ref)
return ERR_PTR(-EINVAL);
pgmap->dev = dev; pgmap->dev = dev;
error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start), error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
@ -217,7 +211,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
align_size >> PAGE_SHIFT, pgmap); align_size >> PAGE_SHIFT, pgmap);
percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap)); percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
devm_add_action(dev, devm_memremap_pages_release, pgmap); error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
pgmap);
if (error)
return ERR_PTR(error);
return __va(res->start); return __va(res->start);
@ -228,6 +225,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
err_pfn_remap: err_pfn_remap:
pgmap_array_delete(res); pgmap_array_delete(res);
err_array: err_array:
pgmap->kill(pgmap->ref);
return ERR_PTR(error); return ERR_PTR(error);
} }
EXPORT_SYMBOL_GPL(devm_memremap_pages); EXPORT_SYMBOL_GPL(devm_memremap_pages);

View File

@ -104,13 +104,26 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
} }
EXPORT_SYMBOL(__wrap_devm_memremap); EXPORT_SYMBOL(__wrap_devm_memremap);
static void nfit_test_kill(void *_pgmap)
{
struct dev_pagemap *pgmap = _pgmap;
pgmap->kill(pgmap->ref);
}
void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{ {
resource_size_t offset = pgmap->res.start; resource_size_t offset = pgmap->res.start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset); struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res) if (nfit_res) {
int rc;
rc = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
if (rc)
return ERR_PTR(rc);
return nfit_res->buf + offset - nfit_res->res.start; return nfit_res->buf + offset - nfit_res->res.start;
}
return devm_memremap_pages(dev, pgmap); return devm_memremap_pages(dev, pgmap);
} }
EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages); EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);