libnvdimm fixes 4.20-rc6

* Unless and until the core mm handles memory hotplug units smaller than
   a section (128M), persistent memory namespaces must be padded to
   section alignment. The libnvdimm core already handled section
   collision with "System RAM", but some configurations overlap
   independent "Persistent Memory" ranges within a section, so additional
   padding injection is added for that case.
 
 * The recent reworks of the ARS (address range scrub) state machine to
   reduce the number of state flags inadvertantly missed a conversion of
   acpi_nfit_ars_rescan() call sites. Fix the regression whereby
   user-requested ARS results in a "short" scrub rather than a "long"
   scrub.
 
 * Fixup the unit tests to handle / test the 128M section alignment of
   mocked test resources.
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJcDK/gAAoJEB7SkWpmfYgCCDsP/3VYrJfJwDVMGbvR4DX4P9WT
 vmft+ac++0aKId+Ei80GwENAQbMttmQ1woHTGAJpEt+A3AL8DWFSFQiqW0b+/eAg
 DW60dYN2cO0M6jkkJyIUMaWRy3iT7OOfEXLlJOubL7EnbivczjZFOAElvdpzPaK0
 +zIStgCdvmdcnZMi0BIst5XLoZ4/wWrR0Caq+ULpHoeDSO/oz3I2HV5gvxPE/7sJ
 WXSdB8zbLSS67fzFik1FJbbRRzEfBH7RJpgMoMpDLT3HtwcMvMrV8iJgNApccueV
 pFGhb5BpaJLaLc59RYxxVp/vLBEEaWDxq54RzU4twf4mbI1sc/NKJ08Nkwo9l8lH
 I6CW3FvegYzMkHD8PNd32GAr9HIxktvlH4Hc1GzaSSWwyeKx+5Et4llDpuuqW19o
 +wlybxRzZEoRNacwnxk1FPeOYUPPKLogkVOf14umh10tvi4UIuGkophoO1bxXc4d
 2gDPAHr3G1hAz+JV7PW/L+rO43uL8MWBYLdZgLiQ+90OAURu7e/f06j2SWzyHV1S
 9AajUqqLLV3whXHUfpl50Eymml9dEyw9NfbKOkry88Kde1NrwC/ccGB/90nKLoa+
 A2HxroRgxk8my8HXztnxLbhuxk0dnV4jK9v2mm9lGS6IUSWL6tePzneA8gGM2tYm
 rt2gaqY2bFOOSmG56Uac
 =85/B
 -----END PGP SIGNATURE-----

Merge tag 'libnvdimm-fixes-4.20-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm fixes from Dan Williams:
 "A regression fix for the Address Range Scrub implementation, yes
  another one, and support for platforms that misalign persistent memory
  relative to the Linux memory hotplug section constraint. Longer term,
  support for sub-section memory hotplug would alleviate alignment
  waste, but until then this hack allows a 'struct page' memmap to be
  established for these misaligned memory regions.

  These have all appeared in a -next release, and thanks to Patrick for
  reporting and testing the alignment padding fix.

  Summary:

   - Unless and until the core mm handles memory hotplug units smaller
     than a section (128M), persistent memory namespaces must be padded
     to section alignment.

     The libnvdimm core already handled section collision with "System
     RAM", but some configurations overlap independent "Persistent
     Memory" ranges within a section, so additional padding injection is
     added for that case.

   - The recent reworks of the ARS (address range scrub) state machine
     to reduce the number of state flags inadvertantly missed a
     conversion of acpi_nfit_ars_rescan() call sites. Fix the regression
     whereby user-requested ARS results in a "short" scrub rather than a
     "long" scrub.

   - Fixup the unit tests to handle / test the 128M section alignment of
     mocked test resources.

* tag 'libnvdimm-fixes-4.20-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  acpi/nfit: Fix user-initiated ARS to be "ARS-long" rather than "ARS-short"
  libnvdimm, pfn: Pad pfn namespaces relative to other regions
  tools/testing/nvdimm: Align test resources to 128M
This commit is contained in:
Linus Torvalds 2018-12-09 09:46:54 -08:00
commit bd799eb63d
5 changed files with 114 additions and 30 deletions

View File

@ -1308,7 +1308,7 @@ static ssize_t scrub_store(struct device *dev,
if (nd_desc) { if (nd_desc) {
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
rc = acpi_nfit_ars_rescan(acpi_desc, 0); rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
} }
device_unlock(dev); device_unlock(dev);
if (rc) if (rc)

View File

@ -111,6 +111,8 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, resource_size_t *overlap); struct nd_mapping *nd_mapping, resource_size_t *overlap);
resource_size_t nd_blk_available_dpa(struct nd_region *nd_region); resource_size_t nd_blk_available_dpa(struct nd_region *nd_region);
resource_size_t nd_region_available_dpa(struct nd_region *nd_region); resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
resource_size_t size);
resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id); struct nd_label_id *label_id);
int alias_dpa_busy(struct device *dev, void *data); int alias_dpa_busy(struct device *dev, void *data);

View File

@ -649,14 +649,47 @@ static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
ALIGN_DOWN(phys, nd_pfn->align)); ALIGN_DOWN(phys, nd_pfn->align));
} }
/*
* Check if pmem collides with 'System RAM', or other regions when
* section aligned. Trim it accordingly.
*/
static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc)
{
struct nd_namespace_common *ndns = nd_pfn->ndns;
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent);
const resource_size_t start = nsio->res.start;
const resource_size_t end = start + resource_size(&nsio->res);
resource_size_t adjust, size;
*start_pad = 0;
*end_trunc = 0;
adjust = start - PHYS_SECTION_ALIGN_DOWN(start);
size = resource_size(&nsio->res) + adjust;
if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM,
IORES_DESC_NONE) == REGION_MIXED
|| nd_region_conflict(nd_region, start - adjust, size))
*start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
/* Now check that end of the range does not collide. */
adjust = PHYS_SECTION_ALIGN_UP(end) - end;
size = resource_size(&nsio->res) + adjust;
if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
IORES_DESC_NONE) == REGION_MIXED
|| !IS_ALIGNED(end, nd_pfn->align)
|| nd_region_conflict(nd_region, start, size + adjust))
*end_trunc = end - phys_pmem_align_down(nd_pfn, end);
}
static int nd_pfn_init(struct nd_pfn *nd_pfn) static int nd_pfn_init(struct nd_pfn *nd_pfn)
{ {
u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_common *ndns = nd_pfn->ndns;
u32 start_pad = 0, end_trunc = 0; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
resource_size_t start, size; resource_size_t start, size;
struct nd_namespace_io *nsio;
struct nd_region *nd_region; struct nd_region *nd_region;
u32 start_pad, end_trunc;
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
unsigned long npfns; unsigned long npfns;
phys_addr_t offset; phys_addr_t offset;
@ -688,30 +721,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
memset(pfn_sb, 0, sizeof(*pfn_sb)); memset(pfn_sb, 0, sizeof(*pfn_sb));
/* trim_pfn_device(nd_pfn, &start_pad, &end_trunc);
* Check if pmem collides with 'System RAM' when section aligned and
* trim it accordingly
*/
nsio = to_nd_namespace_io(&ndns->dev);
start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
size = resource_size(&nsio->res);
if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
IORES_DESC_NONE) == REGION_MIXED) {
start = nsio->res.start;
start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
}
start = nsio->res.start;
size = PHYS_SECTION_ALIGN_UP(start + size) - start;
if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
IORES_DESC_NONE) == REGION_MIXED
|| !IS_ALIGNED(start + resource_size(&nsio->res),
nd_pfn->align)) {
size = resource_size(&nsio->res);
end_trunc = start + size - phys_pmem_align_down(nd_pfn,
start + size);
}
if (start_pad + end_trunc) if (start_pad + end_trunc)
dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
dev_name(&ndns->dev), start_pad + end_trunc); dev_name(&ndns->dev), start_pad + end_trunc);
@ -722,7 +732,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
* implementation will limit the pfns advertised through * implementation will limit the pfns advertised through
* ->direct_access() to those that are included in the memmap. * ->direct_access() to those that are included in the memmap.
*/ */
start += start_pad; start = nsio->res.start + start_pad;
size = resource_size(&nsio->res); size = resource_size(&nsio->res);
npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K)
/ PAGE_SIZE); / PAGE_SIZE);

View File

@ -1184,6 +1184,47 @@ int nvdimm_has_cache(struct nd_region *nd_region)
} }
EXPORT_SYMBOL_GPL(nvdimm_has_cache); EXPORT_SYMBOL_GPL(nvdimm_has_cache);
struct conflict_context {
struct nd_region *nd_region;
resource_size_t start, size;
};
static int region_conflict(struct device *dev, void *data)
{
struct nd_region *nd_region;
struct conflict_context *ctx = data;
resource_size_t res_end, region_end, region_start;
if (!is_memory(dev))
return 0;
nd_region = to_nd_region(dev);
if (nd_region == ctx->nd_region)
return 0;
res_end = ctx->start + ctx->size;
region_start = nd_region->ndr_start;
region_end = region_start + nd_region->ndr_size;
if (ctx->start >= region_start && ctx->start < region_end)
return -EBUSY;
if (res_end > region_start && res_end <= region_end)
return -EBUSY;
return 0;
}
int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
resource_size_t size)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
struct conflict_context ctx = {
.nd_region = nd_region,
.start = start,
.size = size,
};
return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
}
void __exit nd_region_devs_exit(void) void __exit nd_region_devs_exit(void)
{ {
ida_destroy(&region_ida); ida_destroy(&region_ida);

View File

@ -15,6 +15,7 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/libnvdimm.h> #include <linux/libnvdimm.h>
#include <linux/genalloc.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/module.h> #include <linux/module.h>
@ -215,6 +216,8 @@ struct nfit_test {
static struct workqueue_struct *nfit_wq; static struct workqueue_struct *nfit_wq;
static struct gen_pool *nfit_pool;
static struct nfit_test *to_nfit_test(struct device *dev) static struct nfit_test *to_nfit_test(struct device *dev)
{ {
struct platform_device *pdev = to_platform_device(dev); struct platform_device *pdev = to_platform_device(dev);
@ -1132,6 +1135,9 @@ static void release_nfit_res(void *data)
list_del(&nfit_res->list); list_del(&nfit_res->list);
spin_unlock(&nfit_test_lock); spin_unlock(&nfit_test_lock);
if (resource_size(&nfit_res->res) >= DIMM_SIZE)
gen_pool_free(nfit_pool, nfit_res->res.start,
resource_size(&nfit_res->res));
vfree(nfit_res->buf); vfree(nfit_res->buf);
kfree(nfit_res); kfree(nfit_res);
} }
@ -1144,7 +1150,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
GFP_KERNEL); GFP_KERNEL);
int rc; int rc;
if (!buf || !nfit_res) if (!buf || !nfit_res || !*dma)
goto err; goto err;
rc = devm_add_action(dev, release_nfit_res, nfit_res); rc = devm_add_action(dev, release_nfit_res, nfit_res);
if (rc) if (rc)
@ -1164,6 +1170,8 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
return nfit_res->buf; return nfit_res->buf;
err: err:
if (*dma && size >= DIMM_SIZE)
gen_pool_free(nfit_pool, *dma, size);
if (buf) if (buf)
vfree(buf); vfree(buf);
kfree(nfit_res); kfree(nfit_res);
@ -1172,9 +1180,16 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma) static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
{ {
struct genpool_data_align data = {
.align = SZ_128M,
};
void *buf = vmalloc(size); void *buf = vmalloc(size);
*dma = (unsigned long) buf; if (size >= DIMM_SIZE)
*dma = gen_pool_alloc_algo(nfit_pool, size,
gen_pool_first_fit_align, &data);
else
*dma = (unsigned long) buf;
return __test_alloc(t, size, dma, buf); return __test_alloc(t, size, dma, buf);
} }
@ -2839,6 +2854,17 @@ static __init int nfit_test_init(void)
goto err_register; goto err_register;
} }
nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE);
if (!nfit_pool) {
rc = -ENOMEM;
goto err_register;
}
if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) {
rc = -ENOMEM;
goto err_register;
}
for (i = 0; i < NUM_NFITS; i++) { for (i = 0; i < NUM_NFITS; i++) {
struct nfit_test *nfit_test; struct nfit_test *nfit_test;
struct platform_device *pdev; struct platform_device *pdev;
@ -2894,6 +2920,9 @@ static __init int nfit_test_init(void)
return 0; return 0;
err_register: err_register:
if (nfit_pool)
gen_pool_destroy(nfit_pool);
destroy_workqueue(nfit_wq); destroy_workqueue(nfit_wq);
for (i = 0; i < NUM_NFITS; i++) for (i = 0; i < NUM_NFITS; i++)
if (instances[i]) if (instances[i])
@ -2917,6 +2946,8 @@ static __exit void nfit_test_exit(void)
platform_driver_unregister(&nfit_test_driver); platform_driver_unregister(&nfit_test_driver);
nfit_test_teardown(); nfit_test_teardown();
gen_pool_destroy(nfit_pool);
for (i = 0; i < NUM_NFITS; i++) for (i = 0; i < NUM_NFITS; i++)
put_device(&instances[i]->pdev.dev); put_device(&instances[i]->pdev.dev);
class_destroy(nfit_test_dimm); class_destroy(nfit_test_dimm);