diff --git a/drivers/Kconfig b/drivers/Kconfig index d2ac339de85f..8298eab84a6f 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -190,6 +190,8 @@ source "drivers/android/Kconfig" source "drivers/nvdimm/Kconfig" +source "drivers/dax/Kconfig" + source "drivers/nvmem/Kconfig" source "drivers/hwtracing/stm/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 8f5d076baeb0..0b6f3d60193d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/ obj-$(CONFIG_NVM) += lightnvm/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ +obj-$(CONFIG_DEV_DAX) += dax/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig new file mode 100644 index 000000000000..86ffbaa891ad --- /dev/null +++ b/drivers/dax/Kconfig @@ -0,0 +1,25 @@ +menuconfig DEV_DAX + tristate "DAX: direct access to differentiated memory" + default m if NVDIMM_DAX + help + Support raw access to differentiated (persistence, bandwidth, + latency...) memory via an mmap(2) capable character + device. Platform firmware or a device driver may identify a + platform memory resource that is differentiated from the + baseline memory pool. Mappings of a /dev/daxX.Y device impose + restrictions that make the mapping behavior deterministic. + +if DEV_DAX + +config DEV_DAX_PMEM + tristate "PMEM DAX: direct access to persistent memory" + depends on NVDIMM_DAX + default DEV_DAX + help + Support raw access to persistent memory. Note that this + driver consumes memory ranges allocated and exported by the + libnvdimm sub-system. + + Say Y if unsure + +endif diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile new file mode 100644 index 000000000000..27c54e38478a --- /dev/null +++ b/drivers/dax/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_DEV_DAX) += dax.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o + +dax_pmem-y := pmem.o diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c new file mode 100644 index 000000000000..4c22a40f2335 --- /dev/null +++ b/drivers/dax/dax.c @@ -0,0 +1,253 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static int dax_major; +static struct class *dax_class; +static DEFINE_IDA(dax_minor_ida); + +/** + * struct dax_region - mapping infrastructure for dax devices + * @id: kernel-wide unique region for a memory range + * @base: linear address corresponding to @res + * @kref: to pin while other agents have a need to do lookups + * @dev: parent device backing this region + * @align: allocation and mapping alignment for child dax devices + * @res: physical address range of the region + * @pfn_flags: identify whether the pfns are paged back or not + */ +struct dax_region { + int id; + struct ida ida; + void *base; + struct kref kref; + struct device *dev; + unsigned int align; + struct resource res; + unsigned long pfn_flags; +}; + +/** + * struct dax_dev - subdivision of a dax region + * @region - parent region + * @dev - device backing the character device + * @kref - enable this data to be tracked in filp->private_data + * @id - child id in the region + * @num_resources - number of physical address extents in this device + * @res - array of physical address ranges + */ +struct dax_dev { + struct dax_region *region; + struct device *dev; + struct kref kref; + int id; + int num_resources; + struct resource res[0]; +}; + +static void dax_region_free(struct kref *kref) +{ + struct dax_region *dax_region; + + dax_region = container_of(kref, struct dax_region, kref); + kfree(dax_region); +} + +void dax_region_put(struct dax_region *dax_region) +{ + kref_put(&dax_region->kref, dax_region_free); +} +EXPORT_SYMBOL_GPL(dax_region_put); + +static void dax_dev_free(struct kref *kref) +{ + struct dax_dev *dax_dev; + + dax_dev = container_of(kref, struct dax_dev, kref); + dax_region_put(dax_dev->region); + kfree(dax_dev); +} + +static void dax_dev_put(struct dax_dev *dax_dev) +{ + kref_put(&dax_dev->kref, dax_dev_free); +} + +struct dax_region *alloc_dax_region(struct device *parent, int region_id, + struct resource *res, unsigned int align, void *addr, + unsigned long pfn_flags) +{ + struct dax_region *dax_region; + + dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); + + if (!dax_region) + return NULL; + + memcpy(&dax_region->res, res, sizeof(*res)); + dax_region->pfn_flags = pfn_flags; + kref_init(&dax_region->kref); + dax_region->id = region_id; + ida_init(&dax_region->ida); + dax_region->align = align; + dax_region->dev = parent; + dax_region->base = addr; + + return dax_region; +} +EXPORT_SYMBOL_GPL(alloc_dax_region); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dax_dev *dax_dev = dev_get_drvdata(dev); + unsigned long long size = 0; + int i; + + for (i = 0; i < dax_dev->num_resources; i++) + size += resource_size(&dax_dev->res[i]); + + return sprintf(buf, "%llu\n", size); +} +static DEVICE_ATTR_RO(size); + +static struct attribute *dax_device_attributes[] = { + &dev_attr_size.attr, + NULL, +}; + +static const struct attribute_group dax_device_attribute_group = { + .attrs = dax_device_attributes, +}; + +static const struct attribute_group *dax_attribute_groups[] = { + &dax_device_attribute_group, + NULL, +}; + +static void unregister_dax_dev(void *_dev) +{ + struct device *dev = _dev; + struct dax_dev *dax_dev = dev_get_drvdata(dev); + struct dax_region *dax_region = dax_dev->region; + + dev_dbg(dev, "%s\n", __func__); + + get_device(dev); + device_unregister(dev); + ida_simple_remove(&dax_region->ida, dax_dev->id); + ida_simple_remove(&dax_minor_ida, MINOR(dev->devt)); + put_device(dev); + dax_dev_put(dax_dev); +} + +int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, + int count) +{ + struct device *parent = dax_region->dev; + struct dax_dev *dax_dev; + struct device *dev; + int rc, minor; + dev_t dev_t; + + dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL); + if (!dax_dev) + return -ENOMEM; + memcpy(dax_dev->res, res, sizeof(*res) * count); + dax_dev->num_resources = count; + kref_init(&dax_dev->kref); + dax_dev->region = dax_region; + kref_get(&dax_region->kref); + + dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); + if (dax_dev->id < 0) { + rc = dax_dev->id; + goto err_id; + } + + minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL); + if (minor < 0) { + rc = minor; + goto err_minor; + } + + dev_t = MKDEV(dax_major, minor); + dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev, + dax_attribute_groups, "dax%d.%d", dax_region->id, + dax_dev->id); + if (IS_ERR(dev)) { + rc = PTR_ERR(dev); + goto err_create; + } + dax_dev->dev = dev; + + rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev); + if (rc) { + unregister_dax_dev(dev); + return rc; + } + + return 0; + + err_create: + ida_simple_remove(&dax_minor_ida, minor); + err_minor: + ida_simple_remove(&dax_region->ida, dax_dev->id); + err_id: + dax_dev_put(dax_dev); + + return rc; +} +EXPORT_SYMBOL_GPL(devm_create_dax_dev); + +static const struct file_operations dax_fops = { + .llseek = noop_llseek, + .owner = THIS_MODULE, +}; + +static int __init dax_init(void) +{ + int rc; + + rc = register_chrdev(0, "dax", &dax_fops); + if (rc < 0) + return rc; + dax_major = rc; + + dax_class = class_create(THIS_MODULE, "dax"); + if (IS_ERR(dax_class)) { + unregister_chrdev(dax_major, "dax"); + return PTR_ERR(dax_class); + } + + return 0; +} + +static void __exit dax_exit(void) +{ + class_destroy(dax_class); + unregister_chrdev(dax_major, "dax"); + ida_destroy(&dax_minor_ida); +} + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL v2"); +subsys_initcall(dax_init); +module_exit(dax_exit); diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h new file mode 100644 index 000000000000..d8b8f1f25054 --- /dev/null +++ b/drivers/dax/dax.h @@ -0,0 +1,24 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __DAX_H__ +#define __DAX_H__ +struct device; +struct resource; +struct dax_region; +void dax_region_put(struct dax_region *dax_region); +struct dax_region *alloc_dax_region(struct device *parent, + int region_id, struct resource *res, unsigned int align, + void *addr, unsigned long flags); +int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, + int count); +#endif /* __DAX_H__ */ diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c new file mode 100644 index 000000000000..55d510e36cd1 --- /dev/null +++ b/drivers/dax/pmem.c @@ -0,0 +1,158 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "../nvdimm/pfn.h" +#include "../nvdimm/nd.h" +#include "dax.h" + +struct dax_pmem { + struct device *dev; + struct percpu_ref ref; + struct completion cmp; +}; + +struct dax_pmem *to_dax_pmem(struct percpu_ref *ref) +{ + return container_of(ref, struct dax_pmem, ref); +} + +static void dax_pmem_percpu_release(struct percpu_ref *ref) +{ + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + complete(&dax_pmem->cmp); +} + +static void dax_pmem_percpu_exit(void *data) +{ + struct percpu_ref *ref = data; + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + percpu_ref_exit(ref); + wait_for_completion(&dax_pmem->cmp); +} + +static void dax_pmem_percpu_kill(void *data) +{ + struct percpu_ref *ref = data; + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + percpu_ref_kill(ref); +} + +static int dax_pmem_probe(struct device *dev) +{ + int rc; + void *addr; + struct resource res; + struct nd_pfn_sb *pfn_sb; + struct dax_pmem *dax_pmem; + struct nd_region *nd_region; + struct nd_namespace_io *nsio; + struct dax_region *dax_region; + struct nd_namespace_common *ndns; + struct nd_dax *nd_dax = to_nd_dax(dev); + struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + struct vmem_altmap __altmap, *altmap = NULL; + + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return PTR_ERR(ndns); + nsio = to_nd_namespace_io(&ndns->dev); + + /* parse the 'pfn' info block via ->rw_bytes */ + devm_nsio_enable(dev, nsio); + altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); + if (IS_ERR(altmap)) + return PTR_ERR(altmap); + devm_nsio_disable(dev, nsio); + + pfn_sb = nd_pfn->pfn_sb; + + if (!devm_request_mem_region(dev, nsio->res.start, + resource_size(&nsio->res), dev_name(dev))) { + dev_warn(dev, "could not reserve region %pR\n", &nsio->res); + return -EBUSY; + } + + dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); + if (!dax_pmem) + return -ENOMEM; + + dax_pmem->dev = dev; + init_completion(&dax_pmem->cmp); + rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0, + GFP_KERNEL); + if (rc) + return rc; + + rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref); + if (rc) { + dax_pmem_percpu_exit(&dax_pmem->ref); + return rc; + } + + addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); + if (IS_ERR(addr)) + return PTR_ERR(addr); + + rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref); + if (rc) { + dax_pmem_percpu_kill(&dax_pmem->ref); + return rc; + } + + nd_region = to_nd_region(dev->parent); + dax_region = alloc_dax_region(dev, nd_region->id, &res, + le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP); + if (!dax_region) + return -ENOMEM; + + /* TODO: support for subdividing a dax region... */ + rc = devm_create_dax_dev(dax_region, &res, 1); + + /* child dax_dev instances now own the lifetime of the dax_region */ + dax_region_put(dax_region); + + return rc; +} + +static struct nd_device_driver dax_pmem_driver = { + .probe = dax_pmem_probe, + .drv = { + .name = "dax_pmem", + }, + .type = ND_DRIVER_DAX_PMEM, +}; + +static int __init dax_pmem_init(void) +{ + return nd_driver_register(&dax_pmem_driver); +} +module_init(dax_pmem_init); + +static void __exit dax_pmem_exit(void) +{ + driver_unregister(&dax_pmem_driver.drv); +} +module_exit(dax_pmem_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 5ff6d3c126a9..785985677159 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -16,6 +16,7 @@ ldflags-y += --wrap=phys_to_pfn_t DRIVERS := ../../../drivers NVDIMM_SRC := $(DRIVERS)/nvdimm ACPI_SRC := $(DRIVERS)/acpi +DAX_SRC := $(DRIVERS)/dax obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o @@ -23,6 +24,8 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_ACPI_NFIT) += nfit.o +obj-$(CONFIG_DEV_DAX) += dax.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/nfit.o nfit-y += config_check.o @@ -39,6 +42,12 @@ nd_blk-y += config_check.o nd_e820-y := $(NVDIMM_SRC)/e820.o nd_e820-y += config_check.o +dax-y := $(DAX_SRC)/dax.o +dax-y += config_check.o + +dax_pmem-y := $(DAX_SRC)/pmem.o +dax_pmem-y += config_check.o + libnvdimm-y := $(NVDIMM_SRC)/core.o libnvdimm-y += $(NVDIMM_SRC)/bus.o libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c index f2c7615554eb..adf18bfeca00 100644 --- a/tools/testing/nvdimm/config_check.c +++ b/tools/testing/nvdimm/config_check.c @@ -12,4 +12,6 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM)); }