From 5f096b14d421ba23249b752e41989ecfaa6ae226 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 27 Oct 2015 14:53:04 -0600 Subject: [PATCH 01/17] vfio: Whitelist PCI bridges When determining whether a group is viable, we already allow devices bound to pcieport. Generalize this to include any PCI bridge device. Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 563c510f285c..1c0f98c2f634 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -438,16 +439,33 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, } /* - * Whitelist some drivers that we know are safe (no dma) or just sit on - * a device. It's not always practical to leave a device within a group - * driverless as it could get re-bound to something unsafe. + * Some drivers, like pci-stub, are only used to prevent other drivers from + * claiming a device and are therefore perfectly legitimate for a user owned + * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping + * of the device, but it does prevent the user from having direct access to + * the device, which is useful in some circumstances. + * + * We also assume that we can include PCI interconnect devices, ie. bridges. + * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge + * then all of the downstream devices will be part of the same IOMMU group as + * the bridge. Thus, if placing the bridge into the user owned IOVA space + * breaks anything, it only does so for user owned devices downstream. Note + * that error notification via MSI can be affected for platforms that handle + * MSI within the same IOVA space as DMA. */ -static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" }; +static const char * const vfio_driver_whitelist[] = { "pci-stub" }; -static bool vfio_whitelisted_driver(struct device_driver *drv) +static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) { int i; + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) + return true; + } + for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) { if (!strcmp(drv->name, vfio_driver_whitelist[i])) return true; @@ -462,6 +480,7 @@ static bool vfio_whitelisted_driver(struct device_driver *drv) * - driver-less * - bound to a vfio driver * - bound to a whitelisted driver + * - a PCI interconnect device * * We use two methods to determine whether a device is bound to a vfio * driver. The first is to test whether the device exists in the vfio @@ -486,7 +505,7 @@ static int vfio_dev_viable(struct device *dev, void *data) } mutex_unlock(&group->unbound_lock); - if (!ret || !drv || vfio_whitelisted_driver(drv)) + if (!ret || !drv || vfio_dev_whitelisted(dev, drv)) return 0; device = vfio_group_get_device(group, dev); From 4e1a635552d3df7bb743de8c2be156293c53839e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 27 Oct 2015 14:53:05 -0600 Subject: [PATCH 02/17] vfio/pci: Use kernel VPD access functions The PCI VPD capability operates on a set of window registers in PCI config space. Writing to the address register triggers either a read or write, depending on the setting of the PCI_VPD_ADDR_F bit within the address register. The data register provides either the source for writes or the target for reads. This model is susceptible to being broken by concurrent access, for which the kernel has adopted a set of access functions to serialize these registers. Additionally, commits like 932c435caba8 ("PCI: Add dev_flags bit to access VPD through function 0") and 7aa6ca4d39ed ("PCI: Add VPD function 0 quirk for Intel Ethernet devices") indicate that VPD registers can be shared between functions on multifunction devices creating dependencies between otherwise independent devices. Fortunately it's quite easy to emulate the VPD registers, simply storing copies of the address and data registers in memory and triggering a VPD read or write on writes to the address register. This allows vfio users to avoid seeing spurious register changes from accesses on other devices and enables the use of shared quirks in the host kernel. We can theoretically still race with access through sysfs, but the window of opportunity is much smaller. Signed-off-by: Alex Williamson Acked-by: Mark Rustad --- drivers/vfio/pci/vfio_pci_config.c | 70 +++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index ff75ca31a199..a8657ef6382f 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -671,6 +671,73 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) return 0; } +static int vfio_vpd_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + struct pci_dev *pdev = vdev->pdev; + __le16 *paddr = (__le16 *)(vdev->vconfig + pos - offset + PCI_VPD_ADDR); + __le32 *pdata = (__le32 *)(vdev->vconfig + pos - offset + PCI_VPD_DATA); + u16 addr; + u32 data; + + /* + * Write through to emulation. If the write includes the upper byte + * of PCI_VPD_ADDR, then the PCI_VPD_ADDR_F bit is written and we + * have work to do. + */ + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0 || offset > PCI_VPD_ADDR + 1 || + offset + count <= PCI_VPD_ADDR + 1) + return count; + + addr = le16_to_cpu(*paddr); + + if (addr & PCI_VPD_ADDR_F) { + data = le32_to_cpu(*pdata); + if (pci_write_vpd(pdev, addr & ~PCI_VPD_ADDR_F, 4, &data) != 4) + return count; + } else { + if (pci_read_vpd(pdev, addr, 4, &data) != 4) + return count; + *pdata = cpu_to_le32(data); + } + + /* + * Toggle PCI_VPD_ADDR_F in the emulated PCI_VPD_ADDR register to + * signal completion. If an error occurs above, we assume that not + * toggling this bit will induce a driver timeout. + */ + addr ^= PCI_VPD_ADDR_F; + *paddr = cpu_to_le16(addr); + + return count; +} + +/* Permissions for Vital Product Data capability */ +static int __init init_pci_cap_vpd_perm(struct perm_bits *perm) +{ + if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_VPD])) + return -ENOMEM; + + perm->writefn = vfio_vpd_config_write; + + /* + * We always virtualize the next field so we can remove + * capabilities from the chain if we want to. + */ + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); + + /* + * Both the address and data registers are virtualized to + * enable access through the pci_vpd_read/write functions + */ + p_setw(perm, PCI_VPD_ADDR, (u16)ALL_VIRT, (u16)ALL_WRITE); + p_setd(perm, PCI_VPD_DATA, ALL_VIRT, ALL_WRITE); + + return 0; +} + /* Permissions for PCI-X capability */ static int __init init_pci_cap_pcix_perm(struct perm_bits *perm) { @@ -790,6 +857,7 @@ void vfio_pci_uninit_perm_bits(void) free_perm_bits(&cap_perms[PCI_CAP_ID_BASIC]); free_perm_bits(&cap_perms[PCI_CAP_ID_PM]); + free_perm_bits(&cap_perms[PCI_CAP_ID_VPD]); free_perm_bits(&cap_perms[PCI_CAP_ID_PCIX]); free_perm_bits(&cap_perms[PCI_CAP_ID_EXP]); free_perm_bits(&cap_perms[PCI_CAP_ID_AF]); @@ -807,7 +875,7 @@ int __init vfio_pci_init_perm_bits(void) /* Capabilities */ ret |= init_pci_cap_pm_perm(&cap_perms[PCI_CAP_ID_PM]); - cap_perms[PCI_CAP_ID_VPD].writefn = vfio_raw_config_write; + ret |= init_pci_cap_vpd_perm(&cap_perms[PCI_CAP_ID_VPD]); ret |= init_pci_cap_pcix_perm(&cap_perms[PCI_CAP_ID_PCIX]); cap_perms[PCI_CAP_ID_VNDR].writefn = vfio_raw_config_write; ret |= init_pci_cap_exp_perm(&cap_perms[PCI_CAP_ID_EXP]); From 1276ece32c5d18790e8bcff89e692fd3c1790bab Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 14 Oct 2015 15:32:56 +0000 Subject: [PATCH 03/17] VFIO: platform: clear IRQ_NOAUTOEN when de-assigning the IRQ The vfio platform driver currently sets the IRQ_NOAUTOEN before doing the request_irq to properly handle the user masking. However it does not clear it when de-assigning the IRQ. This brings issues when loading the native driver again which may not explicitly enable the IRQ. This problem was observed with xgbe driver. Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index 88bba57b30a8..46d4750f43a8 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -185,6 +185,7 @@ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, int ret; if (irq->trigger) { + irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN); free_irq(irq->hwirq, irq); kfree(irq->name); eventfd_ctx_put(irq->trigger); From 4644321fd3c119a819ab24fd2bc2d1f9bca4a695 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 29 Oct 2015 17:49:42 +0000 Subject: [PATCH 04/17] vfio/type1: handle case where IOMMU does not support PAGE_SIZE size Current vfio_pgsize_bitmap code hides the supported IOMMU page sizes smaller than PAGE_SIZE. As a result, in case the IOMMU does not support PAGE_SIZE page, the alignment check on map/unmap is done with larger page sizes, if any. This can fail although mapping could be done with pages smaller than PAGE_SIZE. This patch modifies vfio_pgsize_bitmap implementation so that, in case the IOMMU supports page sizes smaller than PAGE_SIZE we pretend PAGE_SIZE is supported and hide sub-PAGE_SIZE sizes. That way the user will be able to map/unmap buffers whose size/ start address is aligned with PAGE_SIZE. Pinning code uses that granularity while iommu driver can use the sub-PAGE_SIZE size to map the buffer. Signed-off-by: Eric Auger Acked-by: Will Deacon Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 57d8c37a002b..59d47cb638d5 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -403,13 +403,26 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) { struct vfio_domain *domain; - unsigned long bitmap = PAGE_MASK; + unsigned long bitmap = ULONG_MAX; mutex_lock(&iommu->lock); list_for_each_entry(domain, &iommu->domain_list, next) bitmap &= domain->domain->ops->pgsize_bitmap; mutex_unlock(&iommu->lock); + /* + * In case the IOMMU supports page sizes smaller than PAGE_SIZE + * we pretend PAGE_SIZE is supported and hide sub-PAGE_SIZE sizes. + * That way the user will be able to map/unmap buffers whose size/ + * start address is aligned with PAGE_SIZE. Pinning code uses that + * granularity while iommu driver can use the sub-PAGE_SIZE size + * to map the buffer. + */ + if (bitmap & ~PAGE_MASK) { + bitmap &= PAGE_MASK; + bitmap |= PAGE_SIZE; + } + return bitmap; } From 1b4bb2eaa9b2583521611b4aa978f9f499c92cd4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 29 Oct 2015 16:50:43 +0000 Subject: [PATCH 05/17] vfio/platform: store mapped memory in region, instead of an on-stack copy vfio_platform_{read,write}_mmio() call ioremap_nocache() to map a region of io memory, which they store in struct vfio_platform_region to be eventually re-used, or unmapped by vfio_platform_regions_cleanup(). These functions receive a copy of their struct vfio_platform_region argument on the stack - so these mapped areas are always allocated, and always leaked. Pass this argument as a pointer instead. Fixes: 6e3f26456009 "vfio/platform: read and write support for the device fd" Signed-off-by: James Morse Acked-by: Baptiste Reynal Tested-by: Baptiste Reynal Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_common.c | 36 ++++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index e43efb5e92bf..8c216de883e3 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -307,17 +307,17 @@ static long vfio_platform_ioctl(void *device_data, return -ENOTTY; } -static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg, +static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg, char __user *buf, size_t count, loff_t off) { unsigned int done = 0; - if (!reg.ioaddr) { - reg.ioaddr = - ioremap_nocache(reg.addr, reg.size); + if (!reg->ioaddr) { + reg->ioaddr = + ioremap_nocache(reg->addr, reg->size); - if (!reg.ioaddr) + if (!reg->ioaddr) return -ENOMEM; } @@ -327,7 +327,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg, if (count >= 4 && !(off % 4)) { u32 val; - val = ioread32(reg.ioaddr + off); + val = ioread32(reg->ioaddr + off); if (copy_to_user(buf, &val, 4)) goto err; @@ -335,7 +335,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg, } else if (count >= 2 && !(off % 2)) { u16 val; - val = ioread16(reg.ioaddr + off); + val = ioread16(reg->ioaddr + off); if (copy_to_user(buf, &val, 2)) goto err; @@ -343,7 +343,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg, } else { u8 val; - val = ioread8(reg.ioaddr + off); + val = ioread8(reg->ioaddr + off); if (copy_to_user(buf, &val, 1)) goto err; @@ -376,7 +376,7 @@ static ssize_t vfio_platform_read(void *device_data, char __user *buf, return -EINVAL; if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) - return vfio_platform_read_mmio(vdev->regions[index], + return vfio_platform_read_mmio(&vdev->regions[index], buf, count, off); else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) return -EINVAL; /* not implemented */ @@ -384,17 +384,17 @@ static ssize_t vfio_platform_read(void *device_data, char __user *buf, return -EINVAL; } -static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg, +static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg, const char __user *buf, size_t count, loff_t off) { unsigned int done = 0; - if (!reg.ioaddr) { - reg.ioaddr = - ioremap_nocache(reg.addr, reg.size); + if (!reg->ioaddr) { + reg->ioaddr = + ioremap_nocache(reg->addr, reg->size); - if (!reg.ioaddr) + if (!reg->ioaddr) return -ENOMEM; } @@ -406,7 +406,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg, if (copy_from_user(&val, buf, 4)) goto err; - iowrite32(val, reg.ioaddr + off); + iowrite32(val, reg->ioaddr + off); filled = 4; } else if (count >= 2 && !(off % 2)) { @@ -414,7 +414,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg, if (copy_from_user(&val, buf, 2)) goto err; - iowrite16(val, reg.ioaddr + off); + iowrite16(val, reg->ioaddr + off); filled = 2; } else { @@ -422,7 +422,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg, if (copy_from_user(&val, buf, 1)) goto err; - iowrite8(val, reg.ioaddr + off); + iowrite8(val, reg->ioaddr + off); filled = 1; } @@ -452,7 +452,7 @@ static ssize_t vfio_platform_write(void *device_data, const char __user *buf, return -EINVAL; if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) - return vfio_platform_write_mmio(vdev->regions[index], + return vfio_platform_write_mmio(&vdev->regions[index], buf, count, off); else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) return -EINVAL; /* not implemented */ From 32a2d71c4e808b5aa6c414d4422b5e6c594c8805 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 3 Nov 2015 18:12:12 +0000 Subject: [PATCH 06/17] vfio: platform: introduce vfio-platform-base module To prepare for vfio platform reset rework let's build vfio_platform_common.c and vfio_platform_irq.c in a separate module from vfio-platform and vfio-amba. This makes possible to have separate module inits and works around a race between platform driver init and vfio reset module init: that way we make sure symbols exported by base are available when vfio-platform driver gets probed. The open/release being implemented in the base module, the ref count is applied to the parent module instead. Signed-off-by: Eric Auger Suggested-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Signed-off-by: Alex Williamson --- drivers/vfio/platform/Makefile | 6 ++++-- drivers/vfio/platform/vfio_amba.c | 1 + drivers/vfio/platform/vfio_platform.c | 1 + drivers/vfio/platform/vfio_platform_common.c | 13 +++++++++++-- drivers/vfio/platform/vfio_platform_private.h | 1 + 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile index 9ce8afe28450..41a6224f5e6b 100644 --- a/drivers/vfio/platform/Makefile +++ b/drivers/vfio/platform/Makefile @@ -1,10 +1,12 @@ - -vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o +vfio-platform-base-y := vfio_platform_common.o vfio_platform_irq.o +vfio-platform-y := vfio_platform.o obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o +obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform-base.o obj-$(CONFIG_VFIO_PLATFORM) += reset/ vfio-amba-y := vfio_amba.o obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o +obj-$(CONFIG_VFIO_AMBA) += vfio-platform-base.o obj-$(CONFIG_VFIO_AMBA) += reset/ diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index ff0331f72526..a66479bd0edf 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -67,6 +67,7 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id) vdev->flags = VFIO_DEVICE_FLAGS_AMBA; vdev->get_resource = get_amba_resource; vdev->get_irq = get_amba_irq; + vdev->parent_module = THIS_MODULE; ret = vfio_platform_probe_common(vdev, &adev->dev); if (ret) { diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index cef645c83996..f1625dcfbb23 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -65,6 +65,7 @@ static int vfio_platform_probe(struct platform_device *pdev) vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM; vdev->get_resource = get_platform_resource; vdev->get_irq = get_platform_irq; + vdev->parent_module = THIS_MODULE; ret = vfio_platform_probe_common(vdev, &pdev->dev); if (ret) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 8c216de883e3..7f69b852c7e2 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -23,6 +23,10 @@ #include "vfio_platform_private.h" +#define DRIVER_VERSION "0.10" +#define DRIVER_AUTHOR "Antonios Motakis " +#define DRIVER_DESC "VFIO platform base module" + static DEFINE_MUTEX(driver_lock); static const struct vfio_platform_reset_combo reset_lookup_table[] = { @@ -146,7 +150,7 @@ static void vfio_platform_release(void *device_data) mutex_unlock(&driver_lock); - module_put(THIS_MODULE); + module_put(vdev->parent_module); } static int vfio_platform_open(void *device_data) @@ -154,7 +158,7 @@ static int vfio_platform_open(void *device_data) struct vfio_platform_device *vdev = device_data; int ret; - if (!try_module_get(THIS_MODULE)) + if (!try_module_get(vdev->parent_module)) return -ENODEV; mutex_lock(&driver_lock); @@ -573,3 +577,8 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) return vdev; } EXPORT_SYMBOL_GPL(vfio_platform_remove_common); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index 1c9b3d59543c..71286903fb49 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -56,6 +56,7 @@ struct vfio_platform_device { u32 num_irqs; int refcnt; struct mutex igate; + struct module *parent_module; /* * These fields should be filled by the bus specific binder From e086497d313cbcffcdb5405a5a268961b53519b1 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 3 Nov 2015 18:12:13 +0000 Subject: [PATCH 07/17] vfio: platform: add capability to register a reset function In preparation for subsequent changes in reset function lookup, lets introduce a dynamic list of reset combos (compat string, reset module, reset function). The list can be populated/voided with vfio_platform_register/unregister_reset. Those are not yet used in this patch. Signed-off-by: Eric Auger Reviewed-by: Arnd Bergmann Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_common.c | 27 +++++++++++++++++++ drivers/vfio/platform/vfio_platform_private.h | 20 ++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 7f69b852c7e2..7decc50ccf30 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -27,6 +27,7 @@ #define DRIVER_AUTHOR "Antonios Motakis " #define DRIVER_DESC "VFIO platform base module" +static LIST_HEAD(reset_list); static DEFINE_MUTEX(driver_lock); static const struct vfio_platform_reset_combo reset_lookup_table[] = { @@ -578,6 +579,32 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) } EXPORT_SYMBOL_GPL(vfio_platform_remove_common); +void __vfio_platform_register_reset(struct vfio_platform_reset_node *node) +{ + mutex_lock(&driver_lock); + list_add(&node->link, &reset_list); + mutex_unlock(&driver_lock); +} +EXPORT_SYMBOL_GPL(__vfio_platform_register_reset); + +void vfio_platform_unregister_reset(const char *compat, + vfio_platform_reset_fn_t fn) +{ + struct vfio_platform_reset_node *iter, *temp; + + mutex_lock(&driver_lock); + list_for_each_entry_safe(iter, temp, &reset_list, link) { + if (!strcmp(iter->compat, compat) && (iter->reset == fn)) { + list_del(&iter->link); + break; + } + } + + mutex_unlock(&driver_lock); + +} +EXPORT_SYMBOL_GPL(vfio_platform_unregister_reset); + MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index 71286903fb49..c563940e0f6b 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -71,6 +71,15 @@ struct vfio_platform_device { int (*reset)(struct vfio_platform_device *vdev); }; +typedef int (*vfio_platform_reset_fn_t)(struct vfio_platform_device *vdev); + +struct vfio_platform_reset_node { + struct list_head link; + char *compat; + struct module *owner; + vfio_platform_reset_fn_t reset; +}; + struct vfio_platform_reset_combo { const char *compat; const char *reset_function_name; @@ -90,4 +99,15 @@ extern int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, unsigned start, unsigned count, void *data); +extern void __vfio_platform_register_reset(struct vfio_platform_reset_node *n); +extern void vfio_platform_unregister_reset(const char *compat, + vfio_platform_reset_fn_t fn); +#define vfio_platform_register_reset(__compat, __reset) \ +static struct vfio_platform_reset_node __reset ## _node = { \ + .owner = THIS_MODULE, \ + .compat = __compat, \ + .reset = __reset, \ +}; \ +__vfio_platform_register_reset(&__reset ## _node) + #endif /* VFIO_PLATFORM_PRIVATE_H */ From 588646529f2d09b723584d44ef0a8ab6ff2a690d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 3 Nov 2015 18:12:14 +0000 Subject: [PATCH 08/17] vfio: platform: introduce module_vfio_reset_handler macro The module_vfio_reset_handler macro - define a module alias - implement module init/exit function which respectively registers and unregisters the reset function. Signed-off-by: Eric Auger Reviewed-by: Arnd Bergmann Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_private.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index c563940e0f6b..fd262be2a443 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -110,4 +110,18 @@ static struct vfio_platform_reset_node __reset ## _node = { \ }; \ __vfio_platform_register_reset(&__reset ## _node) +#define module_vfio_reset_handler(compat, reset) \ +MODULE_ALIAS("vfio-reset:" compat); \ +static int __init reset ## _module_init(void) \ +{ \ + vfio_platform_register_reset(compat, reset); \ + return 0; \ +}; \ +static void __exit reset ## _module_exit(void) \ +{ \ + vfio_platform_unregister_reset(compat, reset); \ +}; \ +module_init(reset ## _module_init); \ +module_exit(reset ## _module_exit) + #endif /* VFIO_PLATFORM_PRIVATE_H */ From 680742f644be61657f82d43a2c2fff0489d4a579 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 3 Nov 2015 18:12:15 +0000 Subject: [PATCH 09/17] vfio: platform: reset: calxedaxgmac: add reset function registration This patch adds the reset function registration/unregistration. This is handled through the module_vfio_reset_handler macro. This latter also defines a MODULE_ALIAS which simplifies the load from vfio-platform. Signed-off-by: Eric Auger Reviewed-by: Arnd Bergmann Signed-off-by: Alex Williamson --- drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c index 619dc7d22082..80718f20d5d3 100644 --- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c +++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c @@ -30,8 +30,6 @@ #define DRIVER_AUTHOR "Eric Auger " #define DRIVER_DESC "Reset support for Calxeda xgmac vfio platform device" -#define CALXEDAXGMAC_COMPAT "calxeda,hb-xgmac" - /* XGMAC Register definitions */ #define XGMAC_CONTROL 0x00000000 /* MAC Configuration */ @@ -80,6 +78,8 @@ int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) } EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset); +module_vfio_reset_handler("calxeda,hb-xgmac", vfio_platform_calxedaxgmac_reset); + MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); From 0628c4dfd3a781c09aed983cc79b3c43c5c568bd Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 3 Nov 2015 18:12:16 +0000 Subject: [PATCH 10/17] vfio: platform: add compat in vfio_platform_device Let's retrieve the compatibility string on probe and store it in the vfio_platform_device struct Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_common.c | 15 ++++++++------- drivers/vfio/platform/vfio_platform_private.h | 1 + 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 7decc50ccf30..d8df8a4318da 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -41,16 +41,11 @@ static const struct vfio_platform_reset_combo reset_lookup_table[] = { static void vfio_platform_get_reset(struct vfio_platform_device *vdev, struct device *dev) { - const char *compat; int (*reset)(struct vfio_platform_device *); - int ret, i; - - ret = device_property_read_string(dev, "compatible", &compat); - if (ret) - return; + int i; for (i = 0 ; i < ARRAY_SIZE(reset_lookup_table); i++) { - if (!strcmp(reset_lookup_table[i].compat, compat)) { + if (!strcmp(reset_lookup_table[i].compat, vdev->compat)) { request_module(reset_lookup_table[i].module_name); reset = __symbol_get( reset_lookup_table[i].reset_function_name); @@ -544,6 +539,12 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, if (!vdev) return -EINVAL; + ret = device_property_read_string(dev, "compatible", &vdev->compat); + if (ret) { + pr_err("VFIO: cannot retrieve compat for %s\n", vdev->name); + return -EINVAL; + } + group = iommu_group_get(dev); if (!group) { pr_err("VFIO: No IOMMU group for device %s\n", vdev->name); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index fd262be2a443..415310f62b06 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -57,6 +57,7 @@ struct vfio_platform_device { int refcnt; struct mutex igate; struct module *parent_module; + const char *compat; /* * These fields should be filled by the bus specific binder From e9e0506ee60dd79714c59457f4301c602786defc Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 3 Nov 2015 18:12:17 +0000 Subject: [PATCH 11/17] vfio: platform: use list of registered reset function Remove the static lookup table and use the dynamic list of registered reset functions instead. Also load the reset module through its alias. The reset struct module pointer is stored in vfio_platform_device. We also remove the useless struct device pointer parameter in vfio_platform_get_reset. This patch fixes the issue related to the usage of __symbol_get, which besides from being moot, prevented compilation with CONFIG_MODULES disabled. Also usage of MODULE_ALIAS makes possible to add a new reset module without needing to update the framework. This was suggested by Arnd. Signed-off-by: Eric Auger Reported-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Signed-off-by: Alex Williamson --- .../reset/vfio_platform_calxedaxgmac.c | 1 - drivers/vfio/platform/vfio_platform_common.c | 52 +++++++++++-------- drivers/vfio/platform/vfio_platform_private.h | 7 +-- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c index 80718f20d5d3..640f5d87d422 100644 --- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c +++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c @@ -76,7 +76,6 @@ int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) return 0; } -EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset); module_vfio_reset_handler("calxeda,hb-xgmac", vfio_platform_calxedaxgmac_reset); diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index d8df8a4318da..ac0229622c1f 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -30,37 +30,43 @@ static LIST_HEAD(reset_list); static DEFINE_MUTEX(driver_lock); -static const struct vfio_platform_reset_combo reset_lookup_table[] = { - { - .compat = "calxeda,hb-xgmac", - .reset_function_name = "vfio_platform_calxedaxgmac_reset", - .module_name = "vfio-platform-calxedaxgmac", - }, -}; - -static void vfio_platform_get_reset(struct vfio_platform_device *vdev, - struct device *dev) +static vfio_platform_reset_fn_t vfio_platform_lookup_reset(const char *compat, + struct module **module) { - int (*reset)(struct vfio_platform_device *); - int i; + struct vfio_platform_reset_node *iter; + vfio_platform_reset_fn_t reset_fn = NULL; - for (i = 0 ; i < ARRAY_SIZE(reset_lookup_table); i++) { - if (!strcmp(reset_lookup_table[i].compat, vdev->compat)) { - request_module(reset_lookup_table[i].module_name); - reset = __symbol_get( - reset_lookup_table[i].reset_function_name); - if (reset) { - vdev->reset = reset; - return; - } + mutex_lock(&driver_lock); + list_for_each_entry(iter, &reset_list, link) { + if (!strcmp(iter->compat, compat) && + try_module_get(iter->owner)) { + *module = iter->owner; + reset_fn = iter->reset; + break; } } + mutex_unlock(&driver_lock); + return reset_fn; +} + +static void vfio_platform_get_reset(struct vfio_platform_device *vdev) +{ + char modname[256]; + + vdev->reset = vfio_platform_lookup_reset(vdev->compat, + &vdev->reset_module); + if (!vdev->reset) { + snprintf(modname, 256, "vfio-reset:%s", vdev->compat); + request_module(modname); + vdev->reset = vfio_platform_lookup_reset(vdev->compat, + &vdev->reset_module); + } } static void vfio_platform_put_reset(struct vfio_platform_device *vdev) { if (vdev->reset) - symbol_put_addr(vdev->reset); + module_put(vdev->reset_module); } static int vfio_platform_regions_init(struct vfio_platform_device *vdev) @@ -557,7 +563,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, return ret; } - vfio_platform_get_reset(vdev, dev); + vfio_platform_get_reset(vdev); mutex_init(&vdev->igate); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index 415310f62b06..d1b0668fe394 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -58,6 +58,7 @@ struct vfio_platform_device { struct mutex igate; struct module *parent_module; const char *compat; + struct module *reset_module; /* * These fields should be filled by the bus specific binder @@ -81,12 +82,6 @@ struct vfio_platform_reset_node { vfio_platform_reset_fn_t reset; }; -struct vfio_platform_reset_combo { - const char *compat; - const char *reset_function_name; - const char *module_name; -}; - extern int vfio_platform_probe_common(struct vfio_platform_device *vdev, struct device *dev); extern struct vfio_platform_device *vfio_platform_remove_common From 705e60bae3e09bedba0b2ec936bce3f799f46426 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 3 Nov 2015 18:12:18 +0000 Subject: [PATCH 12/17] vfio: platform: add dev_info on device reset It might be helpful for the end-user to check the device reset function was found by the vfio platform reset framework. Lets store a pointer to the struct device in vfio_platform_device and trace when the reset function is called or not found. Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_common.c | 14 ++++++++++++-- drivers/vfio/platform/vfio_platform_private.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index ac0229622c1f..a1c50d630792 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -144,8 +144,12 @@ static void vfio_platform_release(void *device_data) mutex_lock(&driver_lock); if (!(--vdev->refcnt)) { - if (vdev->reset) + if (vdev->reset) { + dev_info(vdev->device, "reset\n"); vdev->reset(vdev); + } else { + dev_warn(vdev->device, "no reset function found!\n"); + } vfio_platform_regions_cleanup(vdev); vfio_platform_irq_cleanup(vdev); } @@ -174,8 +178,12 @@ static int vfio_platform_open(void *device_data) if (ret) goto err_irq; - if (vdev->reset) + if (vdev->reset) { + dev_info(vdev->device, "reset\n"); vdev->reset(vdev); + } else { + dev_warn(vdev->device, "no reset function found!\n"); + } } vdev->refcnt++; @@ -551,6 +559,8 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, return -EINVAL; } + vdev->device = dev; + group = iommu_group_get(dev); if (!group) { pr_err("VFIO: No IOMMU group for device %s\n", vdev->name); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index d1b0668fe394..42816dd280cb 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -59,6 +59,7 @@ struct vfio_platform_device { struct module *parent_module; const char *compat; struct module *reset_module; + struct device *device; /* * These fields should be filled by the bus specific binder From daac3bbedb8aba714a082d00e2292d462fa24397 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 3 Nov 2015 18:12:19 +0000 Subject: [PATCH 13/17] vfio: platform: reset: calxedaxgmac: fix ioaddr leak In the current code the vfio_platform_region is copied on the stack. As a consequence the ioaddr address is not iounmapped in the vfio platform driver (vfio_platform_regions_cleanup). The patch uses the pointer to the region instead. Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- .../platform/reset/vfio_platform_calxedaxgmac.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c index 640f5d87d422..e3d3d948e661 100644 --- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c +++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c @@ -59,20 +59,20 @@ static inline void xgmac_mac_disable(void __iomem *ioaddr) int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) { - struct vfio_platform_region reg = vdev->regions[0]; + struct vfio_platform_region *reg = &vdev->regions[0]; - if (!reg.ioaddr) { - reg.ioaddr = - ioremap_nocache(reg.addr, reg.size); - if (!reg.ioaddr) + if (!reg->ioaddr) { + reg->ioaddr = + ioremap_nocache(reg->addr, reg->size); + if (!reg->ioaddr) return -ENOMEM; } /* disable IRQ */ - writel(0, reg.ioaddr + XGMAC_DMA_INTR_ENA); + writel(0, reg->ioaddr + XGMAC_DMA_INTR_ENA); /* Disable the MAC core */ - xgmac_mac_disable(reg.ioaddr); + xgmac_mac_disable(reg->ioaddr); return 0; } From 0990822c98661bd625033f0d523b5c33566657ef Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 3 Nov 2015 18:20:57 +0000 Subject: [PATCH 14/17] VFIO: platform: reset: AMD xgbe reset module This patch introduces a module that registers and implements a low-level reset function for the AMD XGBE device. it performs the following actions: - reset the PHY - disable auto-negotiation - disable & clear auto-negotiation IRQ - soft-reset the MAC Those tiny pieces of code are inherited from the native xgbe driver. Signed-off-by: Eric Auger Reviewed-by: Arnd Bergmann Signed-off-by: Alex Williamson --- drivers/vfio/platform/reset/Kconfig | 8 ++ drivers/vfio/platform/reset/Makefile | 2 + .../platform/reset/vfio_platform_amdxgbe.c | 127 ++++++++++++++++++ 3 files changed, 137 insertions(+) create mode 100644 drivers/vfio/platform/reset/vfio_platform_amdxgbe.c diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig index 746b96b0003b..70cccc582bee 100644 --- a/drivers/vfio/platform/reset/Kconfig +++ b/drivers/vfio/platform/reset/Kconfig @@ -5,3 +5,11 @@ config VFIO_PLATFORM_CALXEDAXGMAC_RESET Enables the VFIO platform driver to handle reset for Calxeda xgmac If you don't know what to do here, say N. + +config VFIO_PLATFORM_AMDXGBE_RESET + tristate "VFIO support for AMD XGBE reset" + depends on VFIO_PLATFORM + help + Enables the VFIO platform driver to handle reset for AMD XGBE + + If you don't know what to do here, say N. diff --git a/drivers/vfio/platform/reset/Makefile b/drivers/vfio/platform/reset/Makefile index 2a486af9f8fa..93f4e232697b 100644 --- a/drivers/vfio/platform/reset/Makefile +++ b/drivers/vfio/platform/reset/Makefile @@ -1,5 +1,7 @@ vfio-platform-calxedaxgmac-y := vfio_platform_calxedaxgmac.o +vfio-platform-amdxgbe-y := vfio_platform_amdxgbe.o ccflags-y += -Idrivers/vfio/platform obj-$(CONFIG_VFIO_PLATFORM_CALXEDAXGMAC_RESET) += vfio-platform-calxedaxgmac.o +obj-$(CONFIG_VFIO_PLATFORM_AMDXGBE_RESET) += vfio-platform-amdxgbe.o diff --git a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c new file mode 100644 index 000000000000..da5356f48d0b --- /dev/null +++ b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c @@ -0,0 +1,127 @@ +/* + * VFIO platform driver specialized for AMD xgbe reset + * reset code is inherited from AMD xgbe native driver + * + * Copyright (c) 2015 Linaro Ltd. + * www.linaro.org + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "vfio_platform_private.h" + +#define DMA_MR 0x3000 +#define MAC_VR 0x0110 +#define DMA_ISR 0x3008 +#define MAC_ISR 0x00b0 +#define PCS_MMD_SELECT 0xff +#define MDIO_AN_INT 0x8002 +#define MDIO_AN_INTMASK 0x8001 + +static unsigned int xmdio_read(void *ioaddr, unsigned int mmd, + unsigned int reg) +{ + unsigned int mmd_address, value; + + mmd_address = (mmd << 16) | ((reg) & 0xffff); + iowrite32(mmd_address >> 8, ioaddr + (PCS_MMD_SELECT << 2)); + value = ioread32(ioaddr + ((mmd_address & 0xff) << 2)); + return value; +} + +static void xmdio_write(void *ioaddr, unsigned int mmd, + unsigned int reg, unsigned int value) +{ + unsigned int mmd_address; + + mmd_address = (mmd << 16) | ((reg) & 0xffff); + iowrite32(mmd_address >> 8, ioaddr + (PCS_MMD_SELECT << 2)); + iowrite32(value, ioaddr + ((mmd_address & 0xff) << 2)); +} + +int vfio_platform_amdxgbe_reset(struct vfio_platform_device *vdev) +{ + struct vfio_platform_region *xgmac_regs = &vdev->regions[0]; + struct vfio_platform_region *xpcs_regs = &vdev->regions[1]; + u32 dma_mr_value, pcs_value, value; + unsigned int count; + + if (!xgmac_regs->ioaddr) { + xgmac_regs->ioaddr = + ioremap_nocache(xgmac_regs->addr, xgmac_regs->size); + if (!xgmac_regs->ioaddr) + return -ENOMEM; + } + if (!xpcs_regs->ioaddr) { + xpcs_regs->ioaddr = + ioremap_nocache(xpcs_regs->addr, xpcs_regs->size); + if (!xpcs_regs->ioaddr) + return -ENOMEM; + } + + /* reset the PHY through MDIO*/ + pcs_value = xmdio_read(xpcs_regs->ioaddr, MDIO_MMD_PCS, MDIO_CTRL1); + pcs_value |= MDIO_CTRL1_RESET; + xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_PCS, MDIO_CTRL1, pcs_value); + + count = 50; + do { + msleep(20); + pcs_value = xmdio_read(xpcs_regs->ioaddr, MDIO_MMD_PCS, + MDIO_CTRL1); + } while ((pcs_value & MDIO_CTRL1_RESET) && --count); + + if (pcs_value & MDIO_CTRL1_RESET) + pr_warn("%s XGBE PHY reset timeout\n", __func__); + + /* disable auto-negotiation */ + value = xmdio_read(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_CTRL1); + value &= ~MDIO_AN_CTRL1_ENABLE; + xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_CTRL1, value); + + /* disable AN IRQ */ + xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_AN_INTMASK, 0); + + /* clear AN IRQ */ + xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_AN_INT, 0); + + /* MAC software reset */ + dma_mr_value = ioread32(xgmac_regs->ioaddr + DMA_MR); + dma_mr_value |= 0x1; + iowrite32(dma_mr_value, xgmac_regs->ioaddr + DMA_MR); + + usleep_range(10, 15); + + count = 2000; + while (count-- && (ioread32(xgmac_regs->ioaddr + DMA_MR) & 1)) + usleep_range(500, 600); + + if (!count) + pr_warn("%s MAC SW reset failed\n", __func__); + + return 0; +} + +module_vfio_reset_handler("amd,xgbe-seattle-v1a", vfio_platform_amdxgbe_reset); + +MODULE_VERSION("0.1"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Auger "); +MODULE_DESCRIPTION("Reset support for AMD xgbe vfio platform device"); From e324fc82ea453fcbd3898ec7afb792f750c68979 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 4 Nov 2015 13:53:26 +0100 Subject: [PATCH 15/17] vfio: Fix bug in vfio_device_get_from_name() The vfio_device_get_from_name() function might return a non-NULL pointer, when called with a device name that is not found in the list. This causes undefined behavior, in my case calling an invalid function pointer later on: kernel tried to execute NX-protected page - exploit attempt? (uid: 0) BUG: unable to handle kernel paging request at ffff8800cb3ddc08 [...] Call Trace: [] ? vfio_group_fops_unl_ioctl+0x253/0x410 [vfio] [] do_vfs_ioctl+0x2cd/0x4c0 [] ? __fget+0x77/0xb0 [] SyS_ioctl+0x79/0x90 [] ? syscall_return_slowpath+0x50/0x130 [] entry_SYSCALL_64_fastpath+0x16/0x75 Fix the issue by returning NULL when there is no device with the requested name in the list. Cc: stable@vger.kernel.org # v4.2+ Fixes: 4bc94d5dc95d ("vfio: Fix lockdep issue") Signed-off-by: Joerg Roedel Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 1c0f98c2f634..ab056f7af54d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -711,11 +711,12 @@ EXPORT_SYMBOL_GPL(vfio_device_get_from_dev); static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, char *buf) { - struct vfio_device *device; + struct vfio_device *it, *device = NULL; mutex_lock(&group->device_lock); - list_for_each_entry(device, &group->device_list, group_next) { - if (!strcmp(dev_name(device->dev), buf)) { + list_for_each_entry(it, &group->device_list, group_next) { + if (!strcmp(dev_name(it->dev), buf)) { + device = it; vfio_device_get(device); break; } From 033291eccbdb1b70ffc02641edae19ac825dc75d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 15 Oct 2015 15:08:48 -0600 Subject: [PATCH 16/17] vfio: Include No-IOMMU mode There is really no way to safely give a user full access to a DMA capable device without an IOMMU to protect the host system. There is also no way to provide DMA translation, for use cases such as device assignment to virtual machines. However, there are still those users that want userspace drivers even under those conditions. The UIO driver exists for this use case, but does not provide the degree of device access and programming that VFIO has. In an effort to avoid code duplication, this introduces a No-IOMMU mode for VFIO. This mode requires building VFIO with CONFIG_VFIO_NOIOMMU and enabling the "enable_unsafe_noiommu_mode" option on the vfio driver. This should make it very clear that this mode is not safe. Additionally, CAP_SYS_RAWIO privileges are necessary to work with groups and containers using this mode. Groups making use of this support are named /dev/vfio/noiommu-$GROUP and can only make use of the special VFIO_NOIOMMU_IOMMU for the container. Use of this mode, specifically binding a device without a native IOMMU group to a VFIO bus driver will taint the kernel and should therefore not be considered supported. This patch includes no-iommu support for the vfio-pci bus driver only. Signed-off-by: Alex Williamson Acked-by: Michael S. Tsirkin --- drivers/vfio/Kconfig | 15 +++ drivers/vfio/pci/vfio_pci.c | 8 +- drivers/vfio/vfio.c | 186 ++++++++++++++++++++++++++++++++++-- include/linux/vfio.h | 3 + include/uapi/linux/vfio.h | 7 ++ 5 files changed, 209 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 454017928ed0..b6d3cdc2791b 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -31,5 +31,20 @@ menuconfig VFIO If you don't know what to do here, say N. +menuconfig VFIO_NOIOMMU + bool "VFIO No-IOMMU support" + depends on VFIO + help + VFIO is built on the ability to isolate devices using the IOMMU. + Only with an IOMMU can userspace access to DMA capable devices be + considered secure. VFIO No-IOMMU mode enables IOMMU groups for + devices without IOMMU backing for the purpose of re-using the VFIO + infrastructure in a non-secure mode. Use of this mode will result + in an unsupportable kernel and will therefore taint the kernel. + Device assignment to virtual machines is also not possible with + this mode since there is no IOMMU to provide DMA translation. + + If you don't know what to do here, say N. + source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 964ad572aaee..32b88bd2c82c 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -940,13 +940,13 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; - group = iommu_group_get(&pdev->dev); + group = vfio_iommu_group_get(&pdev->dev); if (!group) return -EINVAL; vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) { - iommu_group_put(group); + vfio_iommu_group_put(group, &pdev->dev); return -ENOMEM; } @@ -957,7 +957,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); if (ret) { - iommu_group_put(group); + vfio_iommu_group_put(group, &pdev->dev); kfree(vdev); return ret; } @@ -993,7 +993,7 @@ static void vfio_pci_remove(struct pci_dev *pdev) if (!vdev) return; - iommu_group_put(pdev->dev.iommu_group); + vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); kfree(vdev); if (vfio_pci_is_vga(pdev)) { diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index ab056f7af54d..de632da2e22f 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -62,6 +62,7 @@ struct vfio_container { struct rw_semaphore group_lock; struct vfio_iommu_driver *iommu_driver; void *iommu_data; + bool noiommu; }; struct vfio_unbound_dev { @@ -84,6 +85,7 @@ struct vfio_group { struct list_head unbound_list; struct mutex unbound_lock; atomic_t opened; + bool noiommu; }; struct vfio_device { @@ -95,6 +97,147 @@ struct vfio_device { void *device_data; }; +#ifdef CONFIG_VFIO_NOIOMMU +static bool noiommu __read_mostly; +module_param_named(enable_unsafe_noiommu_support, + noiommu, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); +#endif + +/* + * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe + * and remove functions, any use cases other than acquiring the first + * reference for the purpose of calling vfio_add_group_dev() or removing + * that symmetric reference after vfio_del_group_dev() should use the raw + * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put() + * removes the device from the dummy group and cannot be nested. + */ +struct iommu_group *vfio_iommu_group_get(struct device *dev) +{ + struct iommu_group *group; + int __maybe_unused ret; + + group = iommu_group_get(dev); + +#ifdef CONFIG_VFIO_NOIOMMU + /* + * With noiommu enabled, an IOMMU group will be created for a device + * that doesn't already have one and doesn't have an iommu_ops on their + * bus. We use iommu_present() again in the main code to detect these + * fake groups. + */ + if (group || !noiommu || iommu_present(dev->bus)) + return group; + + group = iommu_group_alloc(); + if (IS_ERR(group)) + return NULL; + + iommu_group_set_name(group, "vfio-noiommu"); + ret = iommu_group_add_device(group, dev); + iommu_group_put(group); + if (ret) + return NULL; + + /* + * Where to taint? At this point we've added an IOMMU group for a + * device that is not backed by iommu_ops, therefore any iommu_ + * callback using iommu_ops can legitimately Oops. So, while we may + * be about to give a DMA capable device to a user without IOMMU + * protection, which is clearly taint-worthy, let's go ahead and do + * it here. + */ + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); +#endif + + return group; +} +EXPORT_SYMBOL_GPL(vfio_iommu_group_get); + +void vfio_iommu_group_put(struct iommu_group *group, struct device *dev) +{ +#ifdef CONFIG_VFIO_NOIOMMU + if (!iommu_present(dev->bus)) + iommu_group_remove_device(dev); +#endif + + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(vfio_iommu_group_put); + +#ifdef CONFIG_VFIO_NOIOMMU +static void *vfio_noiommu_open(unsigned long arg) +{ + if (arg != VFIO_NOIOMMU_IOMMU) + return ERR_PTR(-EINVAL); + if (!capable(CAP_SYS_RAWIO)) + return ERR_PTR(-EPERM); + + return NULL; +} + +static void vfio_noiommu_release(void *iommu_data) +{ +} + +static long vfio_noiommu_ioctl(void *iommu_data, + unsigned int cmd, unsigned long arg) +{ + if (cmd == VFIO_CHECK_EXTENSION) + return arg == VFIO_NOIOMMU_IOMMU ? 1 : 0; + + return -ENOTTY; +} + +static int vfio_iommu_present(struct device *dev, void *unused) +{ + return iommu_present(dev->bus) ? 1 : 0; +} + +static int vfio_noiommu_attach_group(void *iommu_data, + struct iommu_group *iommu_group) +{ + return iommu_group_for_each_dev(iommu_group, NULL, + vfio_iommu_present) ? -EINVAL : 0; +} + +static void vfio_noiommu_detach_group(void *iommu_data, + struct iommu_group *iommu_group) +{ +} + +static struct vfio_iommu_driver_ops vfio_noiommu_ops = { + .name = "vfio-noiommu", + .owner = THIS_MODULE, + .open = vfio_noiommu_open, + .release = vfio_noiommu_release, + .ioctl = vfio_noiommu_ioctl, + .attach_group = vfio_noiommu_attach_group, + .detach_group = vfio_noiommu_detach_group, +}; + +static struct vfio_iommu_driver vfio_noiommu_driver = { + .ops = &vfio_noiommu_ops, +}; + +/* + * Wrap IOMMU drivers, the noiommu driver is the one and only driver for + * noiommu groups (and thus containers) and not available for normal groups. + */ +#define vfio_for_each_iommu_driver(con, pos) \ + for (pos = con->noiommu ? &vfio_noiommu_driver : \ + list_first_entry(&vfio.iommu_drivers_list, \ + struct vfio_iommu_driver, vfio_next); \ + (con->noiommu ? pos != NULL : \ + &pos->vfio_next != &vfio.iommu_drivers_list); \ + pos = con->noiommu ? NULL : list_next_entry(pos, vfio_next)) +#else +#define vfio_for_each_iommu_driver(con, pos) \ + list_for_each_entry(pos, &vfio.iommu_drivers_list, vfio_next) +#endif + + /** * IOMMU driver registration */ @@ -199,7 +342,8 @@ static void vfio_group_unlock_and_free(struct vfio_group *group) /** * Group objects - create, release, get, put, search */ -static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) +static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, + bool noiommu) { struct vfio_group *group, *tmp; struct device *dev; @@ -217,6 +361,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) atomic_set(&group->container_users, 0); atomic_set(&group->opened, 0); group->iommu_group = iommu_group; + group->noiommu = noiommu; group->nb.notifier_call = vfio_iommu_group_notifier; @@ -252,7 +397,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) dev = device_create(vfio.class, NULL, MKDEV(MAJOR(vfio.group_devt), minor), - group, "%d", iommu_group_id(iommu_group)); + group, "%s%d", noiommu ? "noiommu-" : "", + iommu_group_id(iommu_group)); if (IS_ERR(dev)) { vfio_free_group_minor(minor); vfio_group_unlock_and_free(group); @@ -640,7 +786,8 @@ int vfio_add_group_dev(struct device *dev, group = vfio_group_get_from_iommu(iommu_group); if (!group) { - group = vfio_create_group(iommu_group); + group = vfio_create_group(iommu_group, + !iommu_present(dev->bus)); if (IS_ERR(group)) { iommu_group_put(iommu_group); return PTR_ERR(group); @@ -852,8 +999,7 @@ static long vfio_ioctl_check_extension(struct vfio_container *container, */ if (!driver) { mutex_lock(&vfio.iommu_drivers_lock); - list_for_each_entry(driver, &vfio.iommu_drivers_list, - vfio_next) { + vfio_for_each_iommu_driver(container, driver) { if (!try_module_get(driver->ops->owner)) continue; @@ -922,7 +1068,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container, } mutex_lock(&vfio.iommu_drivers_lock); - list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { + vfio_for_each_iommu_driver(container, driver) { void *data; if (!try_module_get(driver->ops->owner)) @@ -1187,6 +1333,9 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) if (atomic_read(&group->container_users)) return -EINVAL; + if (group->noiommu && !capable(CAP_SYS_RAWIO)) + return -EPERM; + f = fdget(container_fd); if (!f.file) return -EBADF; @@ -1202,6 +1351,13 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) down_write(&container->group_lock); + /* Real groups and fake groups cannot mix */ + if (!list_empty(&container->group_list) && + container->noiommu != group->noiommu) { + ret = -EPERM; + goto unlock_out; + } + driver = container->iommu_driver; if (driver) { ret = driver->ops->attach_group(container->iommu_data, @@ -1211,6 +1367,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) } group->container = container; + container->noiommu = group->noiommu; list_add(&group->container_next, &container->group_list); /* Get a reference on the container and mark a user within the group */ @@ -1241,6 +1398,9 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) !group->container->iommu_driver || !vfio_group_viable(group)) return -EINVAL; + if (group->noiommu && !capable(CAP_SYS_RAWIO)) + return -EPERM; + device = vfio_device_get_from_name(group, buf); if (!device) return -ENODEV; @@ -1283,6 +1443,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) fd_install(ret, filep); + if (group->noiommu) + dev_warn(device->dev, "vfio-noiommu device opened by user " + "(%s:%d)\n", current->comm, task_pid_nr(current)); + return ret; } @@ -1371,6 +1535,11 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep) if (!group) return -ENODEV; + if (group->noiommu && !capable(CAP_SYS_RAWIO)) { + vfio_group_put(group); + return -EPERM; + } + /* Do we need multiple instances of the group open? Seems not. */ opened = atomic_cmpxchg(&group->opened, 0, 1); if (opened) { @@ -1533,6 +1702,11 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep) if (!atomic_inc_not_zero(&group->container_users)) return ERR_PTR(-EINVAL); + if (group->noiommu) { + atomic_dec(&group->container_users); + return ERR_PTR(-EPERM); + } + if (!group->container->iommu_driver || !vfio_group_viable(group)) { atomic_dec(&group->container_users); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ddb440975382..610a86a892b8 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -44,6 +44,9 @@ struct vfio_device_ops { void (*request)(void *device_data, unsigned int count); }; +extern struct iommu_group *vfio_iommu_group_get(struct device *dev); +extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); + extern int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops, void *device_data); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 9fd7b5d8df2f..751b69f858c8 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -38,6 +38,13 @@ #define VFIO_SPAPR_TCE_v2_IOMMU 7 +/* + * The No-IOMMU IOMMU offers no translation or isolation for devices and + * supports no ioctls outside of VFIO_CHECK_EXTENSION. Use of VFIO's No-IOMMU + * code will taint the host kernel and should be used with extreme caution. + */ +#define VFIO_NOIOMMU_IOMMU 8 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between From 222e684ca762e9288108fcf852eb5d08cbe10ae3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 9 Nov 2015 15:24:55 +0300 Subject: [PATCH 17/17] vfio/pci: make an array larger Smatch complains about a possible out of bounds error: drivers/vfio/pci/vfio_pci_config.c:1241 vfio_cap_init() error: buffer overflow 'pci_cap_length' 20 <= 20 The problem is that pci_cap_length[] was defined as large enough to hold "PCI_CAP_ID_AF + 1" elements. The code in vfio_cap_init() assumes it has PCI_CAP_ID_MAX + 1 elements. Originally, PCI_CAP_ID_AF and PCI_CAP_ID_MAX were the same but then we introduced PCI_CAP_ID_EA in commit f80b0ba95964 ("PCI: Add Enhanced Allocation register entries") so now the array is too small. Let's fix this by making the array size PCI_CAP_ID_MAX + 1. And let's make a similar change to pci_ext_cap_length[] for consistency. Also both these arrays can be made const. Signed-off-by: Dan Carpenter Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index a8657ef6382f..fe2b470d7ec6 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -46,7 +46,7 @@ * 0: Removed from the user visible capability list * FF: Variable length */ -static u8 pci_cap_length[] = { +static const u8 pci_cap_length[PCI_CAP_ID_MAX + 1] = { [PCI_CAP_ID_BASIC] = PCI_STD_HEADER_SIZEOF, /* pci config header */ [PCI_CAP_ID_PM] = PCI_PM_SIZEOF, [PCI_CAP_ID_AGP] = PCI_AGP_SIZEOF, @@ -74,7 +74,7 @@ static u8 pci_cap_length[] = { * 0: Removed or masked from the user visible capabilty list * FF: Variable length */ -static u16 pci_ext_cap_length[] = { +static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = { [PCI_EXT_CAP_ID_ERR] = PCI_ERR_ROOT_COMMAND, [PCI_EXT_CAP_ID_VC] = 0xFF, [PCI_EXT_CAP_ID_DSN] = PCI_EXT_CAP_DSN_SIZEOF,