From 51be7a9a261ce18c520fb3928b168feb77522745 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 12 Jan 2017 23:36:32 +0200 Subject: [PATCH 01/13] virtio_mmio: expose header to userspace It's handy for userspace emulators like QEMU. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 2 +- include/uapi/linux/Kbuild | 1 + include/{ => uapi}/linux/virtio_mmio.h | 0 3 files changed, 2 insertions(+), 1 deletion(-) rename include/{ => uapi}/linux/virtio_mmio.h (100%) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index c71fde5fe835..08357d70a891 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -70,7 +70,7 @@ #include #include #include -#include +#include #include diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index f330ba4547cf..718fa73310e1 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -458,6 +458,7 @@ header-y += virtio_console.h header-y += virtio_gpu.h header-y += virtio_ids.h header-y += virtio_input.h +header-y += virtio_mmio.h header-y += virtio_net.h header-y += virtio_pci.h header-y += virtio_ring.h diff --git a/include/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h similarity index 100% rename from include/linux/virtio_mmio.h rename to include/uapi/linux/virtio_mmio.h From e3b56cdd4351f0e227d4d847eeadff4c82aef1b9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 7 Feb 2017 15:49:50 +0800 Subject: [PATCH 02/13] vhost: try avoiding avail index access when getting descriptor If last avail idx is not equal to cached avail idx, we're sure there's still available buffers in the virtqueue so there's no need to re-read avail idx. So let's skip this to avoid unnecessary userspace memory access and memory barrier. Pktgen test show about 3% improvement on rx pps. Signed-off-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 8f99fe08de02..1f7e4e4e6f8e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1930,25 +1930,32 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { - vq_err(vq, "Failed to access avail idx at %p\n", - &vq->avail->idx); - return -EFAULT; + + if (vq->avail_idx == vq->last_avail_idx) { + if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { + vq_err(vq, "Failed to access avail idx at %p\n", + &vq->avail->idx); + return -EFAULT; + } + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + + if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { + vq_err(vq, "Guest moved used index from %u to %u", + last_avail_idx, vq->avail_idx); + return -EFAULT; + } + + /* If there's nothing new since last we looked, return + * invalid. + */ + if (vq->avail_idx == last_avail_idx) + return vq->num; + + /* Only get avail ring entries after they have been + * exposed by guest. + */ + smp_rmb(); } - vq->avail_idx = vhost16_to_cpu(vq, avail_idx); - - if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { - vq_err(vq, "Guest moved used index from %u to %u", - last_avail_idx, vq->avail_idx); - return -EFAULT; - } - - /* If there's nothing new since last we looked, return invalid. */ - if (vq->avail_idx == last_avail_idx) - return vq->num; - - /* Only get avail ring entries after they have been exposed by guest. */ - smp_rmb(); /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ From 5c34d002dcc7a6dd665a19d098b4f4cd5501ba1a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:18 +0100 Subject: [PATCH 03/13] virtio_pci: remove struct virtio_pci_vq_info We don't really need struct virtio_pci_vq_info, as most field in there are redundant: - the vq backpointer is not strictly neede to start with - the entry in the vqs list is not needed - the generic virtqueue already has list, we only need to check if it has a callback to get the same semantics - we can use a simple array to look up the MSI-X vec if needed. - That simple array now also duoble serves to replace the per_vq_vectors flag Signed-off-by: Christoph Hellwig Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 117 +++++++++-------------------- drivers/virtio/virtio_pci_common.h | 25 +----- drivers/virtio/virtio_pci_legacy.c | 6 +- drivers/virtio/virtio_pci_modern.c | 6 +- 4 files changed, 39 insertions(+), 115 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 186cbab327b8..a33767318cbf 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -62,16 +62,13 @@ static irqreturn_t vp_config_changed(int irq, void *opaque) static irqreturn_t vp_vring_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; - struct virtio_pci_vq_info *info; irqreturn_t ret = IRQ_NONE; - unsigned long flags; + struct virtqueue *vq; - spin_lock_irqsave(&vp_dev->lock, flags); - list_for_each_entry(info, &vp_dev->virtqueues, node) { - if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) + list_for_each_entry(vq, &vp_dev->vdev.vqs, list) { + if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED) ret = IRQ_HANDLED; } - spin_unlock_irqrestore(&vp_dev->lock, flags); return ret; } @@ -167,55 +164,6 @@ error: return err; } -static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *vq), - const char *name, - u16 msix_vec) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); - struct virtqueue *vq; - unsigned long flags; - - /* fill out our structure that represents an active queue */ - if (!info) - return ERR_PTR(-ENOMEM); - - vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec); - if (IS_ERR(vq)) - goto out_info; - - info->vq = vq; - if (callback) { - spin_lock_irqsave(&vp_dev->lock, flags); - list_add(&info->node, &vp_dev->virtqueues); - spin_unlock_irqrestore(&vp_dev->lock, flags); - } else { - INIT_LIST_HEAD(&info->node); - } - - vp_dev->vqs[index] = info; - return vq; - -out_info: - kfree(info); - return vq; -} - -static void vp_del_vq(struct virtqueue *vq) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; - unsigned long flags; - - spin_lock_irqsave(&vp_dev->lock, flags); - list_del(&info->node); - spin_unlock_irqrestore(&vp_dev->lock, flags); - - vp_dev->del_vq(info); - kfree(info); -} - /* the config->del_vqs() implementation */ void vp_del_vqs(struct virtio_device *vdev) { @@ -224,16 +172,15 @@ void vp_del_vqs(struct virtio_device *vdev) int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - if (vp_dev->per_vq_vectors) { - int v = vp_dev->vqs[vq->index]->msix_vector; + if (vp_dev->msix_vector_map) { + int v = vp_dev->msix_vector_map[vq->index]; if (v != VIRTIO_MSI_NO_VECTOR) free_irq(pci_irq_vector(vp_dev->pci_dev, v), vq); } - vp_del_vq(vq); + vp_dev->del_vq(vq); } - vp_dev->per_vq_vectors = false; if (vp_dev->intx_enabled) { free_irq(vp_dev->pci_dev->irq, vp_dev); @@ -261,8 +208,8 @@ void vp_del_vqs(struct virtio_device *vdev) vp_dev->msix_names = NULL; kfree(vp_dev->msix_affinity_masks); vp_dev->msix_affinity_masks = NULL; - kfree(vp_dev->vqs); - vp_dev->vqs = NULL; + kfree(vp_dev->msix_vector_map); + vp_dev->msix_vector_map = NULL; } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, @@ -275,10 +222,6 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, u16 msix_vec; int i, err, nvectors, allocated_vectors; - vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); - if (!vp_dev->vqs) - return -ENOMEM; - if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; @@ -294,7 +237,13 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, if (err) goto error_find; - vp_dev->per_vq_vectors = per_vq_vectors; + if (per_vq_vectors) { + vp_dev->msix_vector_map = kmalloc_array(nvqs, + sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); + if (!vp_dev->msix_vector_map) + goto error_find; + } + allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { if (!names[i]) { @@ -304,19 +253,25 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, if (!callbacks[i]) msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (vp_dev->per_vq_vectors) + else if (per_vq_vectors) msix_vec = allocated_vectors++; else msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec); + vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], + msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) + if (!per_vq_vectors) continue; + if (msix_vec == VIRTIO_MSI_NO_VECTOR) { + vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; + continue; + } + /* allocate per-vq irq if available and necessary */ snprintf(vp_dev->msix_names[msix_vec], sizeof *vp_dev->msix_names, @@ -326,8 +281,12 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, vring_interrupt, 0, vp_dev->msix_names[msix_vec], vqs[i]); - if (err) + if (err) { + /* don't free this irq on error */ + vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; goto error_find; + } + vp_dev->msix_vector_map[i] = msix_vec; } return 0; @@ -343,23 +302,18 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i, err; - vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); - if (!vp_dev->vqs) - return -ENOMEM; - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) goto out_del_vqs; vp_dev->intx_enabled = 1; - vp_dev->per_vq_vectors = false; for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; continue; } - vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], VIRTIO_MSI_NO_VECTOR); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); @@ -409,16 +363,15 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) { struct virtio_device *vdev = vq->vdev; struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; - struct cpumask *mask; - unsigned int irq; if (!vq->callback) return -EINVAL; if (vp_dev->msix_enabled) { - mask = vp_dev->msix_affinity_masks[info->msix_vector]; - irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); + int vec = vp_dev->msix_vector_map[vq->index]; + struct cpumask *mask = vp_dev->msix_affinity_masks[vec]; + unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec); + if (cpu == -1) irq_set_affinity_hint(irq, NULL); else { @@ -498,8 +451,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, vp_dev->vdev.dev.parent = &pci_dev->dev; vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; - INIT_LIST_HEAD(&vp_dev->virtqueues); - spin_lock_init(&vp_dev->lock); /* enable the device */ rc = pci_enable_device(pci_dev); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index b2f666250ae0..2038887bdf23 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -31,17 +31,6 @@ #include #include -struct virtio_pci_vq_info { - /* the actual virtqueue */ - struct virtqueue *vq; - - /* the list node for the virtqueues list */ - struct list_head node; - - /* MSI-X vector (or none) */ - unsigned msix_vector; -}; - /* Our device structure */ struct virtio_pci_device { struct virtio_device vdev; @@ -75,13 +64,6 @@ struct virtio_pci_device { /* the IO mapping for the PCI config space */ void __iomem *ioaddr; - /* a list of queues so we can dispatch IRQs */ - spinlock_t lock; - struct list_head virtqueues; - - /* array of all queues for house-keeping */ - struct virtio_pci_vq_info **vqs; - /* MSI-X support */ int msix_enabled; int intx_enabled; @@ -94,16 +76,15 @@ struct virtio_pci_device { /* Vectors allocated, excluding per-vq vectors if any */ unsigned msix_used_vectors; - /* Whether we have vector per vq */ - bool per_vq_vectors; + /* Map of per-VQ MSI-X vectors, may be NULL */ + unsigned *msix_vector_map; struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev, - struct virtio_pci_vq_info *info, unsigned idx, void (*callback)(struct virtqueue *vq), const char *name, u16 msix_vec); - void (*del_vq)(struct virtio_pci_vq_info *info); + void (*del_vq)(struct virtqueue *vq); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); }; diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 6d9e5173d5fa..47292dad0ff9 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -112,7 +112,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) } static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, - struct virtio_pci_vq_info *info, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, @@ -130,8 +129,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) return ERR_PTR(-ENOENT); - info->msix_vector = msix_vec; - /* create the vring */ vq = vring_create_virtqueue(index, num, VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, @@ -162,9 +159,8 @@ out_deactivate: return ERR_PTR(err); } -static void del_vq(struct virtio_pci_vq_info *info) +static void del_vq(struct virtqueue *vq) { - struct virtqueue *vq = info->vq; struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 4bf7ab375894..00e6fc1df407 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -293,7 +293,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) } static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, - struct virtio_pci_vq_info *info, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, @@ -323,8 +322,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, /* get offset of notification word for this vq */ off = vp_ioread16(&cfg->queue_notify_off); - info->msix_vector = msix_vec; - /* create the vring */ vq = vring_create_virtqueue(index, num, SMP_CACHE_BYTES, &vp_dev->vdev, @@ -409,9 +406,8 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, return 0; } -static void del_vq(struct virtio_pci_vq_info *info) +static void del_vq(struct virtqueue *vq) { - struct virtqueue *vq = info->vq; struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); vp_iowrite16(vq->index, &vp_dev->common->queue_select); From 07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:19 +0100 Subject: [PATCH 04/13] virtio_pci: use shared interrupts for virtqueues This lets IRQ layer handle dispatching IRQs to separate handlers for the case where we don't have per-VQ MSI-X vectors, and allows us to greatly simplify the code based on the assumption that we always have interrupt vector 0 (legacy INTx or config interrupt for MSI-X) available, and any other interrupt is request/freed throught the VQ, even if the actual interrupt line might be shared in some cases. This allows removing a great deal of variables keeping track of the interrupt state in struct virtio_pci_device, as we can now simply walk the list of VQs and deal with per-VQ interrupt handlers there, and only treat vector 0 special. Additionally clean up the VQ allocation code to properly unwind on error instead of having a single global cleanup label, which is error prone, and in this case also leads to more code. Signed-off-by: Christoph Hellwig Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 235 +++++++++++++---------------- drivers/virtio/virtio_pci_common.h | 16 +- 2 files changed, 106 insertions(+), 145 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index a33767318cbf..274dc1ff09c0 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -33,10 +33,8 @@ void vp_synchronize_vectors(struct virtio_device *vdev) struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) - synchronize_irq(vp_dev->pci_dev->irq); - - for (i = 0; i < vp_dev->msix_vectors; ++i) + synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0)); + for (i = 1; i < vp_dev->msix_vectors; i++) synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } @@ -99,77 +97,10 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) return vp_vring_interrupt(irq, opaque); } -static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, - bool per_vq_vectors) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - const char *name = dev_name(&vp_dev->vdev.dev); - unsigned i, v; - int err = -ENOMEM; - - vp_dev->msix_vectors = nvectors; - - vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, - GFP_KERNEL); - if (!vp_dev->msix_names) - goto error; - vp_dev->msix_affinity_masks - = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, - GFP_KERNEL); - if (!vp_dev->msix_affinity_masks) - goto error; - for (i = 0; i < nvectors; ++i) - if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], - GFP_KERNEL)) - goto error; - - err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, - PCI_IRQ_MSIX); - if (err < 0) - goto error; - vp_dev->msix_enabled = 1; - - /* Set the vector used for configuration */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-config", name); - err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_config_changed, 0, vp_dev->msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - - v = vp_dev->config_vector(vp_dev, v); - /* Verify we had enough resources to assign the vector */ - if (v == VIRTIO_MSI_NO_VECTOR) { - err = -EBUSY; - goto error; - } - - if (!per_vq_vectors) { - /* Shared vector for all VQs */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-virtqueues", name); - err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_vring_interrupt, 0, vp_dev->msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - } - return 0; -error: - return err; -} - -/* the config->del_vqs() implementation */ -void vp_del_vqs(struct virtio_device *vdev) +static void vp_remove_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq, *n; - int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { if (vp_dev->msix_vector_map) { @@ -181,35 +112,33 @@ void vp_del_vqs(struct virtio_device *vdev) } vp_dev->del_vq(vq); } +} - if (vp_dev->intx_enabled) { - free_irq(vp_dev->pci_dev->irq, vp_dev); - vp_dev->intx_enabled = 0; - } +/* the config->del_vqs() implementation */ +void vp_del_vqs(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; - for (i = 0; i < vp_dev->msix_used_vectors; ++i) - free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); + if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs))) + return; - for (i = 0; i < vp_dev->msix_vectors; i++) - if (vp_dev->msix_affinity_masks[i]) - free_cpumask_var(vp_dev->msix_affinity_masks[i]); + vp_remove_vqs(vdev); if (vp_dev->msix_enabled) { + for (i = 0; i < vp_dev->msix_vectors; i++) + free_cpumask_var(vp_dev->msix_affinity_masks[i]); + /* Disable the vector used for configuration */ vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); - pci_free_irq_vectors(vp_dev->pci_dev); - vp_dev->msix_enabled = 0; + kfree(vp_dev->msix_affinity_masks); + kfree(vp_dev->msix_names); + kfree(vp_dev->msix_vector_map); } - vp_dev->msix_vectors = 0; - vp_dev->msix_used_vectors = 0; - kfree(vp_dev->msix_names); - vp_dev->msix_names = NULL; - kfree(vp_dev->msix_affinity_masks); - vp_dev->msix_affinity_masks = NULL; - kfree(vp_dev->msix_vector_map); - vp_dev->msix_vector_map = NULL; + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); + pci_free_irq_vectors(vp_dev->pci_dev); } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, @@ -219,79 +148,122 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, bool per_vq_vectors) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + const char *name = dev_name(&vp_dev->vdev.dev); + int i, err = -ENOMEM, allocated_vectors, nvectors; u16 msix_vec; - int i, err, nvectors, allocated_vectors; + + nvectors = 1; + for (i = 0; i < nvqs; i++) + if (callbacks[i]) + nvectors++; if (per_vq_vectors) { - /* Best option: one for change interrupt, one per vq. */ - nvectors = 1; - for (i = 0; i < nvqs; ++i) - if (callbacks[i]) - ++nvectors; + err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, + PCI_IRQ_MSIX); } else { - /* Second best: one for change, shared for all vqs. */ - nvectors = 2; + err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2, + PCI_IRQ_MSIX); + } + if (err < 0) + return err; + + vp_dev->msix_vectors = nvectors; + vp_dev->msix_names = kmalloc_array(nvectors, + sizeof(*vp_dev->msix_names), GFP_KERNEL); + if (!vp_dev->msix_names) + goto out_free_irq_vectors; + + vp_dev->msix_affinity_masks = kcalloc(nvectors, + sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL); + if (!vp_dev->msix_affinity_masks) + goto out_free_msix_names; + + for (i = 0; i < nvectors; ++i) { + if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], + GFP_KERNEL)) + goto out_free_msix_affinity_masks; } - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); + /* Set the vector used for configuration */ + snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names), + "%s-config", name); + err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed, + 0, vp_dev->msix_names[0], vp_dev); if (err) - goto error_find; + goto out_free_irq_vectors; - if (per_vq_vectors) { - vp_dev->msix_vector_map = kmalloc_array(nvqs, - sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); - if (!vp_dev->msix_vector_map) - goto error_find; + /* Verify we had enough resources to assign the vector */ + if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto out_free_config_irq; } - allocated_vectors = vp_dev->msix_used_vectors; + vp_dev->msix_vector_map = kmalloc_array(nvqs, + sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); + if (!vp_dev->msix_vector_map) + goto out_disable_config_irq; + + allocated_vectors = 1; /* vector 0 is the config interrupt */ for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; continue; } - if (!callbacks[i]) - msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (per_vq_vectors) - msix_vec = allocated_vectors++; + if (callbacks[i]) + msix_vec = allocated_vectors; else - msix_vec = VP_MSIX_VQ_VECTOR; + msix_vec = VIRTIO_MSI_NO_VECTOR; + vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); - goto error_find; + goto out_remove_vqs; } - if (!per_vq_vectors) - continue; - if (msix_vec == VIRTIO_MSI_NO_VECTOR) { vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; continue; } - /* allocate per-vq irq if available and necessary */ - snprintf(vp_dev->msix_names[msix_vec], - sizeof *vp_dev->msix_names, - "%s-%s", + snprintf(vp_dev->msix_names[i + 1], + sizeof(*vp_dev->msix_names), "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, 0, - vp_dev->msix_names[msix_vec], - vqs[i]); + vring_interrupt, IRQF_SHARED, + vp_dev->msix_names[i + 1], vqs[i]); if (err) { /* don't free this irq on error */ vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; - goto error_find; + goto out_remove_vqs; } vp_dev->msix_vector_map[i] = msix_vec; + + if (per_vq_vectors) + allocated_vectors++; } + + vp_dev->msix_enabled = 1; return 0; -error_find: - vp_del_vqs(vdev); +out_remove_vqs: + vp_remove_vqs(vdev); + kfree(vp_dev->msix_vector_map); +out_disable_config_irq: + vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); +out_free_config_irq: + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); +out_free_msix_affinity_masks: + for (i = 0; i < nvectors; i++) { + if (vp_dev->msix_affinity_masks[i]) + free_cpumask_var(vp_dev->msix_affinity_masks[i]); + } + kfree(vp_dev->msix_affinity_masks); +out_free_msix_names: + kfree(vp_dev->msix_names); +out_free_irq_vectors: + pci_free_irq_vectors(vp_dev->pci_dev); return err; } @@ -305,9 +277,8 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) - goto out_del_vqs; + return err; - vp_dev->intx_enabled = 1; for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; @@ -317,13 +288,15 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, VIRTIO_MSI_NO_VECTOR); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); - goto out_del_vqs; + goto out_remove_vqs; } } return 0; -out_del_vqs: - vp_del_vqs(vdev); + +out_remove_vqs: + vp_remove_vqs(vdev); + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); return err; } diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 2038887bdf23..85593867e712 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -66,16 +66,12 @@ struct virtio_pci_device { /* MSI-X support */ int msix_enabled; - int intx_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ char (*msix_names)[256]; - /* Number of available vectors */ - unsigned msix_vectors; - /* Vectors allocated, excluding per-vq vectors if any */ - unsigned msix_used_vectors; - + /* Total Number of MSI-X vectors (including per-VQ ones). */ + int msix_vectors; /* Map of per-VQ MSI-X vectors, may be NULL */ unsigned *msix_vector_map; @@ -89,14 +85,6 @@ struct virtio_pci_device { u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); }; -/* Constants for MSI-X */ -/* Use first vector for configuration changes, second and the rest for - * virtqueues Thus, we need at least 2 vectors for MSI. */ -enum { - VP_MSIX_CONFIG_VECTOR = 0, - VP_MSIX_VQ_VECTOR = 1, -}; - /* Convert a generic virtio device to our structure */ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) { From 53a020c661741f3b87ad3ac6fa545088aaebac9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:20 +0100 Subject: [PATCH 05/13] virtio_pci: don't duplicate the msix_enable flag in struct pci_dev Signed-off-by: Christoph Hellwig Reviewed-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 5 ++--- drivers/virtio/virtio_pci_common.h | 2 -- drivers/virtio/virtio_pci_legacy.c | 2 +- drivers/virtio/virtio_pci_modern.c | 2 +- include/uapi/linux/virtio_pci.h | 2 +- 5 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 274dc1ff09c0..b83053082875 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -125,7 +125,7 @@ void vp_del_vqs(struct virtio_device *vdev) vp_remove_vqs(vdev); - if (vp_dev->msix_enabled) { + if (vp_dev->pci_dev->msix_enabled) { for (i = 0; i < vp_dev->msix_vectors; i++) free_cpumask_var(vp_dev->msix_affinity_masks[i]); @@ -244,7 +244,6 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, allocated_vectors++; } - vp_dev->msix_enabled = 1; return 0; out_remove_vqs: @@ -340,7 +339,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) if (!vq->callback) return -EINVAL; - if (vp_dev->msix_enabled) { + if (vp_dev->pci_dev->msix_enabled) { int vec = vp_dev->msix_vector_map[vq->index]; struct cpumask *mask = vp_dev->msix_affinity_masks[vec]; unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 85593867e712..217ca876eed7 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -64,8 +64,6 @@ struct virtio_pci_device { /* the IO mapping for the PCI config space */ void __iomem *ioaddr; - /* MSI-X support */ - int msix_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 47292dad0ff9..2ab6aee51bf6 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -165,7 +165,7 @@ static void del_vq(struct virtqueue *vq) iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - if (vp_dev->msix_enabled) { + if (vp_dev->pci_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); /* Flush the write out to device */ diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 00e6fc1df407..e5ce31091953 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -412,7 +412,7 @@ static void del_vq(struct virtqueue *vq) vp_iowrite16(vq->index, &vp_dev->common->queue_select); - if (vp_dev->msix_enabled) { + if (vp_dev->pci_dev->msix_enabled) { vp_iowrite16(VIRTIO_MSI_NO_VECTOR, &vp_dev->common->queue_msix_vector); /* Flush the write out to device */ diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 90007a1abcab..15b4385a2be1 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -79,7 +79,7 @@ * configuration space */ #define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ -#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled) +#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled) /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 From 52a61516125fa9a21b3bdf4f90928308e2e5573f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:21 +0100 Subject: [PATCH 06/13] virtio_pci: simplify MSI-X setup Try to grab the MSI-X vectors early and fall back to the shared one before doing lots of allocations. Signed-off-by: Christoph Hellwig Reviewed-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 35 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index b83053082875..822f8e5dcee4 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -142,14 +142,13 @@ void vp_del_vqs(struct virtio_device *vdev) } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[], - bool per_vq_vectors) + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); int i, err = -ENOMEM, allocated_vectors, nvectors; + bool shared = false; u16 msix_vec; nvectors = 1; @@ -157,12 +156,16 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, if (callbacks[i]) nvectors++; - if (per_vq_vectors) { - err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, - PCI_IRQ_MSIX); - } else { + /* Try one vector per queue first. */ + err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, + PCI_IRQ_MSIX); + if (err < 0) { + /* Fallback to one vector for config, one shared for queues. */ + shared = true; err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2, PCI_IRQ_MSIX); + if (err < 0) + return err; } if (err < 0) return err; @@ -190,7 +193,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed, 0, vp_dev->msix_names[0], vp_dev); if (err) - goto out_free_irq_vectors; + goto out_free_msix_affinity_masks; /* Verify we had enough resources to assign the vector */ if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) { @@ -240,7 +243,11 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, } vp_dev->msix_vector_map[i] = msix_vec; - if (per_vq_vectors) + /* + * Use a different vector for each queue if they are available, + * else share the same vector for all VQs. + */ + if (!shared) allocated_vectors++; } @@ -307,15 +314,9 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, { int err; - /* Try MSI-X with one vector per queue. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names); if (!err) return 0; - /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false); - if (!err) - return 0; - /* Finally fall back to regular interrupts. */ return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names); } From fb5e31d970ce8b4941f03ed765d7dbefc39f22d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:22 +0100 Subject: [PATCH 07/13] virtio: allow drivers to request IRQ affinity when creating VQs Add a struct irq_affinity pointer to the find_vqs methods, which if set is used to tell the PCI layer to create the MSI-X vectors for our I/O virtqueues with the proper affinity from the start. Compared to after the fact affinity hints this gives us an instantly working setup and allows to allocate the irq descritors node-local and avoid interconnect traffic. Last but not least this will allow blk-mq queues are created based on the interrupt affinity for storage drivers. Signed-off-by: Christoph Hellwig Reviewed-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 3 ++- drivers/char/virtio_console.c | 2 +- drivers/crypto/virtio/virtio_crypto_core.c | 2 +- drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +- drivers/misc/mic/vop/vop_main.c | 2 +- drivers/net/caif/caif_virtio.c | 3 ++- drivers/net/virtio_net.c | 2 +- drivers/remoteproc/remoteproc_virtio.c | 3 ++- drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- drivers/s390/virtio/kvm_virtio.c | 3 ++- drivers/s390/virtio/virtio_ccw.c | 3 ++- drivers/scsi/virtio_scsi.c | 3 ++- drivers/virtio/virtio_balloon.c | 3 ++- drivers/virtio/virtio_input.c | 3 ++- drivers/virtio/virtio_mmio.c | 3 ++- drivers/virtio/virtio_pci_common.c | 19 ++++++++++++------- drivers/virtio/virtio_pci_common.h | 5 ++--- drivers/virtio/virtio_pci_modern.c | 7 +++---- include/linux/virtio_config.h | 9 +++++---- net/vmw_vsock/virtio_transport.c | 3 ++- 20 files changed, 48 insertions(+), 34 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 10332c24f961..c54118bdc67d 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -411,7 +411,8 @@ static int init_vq(struct virtio_blk *vblk) } /* Discover virtqueues and write information to configuration. */ - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); + err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, + NULL); if (err) goto out; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 17857beb4892..6266c0568e1d 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1939,7 +1939,7 @@ static int init_vqs(struct ports_device *portdev) /* Find the queues. */ err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs, io_callbacks, - (const char **)io_names); + (const char **)io_names, NULL); if (err) goto free; diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index fe70ec823b27..0aa2f045543b 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -119,7 +119,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) } ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, - names); + names, NULL); if (ret) goto err_find; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 1235519853f4..e975fa5b0a32 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -172,7 +172,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) vgdev->has_virgl_3d ? "enabled" : "not available"); ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs, - callbacks, names); + callbacks, names, NULL); if (ret) { DRM_ERROR("failed to find virt queues\n"); goto err_vqs; diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index 1a2b67f3183d..c2e29d7f0de8 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -374,7 +374,7 @@ unmap: static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], struct irq_affinity *desc) { struct _vop_vdev *vdev = to_vopvdev(dev); struct vop_device *vpdev = vdev->vpdev; diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index b306210b02b7..bc0eb47eccee 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -679,7 +679,8 @@ static int cfv_probe(struct virtio_device *vdev) goto err; /* Get the TX virtio ring. This is a "guest side vring". */ - err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names); + err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names, + NULL); if (err) goto err; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 765c2d6358da..9be74c2dfb22 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2003,7 +2003,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) } ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, - names); + names, NULL); if (ret) goto err_find; diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 364411fb7734..0142cc3f0c91 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -137,7 +137,8 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev) static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], + struct irq_affinity *desc) { int i, ret; diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 3090b0d3072f..5e66e081027e 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -869,7 +869,7 @@ static int rpmsg_probe(struct virtio_device *vdev) init_waitqueue_head(&vrp->sendq); /* We expect two virtqueues, rx and tx (and in this order) */ - err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names); + err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL); if (err) goto free_vrp; diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c index 5e5c11f37b24..2ce0b3eb2efe 100644 --- a/drivers/s390/virtio/kvm_virtio.c +++ b/drivers/s390/virtio/kvm_virtio.c @@ -255,7 +255,8 @@ static void kvm_del_vqs(struct virtio_device *vdev) static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], + struct irq_affinity *desc) { struct kvm_device *kdev = to_kvmdev(vdev); int i; diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 070c4da95f48..304d3b3cbfd3 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -628,7 +628,8 @@ out: static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], + struct irq_affinity *desc) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); unsigned long *indicatorp = NULL; diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index c680d7641311..c9c5ea0611e9 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -941,7 +941,8 @@ static int virtscsi_init(struct virtio_device *vdev, } /* Discover virtqueues and write information to configuration. */ - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); + err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, + NULL); if (err) goto out; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 181793f07852..36c9c8fcb7f8 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -413,7 +413,8 @@ static int init_vqs(struct virtio_balloon *vb) * optionally stat. */ nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names); + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names, + NULL); if (err) return err; diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index 350a2a5a49db..79f1293cda93 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -173,7 +173,8 @@ static int virtinput_init_vqs(struct virtio_input *vi) static const char * const names[] = { "events", "status" }; int err; - err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); + err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names, + NULL); if (err) return err; vi->evt = vqs[0]; diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 08357d70a891..78343b8f9034 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -446,7 +446,8 @@ error_available: static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], + struct irq_affinity *desc) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); unsigned int irq = platform_get_irq(vm_dev->pdev, 0); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 822f8e5dcee4..7902e920fc73 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -143,22 +143,28 @@ void vp_del_vqs(struct virtio_device *vdev) static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); int i, err = -ENOMEM, allocated_vectors, nvectors; + unsigned flags = PCI_IRQ_MSIX; bool shared = false; u16 msix_vec; + if (desc) { + flags |= PCI_IRQ_AFFINITY; + desc->pre_vectors++; /* virtio config vector */ + } + nvectors = 1; for (i = 0; i < nvqs; i++) if (callbacks[i]) nvectors++; /* Try one vector per queue first. */ - err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, - PCI_IRQ_MSIX); + err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, + nvectors, flags, desc); if (err < 0) { /* Fallback to one vector for config, one shared for queues. */ shared = true; @@ -308,13 +314,12 @@ out_remove_vqs: /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]) + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc) { int err; - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc); if (!err) return 0; return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 217ca876eed7..a6ad9ec6baef 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -97,9 +97,8 @@ bool vp_notify(struct virtqueue *vq); void vp_del_vqs(struct virtio_device *vdev); /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]); + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc); const char *vp_bus_name(struct virtio_device *vdev); /* Setup the affinity for a virtqueue: diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index e5ce31091953..a7a0981e441c 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -384,13 +384,12 @@ err_map_notify: } static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]) + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; - int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names); + int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc); if (rc) return rc; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 26c155bb639b..2ebe506fe41a 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -7,6 +7,8 @@ #include #include +struct irq_affinity; + /** * virtio_config_ops - operations for configuring a virtio device * @get: read the value of a configuration field @@ -68,9 +70,8 @@ struct virtio_config_ops { void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]); + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); @@ -169,7 +170,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *callbacks[] = { c }; const char *names[] = { n }; struct virtqueue *vq; - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names); + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL); if (err < 0) return ERR_PTR(err); return vq; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 6788264acc63..9d24c0e958b1 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -532,7 +532,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names); + vsock->vqs, callbacks, names, + NULL); if (ret < 0) goto out; From bbaba479563910aaa51e59bb9027a09e396d3a3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:23 +0100 Subject: [PATCH 08/13] virtio: provide a method to get the IRQ affinity mask for a virtqueue This basically passed up the pci_irq_get_affinity information through virtio through an optional get_vq_affinity method. It is only implemented by the PCI backend for now, and only when we use per-virtqueue IRQs. Signed-off-by: Christoph Hellwig Reviewed-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 11 +++++++++++ drivers/virtio/virtio_pci_common.h | 2 ++ drivers/virtio/virtio_pci_legacy.c | 1 + drivers/virtio/virtio_pci_modern.c | 2 ++ include/linux/virtio_config.h | 3 +++ 5 files changed, 19 insertions(+) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 7902e920fc73..df548a6fb844 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -361,6 +361,17 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) return 0; } +const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + unsigned int *map = vp_dev->msix_vector_map; + + if (!map || map[index] == VIRTIO_MSI_NO_VECTOR) + return NULL; + + return pci_irq_get_affinity(vp_dev->pci_dev, map[index]); +} + #ifdef CONFIG_PM_SLEEP static int virtio_pci_freeze(struct device *dev) { diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index a6ad9ec6baef..ac8c9d788964 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -108,6 +108,8 @@ const char *vp_bus_name(struct virtio_device *vdev); */ int vp_set_vq_affinity(struct virtqueue *vq, int cpu); +const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index); + #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY) int virtio_pci_legacy_probe(struct virtio_pci_device *); void virtio_pci_legacy_remove(struct virtio_pci_device *); diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 2ab6aee51bf6..f7362c5fe18a 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -190,6 +190,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, + .get_vq_affinity = vp_get_vq_affinity, }; /* the PCI probing function */ diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index a7a0981e441c..7bc3004b840e 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -437,6 +437,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, + .get_vq_affinity = vp_get_vq_affinity, }; static const struct virtio_config_ops virtio_pci_config_ops = { @@ -452,6 +453,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, + .get_vq_affinity = vp_get_vq_affinity, }; /** diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 2ebe506fe41a..8355bab175e1 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -58,6 +58,7 @@ struct irq_affinity; * This returns a pointer to the bus name a la pci_name from which * the caller can then copy. * @set_vq_affinity: set the affinity for a virtqueue. + * @get_vq_affinity: get the affinity for a virtqueue (optional). */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { @@ -77,6 +78,8 @@ struct virtio_config_ops { int (*finalize_features)(struct virtio_device *vdev); const char *(*bus_name)(struct virtio_device *vdev); int (*set_vq_affinity)(struct virtqueue *vq, int cpu); + const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev, + int index); }; /* If driver didn't advertise the feature, it will never appear. */ From 73473427bb551686e4b68ecd99bfd27e6635286a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:24 +0100 Subject: [PATCH 09/13] blk-mq: provide a default queue mapping for virtio device Similar to the PCI version, just calling into virtio instead. Signed-off-by: Christoph Hellwig Signed-off-by: Michael S. Tsirkin --- block/Kconfig | 5 ++++ block/Makefile | 1 + block/blk-mq-virtio.c | 54 +++++++++++++++++++++++++++++++++++ include/linux/blk-mq-virtio.h | 10 +++++++ 4 files changed, 70 insertions(+) create mode 100644 block/blk-mq-virtio.c create mode 100644 include/linux/blk-mq-virtio.h diff --git a/block/Kconfig b/block/Kconfig index 8bf114a3858a..3523b4f0cd8b 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -165,4 +165,9 @@ config BLK_MQ_PCI depends on BLOCK && PCI default y +config BLK_MQ_VIRTIO + bool + depends on BLOCK && VIRTIO + default y + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index a827f988c4e6..60691949d28d 100644 --- a/block/Makefile +++ b/block/Makefile @@ -23,5 +23,6 @@ obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o +obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c new file mode 100644 index 000000000000..c3afbca11299 --- /dev/null +++ b/block/blk-mq-virtio.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016 Christoph Hellwig. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include +#include +#include +#include +#include +#include "blk-mq.h" + +/** + * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device + * @set: tagset to provide the mapping for + * @vdev: virtio device associated with @set. + * @first_vec: first interrupt vectors to use for queues (usually 0) + * + * This function assumes the virtio device @vdev has at least as many available + * interrupt vetors as @set has queues. It will then queuery the vector + * corresponding to each queue for it's affinity mask and built queue mapping + * that maps a queue to the CPUs that have irq affinity for the corresponding + * vector. + */ +int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, + struct virtio_device *vdev, int first_vec) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + if (!vdev->config->get_vq_affinity) + goto fallback; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mask = vdev->config->get_vq_affinity(vdev, first_vec + queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + set->mq_map[cpu] = queue; + } + + return 0; +fallback: + return blk_mq_map_queues(set); +} +EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues); diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h new file mode 100644 index 000000000000..b1ef6e14744f --- /dev/null +++ b/include/linux/blk-mq-virtio.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BLK_MQ_VIRTIO_H +#define _LINUX_BLK_MQ_VIRTIO_H + +struct blk_mq_tag_set; +struct virtio_device; + +int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, + struct virtio_device *vdev, int first_vec); + +#endif /* _LINUX_BLK_MQ_VIRTIO_H */ From ad71473d9c43725c917fc5a86d54ceb7001ee28c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:25 +0100 Subject: [PATCH 10/13] virtio_blk: use virtio IRQ affinity Use automatic IRQ affinity assignment in the virtio layer if available, and build the blk-mq queues based on it. Signed-off-by: Christoph Hellwig Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c54118bdc67d..1028dfeb5a7f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include #define PART_BITS 4 @@ -385,6 +387,7 @@ static int init_vq(struct virtio_blk *vblk) struct virtqueue **vqs; unsigned short num_vqs; struct virtio_device *vdev = vblk->vdev; + struct irq_affinity desc = { 0, }; err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, struct virtio_blk_config, num_queues, @@ -412,7 +415,7 @@ static int init_vq(struct virtio_blk *vblk) /* Discover virtqueues and write information to configuration. */ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, - NULL); + &desc); if (err) goto out; @@ -543,10 +546,18 @@ static int virtblk_init_request(void *data, struct request *rq, return 0; } +static int virtblk_map_queues(struct blk_mq_tag_set *set) +{ + struct virtio_blk *vblk = set->driver_data; + + return blk_mq_virtio_map_queues(set, vblk->vdev, 0); +} + static struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .complete = virtblk_request_done, .init_request = virtblk_init_request, + .map_queues = virtblk_map_queues, }; static unsigned int virtblk_queue_depth; From 0d9f0a52c8b9f7a003fe1650b7d5fb8518efabe0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:26 +0100 Subject: [PATCH 11/13] virtio_scsi: use virtio IRQ affinity Use automatic IRQ affinity assignment in the virtio layer if available, and build the blk-mq queues based on it. Signed-off-by: Christoph Hellwig Signed-off-by: Michael S. Tsirkin --- drivers/scsi/virtio_scsi.c | 126 ++++--------------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 12 insertions(+), 115 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index c9c5ea0611e9..939c47df73fa 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #define VIRTIO_SCSI_MEMPOOL_SZ 64 #define VIRTIO_SCSI_EVENT_LEN 8 @@ -108,7 +110,6 @@ struct virtio_scsi { bool affinity_hint_set; struct hlist_node node; - struct hlist_node node_dead; /* Protected by event_vq lock */ bool stop_events; @@ -118,7 +119,6 @@ struct virtio_scsi { struct virtio_scsi_vq req_vqs[]; }; -static enum cpuhp_state virtioscsi_online; static struct kmem_cache *virtscsi_cmd_cache; static mempool_t *virtscsi_cmd_pool; @@ -766,6 +766,13 @@ static void virtscsi_target_destroy(struct scsi_target *starget) kfree(tgt); } +static int virtscsi_map_queues(struct Scsi_Host *shost) +{ + struct virtio_scsi *vscsi = shost_priv(shost); + + return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2); +} + static struct scsi_host_template virtscsi_host_template_single = { .module = THIS_MODULE, .name = "Virtio SCSI HBA", @@ -801,6 +808,7 @@ static struct scsi_host_template virtscsi_host_template_multi = { .use_clustering = ENABLE_CLUSTERING, .target_alloc = virtscsi_target_alloc, .target_destroy = virtscsi_target_destroy, + .map_queues = virtscsi_map_queues, .track_queue_depth = 1, }; @@ -817,80 +825,6 @@ static struct scsi_host_template virtscsi_host_template_multi = { virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \ } while(0) -static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) -{ - int i; - int cpu; - - /* In multiqueue mode, when the number of cpu is equal - * to the number of request queues, we let the qeueues - * to be private to one cpu by setting the affinity hint - * to eliminate the contention. - */ - if ((vscsi->num_queues == 1 || - vscsi->num_queues != num_online_cpus()) && affinity) { - if (vscsi->affinity_hint_set) - affinity = false; - else - return; - } - - if (affinity) { - i = 0; - for_each_online_cpu(cpu) { - virtqueue_set_affinity(vscsi->req_vqs[i].vq, cpu); - i++; - } - - vscsi->affinity_hint_set = true; - } else { - for (i = 0; i < vscsi->num_queues; i++) { - if (!vscsi->req_vqs[i].vq) - continue; - - virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1); - } - - vscsi->affinity_hint_set = false; - } -} - -static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) -{ - get_online_cpus(); - __virtscsi_set_affinity(vscsi, affinity); - put_online_cpus(); -} - -static int virtscsi_cpu_online(unsigned int cpu, struct hlist_node *node) -{ - struct virtio_scsi *vscsi = hlist_entry_safe(node, struct virtio_scsi, - node); - __virtscsi_set_affinity(vscsi, true); - return 0; -} - -static int virtscsi_cpu_notif_add(struct virtio_scsi *vi) -{ - int ret; - - ret = cpuhp_state_add_instance(virtioscsi_online, &vi->node); - if (ret) - return ret; - - ret = cpuhp_state_add_instance(CPUHP_VIRT_SCSI_DEAD, &vi->node_dead); - if (ret) - cpuhp_state_remove_instance(virtioscsi_online, &vi->node); - return ret; -} - -static void virtscsi_cpu_notif_remove(struct virtio_scsi *vi) -{ - cpuhp_state_remove_instance_nocalls(virtioscsi_online, &vi->node); - cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_SCSI_DEAD, - &vi->node_dead); -} - static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -900,14 +834,8 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, static void virtscsi_remove_vqs(struct virtio_device *vdev) { - struct Scsi_Host *sh = virtio_scsi_host(vdev); - struct virtio_scsi *vscsi = shost_priv(sh); - - virtscsi_set_affinity(vscsi, false); - /* Stop all the virtqueues. */ vdev->config->reset(vdev); - vdev->config->del_vqs(vdev); } @@ -920,6 +848,7 @@ static int virtscsi_init(struct virtio_device *vdev, vq_callback_t **callbacks; const char **names; struct virtqueue **vqs; + struct irq_affinity desc = { .pre_vectors = 2 }; num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE; vqs = kmalloc(num_vqs * sizeof(struct virtqueue *), GFP_KERNEL); @@ -942,7 +871,7 @@ static int virtscsi_init(struct virtio_device *vdev, /* Discover virtqueues and write information to configuration. */ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, - NULL); + &desc); if (err) goto out; @@ -1008,10 +937,6 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; - err = virtscsi_cpu_notif_add(vscsi); - if (err) - goto scsi_add_host_failed; - cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue); shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF; @@ -1066,9 +991,6 @@ static void virtscsi_remove(struct virtio_device *vdev) virtscsi_cancel_event_work(vscsi); scsi_remove_host(shost); - - virtscsi_cpu_notif_remove(vscsi); - virtscsi_remove_vqs(vdev); scsi_host_put(shost); } @@ -1076,10 +998,6 @@ static void virtscsi_remove(struct virtio_device *vdev) #ifdef CONFIG_PM_SLEEP static int virtscsi_freeze(struct virtio_device *vdev) { - struct Scsi_Host *sh = virtio_scsi_host(vdev); - struct virtio_scsi *vscsi = shost_priv(sh); - - virtscsi_cpu_notif_remove(vscsi); virtscsi_remove_vqs(vdev); return 0; } @@ -1094,11 +1012,6 @@ static int virtscsi_restore(struct virtio_device *vdev) if (err) return err; - err = virtscsi_cpu_notif_add(vscsi); - if (err) { - vdev->config->del_vqs(vdev); - return err; - } virtio_device_ready(vdev); if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) @@ -1153,16 +1066,6 @@ static int __init init(void) pr_err("mempool_create() for virtscsi_cmd_pool failed\n"); goto error; } - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, - "scsi/virtio:online", - virtscsi_cpu_online, NULL); - if (ret < 0) - goto error; - virtioscsi_online = ret; - ret = cpuhp_setup_state_multi(CPUHP_VIRT_SCSI_DEAD, "scsi/virtio:dead", - NULL, virtscsi_cpu_online); - if (ret) - goto error; ret = register_virtio_driver(&virtio_scsi_driver); if (ret < 0) goto error; @@ -1178,17 +1081,12 @@ error: kmem_cache_destroy(virtscsi_cmd_cache); virtscsi_cmd_cache = NULL; } - if (virtioscsi_online) - cpuhp_remove_multi_state(virtioscsi_online); - cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD); return ret; } static void __exit fini(void) { unregister_virtio_driver(&virtio_scsi_driver); - cpuhp_remove_multi_state(virtioscsi_online); - cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD); mempool_destroy(virtscsi_cmd_pool); kmem_cache_destroy(virtscsi_cmd_cache); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 921acaaa1601..01aea80a503e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -26,7 +26,6 @@ enum cpuhp_state { CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_IRQ_POLL_DEAD, CPUHP_BLOCK_SOFTIRQ_DEAD, - CPUHP_VIRT_SCSI_DEAD, CPUHP_ACPI_CPUDRV_DEAD, CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD, From f889491380582b4ba2981cf0b0d7d6a40fb30ab7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 28 Feb 2017 17:56:02 +0800 Subject: [PATCH 12/13] vhost: introduce O(1) vq metadata cache When device IOTLB is enabled, all address translations were stored in interval tree. O(lgN) searching time could be slow for virtqueue metadata (avail, used and descriptors) since they were accessed much often than other addresses. So this patch introduces an O(1) array which points to the interval tree nodes that store the translations of vq metadata. Those array were update during vq IOTLB prefetching and were reset during each invalidation and tlb update. Each time we want to access vq metadata, this small array were queried before interval tree. This would be sufficient for static mappings but not dynamic mappings, we could do optimizations on top. Test were done with l2fwd in guest (2M hugepage): noiommu | before | after tx 1.32Mpps | 1.06Mpps(82%) | 1.30Mpps(98%) rx 2.33Mpps | 1.46Mpps(63%) | 2.29Mpps(98%) We can almost reach the same performance as noiommu mode. Signed-off-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 136 ++++++++++++++++++++++++++++++++++-------- drivers/vhost/vhost.h | 8 +++ 2 files changed, 118 insertions(+), 26 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1f7e4e4e6f8e..998bed505530 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -282,6 +282,22 @@ void vhost_poll_queue(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_queue); +static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq) +{ + int j; + + for (j = 0; j < VHOST_NUM_ADDRS; j++) + vq->meta_iotlb[j] = NULL; +} + +static void vhost_vq_meta_reset(struct vhost_dev *d) +{ + int i; + + for (i = 0; i < d->nvqs; ++i) + __vhost_vq_meta_reset(d->vqs[i]); +} + static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -312,6 +328,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; + __vhost_vq_meta_reset(vq); } static int vhost_worker(void *data) @@ -691,6 +708,18 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, return 1; } +static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, + u64 addr, unsigned int size, + int type) +{ + const struct vhost_umem_node *node = vq->meta_iotlb[type]; + + if (!node) + return NULL; + + return (void *)(uintptr_t)(node->userspace_addr + addr - node->start); +} + /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, @@ -733,8 +762,14 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to, * could be access through iotlb. So -EAGAIN should * not happen in this case. */ - /* TODO: more fast path */ struct iov_iter t; + void __user *uaddr = vhost_vq_meta_fetch(vq, + (u64)(uintptr_t)to, size, + VHOST_ADDR_DESC); + + if (uaddr) + return __copy_to_user(uaddr, from, size); + ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_WO); @@ -762,8 +797,14 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, * could be access through iotlb. So -EAGAIN should * not happen in this case. */ - /* TODO: more fast path */ + void __user *uaddr = vhost_vq_meta_fetch(vq, + (u64)(uintptr_t)from, size, + VHOST_ADDR_DESC); struct iov_iter f; + + if (uaddr) + return __copy_from_user(to, uaddr, size); + ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); @@ -783,17 +824,12 @@ out: return ret; } -static void __user *__vhost_get_user(struct vhost_virtqueue *vq, - void __user *addr, unsigned size) +static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq, + void __user *addr, unsigned int size, + int type) { int ret; - /* This function should be called after iotlb - * prefetch, which means we're sure that vq - * could be access through iotlb. So -EAGAIN should - * not happen in this case. - */ - /* TODO: more fast path */ ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); @@ -814,14 +850,32 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq, return vq->iotlb_iov[0].iov_base; } -#define vhost_put_user(vq, x, ptr) \ +/* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ +static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, + void *addr, unsigned int size, + int type) +{ + void __user *uaddr = vhost_vq_meta_fetch(vq, + (u64)(uintptr_t)addr, size, type); + if (uaddr) + return uaddr; + + return __vhost_get_user_slow(vq, addr, size, type); +} + +#define vhost_put_user(vq, x, ptr) \ ({ \ int ret = -EFAULT; \ if (!vq->iotlb) { \ ret = __put_user(x, ptr); \ } else { \ __typeof__(ptr) to = \ - (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ + sizeof(*ptr), VHOST_ADDR_USED); \ if (to != NULL) \ ret = __put_user(x, to); \ else \ @@ -830,14 +884,16 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq, ret; \ }) -#define vhost_get_user(vq, x, ptr) \ +#define vhost_get_user(vq, x, ptr, type) \ ({ \ int ret; \ if (!vq->iotlb) { \ ret = __get_user(x, ptr); \ } else { \ __typeof__(ptr) from = \ - (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ + sizeof(*ptr), \ + type); \ if (from != NULL) \ ret = __get_user(x, from); \ else \ @@ -846,6 +902,12 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq, ret; \ }) +#define vhost_get_avail(vq, x, ptr) \ + vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL) + +#define vhost_get_used(vq, x, ptr) \ + vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) + static void vhost_dev_lock_vqs(struct vhost_dev *d) { int i = 0; @@ -951,6 +1013,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, ret = -EFAULT; break; } + vhost_vq_meta_reset(dev); if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, msg->iova + msg->size - 1, msg->uaddr, msg->perm)) { @@ -960,6 +1023,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, vhost_iotlb_notify_vq(dev, msg); break; case VHOST_IOTLB_INVALIDATE: + vhost_vq_meta_reset(dev); vhost_del_umem_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1); break; @@ -1103,12 +1167,26 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, sizeof *used + num * sizeof *used->ring + s); } +static void vhost_vq_meta_update(struct vhost_virtqueue *vq, + const struct vhost_umem_node *node, + int type) +{ + int access = (type == VHOST_ADDR_USED) ? + VHOST_ACCESS_WO : VHOST_ACCESS_RO; + + if (likely(node->perm & access)) + vq->meta_iotlb[type] = node; +} + static int iotlb_access_ok(struct vhost_virtqueue *vq, - int access, u64 addr, u64 len) + int access, u64 addr, u64 len, int type) { const struct vhost_umem_node *node; struct vhost_umem *umem = vq->iotlb; - u64 s = 0, size; + u64 s = 0, size, orig_addr = addr; + + if (vhost_vq_meta_fetch(vq, addr, len, type)) + return true; while (len > s) { node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, @@ -1125,6 +1203,10 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq, } size = node->size - addr + node->start; + + if (orig_addr == addr && size >= len) + vhost_vq_meta_update(vq, node, type); + s += size; addr += size; } @@ -1141,13 +1223,15 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq) return 1; return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, - num * sizeof *vq->desc) && + num * sizeof(*vq->desc), VHOST_ADDR_DESC) && iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, sizeof *vq->avail + - num * sizeof *vq->avail->ring + s) && + num * sizeof(*vq->avail->ring) + s, + VHOST_ADDR_AVAIL) && iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, sizeof *vq->used + - num * sizeof *vq->used->ring + s); + num * sizeof(*vq->used->ring) + s, + VHOST_ADDR_USED); } EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); @@ -1728,7 +1812,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq) r = -EFAULT; goto err; } - r = vhost_get_user(vq, last_used_idx, &vq->used->idx); + r = vhost_get_used(vq, last_used_idx, &vq->used->idx); if (r) { vq_err(vq, "Can't access used idx at %p\n", &vq->used->idx); @@ -1932,7 +2016,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, last_avail_idx = vq->last_avail_idx; if (vq->avail_idx == vq->last_avail_idx) { - if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { + if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); return -EFAULT; @@ -1959,7 +2043,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (unlikely(vhost_get_user(vq, ring_head, + if (unlikely(vhost_get_avail(vq, ring_head, &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, @@ -2175,7 +2259,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) * with the barrier that the Guest executes when enabling * interrupts. */ smp_mb(); - if (vhost_get_user(vq, flags, &vq->avail->flags)) { + if (vhost_get_avail(vq, flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return true; } @@ -2202,7 +2286,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) * interrupts. */ smp_mb(); - if (vhost_get_user(vq, event, vhost_used_event(vq))) { + if (vhost_get_avail(vq, event, vhost_used_event(vq))) { vq_err(vq, "Failed to get used event idx"); return true; } @@ -2246,7 +2330,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) __virtio16 avail_idx; int r; - r = vhost_get_user(vq, avail_idx, &vq->avail->idx); + r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); if (r) return false; @@ -2281,7 +2365,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); - r = vhost_get_user(vq, avail_idx, &vq->avail->idx); + r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); if (r) { vq_err(vq, "Failed to check avail idx at %p: %d\n", &vq->avail->idx, r); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index a9cbbb148f46..f55671d53f28 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -76,6 +76,13 @@ struct vhost_umem { int numem; }; +enum vhost_uaddr_type { + VHOST_ADDR_DESC = 0, + VHOST_ADDR_AVAIL = 1, + VHOST_ADDR_USED = 2, + VHOST_NUM_ADDRS = 3, +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -86,6 +93,7 @@ struct vhost_virtqueue { struct vring_desc __user *desc; struct vring_avail __user *avail; struct vring_used __user *used; + const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; struct file *kick; struct file *call; struct file *error; From c4baad50297d84bde1a7ad45e50c73adae4a2192 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 1 Feb 2017 00:02:27 -0800 Subject: [PATCH 13/13] virtio-console: avoid DMA from stack put_chars() stuffs the buffer it gets into an sg, but that buffer may be on the stack. This breaks with CONFIG_VMAP_STACK=y (for me, it manifested as printks getting turned into NUL bytes). Signed-off-by: Omar Sandoval Signed-off-by: Michael S. Tsirkin Reviewed-by: Amit Shah --- drivers/char/virtio_console.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 6266c0568e1d..e9b7e0b3cabe 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1136,6 +1136,8 @@ static int put_chars(u32 vtermno, const char *buf, int count) { struct port *port; struct scatterlist sg[1]; + void *data; + int ret; if (unlikely(early_put_chars)) return early_put_chars(vtermno, buf, count); @@ -1144,8 +1146,14 @@ static int put_chars(u32 vtermno, const char *buf, int count) if (!port) return -EPIPE; - sg_init_one(sg, buf, count); - return __send_to_port(port, sg, 1, count, (void *)buf, false); + data = kmemdup(buf, count, GFP_ATOMIC); + if (!data) + return -ENOMEM; + + sg_init_one(sg, data, count); + ret = __send_to_port(port, sg, 1, count, data, false); + kfree(data); + return ret; } /*