From c1081002bfeeba54204d37000a8c43b2015b6eda Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 3 Aug 2023 22:30:41 +0800 Subject: [PATCH 01/19] vdpa/mlx5: Remove unused function declarations Commit 29064bfdabd5 ("vdpa/mlx5: Add support library for mlx5 VDPA implementation") declared but never implemented these. Signed-off-by: Yue Haibing Message-Id: <20230803143041.23388-1-yuehaibing@huawei.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index b53420e874ac..ca56242972b3 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -100,9 +100,6 @@ struct mlx5_vdpa_dev { bool suspended; }; -int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); -int mlx5_vdpa_dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid); -int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey); int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn); int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn); From 8b59b4da9b56ce2ec2dc7dc3ae544405553c2de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 9 Jun 2023 11:21:24 +0200 Subject: [PATCH 02/19] vdpa: add VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This feature flag allows the driver enabling virtqueues both before and after DRIVER_OK. This is needed for software assisted live migration, so userland can restore the device status in devices with control virtqueue before the dataplane is enabled. Signed-off-by: Eugenio Pérez Acked-by: Shannon Nelson Message-Id: <20230609092127.170673-2-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost_types.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index d3aad12ad1fa..2d827d22cd99 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -181,5 +181,9 @@ struct vhost_vdpa_iova_range { #define VHOST_BACKEND_F_SUSPEND 0x4 /* Device can be resumed */ #define VHOST_BACKEND_F_RESUME 0x5 +/* Device supports the driver enabling virtqueues both before and after + * DRIVER_OK + */ +#define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK 0x6 #endif From 9f09fd6171fe8badef7875ab1642e67360bc5483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 9 Jun 2023 11:21:25 +0200 Subject: [PATCH 03/19] vdpa: accept VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK backend feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accepting VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK backend feature if userland sets it. Signed-off-by: Eugenio Pérez Acked-by: Shannon Nelson Message-Id: <20230609092127.170673-3-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b43e8680eee8..bb5734323d7d 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -680,7 +680,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, return -EFAULT; if (features & ~(VHOST_VDPA_BACKEND_FEATURES | BIT_ULL(VHOST_BACKEND_F_SUSPEND) | - BIT_ULL(VHOST_BACKEND_F_RESUME))) + BIT_ULL(VHOST_BACKEND_F_RESUME) | + BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) return -EOPNOTSUPP; if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && !vhost_vdpa_can_suspend(v)) From b63e5c70c39349ea5b9e7dbb604551902fc753fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 9 Jun 2023 11:21:26 +0200 Subject: [PATCH 04/19] vdpa: add get_backend_features vdpa operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This operation allow vdpa parent to expose its own backend feature bits. Next patches introduce a feature not compatible with all parent drivers: the ability to enable vq after driver_ok. Each parent must declare if it allows it or not. Signed-off-by: Eugenio Pérez Acked-by: Shannon Nelson Message-Id: <20230609092127.170673-4-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 12 ++++++++++++ include/linux/vdpa.h | 4 ++++ 2 files changed, 16 insertions(+) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index bb5734323d7d..78379ffd2336 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -403,6 +403,17 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) return 0; } +static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->get_backend_features) + return 0; + else + return ops->get_backend_features(vdpa); +} + static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -742,6 +753,7 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); if (vhost_vdpa_can_resume(v)) features |= BIT_ULL(VHOST_BACKEND_F_RESUME); + features |= vhost_vdpa_get_backend_features(v); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; break; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index db1b0eaef4eb..0e652026b776 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -208,6 +208,9 @@ struct vdpa_map_file { * @vdev: vdpa device * Returns the virtio features support by the * device + * @get_backend_features: Get parent-specific backend features (optional) + * Returns the vdpa features supported by the + * device. * @set_driver_features: Set virtio features supported by the driver * @vdev: vdpa device * @features: feature support by the driver @@ -358,6 +361,7 @@ struct vdpa_config_ops { u32 (*get_vq_align)(struct vdpa_device *vdev); u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx); u64 (*get_device_features)(struct vdpa_device *vdev); + u64 (*get_backend_features)(const struct vdpa_device *vdev); int (*set_driver_features)(struct vdpa_device *vdev, u64 features); u64 (*get_driver_features)(struct vdpa_device *vdev); void (*set_config_cb)(struct vdpa_device *vdev, From 2c9c63711607e3742029f433f130320a9fd7a6a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 9 Jun 2023 11:21:27 +0200 Subject: [PATCH 05/19] vdpa_sim: offer VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start offering the feature in the simulator. Other parent drivers can follow this code to offer it too. Signed-off-by: Eugenio Pérez Acked-by: Shannon Nelson Message-Id: <20230609092127.170673-5-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index d343af4fa60e..76d41058add9 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "vdpa_sim.h" @@ -410,6 +411,11 @@ static u64 vdpasim_get_device_features(struct vdpa_device *vdpa) return vdpasim->dev_attr.supported_features; } +static u64 vdpasim_get_backend_features(const struct vdpa_device *vdpa) +{ + return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK); +} + static int vdpasim_set_driver_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -733,6 +739,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .get_vq_align = vdpasim_get_vq_align, .get_vq_group = vdpasim_get_vq_group, .get_device_features = vdpasim_get_device_features, + .get_backend_features = vdpasim_get_backend_features, .set_driver_features = vdpasim_set_driver_features, .get_driver_features = vdpasim_get_driver_features, .set_config_cb = vdpasim_set_config_cb, @@ -770,6 +777,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .get_vq_align = vdpasim_get_vq_align, .get_vq_group = vdpasim_get_vq_group, .get_device_features = vdpasim_get_device_features, + .get_backend_features = vdpasim_get_backend_features, .set_driver_features = vdpasim_set_driver_features, .get_driver_features = vdpasim_get_driver_features, .set_config_cb = vdpasim_set_config_cb, From 610c708bf872fa575f33f74a1650a7011055b7aa Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:46 +0800 Subject: [PATCH 06/19] virtio_ring: check use_dma_api before unmap desc for indirect Inside detach_buf_split(), if use_dma_api is false, vring_unmap_one_split_indirect will be called many times, but actually nothing is done. So this patch check use_dma_api firstly. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20230810123057.43407-2-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c5310eaf8b46..f8754f1d64d3 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -774,8 +774,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); - for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split_indirect(vq, &indir_desc[j]); + if (vq->use_dma_api) { + for (j = 0; j < len / sizeof(struct vring_desc); j++) + vring_unmap_one_split_indirect(vq, &indir_desc[j]); + } kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; From 0e27fa6ddeb0e070398692e369ce52fa91f9442d Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:47 +0800 Subject: [PATCH 07/19] virtio_ring: put mapping error check in vring_map_one_sg This patch put the dma addr error check in vring_map_one_sg(). The benefits of doing this: 1. reduce one judgment of vq->use_dma_api. 2. make vring_map_one_sg more simple, without calling vring_mapping_error to check the return value. simplifies subsequent code Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20230810123057.43407-3-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index f8754f1d64d3..87d7ceeecdbd 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -355,9 +355,8 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq) } /* Map one sg entry. */ -static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, - struct scatterlist *sg, - enum dma_data_direction direction) +static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, + enum dma_data_direction direction, dma_addr_t *addr) { if (!vq->use_dma_api) { /* @@ -366,7 +365,8 @@ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, * depending on the direction. */ kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction); - return (dma_addr_t)sg_phys(sg); + *addr = (dma_addr_t)sg_phys(sg); + return 0; } /* @@ -374,9 +374,14 @@ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, * the way it expects (we don't guarantee that the scatterlist * will exist for the lifetime of the mapping). */ - return dma_map_page(vring_dma_dev(vq), + *addr = dma_map_page(vring_dma_dev(vq), sg_page(sg), sg->offset, sg->length, direction); + + if (dma_mapping_error(vring_dma_dev(vq), *addr)) + return -ENOMEM; + + return 0; } static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, @@ -588,8 +593,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr)) goto unmap_release; prev = i; @@ -603,8 +609,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr)) goto unmap_release; prev = i; @@ -1281,9 +1288,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) goto unmap_release; desc[i].flags = cpu_to_le16(n < out_sgs ? @@ -1428,9 +1434,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, c = 0; for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) goto unmap_release; flags = cpu_to_le16(vq->packed.avail_used_flags | From 8daafe9ebbd21a54bf91f9ff81decf215c203edd Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:48 +0800 Subject: [PATCH 08/19] virtio_ring: introduce virtqueue_set_dma_premapped() This helper allows the driver change the dma mode to premapped mode. Under the premapped mode, the virtio core do not do dma mapping internally. This just work when the use_dma_api is true. If the use_dma_api is false, the dma options is not through the DMA APIs, that is not the standard way of the linux kernel. Signed-off-by: Xuan Zhuo Message-Id: <20230810123057.43407-4-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 2 ++ 2 files changed, 55 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 87d7ceeecdbd..8e81b01e0735 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -172,6 +172,9 @@ struct vring_virtqueue { /* Host publishes avail event idx */ bool event; + /* Do DMA mapping by driver */ + bool premapped; + /* Head of free buffer list. */ unsigned int free_head; /* Number we've added since last sync. */ @@ -2061,6 +2064,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->packed_ring = true; vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); + vq->premapped = false; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2550,6 +2554,7 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, #endif vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); + vq->premapped = false; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2693,6 +2698,54 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, } EXPORT_SYMBOL_GPL(virtqueue_resize); +/** + * virtqueue_set_dma_premapped - set the vring premapped mode + * @_vq: the struct virtqueue we're talking about. + * + * Enable the premapped mode of the vq. + * + * The vring in premapped mode does not do dma internally, so the driver must + * do dma mapping in advance. The driver must pass the dma_address through + * dma_address of scatterlist. When the driver got a used buffer from + * the vring, it has to unmap the dma address. + * + * This function must be called immediately after creating the vq, or after vq + * reset, and before adding any buffers to it. + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EINVAL: vring does not use the dma api, so we can not enable premapped mode. + */ +int virtqueue_set_dma_premapped(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u32 num; + + START_USE(vq); + + num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; + + if (num != vq->vq.num_free) { + END_USE(vq); + return -EINVAL; + } + + if (!vq->use_dma_api) { + END_USE(vq); + return -EINVAL; + } + + vq->premapped = true; + + END_USE(vq); + + return 0; +} +EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped); + /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index de6041deee37..8add38038877 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -78,6 +78,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq); unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); +int virtqueue_set_dma_premapped(struct virtqueue *_vq); + bool virtqueue_poll(struct virtqueue *vq, unsigned); bool virtqueue_enable_cb_delayed(struct virtqueue *vq); From d7344a2f71e38eb04fc16e3fef4f0580f42cbbd5 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:49 +0800 Subject: [PATCH 09/19] virtio_ring: support add premapped buf If the vq is the premapped mode, use the sg_dma_address() directly. Signed-off-by: Xuan Zhuo Message-Id: <20230810123057.43407-5-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 8e81b01e0735..f9f772e85a38 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -361,6 +361,11 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq) static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, enum dma_data_direction direction, dma_addr_t *addr) { + if (vq->premapped) { + *addr = sg_dma_address(sg); + return 0; + } + if (!vq->use_dma_api) { /* * If DMA is not used, KMSAN doesn't know that the scatterlist @@ -639,8 +644,12 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, dma_addr_t addr = vring_map_single( vq, desc, total_sg * sizeof(struct vring_desc), DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_mapping_error(vq, addr)) { + if (vq->premapped) + goto free_indirect; + goto unmap_release; + } virtqueue_add_desc_split(_vq, vq->split.vring.desc, head, addr, @@ -706,6 +715,7 @@ unmap_release: i = vring_unmap_one_split(vq, i); } +free_indirect: if (indirect) kfree(desc); @@ -1307,8 +1317,12 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, addr = vring_map_single(vq, desc, total_sg * sizeof(struct vring_packed_desc), DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_mapping_error(vq, addr)) { + if (vq->premapped) + goto free_desc; + goto unmap_release; + } vq->packed.vring.desc[head].addr = cpu_to_le64(addr); vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * @@ -1366,6 +1380,7 @@ unmap_release: for (i = 0; i < err_idx; i++) vring_unmap_desc_packed(vq, &desc[i]); +free_desc: kfree(desc); END_USE(vq); From 2df64759071bdca2a12edb9af4f071e4419ad745 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:50 +0800 Subject: [PATCH 10/19] virtio_ring: introduce virtqueue_dma_dev() Added virtqueue_dma_dev() to get DMA device for virtio. Then the caller can do dma operation in advance. The purpose is to keep memory mapped across multiple add/get buf operations. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20230810123057.43407-6-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 17 +++++++++++++++++ include/linux/virtio.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index f9f772e85a38..bb3d73d221cd 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2265,6 +2265,23 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); +/** + * virtqueue_dma_dev - get the dma dev + * @_vq: the struct virtqueue we're talking about. + * + * Returns the dma dev. That can been used for dma api. + */ +struct device *virtqueue_dma_dev(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (vq->use_dma_api) + return vring_dma_dev(vq); + else + return NULL; +} +EXPORT_SYMBOL_GPL(virtqueue_dma_dev); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 8add38038877..bd55a05eec04 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -61,6 +61,8 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +struct device *virtqueue_dma_dev(struct virtqueue *vq); + bool virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); From b319940f83c21bb4c1fabffe68a862be879a6193 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:51 +0800 Subject: [PATCH 11/19] virtio_ring: skip unmap for premapped Now we add a case where we skip dma unmap, the vq->premapped is true. We can't just rely on use_dma_api to determine whether to skip the dma operation. For convenience, I introduced the "do_unmap". By default, it is the same as use_dma_api. If the driver is configured with premapped, then do_unmap is false. So as long as do_unmap is false, for addr of desc, we should skip dma unmap operation. Signed-off-by: Xuan Zhuo Message-Id: <20230810123057.43407-7-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 42 ++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index bb3d73d221cd..7973814b6e31 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -175,6 +175,11 @@ struct vring_virtqueue { /* Do DMA mapping by driver */ bool premapped; + /* Do unmap or not for desc. Just when premapped is False and + * use_dma_api is true, this is true. + */ + bool do_unmap; + /* Head of free buffer list. */ unsigned int free_head; /* Number we've added since last sync. */ @@ -440,7 +445,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, { u16 flags; - if (!vq->use_dma_api) + if (!vq->do_unmap) return; flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); @@ -458,18 +463,21 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, struct vring_desc_extra *extra = vq->split.desc_extra; u16 flags; - if (!vq->use_dma_api) - goto out; - flags = extra[i].flags; if (flags & VRING_DESC_F_INDIRECT) { + if (!vq->use_dma_api) + goto out; + dma_unmap_single(vring_dma_dev(vq), extra[i].addr, extra[i].len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!vq->do_unmap) + goto out; + dma_unmap_page(vring_dma_dev(vq), extra[i].addr, extra[i].len, @@ -635,7 +643,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, } /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); - if (!indirect && vq->use_dma_api) + if (!indirect && vq->do_unmap) vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= ~VRING_DESC_F_NEXT; @@ -794,7 +802,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); - if (vq->use_dma_api) { + if (vq->do_unmap) { for (j = 0; j < len / sizeof(struct vring_desc); j++) vring_unmap_one_split_indirect(vq, &indir_desc[j]); } @@ -1217,17 +1225,20 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, { u16 flags; - if (!vq->use_dma_api) - return; - flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { + if (!vq->use_dma_api) + return; + dma_unmap_single(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!vq->do_unmap) + return; + dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? @@ -1240,7 +1251,7 @@ static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, { u16 flags; - if (!vq->use_dma_api) + if (!vq->do_unmap) return; flags = le16_to_cpu(desc->flags); @@ -1329,7 +1340,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, sizeof(struct vring_packed_desc)); vq->packed.vring.desc[head].id = cpu_to_le16(id); - if (vq->use_dma_api) { + if (vq->do_unmap) { vq->packed.desc_extra[id].addr = addr; vq->packed.desc_extra[id].len = total_sg * sizeof(struct vring_packed_desc); @@ -1470,7 +1481,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, desc[i].len = cpu_to_le32(sg->length); desc[i].id = cpu_to_le16(id); - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->do_unmap)) { vq->packed.desc_extra[curr].addr = addr; vq->packed.desc_extra[curr].len = sg->length; vq->packed.desc_extra[curr].flags = @@ -1604,7 +1615,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, vq->free_head = id; vq->vq.num_free += state->num; - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->do_unmap)) { curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, @@ -1621,7 +1632,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (!desc) return; - if (vq->use_dma_api) { + if (vq->do_unmap) { len = vq->packed.desc_extra[id].len; for (i = 0; i < len / sizeof(struct vring_packed_desc); i++) @@ -2080,6 +2091,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); vq->premapped = false; + vq->do_unmap = vq->use_dma_api; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2587,6 +2599,7 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); vq->premapped = false; + vq->do_unmap = vq->use_dma_api; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2771,6 +2784,7 @@ int virtqueue_set_dma_premapped(struct virtqueue *_vq) } vq->premapped = true; + vq->do_unmap = false; END_USE(vq); From 4d09f24080dd301083a70d5a2c25fb59b149fec1 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:52 +0800 Subject: [PATCH 12/19] virtio_ring: correct the expression of the description of virtqueue_resize() Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20230810123057.43407-8-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 7973814b6e31..fd9ae020e0a3 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2678,7 +2678,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer From ad48d53b5b3fbcc10ea89070709724ad589e9223 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:53 +0800 Subject: [PATCH 13/19] virtio_ring: separate the logic of reset/enable from virtqueue_resize The subsequent reset function will reuse these logic. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20230810123057.43407-9-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 58 ++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fd9ae020e0a3..23172d98e48e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2152,6 +2152,43 @@ err_ring: return -ENOMEM; } +static int virtqueue_disable_and_recycle(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + void *buf; + int err; + + if (!vq->we_own_ring) + return -EPERM; + + if (!vdev->config->disable_vq_and_reset) + return -ENOENT; + + if (!vdev->config->enable_vq_after_reset) + return -ENOENT; + + err = vdev->config->disable_vq_and_reset(_vq); + if (err) + return err; + + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) + recycle(_vq, buf); + + return 0; +} + +static int virtqueue_enable_after_reset(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + + if (vdev->config->enable_vq_after_reset(_vq)) + return -EBUSY; + + return 0; +} /* * Generic functions and exported symbols. @@ -2702,13 +2739,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)) { struct vring_virtqueue *vq = to_vvq(_vq); - struct virtio_device *vdev = vq->vq.vdev; - void *buf; int err; - if (!vq->we_own_ring) - return -EPERM; - if (num > vq->vq.num_max) return -E2BIG; @@ -2718,28 +2750,16 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) return 0; - if (!vdev->config->disable_vq_and_reset) - return -ENOENT; - - if (!vdev->config->enable_vq_after_reset) - return -ENOENT; - - err = vdev->config->disable_vq_and_reset(_vq); + err = virtqueue_disable_and_recycle(_vq, recycle); if (err) return err; - while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) - recycle(_vq, buf); - if (vq->packed_ring) err = virtqueue_resize_packed(_vq, num); else err = virtqueue_resize_split(_vq, num); - if (vdev->config->enable_vq_after_reset(_vq)) - return -EBUSY; - - return err; + return virtqueue_enable_after_reset(_vq); } EXPORT_SYMBOL_GPL(virtqueue_resize); From ba3e0c47c070c4cf010be9fb1e4eb669c744af11 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:54 +0800 Subject: [PATCH 14/19] virtio_ring: introduce virtqueue_reset() Introduce virtqueue_reset() to release all buffer inside vq. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20230810123057.43407-10-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 33 +++++++++++++++++++++++++++++++++ include/linux/virtio.h | 2 ++ 2 files changed, 35 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 23172d98e48e..639c20b19e06 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2812,6 +2812,39 @@ int virtqueue_set_dma_premapped(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped); +/** + * virtqueue_reset - detach and recycle all unused buffers + * @_vq: the struct virtqueue we're talking about. + * @recycle: callback to recycle unused buffers + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -EPERM: Operation not permitted + */ +int virtqueue_reset(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + int err; + + err = virtqueue_disable_and_recycle(_vq, recycle); + if (err) + return err; + + if (vq->packed_ring) + virtqueue_reinit_packed(vq); + else + virtqueue_reinit_split(vq); + + return virtqueue_enable_after_reset(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_reset); + /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index bd55a05eec04..49a640e0a6f4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -99,6 +99,8 @@ dma_addr_t virtqueue_get_used_addr(const struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); +int virtqueue_reset(struct virtqueue *vq, + void (*recycle)(struct virtqueue *vq, void *buf)); /** * struct virtio_device - representation of a device using virtio From b6253b4e21939f1bb54e8fdb84c23af9c3fb834a Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:55 +0800 Subject: [PATCH 15/19] virtio_ring: introduce dma map api for virtqueue Added virtqueue_dma_map_api* to map DMA addresses for virtual memory in advance. The purpose is to keep memory mapped across multiple add/get buf operations. Signed-off-by: Xuan Zhuo Message-Id: <20230810123057.43407-11-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 8 +++++ 2 files changed, 77 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 639c20b19e06..916479c9c72c 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -3106,4 +3106,73 @@ const struct vring *virtqueue_get_vring(const struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_get_vring); +/** + * virtqueue_dma_map_single_attrs - map DMA for _vq + * @_vq: the struct virtqueue we're talking about. + * @ptr: the pointer of the buffer to do dma + * @size: the size of the buffer to do dma + * @dir: DMA direction + * @attrs: DMA Attrs + * + * The caller calls this to do dma mapping in advance. The DMA address can be + * passed to this _vq when it is in pre-mapped mode. + * + * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). + */ +dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return (dma_addr_t)virt_to_phys(ptr); + + return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs); + +/** + * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq + * @_vq: the struct virtqueue we're talking about. + * @addr: the dma address to unmap + * @size: the size of the buffer + * @dir: DMA direction + * @attrs: DMA Attrs + * + * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. + * + */ +void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return; + + dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); + +/** + * virtqueue_dma_mapping_error - check dma address + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * + * Returns 0 means dma valid. Other means invalid dma address. + */ +int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return 0; + + return dma_mapping_error(vring_dma_dev(vq), addr); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); + MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 49a640e0a6f4..79e3c74391e0 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -9,6 +9,7 @@ #include #include #include +#include /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -212,4 +213,11 @@ void unregister_virtio_driver(struct virtio_driver *drv); #define module_virtio_driver(__virtio_driver) \ module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) + +dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); +int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); #endif /* _LINUX_VIRTIO_H */ From 8bd2f71054bd0bc997833e9825143672eb7e2801 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:56 +0800 Subject: [PATCH 16/19] virtio_ring: introduce dma sync api for virtqueue These API has been introduced: * virtqueue_dma_need_sync * virtqueue_dma_sync_single_range_for_cpu * virtqueue_dma_sync_single_range_for_device These APIs can be used together with the premapped mechanism to sync the DMA address. Signed-off-by: Xuan Zhuo Message-Id: <20230810123057.43407-12-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 76 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 8 ++++ 2 files changed, 84 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 916479c9c72c..81ecb29c88f1 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -3175,4 +3175,80 @@ int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr) } EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); +/** + * virtqueue_dma_need_sync - check a dma address needs sync + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * + * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be + * synchronized + * + * return bool + */ +bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return false; + + return dma_need_sync(vring_dma_dev(vq), addr); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync); + +/** + * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * @offset: DMA address offset + * @size: buf size for sync + * @dir: DMA direction + * + * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * the DMA address really needs to be synchronized + * + */ +void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct device *dev = vring_dma_dev(vq); + + if (!vq->use_dma_api) + return; + + dma_sync_single_range_for_cpu(dev, addr, offset, size, + DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); + +/** + * virtqueue_dma_sync_single_range_for_device - dma sync for device + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * @offset: DMA address offset + * @size: buf size for sync + * @dir: DMA direction + * + * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * the DMA address really needs to be synchronized + */ +void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct device *dev = vring_dma_dev(vq); + + if (!vq->use_dma_api) + return; + + dma_sync_single_range_for_device(dev, addr, offset, size, + DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); + MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 79e3c74391e0..1311a7fbe675 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -220,4 +220,12 @@ void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); + +bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); +void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); #endif /* _LINUX_VIRTIO_H */ From 295525e29a5b5694a6e96864f0c1365f79639863 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 10 Aug 2023 20:30:57 +0800 Subject: [PATCH 17/19] virtio_net: merge dma operations when filling mergeable buffers Currently, the virtio core will perform a dma operation for each buffer. Although, the same page may be operated multiple times. This patch, the driver does the dma operation and manages the dma address based the feature premapped of virtio core. This way, we can perform only one dma operation for the pages of the alloc frag. This is beneficial for the iommu device. kernel command line: intel_iommu=on iommu.passthrough=0 | strict=0 | strict=1 Before | 775496pps | 428614pps After | 1109316pps | 742853pps Signed-off-by: Xuan Zhuo Message-Id: <20230810123057.43407-13-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 230 ++++++++++++++++++++++++++++++++++----- 1 file changed, 203 insertions(+), 27 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8e9f4cfe941f..98dc9b49d56b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -126,6 +126,14 @@ static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) +/* The dma information of pages allocated at a time. */ +struct virtnet_rq_dma { + dma_addr_t addr; + u32 ref; + u16 len; + u16 need_sync; +}; + /* Internal representation of a send virtqueue */ struct send_queue { /* Virtqueue associated with this send _queue */ @@ -175,6 +183,12 @@ struct receive_queue { char name[16]; struct xdp_rxq_info xdp_rxq; + + /* Record the last dma info to free after new pages is allocated. */ + struct virtnet_rq_dma *last_dma; + + /* Do dma by self */ + bool do_dma; }; /* This structure can contain rss message with maximum settings for indirection table and keysize @@ -562,6 +576,156 @@ ok: return skb; } +static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) +{ + struct page *page = virt_to_head_page(buf); + struct virtnet_rq_dma *dma; + void *head; + int offset; + + head = page_address(page); + + dma = head; + + --dma->ref; + + if (dma->ref) { + if (dma->need_sync && len) { + offset = buf - (head + sizeof(*dma)); + + virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, offset, + len, DMA_FROM_DEVICE); + } + + return; + } + + virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + put_page(page); +} + +static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) +{ + void *buf; + + buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); + if (buf && rq->do_dma) + virtnet_rq_unmap(rq, buf, *len); + + return buf; +} + +static void *virtnet_rq_detach_unused_buf(struct receive_queue *rq) +{ + void *buf; + + buf = virtqueue_detach_unused_buf(rq->vq); + if (buf && rq->do_dma) + virtnet_rq_unmap(rq, buf, 0); + + return buf; +} + +static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) +{ + struct virtnet_rq_dma *dma; + dma_addr_t addr; + u32 offset; + void *head; + + if (!rq->do_dma) { + sg_init_one(rq->sg, buf, len); + return; + } + + head = page_address(rq->alloc_frag.page); + + offset = buf - head; + + dma = head; + + addr = dma->addr - sizeof(*dma) + offset; + + sg_init_table(rq->sg, 1); + rq->sg[0].dma_address = addr; + rq->sg[0].length = len; +} + +static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) +{ + struct page_frag *alloc_frag = &rq->alloc_frag; + struct virtnet_rq_dma *dma; + void *buf, *head; + dma_addr_t addr; + + if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) + return NULL; + + head = page_address(alloc_frag->page); + + if (rq->do_dma) { + dma = head; + + /* new pages */ + if (!alloc_frag->offset) { + if (rq->last_dma) { + /* Now, the new page is allocated, the last dma + * will not be used. So the dma can be unmapped + * if the ref is 0. + */ + virtnet_rq_unmap(rq, rq->last_dma, 0); + rq->last_dma = NULL; + } + + dma->len = alloc_frag->size - sizeof(*dma); + + addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, + dma->len, DMA_FROM_DEVICE, 0); + if (virtqueue_dma_mapping_error(rq->vq, addr)) + return NULL; + + dma->addr = addr; + dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); + + /* Add a reference to dma to prevent the entire dma from + * being released during error handling. This reference + * will be freed after the pages are no longer used. + */ + get_page(alloc_frag->page); + dma->ref = 1; + alloc_frag->offset = sizeof(*dma); + + rq->last_dma = dma; + } + + ++dma->ref; + } + + buf = head + alloc_frag->offset; + + get_page(alloc_frag->page); + alloc_frag->offset += size; + + return buf; +} + +static void virtnet_rq_set_premapped(struct virtnet_info *vi) +{ + int i; + + /* disable for big mode */ + if (!vi->mergeable_rx_bufs && vi->big_packets) + return; + + for (i = 0; i < vi->max_queue_pairs; i++) { + if (virtqueue_set_dma_premapped(vi->rq[i].vq)) + continue; + + vi->rq[i].do_dma = true; + } +} + static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { unsigned int len; @@ -917,7 +1081,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, void *buf; int off; - buf = virtqueue_get_buf(rq->vq, &buflen); + buf = virtnet_rq_get_buf(rq, &buflen, NULL); if (unlikely(!buf)) goto err_buf; @@ -1137,7 +1301,7 @@ static void mergeable_buf_free(struct receive_queue *rq, int num_buf, int len; while (num_buf-- > 1) { - buf = virtqueue_get_buf(rq->vq, &len); + buf = virtnet_rq_get_buf(rq, &len, NULL); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers missing\n", dev->name, num_buf); @@ -1245,7 +1409,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, return -EINVAL; while (--*num_buf > 0) { - buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); + buf = virtnet_rq_get_buf(rq, &len, &ctx); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", dev->name, *num_buf, @@ -1474,7 +1638,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, while (--num_buf) { int num_skb_frags; - buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); + buf = virtnet_rq_get_buf(rq, &len, &ctx); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", dev->name, num_buf, @@ -1633,7 +1797,6 @@ frame_err: static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { - struct page_frag *alloc_frag = &rq->alloc_frag; char *buf; unsigned int xdp_headroom = virtnet_get_headroom(vi); void *ctx = (void *)(unsigned long)xdp_headroom; @@ -1642,17 +1805,21 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, len = SKB_DATA_ALIGN(len) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) + + buf = virtnet_rq_alloc(rq, len, gfp); + if (unlikely(!buf)) return -ENOMEM; - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; - get_page(alloc_frag->page); - alloc_frag->offset += len; - sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom, - vi->hdr_len + GOOD_PACKET_LEN); + virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom, + vi->hdr_len + GOOD_PACKET_LEN); + err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); - if (err < 0) + if (err < 0) { + if (rq->do_dma) + virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); + } + return err; } @@ -1729,23 +1896,22 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, unsigned int headroom = virtnet_get_headroom(vi); unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); - char *buf; - void *ctx; - int err; unsigned int len, hole; + void *ctx; + char *buf; + int err; /* Extra tailroom is needed to satisfy XDP's assumption. This * means rx frags coalescing won't work, but consider we've * disabled GSO for XDP, it won't be a big issue. */ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); - if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) + + buf = virtnet_rq_alloc(rq, len + room, gfp); + if (unlikely(!buf)) return -ENOMEM; - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; buf += headroom; /* advance address leaving hole at front of pkt */ - get_page(alloc_frag->page); - alloc_frag->offset += len + room; hole = alloc_frag->size - alloc_frag->offset; if (hole < len + room) { /* To avoid internal fragmentation, if there is very likely not @@ -1759,11 +1925,15 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, alloc_frag->offset += hole; } - sg_init_one(rq->sg, buf, len); + virtnet_rq_init_one_sg(rq, buf, len); + ctx = mergeable_len_to_ctx(len + room, headroom); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); - if (err < 0) + if (err < 0) { + if (rq->do_dma) + virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); + } return err; } @@ -1884,13 +2054,13 @@ static int virtnet_receive(struct receive_queue *rq, int budget, void *ctx; while (stats.packets < budget && - (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { + (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats); stats.packets++; } } else { while (stats.packets < budget && - (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { + (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) { receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats); stats.packets++; } @@ -3662,8 +3832,11 @@ static void free_receive_page_frags(struct virtnet_info *vi) { int i; for (i = 0; i < vi->max_queue_pairs; i++) - if (vi->rq[i].alloc_frag.page) + if (vi->rq[i].alloc_frag.page) { + if (vi->rq[i].do_dma && vi->rq[i].last_dma) + virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); put_page(vi->rq[i].alloc_frag.page); + } } static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) @@ -3700,9 +3873,10 @@ static void free_unused_bufs(struct virtnet_info *vi) } for (i = 0; i < vi->max_queue_pairs; i++) { - struct virtqueue *vq = vi->rq[i].vq; - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) - virtnet_rq_free_unused_buf(vq, buf); + struct receive_queue *rq = &vi->rq[i]; + + while ((buf = virtnet_rq_detach_unused_buf(rq)) != NULL) + virtnet_rq_free_unused_buf(rq->vq, buf); cond_resched(); } } @@ -3876,6 +4050,8 @@ static int init_vqs(struct virtnet_info *vi) if (ret) goto err_free; + virtnet_rq_set_premapped(vi); + cpus_read_lock(); virtnet_set_affinity(vi); cpus_read_unlock(); From ae15aceaa98ad9499763923f7890e345d9f46b60 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 11 Aug 2023 05:15:39 -0400 Subject: [PATCH 18/19] virtio_vdpa: build affinity masks conditionally We try to build affinity mask via create_affinity_masks() unconditionally which may lead several issues: - the affinity mask is not used for parent without affinity support (only VDUSE support the affinity now) - the logic of create_affinity_masks() might not work for devices other than block. For example it's not rare in the networking device where the number of queues could exceed the number of CPUs. Such case breaks the current affinity logic which is based on group_cpus_evenly() who assumes the number of CPUs are not less than the number of groups. This can trigger a warning[1]: if (ret >= 0) WARN_ON(nr_present + nr_others < numgrps); Fixing this by only build the affinity masks only when - Driver passes affinity descriptor, driver like virtio-blk can make sure to limit the number of queues when it exceeds the number of CPUs - Parent support affinity setting config ops This help to avoid the warning. More optimizations could be done on top. [1] [ 682.146655] WARNING: CPU: 6 PID: 1550 at lib/group_cpus.c:400 group_cpus_evenly+0x1aa/0x1c0 [ 682.146668] CPU: 6 PID: 1550 Comm: vdpa Not tainted 6.5.0-rc5jason+ #79 [ 682.146671] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [ 682.146673] RIP: 0010:group_cpus_evenly+0x1aa/0x1c0 [ 682.146676] Code: 4c 89 e0 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc cc e8 1b c4 74 ff 48 89 ef e8 13 ac 98 ff 4c 89 e7 45 31 e4 e8 08 ac 98 ff eb c2 <0f> 0b eb b6 e8 fd 05 c3 00 45 31 e4 eb e5 cc cc cc cc cc cc cc cc [ 682.146679] RSP: 0018:ffffc9000215f498 EFLAGS: 00010293 [ 682.146682] RAX: 000000000001f1e0 RBX: 0000000000000041 RCX: 0000000000000000 [ 682.146684] RDX: ffff888109922058 RSI: 0000000000000041 RDI: 0000000000000030 [ 682.146686] RBP: ffff888109922058 R08: ffffc9000215f498 R09: ffffc9000215f4a0 [ 682.146687] R10: 00000000000198d0 R11: 0000000000000030 R12: ffff888107e02800 [ 682.146689] R13: 0000000000000030 R14: 0000000000000030 R15: 0000000000000041 [ 682.146692] FS: 00007fef52315740(0000) GS:ffff888237380000(0000) knlGS:0000000000000000 [ 682.146695] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 682.146696] CR2: 00007fef52509000 CR3: 0000000110dbc004 CR4: 0000000000370ee0 [ 682.146698] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 682.146700] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 682.146701] Call Trace: [ 682.146703] [ 682.146705] ? __warn+0x7b/0x130 [ 682.146709] ? group_cpus_evenly+0x1aa/0x1c0 [ 682.146712] ? report_bug+0x1c8/0x1e0 [ 682.146717] ? handle_bug+0x3c/0x70 [ 682.146721] ? exc_invalid_op+0x14/0x70 [ 682.146723] ? asm_exc_invalid_op+0x16/0x20 [ 682.146727] ? group_cpus_evenly+0x1aa/0x1c0 [ 682.146729] ? group_cpus_evenly+0x15c/0x1c0 [ 682.146731] create_affinity_masks+0xaf/0x1a0 [ 682.146735] virtio_vdpa_find_vqs+0x83/0x1d0 [ 682.146738] ? __pfx_default_calc_sets+0x10/0x10 [ 682.146742] virtnet_find_vqs+0x1f0/0x370 [ 682.146747] virtnet_probe+0x501/0xcd0 [ 682.146749] ? vp_modern_get_status+0x12/0x20 [ 682.146751] ? get_cap_addr.isra.0+0x10/0xc0 [ 682.146754] virtio_dev_probe+0x1af/0x260 [ 682.146759] really_probe+0x1a5/0x410 Fixes: 3dad56823b53 ("virtio-vdpa: Support interrupt affinity spreading mechanism") Signed-off-by: Jason Wang Message-Id: <20230811091539.1359865-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_vdpa.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 961161da5900..06ce6d8c2e00 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -366,11 +366,14 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct irq_affinity default_affd = { 0 }; struct cpumask *masks; struct vdpa_callback cb; + bool has_affinity = desc && ops->set_vq_affinity; int i, err, queue_idx = 0; - masks = create_affinity_masks(nvqs, desc ? desc : &default_affd); - if (!masks) - return -ENOMEM; + if (has_affinity) { + masks = create_affinity_masks(nvqs, desc ? desc : &default_affd); + if (!masks) + return -ENOMEM; + } for (i = 0; i < nvqs; ++i) { if (!names[i]) { @@ -386,20 +389,22 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, goto err_setup_vq; } - if (ops->set_vq_affinity) + if (has_affinity) ops->set_vq_affinity(vdpa, i, &masks[i]); } cb.callback = virtio_vdpa_config_cb; cb.private = vd_dev; ops->set_config_cb(vdpa, &cb); - kfree(masks); + if (has_affinity) + kfree(masks); return 0; err_setup_vq: virtio_vdpa_del_vqs(vdev); - kfree(masks); + if (has_affinity) + kfree(masks); return err; } From 1acfe2c1225899eab5ab724c91b7e1eb2881b9ab Mon Sep 17 00:00:00 2001 From: Yuan Yao Date: Tue, 8 Aug 2023 05:10:59 +0000 Subject: [PATCH 19/19] virtio_ring: fix avail_wrap_counter in virtqueue_add_packed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In current packed virtqueue implementation, the avail_wrap_counter won't flip, in the case when the driver supplies a descriptor chain with a length equals to the queue size; total_sg == vq->packed.vring.num. Let’s assume the following situation: vq->packed.vring.num=4 vq->packed.next_avail_idx: 1 vq->packed.avail_wrap_counter: 0 Then the driver adds a descriptor chain containing 4 descriptors. We expect the following result with avail_wrap_counter flipped: vq->packed.next_avail_idx: 1 vq->packed.avail_wrap_counter: 1 But, the current implementation gives the following result: vq->packed.next_avail_idx: 1 vq->packed.avail_wrap_counter: 0 To reproduce the bug, you can set a packed queue size as small as possible, so that the driver is more likely to provide a descriptor chain with a length equal to the packed queue size. For example, in qemu run following commands: sudo qemu-system-x86_64 \ -enable-kvm \ -nographic \ -kernel "path/to/kernel_image" \ -m 1G \ -drive file="path/to/rootfs",if=none,id=disk \ -device virtio-blk,drive=disk \ -drive file="path/to/disk_image",if=none,id=rwdisk \ -device virtio-blk,drive=rwdisk,packed=on,queue-size=4,\ indirect_desc=off \ -append "console=ttyS0 root=/dev/vda rw init=/bin/bash" Inside the VM, create a directory and mount the rwdisk device on it. The rwdisk will hang and mount operation will not complete. This commit fixes the wrap counter error by flipping the packed.avail_wrap_counter, when start of descriptor chain equals to the end of descriptor chain (head == i). Fixes: 1ce9e6055fa0 ("virtio_ring: introduce packed ring support") Signed-off-by: Yuan Yao Message-Id: <20230808051110.3492693-1-yuanyaogoog@chromium.org> Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 81ecb29c88f1..51d8f3299c10 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1499,7 +1499,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, } } - if (i < head) + if (i <= head) vq->packed.avail_wrap_counter ^= 1; /* We're using some buffers from the free list. */