virtio, vhost: fixes, features, performance

new iommu device
 vhost guest memory access using vmap (just meta-data for now)
 minor fixes
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 
 Note: due to code driver changes the driver-core tree, the following
 patch is needed when merging tree with commit 92ce7e83b4
 ("driver_find_device: Unify the match function with
 class_find_device()") in the driver-core tree:
 
 From: Nathan Chancellor <natechancellor@gmail.com>
 Subject: [PATCH] iommu/virtio: Constify data parameter in viommu_match_node
 
 After commit 92ce7e83b4 ("driver_find_device: Unify the match
 function with class_find_device()") in the driver-core tree.
 
 Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 
 ---
  drivers/iommu/virtio-iommu.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
 index 4620dd221ffd..433f4d2ee956 100644
 --- a/drivers/iommu/virtio-iommu.c
 +++ b/drivers/iommu/virtio-iommu.c
 @@ -839,7 +839,7 @@ static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
  static struct iommu_ops viommu_ops;
  static struct virtio_driver virtio_iommu_drv;
 
 -static int viommu_match_node(struct device *dev, void *data)
 +static int viommu_match_node(struct device *dev, const void *data)
  {
  	return dev->parent->fwnode == data;
  }
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJdJ5qUAAoJECgfDbjSjVRpQs0H/2qWcIG1zjGKyh9KWrfgOusG
 /QIqeP50d7SC6oqdyd00tzmExqO1xdGLPFzYixdOsU817te1gHBP4Rfmzo01jZRd
 CUzZNnZQ2JRsDshiA6G2ui+wn1/a/cB3RPN4rT1mquDYS53QmsRGDQDnpp84TXMV
 aocB8TS6halbRzKMq3VmaWHIvzNXnt4dwQR542+PyeLLn9bUx2QwWj2ON3QwxixK
 dVRZow3GwLGBhKTA/Z1Z/Bta4fEfOKjUGP2XWgvL6zOr+nZR4eQ8w5WXVJYzR+d6
 1JCfqTxleweT2k6Tu5VwtTNlQkxn/XvQAeisppOiEE6NnPjubyI9wMQIvL7bkpo=
 =uJbC
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio, vhost updates from Michael Tsirkin:
 "Fixes, features, performance:

   - new iommu device

   - vhost guest memory access using vmap (just meta-data for now)

   - minor fixes"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  virtio-mmio: add error check for platform_get_irq
  scsi: virtio_scsi: Use struct_size() helper
  iommu/virtio: Add event queue
  iommu/virtio: Add probe request
  iommu: Add virtio-iommu driver
  PCI: OF: Initialize dev->fwnode appropriately
  of: Allow the iommu-map property to omit untranslated devices
  dt-bindings: virtio: Add virtio-pci-iommu node
  dt-bindings: virtio-mmio: Add IOMMU description
  vhost: fix clang build warning
  vhost: access vq metadata through kernel virtual address
  vhost: factor out setting vring addr and num
  vhost: introduce helpers to get the size of metadata area
  vhost: rename vq_iotlb_prefetch() to vq_meta_prefetch()
  vhost: fine grain userspace memory accessors
  vhost: generalize adding used elem
This commit is contained in:
Linus Torvalds 2019-07-17 11:26:09 -07:00
commit 3a1d5384b7
15 changed files with 2228 additions and 131 deletions

View File

@ -0,0 +1,66 @@
* virtio IOMMU PCI device
When virtio-iommu uses the PCI transport, its programming interface is
discovered dynamically by the PCI probing infrastructure. However the
device tree statically describes the relation between IOMMU and DMA
masters. Therefore, the PCI root complex that hosts the virtio-iommu
contains a child node representing the IOMMU device explicitly.
Required properties:
- compatible: Should be "virtio,pci-iommu"
- reg: PCI address of the IOMMU. As defined in the PCI Bus
Binding reference [1], the reg property is a five-cell
address encoded as (phys.hi phys.mid phys.lo size.hi
size.lo). phys.hi should contain the device's BDF as
0b00000000 bbbbbbbb dddddfff 00000000. The other cells
should be zero.
- #iommu-cells: Each platform DMA master managed by the IOMMU is assigned
an endpoint ID, described by the "iommus" property [2].
For virtio-iommu, #iommu-cells must be 1.
Notes:
- DMA from the IOMMU device isn't managed by another IOMMU. Therefore the
virtio-iommu node doesn't have an "iommus" property, and is omitted from
the iommu-map property of the root complex.
Example:
pcie@10000000 {
compatible = "pci-host-ecam-generic";
...
/* The IOMMU programming interface uses slot 00:01.0 */
iommu0: iommu@0008 {
compatible = "virtio,pci-iommu";
reg = <0x00000800 0 0 0 0>;
#iommu-cells = <1>;
};
/*
* The IOMMU manages all functions in this PCI domain except
* itself. Omit BDF 00:01.0.
*/
iommu-map = <0x0 &iommu0 0x0 0x8>
<0x9 &iommu0 0x9 0xfff7>;
};
pcie@20000000 {
compatible = "pci-host-ecam-generic";
...
/*
* The IOMMU also manages all functions from this domain,
* with endpoint IDs 0x10000 - 0x1ffff
*/
iommu-map = <0x0 &iommu0 0x10000 0x10000>;
};
ethernet@fe001000 {
...
/* The IOMMU manages this platform device with endpoint ID 0x20000 */
iommus = <&iommu0 0x20000>;
};
[1] Documentation/devicetree/bindings/pci/pci.txt
[2] Documentation/devicetree/bindings/iommu/iommu.txt

View File

@ -8,10 +8,40 @@ Required properties:
- reg: control registers base address and size including configuration space
- interrupts: interrupt generated by the device
Required properties for virtio-iommu:
- #iommu-cells: When the node corresponds to a virtio-iommu device, it is
linked to DMA masters using the "iommus" or "iommu-map"
properties [1][2]. #iommu-cells specifies the size of the
"iommus" property. For virtio-iommu #iommu-cells must be
1, each cell describing a single endpoint ID.
Optional properties:
- iommus: If the device accesses memory through an IOMMU, it should
have an "iommus" property [1]. Since virtio-iommu itself
does not access memory through an IOMMU, the "virtio,mmio"
node cannot have both an "#iommu-cells" and an "iommus"
property.
Example:
virtio_block@3000 {
compatible = "virtio,mmio";
reg = <0x3000 0x100>;
interrupts = <41>;
/* Device has endpoint ID 23 */
iommus = <&viommu 23>
}
viommu: iommu@3100 {
compatible = "virtio,mmio";
reg = <0x3100 0x100>;
interrupts = <42>;
#iommu-cells = <1>
}
[1] Documentation/devicetree/bindings/iommu/iommu.txt
[2] Documentation/devicetree/bindings/pci/pci-iommu.txt

View File

@ -17107,6 +17107,13 @@ S: Maintained
F: drivers/virtio/virtio_input.c
F: include/uapi/linux/virtio_input.h
VIRTIO IOMMU DRIVER
M: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
L: virtualization@lists.linux-foundation.org
S: Maintained
F: drivers/iommu/virtio-iommu.c
F: include/uapi/linux/virtio_iommu.h
VIRTUAL BOX GUEST DEVICE DRIVER
M: Hans de Goede <hdegoede@redhat.com>
M: Arnd Bergmann <arnd@arndb.de>

View File

@ -473,4 +473,15 @@ config HYPERV_IOMMU
Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux
guests to run with x2APIC mode enabled.
config VIRTIO_IOMMU
bool "Virtio IOMMU driver"
depends on VIRTIO=y
depends on ARM64
select IOMMU_API
select INTERVAL_TREE
help
Para-virtualised IOMMU driver with virtio.
Say Y here if you intend to run this kernel as a guest.
endif # IOMMU_SUPPORT

View File

@ -33,3 +33,4 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o

1158
drivers/iommu/virtio-iommu.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -2294,8 +2294,12 @@ int of_map_rid(struct device_node *np, u32 rid,
return 0;
}
pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n",
np, map_name, rid, target && *target ? *target : NULL);
return -EFAULT;
pr_info("%pOF: no %s translation for rid 0x%x on %pOF\n", np, map_name,
rid, target && *target ? *target : NULL);
/* Bypasses translation */
if (id_out)
*id_out = rid;
return 0;
}
EXPORT_SYMBOL_GPL(of_map_rid);

View File

@ -22,12 +22,15 @@ void pci_set_of_node(struct pci_dev *dev)
return;
dev->dev.of_node = of_pci_find_child_device(dev->bus->dev.of_node,
dev->devfn);
if (dev->dev.of_node)
dev->dev.fwnode = &dev->dev.of_node->fwnode;
}
void pci_release_of_node(struct pci_dev *dev)
{
of_node_put(dev->dev.of_node);
dev->dev.of_node = NULL;
dev->dev.fwnode = NULL;
}
void pci_set_bus_of_node(struct pci_bus *bus)
@ -41,13 +44,18 @@ void pci_set_bus_of_node(struct pci_bus *bus)
if (node && of_property_read_bool(node, "external-facing"))
bus->self->untrusted = true;
}
bus->dev.of_node = node;
if (bus->dev.of_node)
bus->dev.fwnode = &bus->dev.of_node->fwnode;
}
void pci_release_bus_of_node(struct pci_bus *bus)
{
of_node_put(bus->dev.of_node);
bus->dev.of_node = NULL;
bus->dev.fwnode = NULL;
}
struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus)

View File

@ -792,7 +792,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
num_targets = virtscsi_config_get(vdev, max_target) + 1;
shost = scsi_host_alloc(&virtscsi_host_template,
sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues);
struct_size(vscsi, req_vqs, num_queues));
if (!shost)
return -ENOMEM;

View File

@ -956,7 +956,7 @@ static void handle_tx(struct vhost_net *net)
if (!sock)
goto out;
if (!vq_iotlb_prefetch(vq))
if (!vq_meta_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq);
@ -1125,7 +1125,7 @@ static void handle_rx(struct vhost_net *net)
if (!sock)
goto out;
if (!vq_iotlb_prefetch(vq))
if (!vq_meta_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq);

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,9 @@
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/atomic.h>
#include <linux/pagemap.h>
#include <linux/mmu_notifier.h>
#include <asm/cacheflush.h>
struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work);
@ -80,6 +83,24 @@ enum vhost_uaddr_type {
VHOST_NUM_ADDRS = 3,
};
struct vhost_map {
int npages;
void *addr;
struct page **pages;
};
struct vhost_uaddr {
unsigned long uaddr;
size_t size;
bool write;
};
#if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
#define VHOST_ARCH_CAN_ACCEL_UACCESS 1
#else
#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
#endif
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@ -90,7 +111,22 @@ struct vhost_virtqueue {
struct vring_desc __user *desc;
struct vring_avail __user *avail;
struct vring_used __user *used;
#if VHOST_ARCH_CAN_ACCEL_UACCESS
/* Read by memory accessors, modified by meta data
* prefetching, MMU notifier and vring ioctl().
* Synchonrized through mmu_lock (writers) and RCU (writers
* and readers).
*/
struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
/* Read by MMU notifier, modified by vring ioctl(),
* synchronized through MMU notifier
* registering/unregistering.
*/
struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
#endif
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
struct eventfd_ctx *call_ctx;
struct eventfd_ctx *error_ctx;
@ -145,6 +181,8 @@ struct vhost_virtqueue {
bool user_be;
#endif
u32 busyloop_timeout;
spinlock_t mmu_lock;
int invalidate_count;
};
struct vhost_msg_node {
@ -158,6 +196,9 @@ struct vhost_msg_node {
struct vhost_dev {
struct mm_struct *mm;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier mmu_notifier;
#endif
struct mutex mutex;
struct vhost_virtqueue **vqs;
int nvqs;
@ -212,7 +253,7 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
unsigned int log_num, u64 len,
struct iovec *iov, int count);
int vq_iotlb_prefetch(struct vhost_virtqueue *vq);
int vq_meta_prefetch(struct vhost_virtqueue *vq);
struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);
void vhost_enqueue_msg(struct vhost_dev *dev,

View File

@ -463,9 +463,14 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct irq_affinity *desc)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
int irq = platform_get_irq(vm_dev->pdev, 0);
int i, err, queue_idx = 0;
if (irq < 0) {
dev_err(&vdev->dev, "Cannot get IRQ resource\n");
return irq;
}
err = request_irq(irq, vm_interrupt, IRQF_SHARED,
dev_name(&vdev->dev), vm_dev);
if (err)

View File

@ -43,5 +43,6 @@
#define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
#endif /* _LINUX_VIRTIO_IDS_H */

View File

@ -0,0 +1,161 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/*
* Virtio-iommu definition v0.9
*
* Copyright (C) 2018 Arm Ltd.
*/
#ifndef _UAPI_LINUX_VIRTIO_IOMMU_H
#define _UAPI_LINUX_VIRTIO_IOMMU_H
#include <linux/types.h>
/* Feature bits */
#define VIRTIO_IOMMU_F_INPUT_RANGE 0
#define VIRTIO_IOMMU_F_DOMAIN_BITS 1
#define VIRTIO_IOMMU_F_MAP_UNMAP 2
#define VIRTIO_IOMMU_F_BYPASS 3
#define VIRTIO_IOMMU_F_PROBE 4
struct virtio_iommu_range {
__u64 start;
__u64 end;
};
struct virtio_iommu_config {
/* Supported page sizes */
__u64 page_size_mask;
/* Supported IOVA range */
struct virtio_iommu_range input_range;
/* Max domain ID size */
__u8 domain_bits;
__u8 padding[3];
/* Probe buffer size */
__u32 probe_size;
};
/* Request types */
#define VIRTIO_IOMMU_T_ATTACH 0x01
#define VIRTIO_IOMMU_T_DETACH 0x02
#define VIRTIO_IOMMU_T_MAP 0x03
#define VIRTIO_IOMMU_T_UNMAP 0x04
#define VIRTIO_IOMMU_T_PROBE 0x05
/* Status types */
#define VIRTIO_IOMMU_S_OK 0x00
#define VIRTIO_IOMMU_S_IOERR 0x01
#define VIRTIO_IOMMU_S_UNSUPP 0x02
#define VIRTIO_IOMMU_S_DEVERR 0x03
#define VIRTIO_IOMMU_S_INVAL 0x04
#define VIRTIO_IOMMU_S_RANGE 0x05
#define VIRTIO_IOMMU_S_NOENT 0x06
#define VIRTIO_IOMMU_S_FAULT 0x07
struct virtio_iommu_req_head {
__u8 type;
__u8 reserved[3];
};
struct virtio_iommu_req_tail {
__u8 status;
__u8 reserved[3];
};
struct virtio_iommu_req_attach {
struct virtio_iommu_req_head head;
__le32 domain;
__le32 endpoint;
__u8 reserved[8];
struct virtio_iommu_req_tail tail;
};
struct virtio_iommu_req_detach {
struct virtio_iommu_req_head head;
__le32 domain;
__le32 endpoint;
__u8 reserved[8];
struct virtio_iommu_req_tail tail;
};
#define VIRTIO_IOMMU_MAP_F_READ (1 << 0)
#define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1)
#define VIRTIO_IOMMU_MAP_F_EXEC (1 << 2)
#define VIRTIO_IOMMU_MAP_F_MMIO (1 << 3)
#define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \
VIRTIO_IOMMU_MAP_F_WRITE | \
VIRTIO_IOMMU_MAP_F_EXEC | \
VIRTIO_IOMMU_MAP_F_MMIO)
struct virtio_iommu_req_map {
struct virtio_iommu_req_head head;
__le32 domain;
__le64 virt_start;
__le64 virt_end;
__le64 phys_start;
__le32 flags;
struct virtio_iommu_req_tail tail;
};
struct virtio_iommu_req_unmap {
struct virtio_iommu_req_head head;
__le32 domain;
__le64 virt_start;
__le64 virt_end;
__u8 reserved[4];
struct virtio_iommu_req_tail tail;
};
#define VIRTIO_IOMMU_PROBE_T_NONE 0
#define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
#define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
struct virtio_iommu_probe_property {
__le16 type;
__le16 length;
};
#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
#define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
struct virtio_iommu_probe_resv_mem {
struct virtio_iommu_probe_property head;
__u8 subtype;
__u8 reserved[3];
__le64 start;
__le64 end;
};
struct virtio_iommu_req_probe {
struct virtio_iommu_req_head head;
__le32 endpoint;
__u8 reserved[64];
__u8 properties[];
/*
* Tail follows the variable-length properties array. No padding,
* property lengths are all aligned on 8 bytes.
*/
};
/* Fault types */
#define VIRTIO_IOMMU_FAULT_R_UNKNOWN 0
#define VIRTIO_IOMMU_FAULT_R_DOMAIN 1
#define VIRTIO_IOMMU_FAULT_R_MAPPING 2
#define VIRTIO_IOMMU_FAULT_F_READ (1 << 0)
#define VIRTIO_IOMMU_FAULT_F_WRITE (1 << 1)
#define VIRTIO_IOMMU_FAULT_F_EXEC (1 << 2)
#define VIRTIO_IOMMU_FAULT_F_ADDRESS (1 << 8)
struct virtio_iommu_fault {
__u8 reason;
__u8 reserved[3];
__le32 flags;
__le32 endpoint;
__u8 reserved2[4];
__le64 address;
};
#endif