From f21916ec4826766463fe9fb55a5f43d2a365811d Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 22 Dec 2020 20:20:13 -0500 Subject: [PATCH 1/3] s390/vfio-ap: clean up vfio_ap resources when KVM pointer invalidated The vfio_ap device driver registers a group notifier with VFIO when the file descriptor for a VFIO mediated device for a KVM guest is opened to receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM event). When the KVM pointer is set, the vfio_ap driver takes the following actions: 1. Stashes the KVM pointer in the vfio_ap_mdev struct that holds the state of the mediated device. 2. Calls the kvm_get_kvm() function to increment its reference counter. 3. Sets the function pointer to the function that handles interception of the instruction that enables/disables interrupt processing. 4. Sets the masks in the KVM guest's CRYCB to pass AP resources through to the guest. In order to avoid memory leaks, when the notifier is called to receive notification that the KVM pointer has been set to NULL, the vfio_ap device driver should reverse the actions taken when the KVM pointer was set. Fixes: 258287c994de ("s390: vfio-ap: implement mediated device open callback") Signed-off-by: Tony Krowiak Reviewed-by: Halil Pasic Reviewed-by: Cornelia Huck Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201223012013.5418-1-akrowiak@linux.ibm.com Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_ops.c | 49 ++++++++++++++++++------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index e0bde8518745..7339043906cf 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1037,19 +1037,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, { struct ap_matrix_mdev *m; - mutex_lock(&matrix_dev->lock); - list_for_each_entry(m, &matrix_dev->mdev_list, node) { - if ((m != matrix_mdev) && (m->kvm == kvm)) { - mutex_unlock(&matrix_dev->lock); + if ((m != matrix_mdev) && (m->kvm == kvm)) return -EPERM; - } } matrix_mdev->kvm = kvm; kvm_get_kvm(kvm); kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; - mutex_unlock(&matrix_dev->lock); return 0; } @@ -1083,35 +1078,52 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, return NOTIFY_DONE; } +static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) +{ + kvm_arch_crypto_clear_masks(matrix_mdev->kvm); + matrix_mdev->kvm->arch.crypto.pqap_hook = NULL; + vfio_ap_mdev_reset_queues(matrix_mdev->mdev); + kvm_put_kvm(matrix_mdev->kvm); + matrix_mdev->kvm = NULL; +} + static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, unsigned long action, void *data) { - int ret; + int ret, notify_rc = NOTIFY_OK; struct ap_matrix_mdev *matrix_mdev; if (action != VFIO_GROUP_NOTIFY_SET_KVM) return NOTIFY_OK; matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier); + mutex_lock(&matrix_dev->lock); if (!data) { - matrix_mdev->kvm = NULL; - return NOTIFY_OK; + if (matrix_mdev->kvm) + vfio_ap_mdev_unset_kvm(matrix_mdev); + goto notify_done; } ret = vfio_ap_mdev_set_kvm(matrix_mdev, data); - if (ret) - return NOTIFY_DONE; + if (ret) { + notify_rc = NOTIFY_DONE; + goto notify_done; + } /* If there is no CRYCB pointer, then we can't copy the masks */ - if (!matrix_mdev->kvm->arch.crypto.crycbd) - return NOTIFY_DONE; + if (!matrix_mdev->kvm->arch.crypto.crycbd) { + notify_rc = NOTIFY_DONE; + goto notify_done; + } kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm, matrix_mdev->matrix.aqm, matrix_mdev->matrix.adm); - return NOTIFY_OK; +notify_done: + mutex_unlock(&matrix_dev->lock); + return notify_rc; } static void vfio_ap_irq_disable_apqn(int apqn) @@ -1222,13 +1234,8 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev) struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); mutex_lock(&matrix_dev->lock); - if (matrix_mdev->kvm) { - kvm_arch_crypto_clear_masks(matrix_mdev->kvm); - matrix_mdev->kvm->arch.crypto.pqap_hook = NULL; - vfio_ap_mdev_reset_queues(mdev); - kvm_put_kvm(matrix_mdev->kvm); - matrix_mdev->kvm = NULL; - } + if (matrix_mdev->kvm) + vfio_ap_mdev_unset_kvm(matrix_mdev); mutex_unlock(&matrix_dev->lock); vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, From 6c12a6384e0c0b96debd88b24028e58f2ebd417b Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 22 Dec 2020 20:15:53 -0500 Subject: [PATCH 2/3] s390/vfio-ap: No need to disable IRQ after queue reset The queues assigned to a matrix mediated device are currently reset when: * The VFIO_DEVICE_RESET ioctl is invoked * The mdev fd is closed by userspace (QEMU) * The mdev is removed from sysfs. Immediately after the reset of a queue, a call is made to disable interrupts for the queue. This is entirely unnecessary because the reset of a queue disables interrupts, so this will be removed. Furthermore, vfio_ap_irq_disable() does an unconditional PQAP/AQIC which can result in a specification exception (when the corresponding facility is not available), so this is actually a bugfix. Signed-off-by: Tony Krowiak [pasic@linux.ibm.com: minor rework before merging] Signed-off-by: Halil Pasic Fixes: ec89b55e3bce ("s390: ap: implement PAPQ AQIC interception in kernel") Cc: Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_drv.c | 6 +- drivers/s390/crypto/vfio_ap_ops.c | 100 ++++++++++++++++---------- drivers/s390/crypto/vfio_ap_private.h | 12 ++-- 3 files changed, 69 insertions(+), 49 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index be2520cc010b..7dc72cb718b0 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -71,15 +71,11 @@ static int vfio_ap_queue_dev_probe(struct ap_device *apdev) static void vfio_ap_queue_dev_remove(struct ap_device *apdev) { struct vfio_ap_queue *q; - int apid, apqi; mutex_lock(&matrix_dev->lock); q = dev_get_drvdata(&apdev->device); + vfio_ap_mdev_reset_queue(q, 1); dev_set_drvdata(&apdev->device, NULL); - apid = AP_QID_CARD(q->apqn); - apqi = AP_QID_QUEUE(q->apqn); - vfio_ap_mdev_reset_queue(apid, apqi, 1); - vfio_ap_irq_disable(q); kfree(q); mutex_unlock(&matrix_dev->lock); } diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 7339043906cf..41fc2e4135fe 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -25,6 +25,7 @@ #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev); +static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); static int match_apqn(struct device *dev, const void *data) { @@ -49,20 +50,15 @@ static struct vfio_ap_queue *vfio_ap_get_queue( int apqn) { struct vfio_ap_queue *q; - struct device *dev; if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm)) return NULL; if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) return NULL; - dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, - &apqn, match_apqn); - if (!dev) - return NULL; - q = dev_get_drvdata(dev); - q->matrix_mdev = matrix_mdev; - put_device(dev); + q = vfio_ap_find_queue(apqn); + if (q) + q->matrix_mdev = matrix_mdev; return q; } @@ -119,13 +115,18 @@ static void vfio_ap_wait_for_irqclear(int apqn) */ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) { - if (q->saved_isc != VFIO_AP_ISC_INVALID && q->matrix_mdev) + if (!q) + return; + if (q->saved_isc != VFIO_AP_ISC_INVALID && + !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) { kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); - if (q->saved_pfn && q->matrix_mdev) + q->saved_isc = VFIO_AP_ISC_INVALID; + } + if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &q->saved_pfn, 1); - q->saved_pfn = 0; - q->saved_isc = VFIO_AP_ISC_INVALID; + q->saved_pfn = 0; + } } /** @@ -144,7 +145,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) * Returns if ap_aqic function failed with invalid, deconfigured or * checkstopped AP. */ -struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) +static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) { struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status; @@ -1126,48 +1127,70 @@ notify_done: return notify_rc; } -static void vfio_ap_irq_disable_apqn(int apqn) +static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) { struct device *dev; - struct vfio_ap_queue *q; + struct vfio_ap_queue *q = NULL; dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, &apqn, match_apqn); if (dev) { q = dev_get_drvdata(dev); - vfio_ap_irq_disable(q); put_device(dev); } + + return q; } -int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, +int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, unsigned int retry) { struct ap_queue_status status; + int ret; int retry2 = 2; - int apqn = AP_MKQID(apid, apqi); - do { - status = ap_zapq(apqn); - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - while (!status.queue_empty && retry2--) { - msleep(20); - status = ap_tapq(apqn, NULL); - } - WARN_ON_ONCE(retry2 <= 0); - return 0; - case AP_RESPONSE_RESET_IN_PROGRESS: - case AP_RESPONSE_BUSY: + if (!q) + return 0; + +retry_zapq: + status = ap_zapq(q->apqn); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + ret = 0; + break; + case AP_RESPONSE_RESET_IN_PROGRESS: + if (retry--) { msleep(20); - break; - default: - /* things are really broken, give up */ - return -EIO; + goto retry_zapq; } - } while (retry--); + ret = -EBUSY; + break; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + WARN_ON_ONCE(status.irq_enabled); + ret = -EBUSY; + goto free_resources; + default: + /* things are really broken, give up */ + WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n", + status.response_code); + return -EIO; + } - return -EBUSY; + /* wait for the reset to take effect */ + while (retry2--) { + if (status.queue_empty && !status.irq_enabled) + break; + msleep(20); + status = ap_tapq(q->apqn, NULL); + } + WARN_ON_ONCE(retry2 <= 0); + +free_resources: + vfio_ap_free_aqic_resources(q); + + return ret; } static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) @@ -1175,13 +1198,15 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) int ret; int rc = 0; unsigned long apid, apqi; + struct vfio_ap_queue *q; struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, matrix_mdev->matrix.apm_max + 1) { for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, matrix_mdev->matrix.aqm_max + 1) { - ret = vfio_ap_mdev_reset_queue(apid, apqi, 1); + q = vfio_ap_find_queue(AP_MKQID(apid, apqi)); + ret = vfio_ap_mdev_reset_queue(q, 1); /* * Regardless whether a queue turns out to be busy, or * is not operational, we need to continue resetting @@ -1189,7 +1214,6 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) */ if (ret) rc = ret; - vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi)); } } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index f46dde56b464..28e9d9989768 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -88,11 +88,6 @@ struct ap_matrix_mdev { struct mdev_device *mdev; }; -extern int vfio_ap_mdev_register(void); -extern void vfio_ap_mdev_unregister(void); -int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, - unsigned int retry); - struct vfio_ap_queue { struct ap_matrix_mdev *matrix_mdev; unsigned long saved_pfn; @@ -100,5 +95,10 @@ struct vfio_ap_queue { #define VFIO_AP_ISC_INVALID 0xff unsigned char saved_isc; }; -struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q); + +int vfio_ap_mdev_register(void); +void vfio_ap_mdev_unregister(void); +int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, + unsigned int retry); + #endif /* _VFIO_AP_PRIVATE_H_ */ From e82080e1f456467cc185fe65ee69fe9f9bd0b576 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 13 Jan 2021 11:56:26 -0500 Subject: [PATCH 3/3] s390: uv: Fix sysfs max number of VCPUs reporting The number reported by the query is N-1 and I think people reading the sysfs file would expect N instead. For users creating VMs there's no actual difference because KVM's limit is currently below the UV's limit. Signed-off-by: Janosch Frank Fixes: a0f60f8431999 ("s390/protvirt: Add sysfs firmware interface for Ultravisor information") Cc: stable@vger.kernel.org Reviewed-by: Claudio Imbrenda Acked-by: Cornelia Huck Signed-off-by: Vasily Gorbik --- arch/s390/boot/uv.c | 2 +- arch/s390/include/asm/uv.h | 4 ++-- arch/s390/kernel/uv.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index a15c033f53ca..87641dd65ccf 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -35,7 +35,7 @@ void uv_query_info(void) uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len; uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE); uv_info.max_num_sec_conf = uvcb.max_num_sec_conf; - uv_info.max_guest_cpus = uvcb.max_guest_cpus; + uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id; } #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0325fc0469b7..7b98d4caee77 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -96,7 +96,7 @@ struct uv_cb_qui { u32 max_num_sec_conf; u64 max_guest_stor_addr; u8 reserved88[158 - 136]; - u16 max_guest_cpus; + u16 max_guest_cpu_id; u8 reserveda0[200 - 160]; } __packed __aligned(8); @@ -273,7 +273,7 @@ struct uv_info { unsigned long guest_cpu_stor_len; unsigned long max_sec_stor_addr; unsigned int max_num_sec_conf; - unsigned short max_guest_cpus; + unsigned short max_guest_cpu_id; }; extern struct uv_info uv_info; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 883bfed9f5c2..b2d2ad153067 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -368,7 +368,7 @@ static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, struct kobj_attribute *attr, char *page) { return scnprintf(page, PAGE_SIZE, "%d\n", - uv_info.max_guest_cpus); + uv_info.max_guest_cpu_id + 1); } static struct kobj_attribute uv_query_max_guest_cpus_attr =