From bcad588dea538a4fc173d16a90a005536ec8dbf2 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 8 Apr 2020 16:42:01 -0700 Subject: [PATCH 1/4] drm/i915/perf: Do not clear pollin for small user read buffers It is wrong to block the user thread in the next poll when OA data is already available which could not fit in the user buffer provided in the previous read. In several cases the exact user buffer size is not known. Blocking user space in poll can lead to data loss when the buffer size used is smaller than the available data. This change fixes this issue and allows user space to read all OA data even when using a buffer size smaller than the available data using multiple non-blocking reads rather than staying blocked in poll till the next timer interrupt. v2: Fix ret value for blocking reads (Umesh) v3: Mistake during patch send (Ashutosh) v4: Remove -EAGAIN from comment (Umesh) v5: Improve condition for clearing pollin and return (Lionel) v6: Improve blocking read loop and other cleanups (Lionel) v7: Added Cc stable Testcase: igt/perf/polling-small-buf Reviewed-by: Lionel Landwerlin Signed-off-by: Ashutosh Dixit Cc: Umesh Nerlige Ramappa Cc: Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200403010120.3067-1-ashutosh.dixit@intel.com (cherry-picked from commit 6352219c39c04ed3f9a8d1cf93f87c21753a213e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_perf.c | 65 ++++++-------------------------- 1 file changed, 11 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 551be589d6f4..66a46e41d5ef 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2940,49 +2940,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce, gen8_update_reg_state_unlocked(ce, stream); } -/** - * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation - * @stream: An i915 perf stream - * @file: An i915 perf stream file - * @buf: destination buffer given by userspace - * @count: the number of bytes userspace wants to read - * @ppos: (inout) file seek position (unused) - * - * Besides wrapping &i915_perf_stream_ops->read this provides a common place to - * ensure that if we've successfully copied any data then reporting that takes - * precedence over any internal error status, so the data isn't lost. - * - * For example ret will be -ENOSPC whenever there is more buffered data than - * can be copied to userspace, but that's only interesting if we weren't able - * to copy some data because it implies the userspace buffer is too small to - * receive a single record (and we never split records). - * - * Another case with ret == -EFAULT is more of a grey area since it would seem - * like bad form for userspace to ask us to overrun its buffer, but the user - * knows best: - * - * http://yarchive.net/comp/linux/partial_reads_writes.html - * - * Returns: The number of bytes copied or a negative error code on failure. - */ -static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, - struct file *file, - char __user *buf, - size_t count, - loff_t *ppos) -{ - /* Note we keep the offset (aka bytes read) separate from any - * error status so that the final check for whether we return - * the bytes read with a higher precedence than any error (see - * comment below) doesn't need to be handled/duplicated in - * stream->ops->read() implementations. - */ - size_t offset = 0; - int ret = stream->ops->read(stream, buf, count, &offset); - - return offset ?: (ret ?: -EAGAIN); -} - /** * i915_perf_read - handles read() FOP for i915 perf stream FDs * @file: An i915 perf stream file @@ -3008,7 +2965,8 @@ static ssize_t i915_perf_read(struct file *file, { struct i915_perf_stream *stream = file->private_data; struct i915_perf *perf = stream->perf; - ssize_t ret; + size_t offset = 0; + int ret; /* To ensure it's handled consistently we simply treat all reads of a * disabled stream as an error. In particular it might otherwise lead @@ -3031,13 +2989,12 @@ static ssize_t i915_perf_read(struct file *file, return ret; mutex_lock(&perf->lock); - ret = i915_perf_read_locked(stream, file, - buf, count, ppos); + ret = stream->ops->read(stream, buf, count, &offset); mutex_unlock(&perf->lock); - } while (ret == -EAGAIN); + } while (!offset && !ret); } else { mutex_lock(&perf->lock); - ret = i915_perf_read_locked(stream, file, buf, count, ppos); + ret = stream->ops->read(stream, buf, count, &offset); mutex_unlock(&perf->lock); } @@ -3048,15 +3005,15 @@ static ssize_t i915_perf_read(struct file *file, * and read() returning -EAGAIN. Clearing the oa.pollin state here * effectively ensures we back off until the next hrtimer callback * before reporting another EPOLLIN event. + * The exception to this is if ops->read() returned -ENOSPC which means + * that more OA data is available than could fit in the user provided + * buffer. In this case we want the next poll() call to not block. */ - if (ret >= 0 || ret == -EAGAIN) { - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ + if (ret != -ENOSPC) stream->pollin = false; - } - return ret; + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */ + return offset ?: (ret ?: -EAGAIN); } static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) From 776d95b768e664efdc9f5cc078b981a006d3bff4 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Thu, 12 Mar 2020 23:10:25 -0400 Subject: [PATCH 2/4] drm/i915/gvt: hold reference of VFIO group during opening of vgpu hold reference count of the VFIO group for each vgpu at vgpu opening and release the reference at vgpu releasing. Signed-off-by: Yan Zhao Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200313031025.7936-1-yan.y.zhao@intel.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 074c4efb58eb..811cee28ae06 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -131,6 +131,7 @@ struct kvmgt_vdev { struct work_struct release_work; atomic_t released; struct vfio_device *vfio_device; + struct vfio_group *vfio_group; }; static inline struct kvmgt_vdev *kvmgt_vdev(struct intel_vgpu *vgpu) @@ -792,6 +793,7 @@ static int intel_vgpu_open(struct mdev_device *mdev) struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned long events; int ret; + struct vfio_group *vfio_group; vdev->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; vdev->group_notifier.notifier_call = intel_vgpu_group_notifier; @@ -814,6 +816,14 @@ static int intel_vgpu_open(struct mdev_device *mdev) goto undo_iommu; } + vfio_group = vfio_group_get_external_user_from_dev(mdev_dev(mdev)); + if (IS_ERR_OR_NULL(vfio_group)) { + ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group); + gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n"); + goto undo_register; + } + vdev->vfio_group = vfio_group; + /* Take a module reference as mdev core doesn't take * a reference for vendor driver. */ @@ -830,6 +840,10 @@ static int intel_vgpu_open(struct mdev_device *mdev) return ret; undo_group: + vfio_group_put_external_user(vdev->vfio_group); + vdev->vfio_group = NULL; + +undo_register: vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &vdev->group_notifier); @@ -884,6 +898,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) kvmgt_guest_exit(info); intel_vgpu_release_msi_eventfd_ctx(vgpu); + vfio_group_put_external_user(vdev->vfio_group); vdev->kvm = NULL; vgpu->handle = 0; From b59b2a3ee567e5a30688e148556ae33a3196bc9d Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Thu, 12 Mar 2020 23:11:09 -0400 Subject: [PATCH 3/4] drm/i915/gvt: subsitute kvm_read/write_guest with vfio_dma_rw As a device model, it is better to read/write guest memory using vfio interface, so that vfio is able to maintain dirty info of device IOVAs. Cc: Kevin Tian Signed-off-by: Yan Zhao Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200313031109.7989-1-yan.y.zhao@intel.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 811cee28ae06..cee7376ba39d 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -2050,33 +2050,14 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, void *buf, unsigned long len, bool write) { struct kvmgt_guest_info *info; - struct kvm *kvm; - int idx, ret; - bool kthread = current->mm == NULL; if (!handle_valid(handle)) return -ESRCH; info = (struct kvmgt_guest_info *)handle; - kvm = info->kvm; - if (kthread) { - if (!mmget_not_zero(kvm->mm)) - return -EFAULT; - use_mm(kvm->mm); - } - - idx = srcu_read_lock(&kvm->srcu); - ret = write ? kvm_write_guest(kvm, gpa, buf, len) : - kvm_read_guest(kvm, gpa, buf, len); - srcu_read_unlock(&kvm->srcu, idx); - - if (kthread) { - unuse_mm(kvm->mm); - mmput(kvm->mm); - } - - return ret; + return vfio_dma_rw(kvmgt_vdev(info->vgpu)->vfio_group, + gpa, buf, len, write); } static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, From ec7301d5146c9abe8aaf6e16e420ea3951018503 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Thu, 12 Mar 2020 23:11:51 -0400 Subject: [PATCH 4/4] drm/i915/gvt: switch to user vfio_group_pin/upin_pages substitute vfio_pin_pages() and vfio_unpin_pages() with vfio_group_pin_pages() and vfio_group_unpin_pages(), so that it will not go through looking up, checking, referencing, dereferencing of VFIO group in each call. Signed-off-by: Yan Zhao Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200313031151.8042-1-yan.y.zhao@intel.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index cee7376ba39d..eee530453aa6 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -152,6 +152,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { struct drm_i915_private *i915 = vgpu->gvt->gt->i915; + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); int total_pages; int npage; int ret; @@ -161,7 +162,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, for (npage = 0; npage < total_pages; npage++) { unsigned long cur_gfn = gfn + npage; - ret = vfio_unpin_pages(mdev_dev(kvmgt_vdev(vgpu)->mdev), &cur_gfn, 1); + ret = vfio_group_unpin_pages(vdev->vfio_group, &cur_gfn, 1); drm_WARN_ON(&i915->drm, ret != 1); } } @@ -170,6 +171,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, struct page **page) { + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned long base_pfn = 0; int total_pages; int npage; @@ -184,8 +186,8 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long cur_gfn = gfn + npage; unsigned long pfn; - ret = vfio_pin_pages(mdev_dev(kvmgt_vdev(vgpu)->mdev), &cur_gfn, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); + ret = vfio_group_pin_pages(vdev->vfio_group, &cur_gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); if (ret != 1) { gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", cur_gfn, ret);