drm/i915: Support explicit fencing for execbuf

Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.

Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Acked-by: Chad Versace <chadversary@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170127094008.27489-2-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson 2017-01-27 09:40:08 +00:00
parent 77ae995789
commit fec0445caa
4 changed files with 87 additions and 7 deletions

View File

@ -19,6 +19,7 @@ config DRM_I915
select INPUT if ACPI select INPUT if ACPI
select ACPI_VIDEO if ACPI select ACPI_VIDEO if ACPI
select ACPI_BUTTON if ACPI select ACPI_BUTTON if ACPI
select SYNC_FILE
help help
Choose this option if you have a system that has "Intel Graphics Choose this option if you have a system that has "Intel Graphics
Media Accelerator" or "HD Graphics" integrated graphics, Media Accelerator" or "HD Graphics" integrated graphics,

View File

@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_COHERENT_PHYS_GTT:
case I915_PARAM_HAS_EXEC_SOFTPIN: case I915_PARAM_HAS_EXEC_SOFTPIN:
case I915_PARAM_HAS_EXEC_ASYNC: case I915_PARAM_HAS_EXEC_ASYNC:
case I915_PARAM_HAS_EXEC_FENCE:
/* For the time being all of these are always true; /* For the time being all of these are always true;
* if some supported hardware does not have one of these * if some supported hardware does not have one of these
* features this value needs to be provided from * features this value needs to be provided from
@ -2550,7 +2551,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),

View File

@ -28,6 +28,7 @@
#include <linux/dma_remapping.h> #include <linux/dma_remapping.h>
#include <linux/reservation.h> #include <linux/reservation.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <drm/drmP.h> #include <drm/drmP.h>
@ -1595,6 +1596,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct i915_execbuffer_params *params = &params_master; struct i915_execbuffer_params *params = &params_master;
const u32 ctx_id = i915_execbuffer2_get_context_id(*args); const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 dispatch_flags; u32 dispatch_flags;
struct dma_fence *in_fence = NULL;
struct sync_file *out_fence = NULL;
int out_fence_fd = -1;
int ret; int ret;
bool need_relocs; bool need_relocs;
@ -1638,6 +1642,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
dispatch_flags |= I915_DISPATCH_RS; dispatch_flags |= I915_DISPATCH_RS;
} }
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence) {
ret = -EINVAL;
goto pre_mutex_err;
}
}
if (args->flags & I915_EXEC_FENCE_OUT) {
out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) {
ret = out_fence_fd;
out_fence_fd = -1;
goto pre_mutex_err;
}
}
/* Take a local wakeref for preparing to dispatch the execbuf as /* Take a local wakeref for preparing to dispatch the execbuf as
* we expect to access the hardware fairly frequently in the * we expect to access the hardware fairly frequently in the
* process. Upon first dispatch, we acquire another prolonged * process. Upon first dispatch, we acquire another prolonged
@ -1782,6 +1803,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err_batch_unpin; goto err_batch_unpin;
} }
if (in_fence) {
ret = i915_gem_request_await_dma_fence(params->request,
in_fence);
if (ret < 0)
goto err_request;
}
if (out_fence_fd != -1) {
out_fence = sync_file_create(&params->request->fence);
if (!out_fence) {
ret = -ENOMEM;
goto err_request;
}
}
/* Whilst this request exists, batch_obj will be on the /* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when this * active_list, and so will hold the active reference. Only when this
* request is retired will the the batch_obj be moved onto the * request is retired will the the batch_obj be moved onto the
@ -1809,6 +1845,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
ret = execbuf_submit(params, args, &eb->vmas); ret = execbuf_submit(params, args, &eb->vmas);
err_request: err_request:
__i915_add_request(params->request, ret == 0); __i915_add_request(params->request, ret == 0);
if (out_fence) {
if (ret == 0) {
fd_install(out_fence_fd, out_fence->file);
args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
args->rsvd2 |= (u64)out_fence_fd << 32;
out_fence_fd = -1;
} else {
fput(out_fence->file);
}
}
err_batch_unpin: err_batch_unpin:
/* /*
@ -1830,6 +1876,9 @@ pre_mutex_err:
/* intel_gpu_busy should also get a ref, so it will free when the device /* intel_gpu_busy should also get a ref, so it will free when the device
* is really idle. */ * is really idle. */
intel_runtime_pm_put(dev_priv); intel_runtime_pm_put(dev_priv);
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
dma_fence_put(in_fence);
return ret; return ret;
} }
@ -1937,11 +1986,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
return -EINVAL; return -EINVAL;
} }
if (args->rsvd2 != 0) {
DRM_DEBUG("dirty rvsd2 field\n");
return -EINVAL;
}
exec2_list = drm_malloc_gfp(args->buffer_count, exec2_list = drm_malloc_gfp(args->buffer_count,
sizeof(*exec2_list), sizeof(*exec2_list),
GFP_TEMPORARY); GFP_TEMPORARY);

View File

@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_PUT_IMAGE 0x27
#define DRM_I915_OVERLAY_ATTRS 0x28 #define DRM_I915_OVERLAY_ATTRS 0x28
#define DRM_I915_GEM_EXECBUFFER2 0x29 #define DRM_I915_GEM_EXECBUFFER2 0x29
#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2
#define DRM_I915_GET_SPRITE_COLORKEY 0x2a #define DRM_I915_GET_SPRITE_COLORKEY 0x2a
#define DRM_I915_SET_SPRITE_COLORKEY 0x2b #define DRM_I915_SET_SPRITE_COLORKEY 0x2b
#define DRM_I915_GEM_WAIT 0x2c #define DRM_I915_GEM_WAIT 0x2c
@ -280,6 +281,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@ -403,6 +405,13 @@ typedef struct drm_i915_irq_wait {
*/ */
#define I915_PARAM_HAS_EXEC_ASYNC 43 #define I915_PARAM_HAS_EXEC_ASYNC 43
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
* both being able to pass in a sync_file fd to wait upon before executing,
* and being able to return a new sync_file fd that is signaled when the
* current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT.
*/
#define I915_PARAM_HAS_EXEC_FENCE 44
typedef struct drm_i915_getparam { typedef struct drm_i915_getparam {
__s32 param; __s32 param;
/* /*
@ -855,7 +864,32 @@ struct drm_i915_gem_execbuffer2 {
*/ */
#define I915_EXEC_RESOURCE_STREAMER (1<<15) #define I915_EXEC_RESOURCE_STREAMER (1<<15)
#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) /* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
* a sync_file fd to wait upon (in a nonblocking manner) prior to executing
* the batch.
*
* Returns -EINVAL if the sync_file fd cannot be found.
*/
#define I915_EXEC_FENCE_IN (1<<16)
/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
* in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
* to the caller, and it should be close() after use. (The fd is a regular
* file descriptor and will be cleaned up on process termination. It holds
* a reference to the request, but nothing else.)
*
* The sync_file fd can be combined with other sync_file and passed either
* to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
* will only occur after this request completes), or to other devices.
*
* Using I915_EXEC_FENCE_OUT requires use of
* DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
* back to userspace. Failure to do so will cause the out-fence to always
* be reported as zero, and the real fence fd to be leaked.
*/
#define I915_EXEC_FENCE_OUT (1<<17)
#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \ #define i915_execbuffer2_set_context_id(eb2, context) \