diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 6a1661643d3d..06d1267e733d 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -355,7 +355,35 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); - req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); + /* Beware: Dragons be flying overhead. + * + * We use RCU to look up requests in flight. The lookups may + * race with the request being allocated from the slab freelist. + * That is the request we are writing to here, may be in the process + * of being read by __i915_gem_active_get_request_rcu(). As such, + * we have to be very careful when overwriting the contents. During + * the RCU lookup, we change chase the request->engine pointer, + * read the request->fence.seqno and increment the reference count. + * + * The reference count is incremented atomically. If it is zero, + * the lookup knows the request is unallocated and complete. Otherwise, + * it is either still in use, or has been reallocated and reset + * with fence_init(). This increment is safe for release as we check + * that the request we have a reference to and matches the active + * request. + * + * Before we increment the refcount, we chase the request->engine + * pointer. We must not call kmem_cache_zalloc() or else we set + * that pointer to NULL and cause a crash during the lookup. If + * we see the request is completed (based on the value of the + * old engine and seqno), the lookup is complete and reports NULL. + * If we decide the request is not completed (new engine or seqno), + * then we grab a reference and double check that it is still the + * active request - which it won't be and restart the lookup. + * + * Do not use kmem_cache_zalloc() here! + */ + req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); if (!req) return ERR_PTR(-ENOMEM); @@ -375,6 +403,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->engine = engine; req->ctx = i915_gem_context_get(ctx); + /* No zalloc, must clear what we need by hand */ + req->previous_context = NULL; + req->file_priv = NULL; + req->batch_obj = NULL; + req->pid = NULL; + req->elsp_submitted = 0; + /* * Reserve space in the ring buffer for all the commands required to * eventually emit this request. This is to guarantee that the diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 6dd83b172c7a..4e91d4912443 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -51,6 +51,13 @@ struct intel_signal_node { * emission time to be associated with the request for tracking how far ahead * of the GPU the submission is. * + * When modifying this structure be very aware that we perform a lockless + * RCU lookup of it that may race against reallocation of the struct + * from the slab freelist. We intentionally do not zero the structure on + * allocation so that the lookup can use the dangling pointers (and is + * cogniscent that those pointers may be wrong). Instead, everything that + * needs to be initialised must be done so explicitly. + * * The requests are reference counted. */ struct drm_i915_gem_request { @@ -458,6 +465,10 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * just report the active tracker is idle. If the new request is * incomplete, then we acquire a reference on it and check that * it remained the active request. + * + * It is then imperative that we do not zero the request on + * reallocation, so that we can chase the dangling pointers! + * See i915_gem_request_alloc(). */ do { struct drm_i915_gem_request *request;