drm/i915/gem: Excise the per-batch whitelist from the context

One does not lightly add a new hidden struct_mutex dependency deep within
the execbuf bowels! The immediate suspicion in seeing the whitelist
cached on the context, is that it is intended to be preserved between
batches, as the kernel is quite adept at caching small allocations
itself. But no, it's sole purpose is to serialise command submission in
order to save a kmalloc on a slow, slow path!

By removing the whitelist dependency from the context, our freedom to
chop the big struct_mutex is greatly augmented.

v2: s/set_bit/__set_bit/ as the whitelist shall never be accessed
concurrently.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191128113424.3885958-1-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson 2019-11-28 11:34:24 +00:00
parent e3f3a0f269
commit cd30a50317
3 changed files with 32 additions and 51 deletions

View File

@ -275,8 +275,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
free_engines(rcu_access_pointer(ctx->engines)); free_engines(rcu_access_pointer(ctx->engines));
mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->engines_mutex);
kfree(ctx->jump_whitelist);
if (ctx->timeline) if (ctx->timeline)
intel_timeline_put(ctx->timeline); intel_timeline_put(ctx->timeline);
@ -584,9 +582,6 @@ __create_context(struct drm_i915_private *i915)
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
ctx->jump_whitelist = NULL;
ctx->jump_whitelist_cmds = 0;
spin_lock(&i915->gem.contexts.lock); spin_lock(&i915->gem.contexts.lock);
list_add_tail(&ctx->link, &i915->gem.contexts.list); list_add_tail(&ctx->link, &i915->gem.contexts.list);
spin_unlock(&i915->gem.contexts.lock); spin_unlock(&i915->gem.contexts.lock);

View File

@ -168,13 +168,6 @@ struct i915_gem_context {
*/ */
struct radix_tree_root handles_vma; struct radix_tree_root handles_vma;
/** jump_whitelist: Bit array for tracking cmds during cmdparsing
* Guarded by struct_mutex
*/
unsigned long *jump_whitelist;
/** jump_whitelist_cmds: No of cmd slots available */
u32 jump_whitelist_cmds;
/** /**
* @name: arbitrary name, used for user debug * @name: arbitrary name, used for user debug
* *

View File

@ -1310,13 +1310,14 @@ static int check_bbstart(const struct i915_gem_context *ctx,
u32 *cmd, u32 offset, u32 length, u32 *cmd, u32 offset, u32 length,
u32 batch_len, u32 batch_len,
u64 batch_start, u64 batch_start,
u64 shadow_batch_start) u64 shadow_batch_start,
const unsigned long *jump_whitelist)
{ {
u64 jump_offset, jump_target; u64 jump_offset, jump_target;
u32 target_cmd_offset, target_cmd_index; u32 target_cmd_offset, target_cmd_index;
/* For igt compatibility on older platforms */ /* For igt compatibility on older platforms */
if (CMDPARSER_USES_GGTT(ctx->i915)) { if (!jump_whitelist) {
DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
return -EACCES; return -EACCES;
} }
@ -1352,10 +1353,10 @@ static int check_bbstart(const struct i915_gem_context *ctx,
if (target_cmd_index == offset) if (target_cmd_index == offset)
return 0; return 0;
if (ctx->jump_whitelist_cmds <= target_cmd_index) { if (IS_ERR(jump_whitelist))
DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); return PTR_ERR(jump_whitelist);
return -EINVAL;
} else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { if (!test_bit(target_cmd_index, jump_whitelist)) {
DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
jump_target); jump_target);
return -EINVAL; return -EINVAL;
@ -1364,40 +1365,27 @@ static int check_bbstart(const struct i915_gem_context *ctx,
return 0; return 0;
} }
static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) static unsigned long *
alloc_whitelist(struct drm_i915_private *i915, u32 batch_len)
{ {
const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); unsigned long *jmp;
const u32 exact_size = BITS_TO_LONGS(batch_cmds);
u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
unsigned long *next_whitelist;
if (CMDPARSER_USES_GGTT(ctx->i915)) /*
return; * We expect batch_len to be less than 256KiB for known users,
* i.e. we need at most an 8KiB bitmap allocation which should be
* reasonably cheap due to kmalloc caches.
*/
if (batch_cmds <= ctx->jump_whitelist_cmds) { if (CMDPARSER_USES_GGTT(i915))
bitmap_zero(ctx->jump_whitelist, batch_cmds); return NULL;
return;
}
again: /* Prefer to report transient allocation failure rather than hit oom */
next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); jmp = bitmap_zalloc(DIV_ROUND_UP(batch_len, sizeof(u32)),
if (next_whitelist) { GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
kfree(ctx->jump_whitelist); if (!jmp)
ctx->jump_whitelist = next_whitelist; return ERR_PTR(-ENOMEM);
ctx->jump_whitelist_cmds =
next_size * BITS_PER_BYTE * sizeof(long);
return;
}
if (next_size > exact_size) { return jmp;
next_size = exact_size;
goto again;
}
DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
return;
} }
#define LENGTH_BIAS 2 #define LENGTH_BIAS 2
@ -1433,6 +1421,7 @@ int intel_engine_cmd_parser(struct i915_gem_context *ctx,
struct drm_i915_cmd_descriptor default_desc = noop_desc; struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool needs_clflush_after = false; bool needs_clflush_after = false;
unsigned long *jump_whitelist;
int ret = 0; int ret = 0;
cmd = copy_batch(shadow_batch_obj, batch_obj, cmd = copy_batch(shadow_batch_obj, batch_obj,
@ -1443,7 +1432,8 @@ int intel_engine_cmd_parser(struct i915_gem_context *ctx,
return PTR_ERR(cmd); return PTR_ERR(cmd);
} }
init_whitelist(ctx, batch_len); /* Defer failure until attempted use */
jump_whitelist = alloc_whitelist(ctx->i915, batch_len);
/* /*
* We use the batch length as size because the shadow object is as * We use the batch length as size because the shadow object is as
@ -1487,15 +1477,16 @@ int intel_engine_cmd_parser(struct i915_gem_context *ctx,
if (desc->cmd.value == MI_BATCH_BUFFER_START) { if (desc->cmd.value == MI_BATCH_BUFFER_START) {
ret = check_bbstart(ctx, cmd, offset, length, ret = check_bbstart(ctx, cmd, offset, length,
batch_len, batch_start, batch_len, batch_start,
shadow_batch_start); shadow_batch_start,
jump_whitelist);
if (ret) if (ret)
goto err; goto err;
break; break;
} }
if (ctx->jump_whitelist_cmds > offset) if (!IS_ERR_OR_NULL(jump_whitelist))
set_bit(offset, ctx->jump_whitelist); __set_bit(offset, jump_whitelist);
cmd += length; cmd += length;
offset += length; offset += length;
@ -1513,6 +1504,8 @@ int intel_engine_cmd_parser(struct i915_gem_context *ctx,
} }
err: err:
if (!IS_ERR_OR_NULL(jump_whitelist))
kfree(jump_whitelist);
i915_gem_object_unpin_map(shadow_batch_obj); i915_gem_object_unpin_map(shadow_batch_obj);
return ret; return ret;
} }