drm/i915: Ratelimit i915_globals_park
When doing our global park, we like to be a good citizen and shrink our slab caches (of which we have quite a few now), but each kmem_cache_shrink() incurs a stop_machine() and so ends up being quite expensive, causing machine-wide stalls. While ideally we would like to throw away unused pages in our slab caches whenever it appears that we are idling, doing so will require a much cheaper mechanism. In the meantime use a delayed worked to impose a rate-limit that means we have to have been idle for more than 2 seconds before we start shrinking. References: https://gitlab.freedesktop.org/drm/intel/issues/848 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191218094057.3510459-1-chris@chris-wilson.co.uk
This commit is contained in:
parent
54400257ae
commit
01f624f018
|
@ -20,7 +20,10 @@ static LIST_HEAD(globals);
|
|||
static atomic_t active;
|
||||
static atomic_t epoch;
|
||||
static struct park_work {
|
||||
struct rcu_work work;
|
||||
struct delayed_work work;
|
||||
struct rcu_head rcu;
|
||||
unsigned long flags;
|
||||
#define PENDING 0
|
||||
int epoch;
|
||||
} park;
|
||||
|
||||
|
@ -37,11 +40,33 @@ static void i915_globals_shrink(void)
|
|||
global->shrink();
|
||||
}
|
||||
|
||||
static void __i915_globals_grace(struct rcu_head *rcu)
|
||||
{
|
||||
/* Ratelimit parking as shrinking is quite slow */
|
||||
schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ));
|
||||
}
|
||||
|
||||
static void __i915_globals_queue_rcu(void)
|
||||
{
|
||||
park.epoch = atomic_inc_return(&epoch);
|
||||
if (!atomic_read(&active)) {
|
||||
init_rcu_head(&park.rcu);
|
||||
call_rcu(&park.rcu, __i915_globals_grace);
|
||||
}
|
||||
}
|
||||
|
||||
static void __i915_globals_park(struct work_struct *work)
|
||||
{
|
||||
destroy_rcu_head(&park.rcu);
|
||||
|
||||
/* Confirm nothing woke up in the last grace period */
|
||||
if (park.epoch == atomic_read(&epoch))
|
||||
i915_globals_shrink();
|
||||
if (park.epoch != atomic_read(&epoch)) {
|
||||
__i915_globals_queue_rcu();
|
||||
return;
|
||||
}
|
||||
|
||||
clear_bit(PENDING, &park.flags);
|
||||
i915_globals_shrink();
|
||||
}
|
||||
|
||||
void __init i915_global_register(struct i915_global *global)
|
||||
|
@ -85,7 +110,7 @@ int __init i915_globals_init(void)
|
|||
}
|
||||
}
|
||||
|
||||
INIT_RCU_WORK(&park.work, __i915_globals_park);
|
||||
INIT_DELAYED_WORK(&park.work, __i915_globals_park);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -103,8 +128,9 @@ void i915_globals_park(void)
|
|||
if (!atomic_dec_and_test(&active))
|
||||
return;
|
||||
|
||||
park.epoch = atomic_inc_return(&epoch);
|
||||
queue_rcu_work(system_wq, &park.work);
|
||||
/* Queue cleanup after the next RCU grace period has freed slabs */
|
||||
if (!test_and_set_bit(PENDING, &park.flags))
|
||||
__i915_globals_queue_rcu();
|
||||
}
|
||||
|
||||
void i915_globals_unpark(void)
|
||||
|
@ -113,12 +139,21 @@ void i915_globals_unpark(void)
|
|||
atomic_inc(&active);
|
||||
}
|
||||
|
||||
static void __exit __i915_globals_flush(void)
|
||||
{
|
||||
atomic_inc(&active); /* skip shrinking */
|
||||
|
||||
rcu_barrier(); /* wait for the work to be queued */
|
||||
flush_delayed_work(&park.work);
|
||||
|
||||
atomic_dec(&active);
|
||||
}
|
||||
|
||||
void __exit i915_globals_exit(void)
|
||||
{
|
||||
/* Flush any residual park_work */
|
||||
atomic_inc(&epoch);
|
||||
flush_rcu_work(&park.work);
|
||||
GEM_BUG_ON(atomic_read(&active));
|
||||
|
||||
__i915_globals_flush();
|
||||
__i915_globals_cleanup();
|
||||
|
||||
/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
|
||||
|
|
Loading…
Reference in New Issue