workqueue: implement "workqueue.debug_force_rr_cpu" debug feature
Workqueue used to guarantee local execution for work items queued
without explicit target CPU. The guarantee is gone now which can
break some usages in subtle ways. To flush out those cases, this
patch implements a debug feature which forces round-robin CPU
selection for all such work items.
The debug feature defaults to off and can be enabled with a kernel
parameter. The default can be flipped with a debug config option.
If you hit this commit during bisection, please refer to 041bd12e27
("Revert "workqueue: make sure delayed work run in local cpu"") for
more information and ping me.
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
ef55718044
commit
f303fccb82
|
@ -4230,6 +4230,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
The default value of this parameter is determined by
|
||||
the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
|
||||
|
||||
workqueue.debug_force_rr_cpu
|
||||
Workqueue used to implicitly guarantee that work
|
||||
items queued without explicit CPU specified are put
|
||||
on the local CPU. This guarantee is no longer true
|
||||
and while local CPU is still preferred work items
|
||||
may be put on foreign CPUs. This debug option
|
||||
forces round-robin CPU selection to flush out
|
||||
usages which depend on the now broken guarantee.
|
||||
When enabled, memory and cache locality will be
|
||||
impacted.
|
||||
|
||||
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
|
||||
default x2apic cluster mode on platforms
|
||||
supporting x2apic.
|
||||
|
|
|
@ -307,6 +307,18 @@ static cpumask_var_t wq_unbound_cpumask;
|
|||
/* CPU where unbound work was last round robin scheduled from this CPU */
|
||||
static DEFINE_PER_CPU(int, wq_rr_cpu_last);
|
||||
|
||||
/*
|
||||
* Local execution of unbound work items is no longer guaranteed. The
|
||||
* following always forces round-robin CPU selection on unbound work items
|
||||
* to uncover usages which depend on it.
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
|
||||
static bool wq_debug_force_rr_cpu = true;
|
||||
#else
|
||||
static bool wq_debug_force_rr_cpu = false;
|
||||
#endif
|
||||
module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
|
||||
|
||||
/* the per-cpu worker pools */
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
|
||||
cpu_worker_pools);
|
||||
|
@ -1309,10 +1321,17 @@ static bool is_chained_work(struct workqueue_struct *wq)
|
|||
*/
|
||||
static int wq_select_unbound_cpu(int cpu)
|
||||
{
|
||||
static bool printed_dbg_warning;
|
||||
int new_cpu;
|
||||
|
||||
if (likely(!wq_debug_force_rr_cpu)) {
|
||||
if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
|
||||
return cpu;
|
||||
} else if (!printed_dbg_warning) {
|
||||
pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
|
||||
printed_dbg_warning = true;
|
||||
}
|
||||
|
||||
if (cpumask_empty(wq_unbound_cpumask))
|
||||
return cpu;
|
||||
|
||||
|
|
|
@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
|
|||
|
||||
endmenu # "RCU Debugging"
|
||||
|
||||
config DEBUG_WQ_FORCE_RR_CPU
|
||||
bool "Force round-robin CPU selection for unbound work items"
|
||||
depends on DEBUG_KERNEL
|
||||
default n
|
||||
help
|
||||
Workqueue used to implicitly guarantee that work items queued
|
||||
without explicit CPU specified are put on the local CPU. This
|
||||
guarantee is no longer true and while local CPU is still
|
||||
preferred work items may be put on foreign CPUs. Kernel
|
||||
parameter "workqueue.debug_force_rr_cpu" is added to force
|
||||
round-robin CPU selection to flush out usages which depend on the
|
||||
now broken guarantee. This config option enables the debug
|
||||
feature by default. When enabled, memory and cache locality will
|
||||
be impacted.
|
||||
|
||||
config DEBUG_BLOCK_EXT_DEVT
|
||||
bool "Force extended block device numbers and spread them"
|
||||
depends on DEBUG_KERNEL
|
||||
|
|
Loading…
Reference in New Issue