sched: avoid large irq-latencies in smp-balancing
SMP balancing is done with IRQs disabled and can iterate the full rq. When rqs are large this can cause large irq-latencies. Limit the nr of iterations on each run. This fixes a scheduling latency regression reported by the -rt folks. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Steven Rostedt <rostedt@goodmis.org> Tested-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
3c90e6e99b
commit
b82d9fdd84
|
@ -1466,6 +1466,7 @@ extern unsigned int sysctl_sched_batch_wakeup_granularity;
|
|||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
extern unsigned int sysctl_sched_features;
|
||||
extern unsigned int sysctl_sched_migration_cost;
|
||||
extern unsigned int sysctl_sched_nr_migrate;
|
||||
|
||||
int sched_nr_latency_handler(struct ctl_table *table, int write,
|
||||
struct file *file, void __user *buffer, size_t *length,
|
||||
|
|
|
@ -471,6 +471,12 @@ const_debug unsigned int sysctl_sched_features =
|
|||
|
||||
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
|
||||
|
||||
/*
|
||||
* Number of tasks to iterate in a single balance run.
|
||||
* Limited because this is done with IRQs disabled.
|
||||
*/
|
||||
const_debug unsigned int sysctl_sched_nr_migrate = 32;
|
||||
|
||||
/*
|
||||
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
|
||||
* clock constructed from sched_clock():
|
||||
|
@ -2235,7 +2241,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
|||
enum cpu_idle_type idle, int *all_pinned,
|
||||
int *this_best_prio, struct rq_iterator *iterator)
|
||||
{
|
||||
int pulled = 0, pinned = 0, skip_for_load;
|
||||
int loops = 0, pulled = 0, pinned = 0, skip_for_load;
|
||||
struct task_struct *p;
|
||||
long rem_load_move = max_load_move;
|
||||
|
||||
|
@ -2249,10 +2255,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
|||
*/
|
||||
p = iterator->start(iterator->arg);
|
||||
next:
|
||||
if (!p)
|
||||
if (!p || loops++ > sysctl_sched_nr_migrate)
|
||||
goto out;
|
||||
/*
|
||||
* To help distribute high priority tasks accross CPUs we don't
|
||||
* To help distribute high priority tasks across CPUs we don't
|
||||
* skip a task if it will be the highest priority task (i.e. smallest
|
||||
* prio value) on its new queue regardless of its load weight
|
||||
*/
|
||||
|
@ -2269,8 +2275,7 @@ next:
|
|||
rem_load_move -= p->se.load.weight;
|
||||
|
||||
/*
|
||||
* We only want to steal up to the prescribed number of tasks
|
||||
* and the prescribed amount of weighted load.
|
||||
* We only want to steal up to the prescribed amount of weighted load.
|
||||
*/
|
||||
if (rem_load_move > 0) {
|
||||
if (p->prio < *this_best_prio)
|
||||
|
|
|
@ -301,6 +301,14 @@ static struct ctl_table kern_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_nr_migrate",
|
||||
.data = &sysctl_sched_nr_migrate,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
|
|
Loading…
Reference in New Issue