Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "Four bugfixes and one performance fix" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Avoid integer overflow sched: Optimize task_sched_runtime() sched/numa: Cure update_numa_stats() vs. hotplug sched/numa: Fix NULL pointer dereference in task_numa_migrate() sched: Fix endless sync_sched/rcu() loop inside _cpu_down()
This commit is contained in:
commit
fe8a45df36
|
@ -306,7 +306,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||||
__func__, cpu);
|
__func__, cpu);
|
||||||
goto out_release;
|
goto out_release;
|
||||||
}
|
}
|
||||||
smpboot_park_threads(cpu);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
|
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
|
||||||
|
@ -315,12 +314,16 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||||
*
|
*
|
||||||
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
|
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
|
||||||
* not imply sync_sched(), so explicitly call both.
|
* not imply sync_sched(), so explicitly call both.
|
||||||
|
*
|
||||||
|
* Do sync before park smpboot threads to take care the rcu boost case.
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_PREEMPT
|
#ifdef CONFIG_PREEMPT
|
||||||
synchronize_sched();
|
synchronize_sched();
|
||||||
#endif
|
#endif
|
||||||
synchronize_rcu();
|
synchronize_rcu();
|
||||||
|
|
||||||
|
smpboot_park_threads(cpu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* So now all preempt/rcu users must observe !cpu_active().
|
* So now all preempt/rcu users must observe !cpu_active().
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -2253,6 +2253,20 @@ unsigned long long task_sched_runtime(struct task_struct *p)
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
u64 ns = 0;
|
u64 ns = 0;
|
||||||
|
|
||||||
|
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
|
||||||
|
/*
|
||||||
|
* 64-bit doesn't need locks to atomically read a 64bit value.
|
||||||
|
* So we have a optimization chance when the task's delta_exec is 0.
|
||||||
|
* Reading ->on_cpu is racy, but this is ok.
|
||||||
|
*
|
||||||
|
* If we race with it leaving cpu, we'll take a lock. So we're correct.
|
||||||
|
* If we race with it entering cpu, unaccounted time is 0. This is
|
||||||
|
* indistinguishable from the read occurring a few cycles earlier.
|
||||||
|
*/
|
||||||
|
if (!p->on_cpu)
|
||||||
|
return p->se.sum_exec_runtime;
|
||||||
|
#endif
|
||||||
|
|
||||||
rq = task_rq_lock(p, &flags);
|
rq = task_rq_lock(p, &flags);
|
||||||
ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
|
ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
|
||||||
task_rq_unlock(rq, p, &flags);
|
task_rq_unlock(rq, p, &flags);
|
||||||
|
|
|
@ -1000,7 +1000,7 @@ struct numa_stats {
|
||||||
*/
|
*/
|
||||||
static void update_numa_stats(struct numa_stats *ns, int nid)
|
static void update_numa_stats(struct numa_stats *ns, int nid)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu, cpus = 0;
|
||||||
|
|
||||||
memset(ns, 0, sizeof(*ns));
|
memset(ns, 0, sizeof(*ns));
|
||||||
for_each_cpu(cpu, cpumask_of_node(nid)) {
|
for_each_cpu(cpu, cpumask_of_node(nid)) {
|
||||||
|
@ -1009,8 +1009,21 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
|
||||||
ns->nr_running += rq->nr_running;
|
ns->nr_running += rq->nr_running;
|
||||||
ns->load += weighted_cpuload(cpu);
|
ns->load += weighted_cpuload(cpu);
|
||||||
ns->power += power_of(cpu);
|
ns->power += power_of(cpu);
|
||||||
|
|
||||||
|
cpus++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we raced with hotplug and there are no CPUs left in our mask
|
||||||
|
* the @ns structure is NULL'ed and task_numa_compare() will
|
||||||
|
* not find this node attractive.
|
||||||
|
*
|
||||||
|
* We'll either bail at !has_capacity, or we'll detect a huge imbalance
|
||||||
|
* and bail there.
|
||||||
|
*/
|
||||||
|
if (!cpus)
|
||||||
|
return;
|
||||||
|
|
||||||
ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
|
ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
|
||||||
ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
|
ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
|
||||||
ns->has_capacity = (ns->nr_running < ns->capacity);
|
ns->has_capacity = (ns->nr_running < ns->capacity);
|
||||||
|
@ -1201,9 +1214,21 @@ static int task_numa_migrate(struct task_struct *p)
|
||||||
*/
|
*/
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
|
sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
|
||||||
env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
|
if (sd)
|
||||||
|
env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cpusets can break the scheduler domain tree into smaller
|
||||||
|
* balance domains, some of which do not cross NUMA boundaries.
|
||||||
|
* Tasks that are "trapped" in such domains cannot be migrated
|
||||||
|
* elsewhere, so there is no point in (re)trying.
|
||||||
|
*/
|
||||||
|
if (unlikely(!sd)) {
|
||||||
|
p->numa_preferred_nid = cpu_to_node(task_cpu(p));
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
taskweight = task_weight(p, env.src_nid);
|
taskweight = task_weight(p, env.src_nid);
|
||||||
groupweight = group_weight(p, env.src_nid);
|
groupweight = group_weight(p, env.src_nid);
|
||||||
update_numa_stats(&env.src_stats, env.src_nid);
|
update_numa_stats(&env.src_stats, env.src_nid);
|
||||||
|
@ -2153,7 +2178,7 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa,
|
||||||
long contrib;
|
long contrib;
|
||||||
|
|
||||||
/* The fraction of a cpu used by this cfs_rq */
|
/* The fraction of a cpu used by this cfs_rq */
|
||||||
contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT,
|
contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT,
|
||||||
sa->runnable_avg_period + 1);
|
sa->runnable_avg_period + 1);
|
||||||
contrib -= cfs_rq->tg_runnable_contrib;
|
contrib -= cfs_rq->tg_runnable_contrib;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue