Merge branch 'sched/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (76 commits) sched_clock: and multiplier for TSC to gtod drift sched_clock: record TSC after gtod sched_clock: only update deltas with local reads. sched_clock: fix calculation of other CPU sched_clock: stop maximum check on NO HZ sched_clock: widen the max and min time sched_clock: record from last tick sched: fix accounting in task delay accounting & migration sched: add avg-overlap support to RT tasks sched: terminate newidle balancing once at least one task has moved over sched: fix warning sched: build fix sched: sched_clock_cpu() based cpu_clock(), lockdep fix sched: export cpu_clock sched: make sched_{rt,fair}.c ifdefs more readable sched: bias effective_load() error towards failing wake_affine(). sched: incremental effective_load() sched: correct wakeup weight calculations sched: fix mult overflow sched: update shares on wakeup ...
This commit is contained in:
commit
17489c058e
|
@ -61,10 +61,7 @@ builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your
|
||||||
arch_init_sched_domains function. This function will attach domains to all
|
arch_init_sched_domains function. This function will attach domains to all
|
||||||
CPUs using cpu_attach_domain.
|
CPUs using cpu_attach_domain.
|
||||||
|
|
||||||
Implementors should change the line
|
The sched-domains debugging infrastructure can be enabled by enabling
|
||||||
#undef SCHED_DOMAIN_DEBUG
|
CONFIG_SCHED_DEBUG. This enables an error checking parse of the sched domains
|
||||||
to
|
|
||||||
#define SCHED_DOMAIN_DEBUG
|
|
||||||
in kernel/sched.c as this enables an error checking parse of the sched domains
|
|
||||||
which should catch most possible errors (described above). It also prints out
|
which should catch most possible errors (described above). It also prints out
|
||||||
the domain structure in a visual format.
|
the domain structure in a visual format.
|
||||||
|
|
|
@ -51,9 +51,9 @@ needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s =
|
||||||
0.00015s. So this group can be scheduled with a period of 0.005s and a run time
|
0.00015s. So this group can be scheduled with a period of 0.005s and a run time
|
||||||
of 0.00015s.
|
of 0.00015s.
|
||||||
|
|
||||||
The remaining CPU time will be used for user input and other tass. Because
|
The remaining CPU time will be used for user input and other tasks. Because
|
||||||
realtime tasks have explicitly allocated the CPU time they need to perform
|
realtime tasks have explicitly allocated the CPU time they need to perform
|
||||||
their tasks, buffer underruns in the graphocs or audio can be eliminated.
|
their tasks, buffer underruns in the graphics or audio can be eliminated.
|
||||||
|
|
||||||
NOTE: the above example is not fully implemented as of yet (2.6.25). We still
|
NOTE: the above example is not fully implemented as of yet (2.6.25). We still
|
||||||
lack an EDF scheduler to make non-uniform periods usable.
|
lack an EDF scheduler to make non-uniform periods usable.
|
||||||
|
|
|
@ -134,7 +134,6 @@ extern unsigned long nr_running(void);
|
||||||
extern unsigned long nr_uninterruptible(void);
|
extern unsigned long nr_uninterruptible(void);
|
||||||
extern unsigned long nr_active(void);
|
extern unsigned long nr_active(void);
|
||||||
extern unsigned long nr_iowait(void);
|
extern unsigned long nr_iowait(void);
|
||||||
extern unsigned long weighted_cpuload(const int cpu);
|
|
||||||
|
|
||||||
struct seq_file;
|
struct seq_file;
|
||||||
struct cfs_rq;
|
struct cfs_rq;
|
||||||
|
@ -784,6 +783,8 @@ struct sched_domain {
|
||||||
unsigned int balance_interval; /* initialise to 1. units in ms. */
|
unsigned int balance_interval; /* initialise to 1. units in ms. */
|
||||||
unsigned int nr_balance_failed; /* initialise to 0 */
|
unsigned int nr_balance_failed; /* initialise to 0 */
|
||||||
|
|
||||||
|
u64 last_update;
|
||||||
|
|
||||||
#ifdef CONFIG_SCHEDSTATS
|
#ifdef CONFIG_SCHEDSTATS
|
||||||
/* load_balance() stats */
|
/* load_balance() stats */
|
||||||
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
|
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
|
||||||
|
@ -823,23 +824,6 @@ extern int arch_reinit_sched_domains(void);
|
||||||
|
|
||||||
#endif /* CONFIG_SMP */
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
/*
|
|
||||||
* A runqueue laden with a single nice 0 task scores a weighted_cpuload of
|
|
||||||
* SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a
|
|
||||||
* task of nice 0 or enough lower priority tasks to bring up the
|
|
||||||
* weighted_cpuload
|
|
||||||
*/
|
|
||||||
static inline int above_background_load(void)
|
|
||||||
{
|
|
||||||
unsigned long cpu;
|
|
||||||
|
|
||||||
for_each_online_cpu(cpu) {
|
|
||||||
if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE)
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct io_context; /* See blkdev.h */
|
struct io_context; /* See blkdev.h */
|
||||||
#define NGROUPS_SMALL 32
|
#define NGROUPS_SMALL 32
|
||||||
#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
|
#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
|
||||||
|
@ -921,8 +905,8 @@ struct sched_class {
|
||||||
void (*set_cpus_allowed)(struct task_struct *p,
|
void (*set_cpus_allowed)(struct task_struct *p,
|
||||||
const cpumask_t *newmask);
|
const cpumask_t *newmask);
|
||||||
|
|
||||||
void (*join_domain)(struct rq *rq);
|
void (*rq_online)(struct rq *rq);
|
||||||
void (*leave_domain)(struct rq *rq);
|
void (*rq_offline)(struct rq *rq);
|
||||||
|
|
||||||
void (*switched_from) (struct rq *this_rq, struct task_struct *task,
|
void (*switched_from) (struct rq *this_rq, struct task_struct *task,
|
||||||
int running);
|
int running);
|
||||||
|
@ -1039,6 +1023,7 @@ struct task_struct {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int prio, static_prio, normal_prio;
|
int prio, static_prio, normal_prio;
|
||||||
|
unsigned int rt_priority;
|
||||||
const struct sched_class *sched_class;
|
const struct sched_class *sched_class;
|
||||||
struct sched_entity se;
|
struct sched_entity se;
|
||||||
struct sched_rt_entity rt;
|
struct sched_rt_entity rt;
|
||||||
|
@ -1122,7 +1107,6 @@ struct task_struct {
|
||||||
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
|
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
|
||||||
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
|
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
|
||||||
|
|
||||||
unsigned int rt_priority;
|
|
||||||
cputime_t utime, stime, utimescaled, stimescaled;
|
cputime_t utime, stime, utimescaled, stimescaled;
|
||||||
cputime_t gtime;
|
cputime_t gtime;
|
||||||
cputime_t prev_utime, prev_stime;
|
cputime_t prev_utime, prev_stime;
|
||||||
|
@ -1141,12 +1125,12 @@ struct task_struct {
|
||||||
gid_t gid,egid,sgid,fsgid;
|
gid_t gid,egid,sgid,fsgid;
|
||||||
struct group_info *group_info;
|
struct group_info *group_info;
|
||||||
kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset;
|
kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset;
|
||||||
unsigned securebits;
|
|
||||||
struct user_struct *user;
|
struct user_struct *user;
|
||||||
|
unsigned securebits;
|
||||||
#ifdef CONFIG_KEYS
|
#ifdef CONFIG_KEYS
|
||||||
|
unsigned char jit_keyring; /* default keyring to attach requested keys to */
|
||||||
struct key *request_key_auth; /* assumed request_key authority */
|
struct key *request_key_auth; /* assumed request_key authority */
|
||||||
struct key *thread_keyring; /* keyring private to this thread */
|
struct key *thread_keyring; /* keyring private to this thread */
|
||||||
unsigned char jit_keyring; /* default keyring to attach requested keys to */
|
|
||||||
#endif
|
#endif
|
||||||
char comm[TASK_COMM_LEN]; /* executable name excluding path
|
char comm[TASK_COMM_LEN]; /* executable name excluding path
|
||||||
- access with [gs]et_task_comm (which lock
|
- access with [gs]et_task_comm (which lock
|
||||||
|
@ -1233,8 +1217,8 @@ struct task_struct {
|
||||||
# define MAX_LOCK_DEPTH 48UL
|
# define MAX_LOCK_DEPTH 48UL
|
||||||
u64 curr_chain_key;
|
u64 curr_chain_key;
|
||||||
int lockdep_depth;
|
int lockdep_depth;
|
||||||
struct held_lock held_locks[MAX_LOCK_DEPTH];
|
|
||||||
unsigned int lockdep_recursion;
|
unsigned int lockdep_recursion;
|
||||||
|
struct held_lock held_locks[MAX_LOCK_DEPTH];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* journalling filesystem info */
|
/* journalling filesystem info */
|
||||||
|
@ -1262,10 +1246,6 @@ struct task_struct {
|
||||||
u64 acct_vm_mem1; /* accumulated virtual memory usage */
|
u64 acct_vm_mem1; /* accumulated virtual memory usage */
|
||||||
cputime_t acct_stimexpd;/* stime since last update */
|
cputime_t acct_stimexpd;/* stime since last update */
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
struct mempolicy *mempolicy;
|
|
||||||
short il_next;
|
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_CPUSETS
|
#ifdef CONFIG_CPUSETS
|
||||||
nodemask_t mems_allowed;
|
nodemask_t mems_allowed;
|
||||||
int cpuset_mems_generation;
|
int cpuset_mems_generation;
|
||||||
|
@ -1284,6 +1264,10 @@ struct task_struct {
|
||||||
#endif
|
#endif
|
||||||
struct list_head pi_state_list;
|
struct list_head pi_state_list;
|
||||||
struct futex_pi_state *pi_state_cache;
|
struct futex_pi_state *pi_state_cache;
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
struct mempolicy *mempolicy;
|
||||||
|
short il_next;
|
||||||
#endif
|
#endif
|
||||||
atomic_t fs_excl; /* holding fs exclusive resources */
|
atomic_t fs_excl; /* holding fs exclusive resources */
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
|
@ -1504,6 +1488,7 @@ static inline void put_task_struct(struct task_struct *t)
|
||||||
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
||||||
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
|
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
|
||||||
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
|
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
|
||||||
|
#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
|
||||||
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
|
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
|
||||||
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
|
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
|
||||||
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
|
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
|
||||||
|
@ -1573,13 +1558,28 @@ static inline void sched_clock_idle_sleep_event(void)
|
||||||
static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
|
static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
|
#ifdef CONFIG_NO_HZ
|
||||||
|
static inline void sched_clock_tick_stop(int cpu)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void sched_clock_tick_start(int cpu)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
||||||
extern void sched_clock_init(void);
|
extern void sched_clock_init(void);
|
||||||
extern u64 sched_clock_cpu(int cpu);
|
extern u64 sched_clock_cpu(int cpu);
|
||||||
extern void sched_clock_tick(void);
|
extern void sched_clock_tick(void);
|
||||||
extern void sched_clock_idle_sleep_event(void);
|
extern void sched_clock_idle_sleep_event(void);
|
||||||
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
|
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
|
||||||
|
#ifdef CONFIG_NO_HZ
|
||||||
|
extern void sched_clock_tick_stop(int cpu);
|
||||||
|
extern void sched_clock_tick_start(int cpu);
|
||||||
#endif
|
#endif
|
||||||
|
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
|
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
|
||||||
|
@ -1622,6 +1622,7 @@ extern unsigned int sysctl_sched_child_runs_first;
|
||||||
extern unsigned int sysctl_sched_features;
|
extern unsigned int sysctl_sched_features;
|
||||||
extern unsigned int sysctl_sched_migration_cost;
|
extern unsigned int sysctl_sched_migration_cost;
|
||||||
extern unsigned int sysctl_sched_nr_migrate;
|
extern unsigned int sysctl_sched_nr_migrate;
|
||||||
|
extern unsigned int sysctl_sched_shares_ratelimit;
|
||||||
|
|
||||||
int sched_nr_latency_handler(struct ctl_table *table, int write,
|
int sched_nr_latency_handler(struct ctl_table *table, int write,
|
||||||
struct file *file, void __user *buffer, size_t *length,
|
struct file *file, void __user *buffer, size_t *length,
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
|
obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
|
||||||
exit.o itimer.o time.o softirq.o resource.o \
|
cpu.o exit.o itimer.o time.o softirq.o resource.o \
|
||||||
sysctl.o capability.o ptrace.o timer.o user.o \
|
sysctl.o capability.o ptrace.o timer.o user.o \
|
||||||
signal.o sys.o kmod.o workqueue.o pid.o \
|
signal.o sys.o kmod.o workqueue.o pid.o \
|
||||||
rcupdate.o extable.o params.o posix-timers.o \
|
rcupdate.o extable.o params.o posix-timers.o \
|
||||||
|
@ -27,7 +27,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
|
||||||
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
|
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
|
||||||
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
|
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
|
||||||
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
|
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
|
||||||
obj-$(CONFIG_SMP) += cpu.o spinlock.o
|
obj-$(CONFIG_SMP) += spinlock.o
|
||||||
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
|
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
|
||||||
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
|
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
|
||||||
obj-$(CONFIG_UID16) += uid16.o
|
obj-$(CONFIG_UID16) += uid16.o
|
||||||
|
@ -69,6 +69,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
|
||||||
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
|
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
|
||||||
obj-$(CONFIG_MARKERS) += marker.o
|
obj-$(CONFIG_MARKERS) += marker.o
|
||||||
obj-$(CONFIG_LATENCYTOP) += latencytop.o
|
obj-$(CONFIG_LATENCYTOP) += latencytop.o
|
||||||
|
obj-$(CONFIG_SMP) += sched_cpupri.o
|
||||||
|
|
||||||
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
|
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
|
||||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||||
|
|
24
kernel/cpu.c
24
kernel/cpu.c
|
@ -15,6 +15,28 @@
|
||||||
#include <linux/stop_machine.h>
|
#include <linux/stop_machine.h>
|
||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Represents all cpu's present in the system
|
||||||
|
* In systems capable of hotplug, this map could dynamically grow
|
||||||
|
* as new cpu's are detected in the system via any platform specific
|
||||||
|
* method, such as ACPI for e.g.
|
||||||
|
*/
|
||||||
|
cpumask_t cpu_present_map __read_mostly;
|
||||||
|
EXPORT_SYMBOL(cpu_present_map);
|
||||||
|
|
||||||
|
#ifndef CONFIG_SMP
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Represents all cpu's that are currently online.
|
||||||
|
*/
|
||||||
|
cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
|
||||||
|
EXPORT_SYMBOL(cpu_online_map);
|
||||||
|
|
||||||
|
cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
|
||||||
|
EXPORT_SYMBOL(cpu_possible_map);
|
||||||
|
|
||||||
|
#else /* CONFIG_SMP */
|
||||||
|
|
||||||
/* Serializes the updates to cpu_online_map, cpu_present_map */
|
/* Serializes the updates to cpu_online_map, cpu_present_map */
|
||||||
static DEFINE_MUTEX(cpu_add_remove_lock);
|
static DEFINE_MUTEX(cpu_add_remove_lock);
|
||||||
|
|
||||||
|
@ -403,3 +425,5 @@ out:
|
||||||
cpu_maps_update_done();
|
cpu_maps_update_done();
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_PM_SLEEP_SMP */
|
#endif /* CONFIG_PM_SLEEP_SMP */
|
||||||
|
|
||||||
|
#endif /* CONFIG_SMP */
|
||||||
|
|
|
@ -1194,6 +1194,15 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
|
||||||
|
|
||||||
if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
|
if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
|
if (tsk->flags & PF_THREAD_BOUND) {
|
||||||
|
cpumask_t mask;
|
||||||
|
|
||||||
|
mutex_lock(&callback_mutex);
|
||||||
|
mask = cs->cpus_allowed;
|
||||||
|
mutex_unlock(&callback_mutex);
|
||||||
|
if (!cpus_equal(tsk->cpus_allowed, mask))
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
return security_task_setscheduler(tsk, 0, NULL);
|
return security_task_setscheduler(tsk, 0, NULL);
|
||||||
}
|
}
|
||||||
|
@ -1207,11 +1216,14 @@ static void cpuset_attach(struct cgroup_subsys *ss,
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
struct cpuset *cs = cgroup_cs(cont);
|
struct cpuset *cs = cgroup_cs(cont);
|
||||||
struct cpuset *oldcs = cgroup_cs(oldcont);
|
struct cpuset *oldcs = cgroup_cs(oldcont);
|
||||||
|
int err;
|
||||||
|
|
||||||
mutex_lock(&callback_mutex);
|
mutex_lock(&callback_mutex);
|
||||||
guarantee_online_cpus(cs, &cpus);
|
guarantee_online_cpus(cs, &cpus);
|
||||||
set_cpus_allowed_ptr(tsk, &cpus);
|
err = set_cpus_allowed_ptr(tsk, &cpus);
|
||||||
mutex_unlock(&callback_mutex);
|
mutex_unlock(&callback_mutex);
|
||||||
|
if (err)
|
||||||
|
return;
|
||||||
|
|
||||||
from = oldcs->mems_allowed;
|
from = oldcs->mems_allowed;
|
||||||
to = cs->mems_allowed;
|
to = cs->mems_allowed;
|
||||||
|
|
|
@ -180,6 +180,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu)
|
||||||
set_task_cpu(k, cpu);
|
set_task_cpu(k, cpu);
|
||||||
k->cpus_allowed = cpumask_of_cpu(cpu);
|
k->cpus_allowed = cpumask_of_cpu(cpu);
|
||||||
k->rt.nr_cpus_allowed = 1;
|
k->rt.nr_cpus_allowed = 1;
|
||||||
|
k->flags |= PF_THREAD_BOUND;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(kthread_bind);
|
EXPORT_SYMBOL(kthread_bind);
|
||||||
|
|
||||||
|
|
723
kernel/sched.c
723
kernel/sched.c
File diff suppressed because it is too large
Load Diff
|
@ -3,6 +3,9 @@
|
||||||
*
|
*
|
||||||
* Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
* Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
||||||
*
|
*
|
||||||
|
* Updates and enhancements:
|
||||||
|
* Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
|
||||||
|
*
|
||||||
* Based on code by:
|
* Based on code by:
|
||||||
* Ingo Molnar <mingo@redhat.com>
|
* Ingo Molnar <mingo@redhat.com>
|
||||||
* Guillaume Chazarain <guichaz@gmail.com>
|
* Guillaume Chazarain <guichaz@gmail.com>
|
||||||
|
@ -32,6 +35,11 @@
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
||||||
|
|
||||||
|
#define MULTI_SHIFT 15
|
||||||
|
/* Max is double, Min is 1/2 */
|
||||||
|
#define MAX_MULTI (2LL << MULTI_SHIFT)
|
||||||
|
#define MIN_MULTI (1LL << (MULTI_SHIFT-1))
|
||||||
|
|
||||||
struct sched_clock_data {
|
struct sched_clock_data {
|
||||||
/*
|
/*
|
||||||
* Raw spinlock - this is a special case: this might be called
|
* Raw spinlock - this is a special case: this might be called
|
||||||
|
@ -40,11 +48,15 @@ struct sched_clock_data {
|
||||||
*/
|
*/
|
||||||
raw_spinlock_t lock;
|
raw_spinlock_t lock;
|
||||||
|
|
||||||
unsigned long prev_jiffies;
|
unsigned long tick_jiffies;
|
||||||
u64 prev_raw;
|
u64 prev_raw;
|
||||||
u64 tick_raw;
|
u64 tick_raw;
|
||||||
u64 tick_gtod;
|
u64 tick_gtod;
|
||||||
u64 clock;
|
u64 clock;
|
||||||
|
s64 multi;
|
||||||
|
#ifdef CONFIG_NO_HZ
|
||||||
|
int check_max;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
|
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
|
||||||
|
@ -71,41 +83,91 @@ void sched_clock_init(void)
|
||||||
struct sched_clock_data *scd = cpu_sdc(cpu);
|
struct sched_clock_data *scd = cpu_sdc(cpu);
|
||||||
|
|
||||||
scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
|
scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
|
||||||
scd->prev_jiffies = now_jiffies;
|
scd->tick_jiffies = now_jiffies;
|
||||||
scd->prev_raw = 0;
|
scd->prev_raw = 0;
|
||||||
scd->tick_raw = 0;
|
scd->tick_raw = 0;
|
||||||
scd->tick_gtod = ktime_now;
|
scd->tick_gtod = ktime_now;
|
||||||
scd->clock = ktime_now;
|
scd->clock = ktime_now;
|
||||||
|
scd->multi = 1 << MULTI_SHIFT;
|
||||||
|
#ifdef CONFIG_NO_HZ
|
||||||
|
scd->check_max = 1;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
sched_clock_running = 1;
|
sched_clock_running = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NO_HZ
|
||||||
|
/*
|
||||||
|
* The dynamic ticks makes the delta jiffies inaccurate. This
|
||||||
|
* prevents us from checking the maximum time update.
|
||||||
|
* Disable the maximum check during stopped ticks.
|
||||||
|
*/
|
||||||
|
void sched_clock_tick_stop(int cpu)
|
||||||
|
{
|
||||||
|
struct sched_clock_data *scd = cpu_sdc(cpu);
|
||||||
|
|
||||||
|
scd->check_max = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void sched_clock_tick_start(int cpu)
|
||||||
|
{
|
||||||
|
struct sched_clock_data *scd = cpu_sdc(cpu);
|
||||||
|
|
||||||
|
scd->check_max = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_max(struct sched_clock_data *scd)
|
||||||
|
{
|
||||||
|
return scd->check_max;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static int check_max(struct sched_clock_data *scd)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_NO_HZ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* update the percpu scd from the raw @now value
|
* update the percpu scd from the raw @now value
|
||||||
*
|
*
|
||||||
* - filter out backward motion
|
* - filter out backward motion
|
||||||
* - use jiffies to generate a min,max window to clip the raw values
|
* - use jiffies to generate a min,max window to clip the raw values
|
||||||
*/
|
*/
|
||||||
static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
|
static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *time)
|
||||||
{
|
{
|
||||||
unsigned long now_jiffies = jiffies;
|
unsigned long now_jiffies = jiffies;
|
||||||
long delta_jiffies = now_jiffies - scd->prev_jiffies;
|
long delta_jiffies = now_jiffies - scd->tick_jiffies;
|
||||||
u64 clock = scd->clock;
|
u64 clock = scd->clock;
|
||||||
u64 min_clock, max_clock;
|
u64 min_clock, max_clock;
|
||||||
s64 delta = now - scd->prev_raw;
|
s64 delta = now - scd->prev_raw;
|
||||||
|
|
||||||
WARN_ON_ONCE(!irqs_disabled());
|
WARN_ON_ONCE(!irqs_disabled());
|
||||||
min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
|
|
||||||
|
/*
|
||||||
|
* At schedule tick the clock can be just under the gtod. We don't
|
||||||
|
* want to push it too prematurely.
|
||||||
|
*/
|
||||||
|
min_clock = scd->tick_gtod + (delta_jiffies * TICK_NSEC);
|
||||||
|
if (min_clock > TICK_NSEC)
|
||||||
|
min_clock -= TICK_NSEC / 2;
|
||||||
|
|
||||||
if (unlikely(delta < 0)) {
|
if (unlikely(delta < 0)) {
|
||||||
clock++;
|
clock++;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
max_clock = min_clock + TICK_NSEC;
|
/*
|
||||||
|
* The clock must stay within a jiffie of the gtod.
|
||||||
|
* But since we may be at the start of a jiffy or the end of one
|
||||||
|
* we add another jiffy buffer.
|
||||||
|
*/
|
||||||
|
max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC;
|
||||||
|
|
||||||
if (unlikely(clock + delta > max_clock)) {
|
delta *= scd->multi;
|
||||||
|
delta >>= MULTI_SHIFT;
|
||||||
|
|
||||||
|
if (unlikely(clock + delta > max_clock) && check_max(scd)) {
|
||||||
if (clock < max_clock)
|
if (clock < max_clock)
|
||||||
clock = max_clock;
|
clock = max_clock;
|
||||||
else
|
else
|
||||||
|
@ -118,9 +180,12 @@ static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
|
||||||
if (unlikely(clock < min_clock))
|
if (unlikely(clock < min_clock))
|
||||||
clock = min_clock;
|
clock = min_clock;
|
||||||
|
|
||||||
scd->prev_raw = now;
|
if (time)
|
||||||
scd->prev_jiffies = now_jiffies;
|
*time = clock;
|
||||||
scd->clock = clock;
|
else {
|
||||||
|
scd->prev_raw = now;
|
||||||
|
scd->clock = clock;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lock_double_clock(struct sched_clock_data *data1,
|
static void lock_double_clock(struct sched_clock_data *data1,
|
||||||
|
@ -160,25 +225,30 @@ u64 sched_clock_cpu(int cpu)
|
||||||
now -= my_scd->tick_raw;
|
now -= my_scd->tick_raw;
|
||||||
now += scd->tick_raw;
|
now += scd->tick_raw;
|
||||||
|
|
||||||
now -= my_scd->tick_gtod;
|
now += my_scd->tick_gtod;
|
||||||
now += scd->tick_gtod;
|
now -= scd->tick_gtod;
|
||||||
|
|
||||||
__raw_spin_unlock(&my_scd->lock);
|
__raw_spin_unlock(&my_scd->lock);
|
||||||
|
|
||||||
|
__update_sched_clock(scd, now, &clock);
|
||||||
|
|
||||||
|
__raw_spin_unlock(&scd->lock);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
__raw_spin_lock(&scd->lock);
|
__raw_spin_lock(&scd->lock);
|
||||||
|
__update_sched_clock(scd, now, NULL);
|
||||||
|
clock = scd->clock;
|
||||||
|
__raw_spin_unlock(&scd->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
__update_sched_clock(scd, now);
|
|
||||||
clock = scd->clock;
|
|
||||||
|
|
||||||
__raw_spin_unlock(&scd->lock);
|
|
||||||
|
|
||||||
return clock;
|
return clock;
|
||||||
}
|
}
|
||||||
|
|
||||||
void sched_clock_tick(void)
|
void sched_clock_tick(void)
|
||||||
{
|
{
|
||||||
struct sched_clock_data *scd = this_scd();
|
struct sched_clock_data *scd = this_scd();
|
||||||
|
unsigned long now_jiffies = jiffies;
|
||||||
|
s64 mult, delta_gtod, delta_raw;
|
||||||
u64 now, now_gtod;
|
u64 now, now_gtod;
|
||||||
|
|
||||||
if (unlikely(!sched_clock_running))
|
if (unlikely(!sched_clock_running))
|
||||||
|
@ -186,18 +256,33 @@ void sched_clock_tick(void)
|
||||||
|
|
||||||
WARN_ON_ONCE(!irqs_disabled());
|
WARN_ON_ONCE(!irqs_disabled());
|
||||||
|
|
||||||
now = sched_clock();
|
|
||||||
now_gtod = ktime_to_ns(ktime_get());
|
now_gtod = ktime_to_ns(ktime_get());
|
||||||
|
now = sched_clock();
|
||||||
|
|
||||||
__raw_spin_lock(&scd->lock);
|
__raw_spin_lock(&scd->lock);
|
||||||
__update_sched_clock(scd, now);
|
__update_sched_clock(scd, now, NULL);
|
||||||
/*
|
/*
|
||||||
* update tick_gtod after __update_sched_clock() because that will
|
* update tick_gtod after __update_sched_clock() because that will
|
||||||
* already observe 1 new jiffy; adding a new tick_gtod to that would
|
* already observe 1 new jiffy; adding a new tick_gtod to that would
|
||||||
* increase the clock 2 jiffies.
|
* increase the clock 2 jiffies.
|
||||||
*/
|
*/
|
||||||
|
delta_gtod = now_gtod - scd->tick_gtod;
|
||||||
|
delta_raw = now - scd->tick_raw;
|
||||||
|
|
||||||
|
if ((long)delta_raw > 0) {
|
||||||
|
mult = delta_gtod << MULTI_SHIFT;
|
||||||
|
do_div(mult, delta_raw);
|
||||||
|
scd->multi = mult;
|
||||||
|
if (scd->multi > MAX_MULTI)
|
||||||
|
scd->multi = MAX_MULTI;
|
||||||
|
else if (scd->multi < MIN_MULTI)
|
||||||
|
scd->multi = MIN_MULTI;
|
||||||
|
} else
|
||||||
|
scd->multi = 1 << MULTI_SHIFT;
|
||||||
|
|
||||||
scd->tick_raw = now;
|
scd->tick_raw = now;
|
||||||
scd->tick_gtod = now_gtod;
|
scd->tick_gtod = now_gtod;
|
||||||
|
scd->tick_jiffies = now_jiffies;
|
||||||
__raw_spin_unlock(&scd->lock);
|
__raw_spin_unlock(&scd->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -227,6 +312,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
|
||||||
__raw_spin_lock(&scd->lock);
|
__raw_spin_lock(&scd->lock);
|
||||||
scd->prev_raw = now;
|
scd->prev_raw = now;
|
||||||
scd->clock += delta_ns;
|
scd->clock += delta_ns;
|
||||||
|
scd->multi = 1 << MULTI_SHIFT;
|
||||||
__raw_spin_unlock(&scd->lock);
|
__raw_spin_unlock(&scd->lock);
|
||||||
|
|
||||||
touch_softlockup_watchdog();
|
touch_softlockup_watchdog();
|
||||||
|
@ -244,3 +330,16 @@ unsigned long long __attribute__((weak)) sched_clock(void)
|
||||||
{
|
{
|
||||||
return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
|
return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned long long cpu_clock(int cpu)
|
||||||
|
{
|
||||||
|
unsigned long long clock;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
local_irq_save(flags);
|
||||||
|
clock = sched_clock_cpu(cpu);
|
||||||
|
local_irq_restore(flags);
|
||||||
|
|
||||||
|
return clock;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(cpu_clock);
|
||||||
|
|
|
@ -0,0 +1,174 @@
|
||||||
|
/*
|
||||||
|
* kernel/sched_cpupri.c
|
||||||
|
*
|
||||||
|
* CPU priority management
|
||||||
|
*
|
||||||
|
* Copyright (C) 2007-2008 Novell
|
||||||
|
*
|
||||||
|
* Author: Gregory Haskins <ghaskins@novell.com>
|
||||||
|
*
|
||||||
|
* This code tracks the priority of each CPU so that global migration
|
||||||
|
* decisions are easy to calculate. Each CPU can be in a state as follows:
|
||||||
|
*
|
||||||
|
* (INVALID), IDLE, NORMAL, RT1, ... RT99
|
||||||
|
*
|
||||||
|
* going from the lowest priority to the highest. CPUs in the INVALID state
|
||||||
|
* are not eligible for routing. The system maintains this state with
|
||||||
|
* a 2 dimensional bitmap (the first for priority class, the second for cpus
|
||||||
|
* in that class). Therefore a typical application without affinity
|
||||||
|
* restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
|
||||||
|
* searches). For tasks with affinity restrictions, the algorithm has a
|
||||||
|
* worst case complexity of O(min(102, nr_domcpus)), though the scenario that
|
||||||
|
* yields the worst case search is fairly contrived.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; version 2
|
||||||
|
* of the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "sched_cpupri.h"
|
||||||
|
|
||||||
|
/* Convert between a 140 based task->prio, and our 102 based cpupri */
|
||||||
|
static int convert_prio(int prio)
|
||||||
|
{
|
||||||
|
int cpupri;
|
||||||
|
|
||||||
|
if (prio == CPUPRI_INVALID)
|
||||||
|
cpupri = CPUPRI_INVALID;
|
||||||
|
else if (prio == MAX_PRIO)
|
||||||
|
cpupri = CPUPRI_IDLE;
|
||||||
|
else if (prio >= MAX_RT_PRIO)
|
||||||
|
cpupri = CPUPRI_NORMAL;
|
||||||
|
else
|
||||||
|
cpupri = MAX_RT_PRIO - prio + 1;
|
||||||
|
|
||||||
|
return cpupri;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define for_each_cpupri_active(array, idx) \
|
||||||
|
for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \
|
||||||
|
idx < CPUPRI_NR_PRIORITIES; \
|
||||||
|
idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpupri_find - find the best (lowest-pri) CPU in the system
|
||||||
|
* @cp: The cpupri context
|
||||||
|
* @p: The task
|
||||||
|
* @lowest_mask: A mask to fill in with selected CPUs
|
||||||
|
*
|
||||||
|
* Note: This function returns the recommended CPUs as calculated during the
|
||||||
|
* current invokation. By the time the call returns, the CPUs may have in
|
||||||
|
* fact changed priorities any number of times. While not ideal, it is not
|
||||||
|
* an issue of correctness since the normal rebalancer logic will correct
|
||||||
|
* any discrepancies created by racing against the uncertainty of the current
|
||||||
|
* priority configuration.
|
||||||
|
*
|
||||||
|
* Returns: (int)bool - CPUs were found
|
||||||
|
*/
|
||||||
|
int cpupri_find(struct cpupri *cp, struct task_struct *p,
|
||||||
|
cpumask_t *lowest_mask)
|
||||||
|
{
|
||||||
|
int idx = 0;
|
||||||
|
int task_pri = convert_prio(p->prio);
|
||||||
|
|
||||||
|
for_each_cpupri_active(cp->pri_active, idx) {
|
||||||
|
struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
|
||||||
|
cpumask_t mask;
|
||||||
|
|
||||||
|
if (idx >= task_pri)
|
||||||
|
break;
|
||||||
|
|
||||||
|
cpus_and(mask, p->cpus_allowed, vec->mask);
|
||||||
|
|
||||||
|
if (cpus_empty(mask))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
*lowest_mask = mask;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpupri_set - update the cpu priority setting
|
||||||
|
* @cp: The cpupri context
|
||||||
|
* @cpu: The target cpu
|
||||||
|
* @pri: The priority (INVALID-RT99) to assign to this CPU
|
||||||
|
*
|
||||||
|
* Note: Assumes cpu_rq(cpu)->lock is locked
|
||||||
|
*
|
||||||
|
* Returns: (void)
|
||||||
|
*/
|
||||||
|
void cpupri_set(struct cpupri *cp, int cpu, int newpri)
|
||||||
|
{
|
||||||
|
int *currpri = &cp->cpu_to_pri[cpu];
|
||||||
|
int oldpri = *currpri;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
newpri = convert_prio(newpri);
|
||||||
|
|
||||||
|
BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);
|
||||||
|
|
||||||
|
if (newpri == oldpri)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the cpu was currently mapped to a different value, we
|
||||||
|
* first need to unmap the old value
|
||||||
|
*/
|
||||||
|
if (likely(oldpri != CPUPRI_INVALID)) {
|
||||||
|
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
|
||||||
|
|
||||||
|
spin_lock_irqsave(&vec->lock, flags);
|
||||||
|
|
||||||
|
vec->count--;
|
||||||
|
if (!vec->count)
|
||||||
|
clear_bit(oldpri, cp->pri_active);
|
||||||
|
cpu_clear(cpu, vec->mask);
|
||||||
|
|
||||||
|
spin_unlock_irqrestore(&vec->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (likely(newpri != CPUPRI_INVALID)) {
|
||||||
|
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
|
||||||
|
|
||||||
|
spin_lock_irqsave(&vec->lock, flags);
|
||||||
|
|
||||||
|
cpu_set(cpu, vec->mask);
|
||||||
|
vec->count++;
|
||||||
|
if (vec->count == 1)
|
||||||
|
set_bit(newpri, cp->pri_active);
|
||||||
|
|
||||||
|
spin_unlock_irqrestore(&vec->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
*currpri = newpri;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpupri_init - initialize the cpupri structure
|
||||||
|
* @cp: The cpupri context
|
||||||
|
*
|
||||||
|
* Returns: (void)
|
||||||
|
*/
|
||||||
|
void cpupri_init(struct cpupri *cp)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
memset(cp, 0, sizeof(*cp));
|
||||||
|
|
||||||
|
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
|
||||||
|
struct cpupri_vec *vec = &cp->pri_to_cpu[i];
|
||||||
|
|
||||||
|
spin_lock_init(&vec->lock);
|
||||||
|
vec->count = 0;
|
||||||
|
cpus_clear(vec->mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
for_each_possible_cpu(i)
|
||||||
|
cp->cpu_to_pri[i] = CPUPRI_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
#ifndef _LINUX_CPUPRI_H
|
||||||
|
#define _LINUX_CPUPRI_H
|
||||||
|
|
||||||
|
#include <linux/sched.h>
|
||||||
|
|
||||||
|
#define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2)
|
||||||
|
#define CPUPRI_NR_PRI_WORDS BITS_TO_LONGS(CPUPRI_NR_PRIORITIES)
|
||||||
|
|
||||||
|
#define CPUPRI_INVALID -1
|
||||||
|
#define CPUPRI_IDLE 0
|
||||||
|
#define CPUPRI_NORMAL 1
|
||||||
|
/* values 2-101 are RT priorities 0-99 */
|
||||||
|
|
||||||
|
struct cpupri_vec {
|
||||||
|
spinlock_t lock;
|
||||||
|
int count;
|
||||||
|
cpumask_t mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cpupri {
|
||||||
|
struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
|
||||||
|
long pri_active[CPUPRI_NR_PRI_WORDS];
|
||||||
|
int cpu_to_pri[NR_CPUS];
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
int cpupri_find(struct cpupri *cp,
|
||||||
|
struct task_struct *p, cpumask_t *lowest_mask);
|
||||||
|
void cpupri_set(struct cpupri *cp, int cpu, int pri);
|
||||||
|
void cpupri_init(struct cpupri *cp);
|
||||||
|
#else
|
||||||
|
#define cpupri_set(cp, cpu, pri) do { } while (0)
|
||||||
|
#define cpupri_init() do { } while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* _LINUX_CPUPRI_H */
|
|
@ -119,9 +119,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||||
struct sched_entity *last;
|
struct sched_entity *last;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
#if !defined(CONFIG_CGROUP_SCHED) || !defined(CONFIG_USER_SCHED)
|
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
|
||||||
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
|
|
||||||
#else
|
|
||||||
char path[128] = "";
|
char path[128] = "";
|
||||||
struct cgroup *cgroup = NULL;
|
struct cgroup *cgroup = NULL;
|
||||||
struct task_group *tg = cfs_rq->tg;
|
struct task_group *tg = cfs_rq->tg;
|
||||||
|
@ -133,6 +131,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||||
cgroup_path(cgroup, path, sizeof(path));
|
cgroup_path(cgroup, path, sizeof(path));
|
||||||
|
|
||||||
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
|
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
|
||||||
|
#else
|
||||||
|
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
|
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
|
||||||
|
@ -162,11 +162,64 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||||
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
|
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
|
||||||
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
|
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
|
||||||
#ifdef CONFIG_SCHEDSTATS
|
#ifdef CONFIG_SCHEDSTATS
|
||||||
SEQ_printf(m, " .%-30s: %d\n", "bkl_count",
|
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
|
||||||
rq->bkl_count);
|
|
||||||
|
P(yld_exp_empty);
|
||||||
|
P(yld_act_empty);
|
||||||
|
P(yld_both_empty);
|
||||||
|
P(yld_count);
|
||||||
|
|
||||||
|
P(sched_switch);
|
||||||
|
P(sched_count);
|
||||||
|
P(sched_goidle);
|
||||||
|
|
||||||
|
P(ttwu_count);
|
||||||
|
P(ttwu_local);
|
||||||
|
|
||||||
|
P(bkl_count);
|
||||||
|
|
||||||
|
#undef P
|
||||||
#endif
|
#endif
|
||||||
SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
|
SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
|
||||||
cfs_rq->nr_spread_over);
|
cfs_rq->nr_spread_over);
|
||||||
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
|
||||||
|
{
|
||||||
|
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
|
||||||
|
char path[128] = "";
|
||||||
|
struct cgroup *cgroup = NULL;
|
||||||
|
struct task_group *tg = rt_rq->tg;
|
||||||
|
|
||||||
|
if (tg)
|
||||||
|
cgroup = tg->css.cgroup;
|
||||||
|
|
||||||
|
if (cgroup)
|
||||||
|
cgroup_path(cgroup, path, sizeof(path));
|
||||||
|
|
||||||
|
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
|
||||||
|
#else
|
||||||
|
SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define P(x) \
|
||||||
|
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
|
||||||
|
#define PN(x) \
|
||||||
|
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
|
||||||
|
|
||||||
|
P(rt_nr_running);
|
||||||
|
P(rt_throttled);
|
||||||
|
PN(rt_time);
|
||||||
|
PN(rt_runtime);
|
||||||
|
|
||||||
|
#undef PN
|
||||||
|
#undef P
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_cpu(struct seq_file *m, int cpu)
|
static void print_cpu(struct seq_file *m, int cpu)
|
||||||
|
@ -208,6 +261,7 @@ static void print_cpu(struct seq_file *m, int cpu)
|
||||||
#undef PN
|
#undef PN
|
||||||
|
|
||||||
print_cfs_stats(m, cpu);
|
print_cfs_stats(m, cpu);
|
||||||
|
print_rt_stats(m, cpu);
|
||||||
|
|
||||||
print_rq(m, rq, cpu);
|
print_rq(m, rq, cpu);
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,13 +63,13 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SCHED_OTHER wake-up granularity.
|
* SCHED_OTHER wake-up granularity.
|
||||||
* (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
* (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||||
*
|
*
|
||||||
* This option delays the preemption effects of decoupled workloads
|
* This option delays the preemption effects of decoupled workloads
|
||||||
* and reduces their over-scheduling. Synchronous workloads will still
|
* and reduces their over-scheduling. Synchronous workloads will still
|
||||||
* have immediate wakeup/sleep latencies.
|
* have immediate wakeup/sleep latencies.
|
||||||
*/
|
*/
|
||||||
unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
|
unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
|
||||||
|
|
||||||
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||||||
|
|
||||||
|
@ -333,6 +333,34 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* delta *= w / rw
|
||||||
|
*/
|
||||||
|
static inline unsigned long
|
||||||
|
calc_delta_weight(unsigned long delta, struct sched_entity *se)
|
||||||
|
{
|
||||||
|
for_each_sched_entity(se) {
|
||||||
|
delta = calc_delta_mine(delta,
|
||||||
|
se->load.weight, &cfs_rq_of(se)->load);
|
||||||
|
}
|
||||||
|
|
||||||
|
return delta;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* delta *= rw / w
|
||||||
|
*/
|
||||||
|
static inline unsigned long
|
||||||
|
calc_delta_fair(unsigned long delta, struct sched_entity *se)
|
||||||
|
{
|
||||||
|
for_each_sched_entity(se) {
|
||||||
|
delta = calc_delta_mine(delta,
|
||||||
|
cfs_rq_of(se)->load.weight, &se->load);
|
||||||
|
}
|
||||||
|
|
||||||
|
return delta;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The idea is to set a period in which each task runs once.
|
* The idea is to set a period in which each task runs once.
|
||||||
*
|
*
|
||||||
|
@ -362,47 +390,80 @@ static u64 __sched_period(unsigned long nr_running)
|
||||||
*/
|
*/
|
||||||
static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
{
|
{
|
||||||
u64 slice = __sched_period(cfs_rq->nr_running);
|
return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);
|
||||||
|
|
||||||
for_each_sched_entity(se) {
|
|
||||||
cfs_rq = cfs_rq_of(se);
|
|
||||||
|
|
||||||
slice *= se->load.weight;
|
|
||||||
do_div(slice, cfs_rq->load.weight);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return slice;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We calculate the vruntime slice of a to be inserted task
|
* We calculate the vruntime slice of a to be inserted task
|
||||||
*
|
*
|
||||||
* vs = s/w = p/rw
|
* vs = s*rw/w = p
|
||||||
*/
|
*/
|
||||||
static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
{
|
{
|
||||||
unsigned long nr_running = cfs_rq->nr_running;
|
unsigned long nr_running = cfs_rq->nr_running;
|
||||||
unsigned long weight;
|
|
||||||
u64 vslice;
|
|
||||||
|
|
||||||
if (!se->on_rq)
|
if (!se->on_rq)
|
||||||
nr_running++;
|
nr_running++;
|
||||||
|
|
||||||
vslice = __sched_period(nr_running);
|
return __sched_period(nr_running);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
|
||||||
|
* that it favours >=0 over <0.
|
||||||
|
*
|
||||||
|
* -20 |
|
||||||
|
* |
|
||||||
|
* 0 --------+-------
|
||||||
|
* .'
|
||||||
|
* 19 .'
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static unsigned long
|
||||||
|
calc_delta_asym(unsigned long delta, struct sched_entity *se)
|
||||||
|
{
|
||||||
|
struct load_weight lw = {
|
||||||
|
.weight = NICE_0_LOAD,
|
||||||
|
.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
|
||||||
|
};
|
||||||
|
|
||||||
for_each_sched_entity(se) {
|
for_each_sched_entity(se) {
|
||||||
cfs_rq = cfs_rq_of(se);
|
struct load_weight *se_lw = &se->load;
|
||||||
|
unsigned long rw = cfs_rq_of(se)->load.weight;
|
||||||
|
|
||||||
weight = cfs_rq->load.weight;
|
#ifdef CONFIG_FAIR_SCHED_GROUP
|
||||||
if (!se->on_rq)
|
struct cfs_rq *cfs_rq = se->my_q;
|
||||||
weight += se->load.weight;
|
struct task_group *tg = NULL
|
||||||
|
|
||||||
vslice *= NICE_0_LOAD;
|
if (cfs_rq)
|
||||||
do_div(vslice, weight);
|
tg = cfs_rq->tg;
|
||||||
|
|
||||||
|
if (tg && tg->shares < NICE_0_LOAD) {
|
||||||
|
/*
|
||||||
|
* scale shares to what it would have been had
|
||||||
|
* tg->weight been NICE_0_LOAD:
|
||||||
|
*
|
||||||
|
* weight = 1024 * shares / tg->weight
|
||||||
|
*/
|
||||||
|
lw.weight *= se->load.weight;
|
||||||
|
lw.weight /= tg->shares;
|
||||||
|
|
||||||
|
lw.inv_weight = 0;
|
||||||
|
|
||||||
|
se_lw = &lw;
|
||||||
|
rw += lw.weight - se->load.weight;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (se->load.weight < NICE_0_LOAD) {
|
||||||
|
se_lw = &lw;
|
||||||
|
rw += NICE_0_LOAD - se->load.weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
delta = calc_delta_mine(delta, rw, se_lw);
|
||||||
}
|
}
|
||||||
|
|
||||||
return vslice;
|
return delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -419,11 +480,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
|
||||||
|
|
||||||
curr->sum_exec_runtime += delta_exec;
|
curr->sum_exec_runtime += delta_exec;
|
||||||
schedstat_add(cfs_rq, exec_clock, delta_exec);
|
schedstat_add(cfs_rq, exec_clock, delta_exec);
|
||||||
delta_exec_weighted = delta_exec;
|
delta_exec_weighted = calc_delta_fair(delta_exec, curr);
|
||||||
if (unlikely(curr->load.weight != NICE_0_LOAD)) {
|
|
||||||
delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
|
|
||||||
&curr->load);
|
|
||||||
}
|
|
||||||
curr->vruntime += delta_exec_weighted;
|
curr->vruntime += delta_exec_weighted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -510,10 +567,27 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
* Scheduling class queueing methods:
|
* Scheduling class queueing methods:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
|
||||||
|
static void
|
||||||
|
add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
|
||||||
|
{
|
||||||
|
cfs_rq->task_weight += weight;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline void
|
||||||
|
add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void
|
static void
|
||||||
account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
{
|
{
|
||||||
update_load_add(&cfs_rq->load, se->load.weight);
|
update_load_add(&cfs_rq->load, se->load.weight);
|
||||||
|
if (!parent_entity(se))
|
||||||
|
inc_cpu_load(rq_of(cfs_rq), se->load.weight);
|
||||||
|
if (entity_is_task(se))
|
||||||
|
add_cfs_task_weight(cfs_rq, se->load.weight);
|
||||||
cfs_rq->nr_running++;
|
cfs_rq->nr_running++;
|
||||||
se->on_rq = 1;
|
se->on_rq = 1;
|
||||||
list_add(&se->group_node, &cfs_rq->tasks);
|
list_add(&se->group_node, &cfs_rq->tasks);
|
||||||
|
@ -523,6 +597,10 @@ static void
|
||||||
account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
{
|
{
|
||||||
update_load_sub(&cfs_rq->load, se->load.weight);
|
update_load_sub(&cfs_rq->load, se->load.weight);
|
||||||
|
if (!parent_entity(se))
|
||||||
|
dec_cpu_load(rq_of(cfs_rq), se->load.weight);
|
||||||
|
if (entity_is_task(se))
|
||||||
|
add_cfs_task_weight(cfs_rq, -se->load.weight);
|
||||||
cfs_rq->nr_running--;
|
cfs_rq->nr_running--;
|
||||||
se->on_rq = 0;
|
se->on_rq = 0;
|
||||||
list_del_init(&se->group_node);
|
list_del_init(&se->group_node);
|
||||||
|
@ -609,8 +687,17 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||||
|
|
||||||
if (!initial) {
|
if (!initial) {
|
||||||
/* sleeps upto a single latency don't count. */
|
/* sleeps upto a single latency don't count. */
|
||||||
if (sched_feat(NEW_FAIR_SLEEPERS))
|
if (sched_feat(NEW_FAIR_SLEEPERS)) {
|
||||||
vruntime -= sysctl_sched_latency;
|
unsigned long thresh = sysctl_sched_latency;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* convert the sleeper threshold into virtual time
|
||||||
|
*/
|
||||||
|
if (sched_feat(NORMALIZED_SLEEPER))
|
||||||
|
thresh = calc_delta_fair(thresh, se);
|
||||||
|
|
||||||
|
vruntime -= thresh;
|
||||||
|
}
|
||||||
|
|
||||||
/* ensure we never gain time by being placed backwards. */
|
/* ensure we never gain time by being placed backwards. */
|
||||||
vruntime = max_vruntime(se->vruntime, vruntime);
|
vruntime = max_vruntime(se->vruntime, vruntime);
|
||||||
|
@ -639,21 +726,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
|
||||||
__enqueue_entity(cfs_rq, se);
|
__enqueue_entity(cfs_rq, se);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_avg(u64 *avg, u64 sample)
|
|
||||||
{
|
|
||||||
s64 diff = sample - *avg;
|
|
||||||
*avg += diff >> 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
||||||
{
|
|
||||||
if (!se->last_wakeup)
|
|
||||||
return;
|
|
||||||
|
|
||||||
update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup);
|
|
||||||
se->last_wakeup = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
|
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
|
||||||
{
|
{
|
||||||
|
@ -664,7 +736,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
|
||||||
|
|
||||||
update_stats_dequeue(cfs_rq, se);
|
update_stats_dequeue(cfs_rq, se);
|
||||||
if (sleep) {
|
if (sleep) {
|
||||||
update_avg_stats(cfs_rq, se);
|
|
||||||
#ifdef CONFIG_SCHEDSTATS
|
#ifdef CONFIG_SCHEDSTATS
|
||||||
if (entity_is_task(se)) {
|
if (entity_is_task(se)) {
|
||||||
struct task_struct *tsk = task_of(se);
|
struct task_struct *tsk = task_of(se);
|
||||||
|
@ -726,17 +797,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
se->prev_sum_exec_runtime = se->sum_exec_runtime;
|
se->prev_sum_exec_runtime = se->sum_exec_runtime;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
|
|
||||||
|
|
||||||
static struct sched_entity *
|
static struct sched_entity *
|
||||||
pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
{
|
{
|
||||||
if (!cfs_rq->next)
|
struct rq *rq = rq_of(cfs_rq);
|
||||||
return se;
|
u64 pair_slice = rq->clock - cfs_rq->pair_start;
|
||||||
|
|
||||||
if (wakeup_preempt_entity(cfs_rq->next, se) != 0)
|
if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) {
|
||||||
|
cfs_rq->pair_start = rq->clock;
|
||||||
return se;
|
return se;
|
||||||
|
}
|
||||||
|
|
||||||
return cfs_rq->next;
|
return cfs_rq->next;
|
||||||
}
|
}
|
||||||
|
@ -835,7 +905,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
||||||
hrtick_start(rq, delta, requeue);
|
hrtick_start(rq, delta, requeue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else /* !CONFIG_SCHED_HRTICK */
|
||||||
static inline void
|
static inline void
|
||||||
hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
||||||
{
|
{
|
||||||
|
@ -976,7 +1046,7 @@ static int wake_idle(int cpu, struct task_struct *p)
|
||||||
}
|
}
|
||||||
return cpu;
|
return cpu;
|
||||||
}
|
}
|
||||||
#else
|
#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
|
||||||
static inline int wake_idle(int cpu, struct task_struct *p)
|
static inline int wake_idle(int cpu, struct task_struct *p)
|
||||||
{
|
{
|
||||||
return cpu;
|
return cpu;
|
||||||
|
@ -987,6 +1057,89 @@ static inline int wake_idle(int cpu, struct task_struct *p)
|
||||||
|
|
||||||
static const struct sched_class fair_sched_class;
|
static const struct sched_class fair_sched_class;
|
||||||
|
|
||||||
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
|
/*
|
||||||
|
* effective_load() calculates the load change as seen from the root_task_group
|
||||||
|
*
|
||||||
|
* Adding load to a group doesn't make a group heavier, but can cause movement
|
||||||
|
* of group shares between cpus. Assuming the shares were perfectly aligned one
|
||||||
|
* can calculate the shift in shares.
|
||||||
|
*
|
||||||
|
* The problem is that perfectly aligning the shares is rather expensive, hence
|
||||||
|
* we try to avoid doing that too often - see update_shares(), which ratelimits
|
||||||
|
* this change.
|
||||||
|
*
|
||||||
|
* We compensate this by not only taking the current delta into account, but
|
||||||
|
* also considering the delta between when the shares were last adjusted and
|
||||||
|
* now.
|
||||||
|
*
|
||||||
|
* We still saw a performance dip, some tracing learned us that between
|
||||||
|
* cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
|
||||||
|
* significantly. Therefore try to bias the error in direction of failing
|
||||||
|
* the affine wakeup.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static long effective_load(struct task_group *tg, int cpu,
|
||||||
|
long wl, long wg)
|
||||||
|
{
|
||||||
|
struct sched_entity *se = tg->se[cpu];
|
||||||
|
long more_w;
|
||||||
|
|
||||||
|
if (!tg->parent)
|
||||||
|
return wl;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* By not taking the decrease of shares on the other cpu into
|
||||||
|
* account our error leans towards reducing the affine wakeups.
|
||||||
|
*/
|
||||||
|
if (!wl && sched_feat(ASYM_EFF_LOAD))
|
||||||
|
return wl;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Instead of using this increment, also add the difference
|
||||||
|
* between when the shares were last updated and now.
|
||||||
|
*/
|
||||||
|
more_w = se->my_q->load.weight - se->my_q->rq_weight;
|
||||||
|
wl += more_w;
|
||||||
|
wg += more_w;
|
||||||
|
|
||||||
|
for_each_sched_entity(se) {
|
||||||
|
#define D(n) (likely(n) ? (n) : 1)
|
||||||
|
|
||||||
|
long S, rw, s, a, b;
|
||||||
|
|
||||||
|
S = se->my_q->tg->shares;
|
||||||
|
s = se->my_q->shares;
|
||||||
|
rw = se->my_q->rq_weight;
|
||||||
|
|
||||||
|
a = S*(rw + wl);
|
||||||
|
b = S*rw + s*wg;
|
||||||
|
|
||||||
|
wl = s*(a-b)/D(b);
|
||||||
|
/*
|
||||||
|
* Assume the group is already running and will
|
||||||
|
* thus already be accounted for in the weight.
|
||||||
|
*
|
||||||
|
* That is, moving shares between CPUs, does not
|
||||||
|
* alter the group weight.
|
||||||
|
*/
|
||||||
|
wg = 0;
|
||||||
|
#undef D
|
||||||
|
}
|
||||||
|
|
||||||
|
return wl;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
static inline unsigned long effective_load(struct task_group *tg, int cpu,
|
||||||
|
unsigned long wl, unsigned long wg)
|
||||||
|
{
|
||||||
|
return wl;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
|
wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
|
||||||
struct task_struct *p, int prev_cpu, int this_cpu, int sync,
|
struct task_struct *p, int prev_cpu, int this_cpu, int sync,
|
||||||
|
@ -994,8 +1147,10 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
|
||||||
unsigned int imbalance)
|
unsigned int imbalance)
|
||||||
{
|
{
|
||||||
struct task_struct *curr = this_rq->curr;
|
struct task_struct *curr = this_rq->curr;
|
||||||
|
struct task_group *tg;
|
||||||
unsigned long tl = this_load;
|
unsigned long tl = this_load;
|
||||||
unsigned long tl_per_task;
|
unsigned long tl_per_task;
|
||||||
|
unsigned long weight;
|
||||||
int balanced;
|
int balanced;
|
||||||
|
|
||||||
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
|
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
|
||||||
|
@ -1006,19 +1161,28 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
|
||||||
* effect of the currently running task from the load
|
* effect of the currently running task from the load
|
||||||
* of the current CPU:
|
* of the current CPU:
|
||||||
*/
|
*/
|
||||||
if (sync)
|
if (sync) {
|
||||||
tl -= current->se.load.weight;
|
tg = task_group(current);
|
||||||
|
weight = current->se.load.weight;
|
||||||
|
|
||||||
balanced = 100*(tl + p->se.load.weight) <= imbalance*load;
|
tl += effective_load(tg, this_cpu, -weight, -weight);
|
||||||
|
load += effective_load(tg, prev_cpu, 0, -weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
tg = task_group(p);
|
||||||
|
weight = p->se.load.weight;
|
||||||
|
|
||||||
|
balanced = 100*(tl + effective_load(tg, this_cpu, weight, weight)) <=
|
||||||
|
imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the currently running task will sleep within
|
* If the currently running task will sleep within
|
||||||
* a reasonable amount of time then attract this newly
|
* a reasonable amount of time then attract this newly
|
||||||
* woken task:
|
* woken task:
|
||||||
*/
|
*/
|
||||||
if (sync && balanced && curr->sched_class == &fair_sched_class) {
|
if (sync && balanced) {
|
||||||
if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
|
if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
|
||||||
p->se.avg_overlap < sysctl_sched_migration_cost)
|
p->se.avg_overlap < sysctl_sched_migration_cost)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1111,11 +1275,13 @@ static unsigned long wakeup_gran(struct sched_entity *se)
|
||||||
unsigned long gran = sysctl_sched_wakeup_granularity;
|
unsigned long gran = sysctl_sched_wakeup_granularity;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* More easily preempt - nice tasks, while not making
|
* More easily preempt - nice tasks, while not making it harder for
|
||||||
* it harder for + nice tasks.
|
* + nice tasks.
|
||||||
*/
|
*/
|
||||||
if (unlikely(se->load.weight > NICE_0_LOAD))
|
if (sched_feat(ASYM_GRAN))
|
||||||
gran = calc_delta_fair(gran, &se->load);
|
gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
|
||||||
|
else
|
||||||
|
gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
|
||||||
|
|
||||||
return gran;
|
return gran;
|
||||||
}
|
}
|
||||||
|
@ -1177,7 +1343,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
se->last_wakeup = se->sum_exec_runtime;
|
|
||||||
if (unlikely(se == pse))
|
if (unlikely(se == pse))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -1275,23 +1440,18 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
|
||||||
struct task_struct *p = NULL;
|
struct task_struct *p = NULL;
|
||||||
struct sched_entity *se;
|
struct sched_entity *se;
|
||||||
|
|
||||||
if (next == &cfs_rq->tasks)
|
while (next != &cfs_rq->tasks) {
|
||||||
return NULL;
|
|
||||||
|
|
||||||
/* Skip over entities that are not tasks */
|
|
||||||
do {
|
|
||||||
se = list_entry(next, struct sched_entity, group_node);
|
se = list_entry(next, struct sched_entity, group_node);
|
||||||
next = next->next;
|
next = next->next;
|
||||||
} while (next != &cfs_rq->tasks && !entity_is_task(se));
|
|
||||||
|
|
||||||
if (next == &cfs_rq->tasks)
|
/* Skip over entities that are not tasks */
|
||||||
return NULL;
|
if (entity_is_task(se)) {
|
||||||
|
p = task_of(se);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cfs_rq->balance_iterator = next;
|
cfs_rq->balance_iterator = next;
|
||||||
|
|
||||||
if (entity_is_task(se))
|
|
||||||
p = task_of(se);
|
|
||||||
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1309,75 +1469,82 @@ static struct task_struct *load_balance_next_fair(void *arg)
|
||||||
return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
|
return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
static unsigned long
|
||||||
static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
|
__load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||||
|
unsigned long max_load_move, struct sched_domain *sd,
|
||||||
|
enum cpu_idle_type idle, int *all_pinned, int *this_best_prio,
|
||||||
|
struct cfs_rq *cfs_rq)
|
||||||
{
|
{
|
||||||
struct sched_entity *curr;
|
struct rq_iterator cfs_rq_iterator;
|
||||||
struct task_struct *p;
|
|
||||||
|
|
||||||
if (!cfs_rq->nr_running || !first_fair(cfs_rq))
|
cfs_rq_iterator.start = load_balance_start_fair;
|
||||||
return MAX_PRIO;
|
cfs_rq_iterator.next = load_balance_next_fair;
|
||||||
|
cfs_rq_iterator.arg = cfs_rq;
|
||||||
|
|
||||||
curr = cfs_rq->curr;
|
return balance_tasks(this_rq, this_cpu, busiest,
|
||||||
if (!curr)
|
max_load_move, sd, idle, all_pinned,
|
||||||
curr = __pick_next_entity(cfs_rq);
|
this_best_prio, &cfs_rq_iterator);
|
||||||
|
|
||||||
p = task_of(curr);
|
|
||||||
|
|
||||||
return p->prio;
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
static unsigned long
|
static unsigned long
|
||||||
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||||
unsigned long max_load_move,
|
unsigned long max_load_move,
|
||||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||||
int *all_pinned, int *this_best_prio)
|
int *all_pinned, int *this_best_prio)
|
||||||
{
|
{
|
||||||
struct cfs_rq *busy_cfs_rq;
|
|
||||||
long rem_load_move = max_load_move;
|
long rem_load_move = max_load_move;
|
||||||
struct rq_iterator cfs_rq_iterator;
|
int busiest_cpu = cpu_of(busiest);
|
||||||
|
struct task_group *tg;
|
||||||
|
|
||||||
cfs_rq_iterator.start = load_balance_start_fair;
|
rcu_read_lock();
|
||||||
cfs_rq_iterator.next = load_balance_next_fair;
|
update_h_load(busiest_cpu);
|
||||||
|
|
||||||
for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
|
list_for_each_entry(tg, &task_groups, list) {
|
||||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
|
||||||
struct cfs_rq *this_cfs_rq;
|
unsigned long busiest_h_load = busiest_cfs_rq->h_load;
|
||||||
long imbalance;
|
unsigned long busiest_weight = busiest_cfs_rq->load.weight;
|
||||||
unsigned long maxload;
|
u64 rem_load, moved_load;
|
||||||
|
|
||||||
this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
|
/*
|
||||||
|
* empty group
|
||||||
imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
|
*/
|
||||||
/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
|
if (!busiest_cfs_rq->task_weight)
|
||||||
if (imbalance <= 0)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Don't pull more than imbalance/2 */
|
rem_load = (u64)rem_load_move * busiest_weight;
|
||||||
imbalance /= 2;
|
rem_load = div_u64(rem_load, busiest_h_load + 1);
|
||||||
maxload = min(rem_load_move, imbalance);
|
|
||||||
|
|
||||||
*this_best_prio = cfs_rq_best_prio(this_cfs_rq);
|
moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
|
||||||
#else
|
rem_load, sd, idle, all_pinned, this_best_prio,
|
||||||
# define maxload rem_load_move
|
tg->cfs_rq[busiest_cpu]);
|
||||||
#endif
|
|
||||||
/*
|
|
||||||
* pass busy_cfs_rq argument into
|
|
||||||
* load_balance_[start|next]_fair iterators
|
|
||||||
*/
|
|
||||||
cfs_rq_iterator.arg = busy_cfs_rq;
|
|
||||||
rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
|
|
||||||
maxload, sd, idle, all_pinned,
|
|
||||||
this_best_prio,
|
|
||||||
&cfs_rq_iterator);
|
|
||||||
|
|
||||||
if (rem_load_move <= 0)
|
if (!moved_load)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
moved_load *= busiest_h_load;
|
||||||
|
moved_load = div_u64(moved_load, busiest_weight + 1);
|
||||||
|
|
||||||
|
rem_load_move -= moved_load;
|
||||||
|
if (rem_load_move < 0)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return max_load_move - rem_load_move;
|
return max_load_move - rem_load_move;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static unsigned long
|
||||||
|
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||||
|
unsigned long max_load_move,
|
||||||
|
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||||
|
int *all_pinned, int *this_best_prio)
|
||||||
|
{
|
||||||
|
return __load_balance_fair(this_rq, this_cpu, busiest,
|
||||||
|
max_load_move, sd, idle, all_pinned,
|
||||||
|
this_best_prio, &busiest->cfs);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||||
|
@ -1402,7 +1569,7 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* scheduler tick hitting a task of our scheduling class:
|
* scheduler tick hitting a task of our scheduling class:
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
|
SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
|
||||||
|
SCHED_FEAT(NORMALIZED_SLEEPER, 1)
|
||||||
SCHED_FEAT(WAKEUP_PREEMPT, 1)
|
SCHED_FEAT(WAKEUP_PREEMPT, 1)
|
||||||
SCHED_FEAT(START_DEBIT, 1)
|
SCHED_FEAT(START_DEBIT, 1)
|
||||||
SCHED_FEAT(AFFINE_WAKEUPS, 1)
|
SCHED_FEAT(AFFINE_WAKEUPS, 1)
|
||||||
|
@ -6,5 +7,7 @@ SCHED_FEAT(CACHE_HOT_BUDDY, 1)
|
||||||
SCHED_FEAT(SYNC_WAKEUPS, 1)
|
SCHED_FEAT(SYNC_WAKEUPS, 1)
|
||||||
SCHED_FEAT(HRTICK, 1)
|
SCHED_FEAT(HRTICK, 1)
|
||||||
SCHED_FEAT(DOUBLE_TICK, 0)
|
SCHED_FEAT(DOUBLE_TICK, 0)
|
||||||
SCHED_FEAT(NORMALIZED_SLEEPER, 1)
|
SCHED_FEAT(ASYM_GRAN, 1)
|
||||||
SCHED_FEAT(DEADLINE, 1)
|
SCHED_FEAT(LB_BIAS, 0)
|
||||||
|
SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
|
||||||
|
SCHED_FEAT(ASYM_EFF_LOAD, 1)
|
||||||
|
|
|
@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq)
|
||||||
|
|
||||||
static inline void rt_set_overload(struct rq *rq)
|
static inline void rt_set_overload(struct rq *rq)
|
||||||
{
|
{
|
||||||
|
if (!rq->online)
|
||||||
|
return;
|
||||||
|
|
||||||
cpu_set(rq->cpu, rq->rd->rto_mask);
|
cpu_set(rq->cpu, rq->rd->rto_mask);
|
||||||
/*
|
/*
|
||||||
* Make sure the mask is visible before we set
|
* Make sure the mask is visible before we set
|
||||||
|
@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq)
|
||||||
|
|
||||||
static inline void rt_clear_overload(struct rq *rq)
|
static inline void rt_clear_overload(struct rq *rq)
|
||||||
{
|
{
|
||||||
|
if (!rq->online)
|
||||||
|
return;
|
||||||
|
|
||||||
/* the order here really doesn't matter */
|
/* the order here really doesn't matter */
|
||||||
atomic_dec(&rq->rd->rto_count);
|
atomic_dec(&rq->rd->rto_count);
|
||||||
cpu_clear(rq->cpu, rq->rd->rto_mask);
|
cpu_clear(rq->cpu, rq->rd->rto_mask);
|
||||||
|
@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
|
||||||
return &rt_rq->tg->rt_bandwidth;
|
return &rt_rq->tg->rt_bandwidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else /* !CONFIG_RT_GROUP_SCHED */
|
||||||
|
|
||||||
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
|
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
|
||||||
{
|
{
|
||||||
|
@ -220,7 +226,160 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
|
||||||
return &def_rt_bandwidth;
|
return &def_rt_bandwidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
static int do_balance_runtime(struct rt_rq *rt_rq)
|
||||||
|
{
|
||||||
|
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
|
||||||
|
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
|
||||||
|
int i, weight, more = 0;
|
||||||
|
u64 rt_period;
|
||||||
|
|
||||||
|
weight = cpus_weight(rd->span);
|
||||||
|
|
||||||
|
spin_lock(&rt_b->rt_runtime_lock);
|
||||||
|
rt_period = ktime_to_ns(rt_b->rt_period);
|
||||||
|
for_each_cpu_mask(i, rd->span) {
|
||||||
|
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
|
||||||
|
s64 diff;
|
||||||
|
|
||||||
|
if (iter == rt_rq)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
spin_lock(&iter->rt_runtime_lock);
|
||||||
|
if (iter->rt_runtime == RUNTIME_INF)
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
diff = iter->rt_runtime - iter->rt_time;
|
||||||
|
if (diff > 0) {
|
||||||
|
do_div(diff, weight);
|
||||||
|
if (rt_rq->rt_runtime + diff > rt_period)
|
||||||
|
diff = rt_period - rt_rq->rt_runtime;
|
||||||
|
iter->rt_runtime -= diff;
|
||||||
|
rt_rq->rt_runtime += diff;
|
||||||
|
more = 1;
|
||||||
|
if (rt_rq->rt_runtime == rt_period) {
|
||||||
|
spin_unlock(&iter->rt_runtime_lock);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
next:
|
||||||
|
spin_unlock(&iter->rt_runtime_lock);
|
||||||
|
}
|
||||||
|
spin_unlock(&rt_b->rt_runtime_lock);
|
||||||
|
|
||||||
|
return more;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __disable_runtime(struct rq *rq)
|
||||||
|
{
|
||||||
|
struct root_domain *rd = rq->rd;
|
||||||
|
struct rt_rq *rt_rq;
|
||||||
|
|
||||||
|
if (unlikely(!scheduler_running))
|
||||||
|
return;
|
||||||
|
|
||||||
|
for_each_leaf_rt_rq(rt_rq, rq) {
|
||||||
|
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
|
||||||
|
s64 want;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
spin_lock(&rt_b->rt_runtime_lock);
|
||||||
|
spin_lock(&rt_rq->rt_runtime_lock);
|
||||||
|
if (rt_rq->rt_runtime == RUNTIME_INF ||
|
||||||
|
rt_rq->rt_runtime == rt_b->rt_runtime)
|
||||||
|
goto balanced;
|
||||||
|
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||||
|
|
||||||
|
want = rt_b->rt_runtime - rt_rq->rt_runtime;
|
||||||
|
|
||||||
|
for_each_cpu_mask(i, rd->span) {
|
||||||
|
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
|
||||||
|
s64 diff;
|
||||||
|
|
||||||
|
if (iter == rt_rq)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
spin_lock(&iter->rt_runtime_lock);
|
||||||
|
if (want > 0) {
|
||||||
|
diff = min_t(s64, iter->rt_runtime, want);
|
||||||
|
iter->rt_runtime -= diff;
|
||||||
|
want -= diff;
|
||||||
|
} else {
|
||||||
|
iter->rt_runtime -= want;
|
||||||
|
want -= want;
|
||||||
|
}
|
||||||
|
spin_unlock(&iter->rt_runtime_lock);
|
||||||
|
|
||||||
|
if (!want)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock(&rt_rq->rt_runtime_lock);
|
||||||
|
BUG_ON(want);
|
||||||
|
balanced:
|
||||||
|
rt_rq->rt_runtime = RUNTIME_INF;
|
||||||
|
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||||
|
spin_unlock(&rt_b->rt_runtime_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void disable_runtime(struct rq *rq)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&rq->lock, flags);
|
||||||
|
__disable_runtime(rq);
|
||||||
|
spin_unlock_irqrestore(&rq->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __enable_runtime(struct rq *rq)
|
||||||
|
{
|
||||||
|
struct rt_rq *rt_rq;
|
||||||
|
|
||||||
|
if (unlikely(!scheduler_running))
|
||||||
|
return;
|
||||||
|
|
||||||
|
for_each_leaf_rt_rq(rt_rq, rq) {
|
||||||
|
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
|
||||||
|
|
||||||
|
spin_lock(&rt_b->rt_runtime_lock);
|
||||||
|
spin_lock(&rt_rq->rt_runtime_lock);
|
||||||
|
rt_rq->rt_runtime = rt_b->rt_runtime;
|
||||||
|
rt_rq->rt_time = 0;
|
||||||
|
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||||
|
spin_unlock(&rt_b->rt_runtime_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void enable_runtime(struct rq *rq)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&rq->lock, flags);
|
||||||
|
__enable_runtime(rq);
|
||||||
|
spin_unlock_irqrestore(&rq->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int balance_runtime(struct rt_rq *rt_rq)
|
||||||
|
{
|
||||||
|
int more = 0;
|
||||||
|
|
||||||
|
if (rt_rq->rt_time > rt_rq->rt_runtime) {
|
||||||
|
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||||
|
more = do_balance_runtime(rt_rq);
|
||||||
|
spin_lock(&rt_rq->rt_runtime_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
return more;
|
||||||
|
}
|
||||||
|
#else /* !CONFIG_SMP */
|
||||||
|
static inline int balance_runtime(struct rt_rq *rt_rq)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||||
{
|
{
|
||||||
|
@ -241,6 +400,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||||
u64 runtime;
|
u64 runtime;
|
||||||
|
|
||||||
spin_lock(&rt_rq->rt_runtime_lock);
|
spin_lock(&rt_rq->rt_runtime_lock);
|
||||||
|
if (rt_rq->rt_throttled)
|
||||||
|
balance_runtime(rt_rq);
|
||||||
runtime = rt_rq->rt_runtime;
|
runtime = rt_rq->rt_runtime;
|
||||||
rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
|
rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
|
||||||
if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
|
if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
|
||||||
|
@ -261,47 +422,6 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||||
return idle;
|
return idle;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
static int balance_runtime(struct rt_rq *rt_rq)
|
|
||||||
{
|
|
||||||
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
|
|
||||||
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
|
|
||||||
int i, weight, more = 0;
|
|
||||||
u64 rt_period;
|
|
||||||
|
|
||||||
weight = cpus_weight(rd->span);
|
|
||||||
|
|
||||||
spin_lock(&rt_b->rt_runtime_lock);
|
|
||||||
rt_period = ktime_to_ns(rt_b->rt_period);
|
|
||||||
for_each_cpu_mask(i, rd->span) {
|
|
||||||
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
|
|
||||||
s64 diff;
|
|
||||||
|
|
||||||
if (iter == rt_rq)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
spin_lock(&iter->rt_runtime_lock);
|
|
||||||
diff = iter->rt_runtime - iter->rt_time;
|
|
||||||
if (diff > 0) {
|
|
||||||
do_div(diff, weight);
|
|
||||||
if (rt_rq->rt_runtime + diff > rt_period)
|
|
||||||
diff = rt_period - rt_rq->rt_runtime;
|
|
||||||
iter->rt_runtime -= diff;
|
|
||||||
rt_rq->rt_runtime += diff;
|
|
||||||
more = 1;
|
|
||||||
if (rt_rq->rt_runtime == rt_period) {
|
|
||||||
spin_unlock(&iter->rt_runtime_lock);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock(&iter->rt_runtime_lock);
|
|
||||||
}
|
|
||||||
spin_unlock(&rt_b->rt_runtime_lock);
|
|
||||||
|
|
||||||
return more;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline int rt_se_prio(struct sched_rt_entity *rt_se)
|
static inline int rt_se_prio(struct sched_rt_entity *rt_se)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_RT_GROUP_SCHED
|
#ifdef CONFIG_RT_GROUP_SCHED
|
||||||
|
@ -327,18 +447,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
|
||||||
if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
|
if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
balance_runtime(rt_rq);
|
||||||
if (rt_rq->rt_time > runtime) {
|
runtime = sched_rt_runtime(rt_rq);
|
||||||
int more;
|
if (runtime == RUNTIME_INF)
|
||||||
|
return 0;
|
||||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
|
||||||
more = balance_runtime(rt_rq);
|
|
||||||
spin_lock(&rt_rq->rt_runtime_lock);
|
|
||||||
|
|
||||||
if (more)
|
|
||||||
runtime = sched_rt_runtime(rt_rq);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (rt_rq->rt_time > runtime) {
|
if (rt_rq->rt_time > runtime) {
|
||||||
rt_rq->rt_throttled = 1;
|
rt_rq->rt_throttled = 1;
|
||||||
|
@ -392,12 +504,21 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||||
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
|
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
|
||||||
rt_rq->rt_nr_running++;
|
rt_rq->rt_nr_running++;
|
||||||
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
|
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
|
||||||
if (rt_se_prio(rt_se) < rt_rq->highest_prio)
|
if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
|
||||||
|
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||||
|
|
||||||
rt_rq->highest_prio = rt_se_prio(rt_se);
|
rt_rq->highest_prio = rt_se_prio(rt_se);
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
if (rq->online)
|
||||||
|
cpupri_set(&rq->rd->cpupri, rq->cpu,
|
||||||
|
rt_se_prio(rt_se));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
if (rt_se->nr_cpus_allowed > 1) {
|
if (rt_se->nr_cpus_allowed > 1) {
|
||||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||||
|
|
||||||
rq->rt.rt_nr_migratory++;
|
rq->rt.rt_nr_migratory++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -417,6 +538,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||||
static inline
|
static inline
|
||||||
void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
int highest_prio = rt_rq->highest_prio;
|
||||||
|
#endif
|
||||||
|
|
||||||
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
|
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
|
||||||
WARN_ON(!rt_rq->rt_nr_running);
|
WARN_ON(!rt_rq->rt_nr_running);
|
||||||
rt_rq->rt_nr_running--;
|
rt_rq->rt_nr_running--;
|
||||||
|
@ -440,6 +565,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||||
rq->rt.rt_nr_migratory--;
|
rq->rt.rt_nr_migratory--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rt_rq->highest_prio != highest_prio) {
|
||||||
|
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||||
|
|
||||||
|
if (rq->online)
|
||||||
|
cpupri_set(&rq->rd->cpupri, rq->cpu,
|
||||||
|
rt_rq->highest_prio);
|
||||||
|
}
|
||||||
|
|
||||||
update_rt_migration(rq_of_rt_rq(rt_rq));
|
update_rt_migration(rq_of_rt_rq(rt_rq));
|
||||||
#endif /* CONFIG_SMP */
|
#endif /* CONFIG_SMP */
|
||||||
#ifdef CONFIG_RT_GROUP_SCHED
|
#ifdef CONFIG_RT_GROUP_SCHED
|
||||||
|
@ -455,6 +588,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
|
||||||
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
|
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
|
||||||
struct rt_prio_array *array = &rt_rq->active;
|
struct rt_prio_array *array = &rt_rq->active;
|
||||||
struct rt_rq *group_rq = group_rt_rq(rt_se);
|
struct rt_rq *group_rq = group_rt_rq(rt_se);
|
||||||
|
struct list_head *queue = array->queue + rt_se_prio(rt_se);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't enqueue the group if its throttled, or when empty.
|
* Don't enqueue the group if its throttled, or when empty.
|
||||||
|
@ -465,7 +599,11 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
|
||||||
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
|
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
|
if (rt_se->nr_cpus_allowed == 1)
|
||||||
|
list_add(&rt_se->run_list, queue);
|
||||||
|
else
|
||||||
|
list_add_tail(&rt_se->run_list, queue);
|
||||||
|
|
||||||
__set_bit(rt_se_prio(rt_se), array->bitmap);
|
__set_bit(rt_se_prio(rt_se), array->bitmap);
|
||||||
|
|
||||||
inc_rt_tasks(rt_se, rt_rq);
|
inc_rt_tasks(rt_se, rt_rq);
|
||||||
|
@ -532,6 +670,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
|
||||||
rt_se->timeout = 0;
|
rt_se->timeout = 0;
|
||||||
|
|
||||||
enqueue_rt_entity(rt_se);
|
enqueue_rt_entity(rt_se);
|
||||||
|
|
||||||
|
inc_cpu_load(rq, p->se.load.weight);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
||||||
|
@ -540,6 +680,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
||||||
|
|
||||||
update_curr_rt(rq);
|
update_curr_rt(rq);
|
||||||
dequeue_rt_entity(rt_se);
|
dequeue_rt_entity(rt_se);
|
||||||
|
|
||||||
|
dec_cpu_load(rq, p->se.load.weight);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -550,10 +692,12 @@ static
|
||||||
void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
|
void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
|
||||||
{
|
{
|
||||||
struct rt_prio_array *array = &rt_rq->active;
|
struct rt_prio_array *array = &rt_rq->active;
|
||||||
struct list_head *queue = array->queue + rt_se_prio(rt_se);
|
|
||||||
|
|
||||||
if (on_rt_rq(rt_se))
|
if (on_rt_rq(rt_se)) {
|
||||||
list_move_tail(&rt_se->run_list, queue);
|
list_del_init(&rt_se->run_list);
|
||||||
|
list_add_tail(&rt_se->run_list,
|
||||||
|
array->queue + rt_se_prio(rt_se));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void requeue_task_rt(struct rq *rq, struct task_struct *p)
|
static void requeue_task_rt(struct rq *rq, struct task_struct *p)
|
||||||
|
@ -616,8 +760,37 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
|
||||||
*/
|
*/
|
||||||
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
|
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
|
||||||
{
|
{
|
||||||
if (p->prio < rq->curr->prio)
|
if (p->prio < rq->curr->prio) {
|
||||||
resched_task(rq->curr);
|
resched_task(rq->curr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
/*
|
||||||
|
* If:
|
||||||
|
*
|
||||||
|
* - the newly woken task is of equal priority to the current task
|
||||||
|
* - the newly woken task is non-migratable while current is migratable
|
||||||
|
* - current will be preempted on the next reschedule
|
||||||
|
*
|
||||||
|
* we should check to see if current can readily move to a different
|
||||||
|
* cpu. If so, we will reschedule to allow the push logic to try
|
||||||
|
* to move current somewhere else, making room for our non-migratable
|
||||||
|
* task.
|
||||||
|
*/
|
||||||
|
if((p->prio == rq->curr->prio)
|
||||||
|
&& p->rt.nr_cpus_allowed == 1
|
||||||
|
&& rq->curr->rt.nr_cpus_allowed != 1) {
|
||||||
|
cpumask_t mask;
|
||||||
|
|
||||||
|
if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
|
||||||
|
/*
|
||||||
|
* There appears to be other cpus that can accept
|
||||||
|
* current, so lets reschedule to try and push it away
|
||||||
|
*/
|
||||||
|
resched_task(rq->curr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
|
static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
|
||||||
|
@ -720,73 +893,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
|
||||||
|
|
||||||
static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
|
static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
|
||||||
|
|
||||||
static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
|
|
||||||
{
|
|
||||||
int lowest_prio = -1;
|
|
||||||
int lowest_cpu = -1;
|
|
||||||
int count = 0;
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Scan each rq for the lowest prio.
|
|
||||||
*/
|
|
||||||
for_each_cpu_mask(cpu, *lowest_mask) {
|
|
||||||
struct rq *rq = cpu_rq(cpu);
|
|
||||||
|
|
||||||
/* We look for lowest RT prio or non-rt CPU */
|
|
||||||
if (rq->rt.highest_prio >= MAX_RT_PRIO) {
|
|
||||||
/*
|
|
||||||
* if we already found a low RT queue
|
|
||||||
* and now we found this non-rt queue
|
|
||||||
* clear the mask and set our bit.
|
|
||||||
* Otherwise just return the queue as is
|
|
||||||
* and the count==1 will cause the algorithm
|
|
||||||
* to use the first bit found.
|
|
||||||
*/
|
|
||||||
if (lowest_cpu != -1) {
|
|
||||||
cpus_clear(*lowest_mask);
|
|
||||||
cpu_set(rq->cpu, *lowest_mask);
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* no locking for now */
|
|
||||||
if ((rq->rt.highest_prio > task->prio)
|
|
||||||
&& (rq->rt.highest_prio >= lowest_prio)) {
|
|
||||||
if (rq->rt.highest_prio > lowest_prio) {
|
|
||||||
/* new low - clear old data */
|
|
||||||
lowest_prio = rq->rt.highest_prio;
|
|
||||||
lowest_cpu = cpu;
|
|
||||||
count = 0;
|
|
||||||
}
|
|
||||||
count++;
|
|
||||||
} else
|
|
||||||
cpu_clear(cpu, *lowest_mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Clear out all the set bits that represent
|
|
||||||
* runqueues that were of higher prio than
|
|
||||||
* the lowest_prio.
|
|
||||||
*/
|
|
||||||
if (lowest_cpu > 0) {
|
|
||||||
/*
|
|
||||||
* Perhaps we could add another cpumask op to
|
|
||||||
* zero out bits. Like cpu_zero_bits(cpumask, nrbits);
|
|
||||||
* Then that could be optimized to use memset and such.
|
|
||||||
*/
|
|
||||||
for_each_cpu_mask(cpu, *lowest_mask) {
|
|
||||||
if (cpu >= lowest_cpu)
|
|
||||||
break;
|
|
||||||
cpu_clear(cpu, *lowest_mask);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
|
static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
|
||||||
{
|
{
|
||||||
int first;
|
int first;
|
||||||
|
@ -808,18 +914,13 @@ static int find_lowest_rq(struct task_struct *task)
|
||||||
cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
|
cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
|
||||||
int this_cpu = smp_processor_id();
|
int this_cpu = smp_processor_id();
|
||||||
int cpu = task_cpu(task);
|
int cpu = task_cpu(task);
|
||||||
int count = find_lowest_cpus(task, lowest_mask);
|
|
||||||
|
|
||||||
if (!count)
|
if (task->rt.nr_cpus_allowed == 1)
|
||||||
|
return -1; /* No other targets possible */
|
||||||
|
|
||||||
|
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
|
||||||
return -1; /* No targets found */
|
return -1; /* No targets found */
|
||||||
|
|
||||||
/*
|
|
||||||
* There is no sense in performing an optimal search if only one
|
|
||||||
* target is found.
|
|
||||||
*/
|
|
||||||
if (count == 1)
|
|
||||||
return first_cpu(*lowest_mask);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At this point we have built a mask of cpus representing the
|
* At this point we have built a mask of cpus representing the
|
||||||
* lowest priority tasks in the system. Now we want to elect
|
* lowest priority tasks in the system. Now we want to elect
|
||||||
|
@ -1163,17 +1264,25 @@ static void set_cpus_allowed_rt(struct task_struct *p,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Assumes rq->lock is held */
|
/* Assumes rq->lock is held */
|
||||||
static void join_domain_rt(struct rq *rq)
|
static void rq_online_rt(struct rq *rq)
|
||||||
{
|
{
|
||||||
if (rq->rt.overloaded)
|
if (rq->rt.overloaded)
|
||||||
rt_set_overload(rq);
|
rt_set_overload(rq);
|
||||||
|
|
||||||
|
__enable_runtime(rq);
|
||||||
|
|
||||||
|
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Assumes rq->lock is held */
|
/* Assumes rq->lock is held */
|
||||||
static void leave_domain_rt(struct rq *rq)
|
static void rq_offline_rt(struct rq *rq)
|
||||||
{
|
{
|
||||||
if (rq->rt.overloaded)
|
if (rq->rt.overloaded)
|
||||||
rt_clear_overload(rq);
|
rt_clear_overload(rq);
|
||||||
|
|
||||||
|
__disable_runtime(rq);
|
||||||
|
|
||||||
|
cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1336,8 +1445,8 @@ static const struct sched_class rt_sched_class = {
|
||||||
.load_balance = load_balance_rt,
|
.load_balance = load_balance_rt,
|
||||||
.move_one_task = move_one_task_rt,
|
.move_one_task = move_one_task_rt,
|
||||||
.set_cpus_allowed = set_cpus_allowed_rt,
|
.set_cpus_allowed = set_cpus_allowed_rt,
|
||||||
.join_domain = join_domain_rt,
|
.rq_online = rq_online_rt,
|
||||||
.leave_domain = leave_domain_rt,
|
.rq_offline = rq_offline_rt,
|
||||||
.pre_schedule = pre_schedule_rt,
|
.pre_schedule = pre_schedule_rt,
|
||||||
.post_schedule = post_schedule_rt,
|
.post_schedule = post_schedule_rt,
|
||||||
.task_wake_up = task_wake_up_rt,
|
.task_wake_up = task_wake_up_rt,
|
||||||
|
@ -1350,3 +1459,17 @@ static const struct sched_class rt_sched_class = {
|
||||||
.prio_changed = prio_changed_rt,
|
.prio_changed = prio_changed_rt,
|
||||||
.switched_to = switched_to_rt,
|
.switched_to = switched_to_rt,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
|
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
|
||||||
|
|
||||||
|
static void print_rt_stats(struct seq_file *m, int cpu)
|
||||||
|
{
|
||||||
|
struct rt_rq *rt_rq;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
|
||||||
|
print_rt_rq(m, cpu, rt_rq);
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_SCHED_DEBUG */
|
||||||
|
|
|
@ -118,6 +118,13 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||||
if (rq)
|
if (rq)
|
||||||
rq->rq_sched_info.cpu_time += delta;
|
rq->rq_sched_info.cpu_time += delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||||
|
{
|
||||||
|
if (rq)
|
||||||
|
rq->rq_sched_info.run_delay += delta;
|
||||||
|
}
|
||||||
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
|
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
|
||||||
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
|
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
|
||||||
# define schedstat_set(var, val) do { var = (val); } while (0)
|
# define schedstat_set(var, val) do { var = (val); } while (0)
|
||||||
|
@ -126,6 +133,9 @@ static inline void
|
||||||
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
||||||
{}
|
{}
|
||||||
static inline void
|
static inline void
|
||||||
|
rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||||
|
{}
|
||||||
|
static inline void
|
||||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||||
{}
|
{}
|
||||||
# define schedstat_inc(rq, field) do { } while (0)
|
# define schedstat_inc(rq, field) do { } while (0)
|
||||||
|
@ -134,6 +144,11 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
|
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
|
||||||
|
static inline void sched_info_reset_dequeued(struct task_struct *t)
|
||||||
|
{
|
||||||
|
t->sched_info.last_queued = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called when a process is dequeued from the active array and given
|
* Called when a process is dequeued from the active array and given
|
||||||
* the cpu. We should note that with the exception of interactive
|
* the cpu. We should note that with the exception of interactive
|
||||||
|
@ -143,15 +158,22 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||||
* active queue, thus delaying tasks in the expired queue from running;
|
* active queue, thus delaying tasks in the expired queue from running;
|
||||||
* see scheduler_tick()).
|
* see scheduler_tick()).
|
||||||
*
|
*
|
||||||
* This function is only called from sched_info_arrive(), rather than
|
* Though we are interested in knowing how long it was from the *first* time a
|
||||||
* dequeue_task(). Even though a task may be queued and dequeued multiple
|
* task was queued to the time that it finally hit a cpu, we call this routine
|
||||||
* times as it is shuffled about, we're really interested in knowing how
|
* from dequeue_task() to account for possible rq->clock skew across cpus. The
|
||||||
* long it was from the *first* time it was queued to the time that it
|
* delta taken on each cpu would annul the skew.
|
||||||
* finally hit a cpu.
|
|
||||||
*/
|
*/
|
||||||
static inline void sched_info_dequeued(struct task_struct *t)
|
static inline void sched_info_dequeued(struct task_struct *t)
|
||||||
{
|
{
|
||||||
t->sched_info.last_queued = 0;
|
unsigned long long now = task_rq(t)->clock, delta = 0;
|
||||||
|
|
||||||
|
if (unlikely(sched_info_on()))
|
||||||
|
if (t->sched_info.last_queued)
|
||||||
|
delta = now - t->sched_info.last_queued;
|
||||||
|
sched_info_reset_dequeued(t);
|
||||||
|
t->sched_info.run_delay += delta;
|
||||||
|
|
||||||
|
rq_sched_info_dequeued(task_rq(t), delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -165,7 +187,7 @@ static void sched_info_arrive(struct task_struct *t)
|
||||||
|
|
||||||
if (t->sched_info.last_queued)
|
if (t->sched_info.last_queued)
|
||||||
delta = now - t->sched_info.last_queued;
|
delta = now - t->sched_info.last_queued;
|
||||||
sched_info_dequeued(t);
|
sched_info_reset_dequeued(t);
|
||||||
t->sched_info.run_delay += delta;
|
t->sched_info.run_delay += delta;
|
||||||
t->sched_info.last_arrival = now;
|
t->sched_info.last_arrival = now;
|
||||||
t->sched_info.pcount++;
|
t->sched_info.pcount++;
|
||||||
|
@ -242,7 +264,9 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
|
||||||
__sched_info_switch(prev, next);
|
__sched_info_switch(prev, next);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define sched_info_queued(t) do { } while (0)
|
#define sched_info_queued(t) do { } while (0)
|
||||||
#define sched_info_switch(t, next) do { } while (0)
|
#define sched_info_reset_dequeued(t) do { } while (0)
|
||||||
|
#define sched_info_dequeued(t) do { } while (0)
|
||||||
|
#define sched_info_switch(t, next) do { } while (0)
|
||||||
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
|
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
|
||||||
|
|
||||||
|
|
|
@ -264,6 +264,14 @@ static struct ctl_table kern_table[] = {
|
||||||
.extra1 = &min_wakeup_granularity_ns,
|
.extra1 = &min_wakeup_granularity_ns,
|
||||||
.extra2 = &max_wakeup_granularity_ns,
|
.extra2 = &max_wakeup_granularity_ns,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.ctl_name = CTL_UNNUMBERED,
|
||||||
|
.procname = "sched_shares_ratelimit",
|
||||||
|
.data = &sysctl_sched_shares_ratelimit,
|
||||||
|
.maxlen = sizeof(unsigned int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = &proc_dointvec,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.ctl_name = CTL_UNNUMBERED,
|
.ctl_name = CTL_UNNUMBERED,
|
||||||
.procname = "sched_child_runs_first",
|
.procname = "sched_child_runs_first",
|
||||||
|
|
|
@ -276,6 +276,7 @@ void tick_nohz_stop_sched_tick(void)
|
||||||
ts->tick_stopped = 1;
|
ts->tick_stopped = 1;
|
||||||
ts->idle_jiffies = last_jiffies;
|
ts->idle_jiffies = last_jiffies;
|
||||||
rcu_enter_nohz();
|
rcu_enter_nohz();
|
||||||
|
sched_clock_tick_stop(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -375,6 +376,7 @@ void tick_nohz_restart_sched_tick(void)
|
||||||
select_nohz_load_balancer(0);
|
select_nohz_load_balancer(0);
|
||||||
now = ktime_get();
|
now = ktime_get();
|
||||||
tick_do_update_jiffies64(now);
|
tick_do_update_jiffies64(now);
|
||||||
|
sched_clock_tick_start(cpu);
|
||||||
cpu_clear(cpu, nohz_cpu_mask);
|
cpu_clear(cpu, nohz_cpu_mask);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue