Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The biggest changes in this cycle were: - Make kcpustat vtime aware (Frederic Weisbecker) - Rework the CFS load_balance() logic (Vincent Guittot) - Misc cleanups, smaller enhancements, fixes. The load-balancing rework is the most intrusive change: it replaces the old heuristics that have become less meaningful after the introduction of the PELT metrics, with a grounds-up load-balancing algorithm. As such it's not really an iterative series, but replaces the old load-balancing logic with the new one. We hope there are no performance regressions left - but statistically it's highly probable that there *is* going to be some workload that is hurting from these chnages. If so then we'd prefer to have a look at that workload and fix its scheduling, instead of reverting the changes" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (46 commits) rackmeter: Use vtime aware kcpustat accessor leds: Use all-in-one vtime aware kcpustat accessor cpufreq: Use vtime aware kcpustat accessors for user time procfs: Use all-in-one vtime aware kcpustat accessor sched/vtime: Bring up complete kcpustat accessor sched/cputime: Support other fields on kcpustat_field() sched/cpufreq: Move the cfs_rq_util_change() call to cpufreq_update_util() sched/fair: Add comments for group_type and balancing at SD_NUMA level sched/fair: Fix rework of find_idlest_group() sched/uclamp: Fix overzealous type replacement sched/Kconfig: Fix spelling mistake in user-visible help text sched/core: Further clarify sched_class::set_next_task() sched/fair: Use mul_u32_u32() sched/core: Simplify sched_class::pick_next_task() sched/core: Optimize pick_next_task() sched/core: Make pick_next_task_idle() more consistent sched/fair: Better document newidle_balance() leds: Use vtime aware kcpustat accessor to fetch CPUTIME_SYSTEM cpufreq: Use vtime aware kcpustat accessor to fetch CPUTIME_SYSTEM procfs: Use vtime aware kcpustat accessor to fetch CPUTIME_SYSTEM ...
This commit is contained in:
commit
77a05940ee
|
@ -132,7 +132,7 @@ static __u64 vtime_delta(struct task_struct *tsk)
|
|||
return delta_stime;
|
||||
}
|
||||
|
||||
void vtime_account_system(struct task_struct *tsk)
|
||||
void vtime_account_kernel(struct task_struct *tsk)
|
||||
{
|
||||
struct thread_info *ti = task_thread_info(tsk);
|
||||
__u64 stime = vtime_delta(tsk);
|
||||
|
@ -146,7 +146,7 @@ void vtime_account_system(struct task_struct *tsk)
|
|||
else
|
||||
ti->stime += stime;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vtime_account_system);
|
||||
EXPORT_SYMBOL_GPL(vtime_account_kernel);
|
||||
|
||||
void vtime_account_idle(struct task_struct *tsk)
|
||||
{
|
||||
|
|
|
@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
|
|||
return stime;
|
||||
}
|
||||
|
||||
void vtime_account_system(struct task_struct *tsk)
|
||||
void vtime_account_kernel(struct task_struct *tsk)
|
||||
{
|
||||
unsigned long stime, stime_scaled, steal_time;
|
||||
struct cpu_accounting_data *acct = get_accounting(tsk);
|
||||
|
@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
|
|||
#endif
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vtime_account_system);
|
||||
EXPORT_SYMBOL_GPL(vtime_account_kernel);
|
||||
|
||||
void vtime_account_idle(struct task_struct *tsk)
|
||||
{
|
||||
|
@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
|
|||
/*
|
||||
* Account the whole cputime accumulated in the paca
|
||||
* Must be called with interrupts disabled.
|
||||
* Assumes that vtime_account_system/idle() has been called
|
||||
* Assumes that vtime_account_kernel/idle() has been called
|
||||
* recently (i.e. since the last entry from usermode) so that
|
||||
* get_paca()->user_time_scaled is up to date.
|
||||
*/
|
||||
|
|
|
@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
|
||||
|
||||
void vtime_account_system(struct task_struct *tsk)
|
||||
void vtime_account_kernel(struct task_struct *tsk)
|
||||
__attribute__((alias("vtime_account_irq_enter")));
|
||||
EXPORT_SYMBOL_GPL(vtime_account_system);
|
||||
EXPORT_SYMBOL_GPL(vtime_account_kernel);
|
||||
|
||||
/*
|
||||
* Sorted add to a list. List is linear searched until first bigger
|
||||
|
|
|
@ -354,7 +354,7 @@ For 32-bit we have the following conventions - kernel is built with
|
|||
.macro CALL_enter_from_user_mode
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
#ifdef CONFIG_JUMP_LABEL
|
||||
STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
|
||||
STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_key, def=0
|
||||
#endif
|
||||
call enter_from_user_mode
|
||||
.Lafter_call_\@:
|
||||
|
|
|
@ -113,18 +113,21 @@ EXPORT_SYMBOL_GPL(get_governor_parent_kobj);
|
|||
|
||||
static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
|
||||
{
|
||||
u64 idle_time;
|
||||
struct kernel_cpustat kcpustat;
|
||||
u64 cur_wall_time;
|
||||
u64 idle_time;
|
||||
u64 busy_time;
|
||||
|
||||
cur_wall_time = jiffies64_to_nsecs(get_jiffies_64());
|
||||
|
||||
busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
|
||||
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
|
||||
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
|
||||
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
|
||||
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
|
||||
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
|
||||
kcpustat_cpu_fetch(&kcpustat, cpu);
|
||||
|
||||
busy_time = kcpustat.cpustat[CPUTIME_USER];
|
||||
busy_time += kcpustat.cpustat[CPUTIME_SYSTEM];
|
||||
busy_time += kcpustat.cpustat[CPUTIME_IRQ];
|
||||
busy_time += kcpustat.cpustat[CPUTIME_SOFTIRQ];
|
||||
busy_time += kcpustat.cpustat[CPUTIME_STEAL];
|
||||
busy_time += kcpustat.cpustat[CPUTIME_NICE];
|
||||
|
||||
idle_time = cur_wall_time - busy_time;
|
||||
if (wall)
|
||||
|
|
|
@ -105,7 +105,7 @@ void gov_update_cpu_data(struct dbs_data *dbs_data)
|
|||
j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time,
|
||||
dbs_data->io_is_busy);
|
||||
if (dbs_data->ignore_nice_load)
|
||||
j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
|
||||
j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -149,7 +149,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
|
|||
j_cdbs->prev_cpu_idle = cur_idle_time;
|
||||
|
||||
if (ignore_nice) {
|
||||
u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
|
||||
u64 cur_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
|
||||
|
||||
idle_time += div_u64(cur_nice - j_cdbs->prev_cpu_nice, NSEC_PER_USEC);
|
||||
j_cdbs->prev_cpu_nice = cur_nice;
|
||||
|
@ -530,7 +530,7 @@ int cpufreq_dbs_governor_start(struct cpufreq_policy *policy)
|
|||
j_cdbs->prev_load = 0;
|
||||
|
||||
if (ignore_nice)
|
||||
j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
|
||||
j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
|
||||
}
|
||||
|
||||
gov->start(policy);
|
||||
|
|
|
@ -57,11 +57,15 @@ static void led_activity_function(struct timer_list *t)
|
|||
curr_used = 0;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
curr_used += kcpustat_cpu(i).cpustat[CPUTIME_USER]
|
||||
+ kcpustat_cpu(i).cpustat[CPUTIME_NICE]
|
||||
+ kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]
|
||||
+ kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]
|
||||
+ kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
|
||||
struct kernel_cpustat kcpustat;
|
||||
|
||||
kcpustat_cpu_fetch(&kcpustat, i);
|
||||
|
||||
curr_used += kcpustat.cpustat[CPUTIME_USER]
|
||||
+ kcpustat.cpustat[CPUTIME_NICE]
|
||||
+ kcpustat.cpustat[CPUTIME_SYSTEM]
|
||||
+ kcpustat.cpustat[CPUTIME_SOFTIRQ]
|
||||
+ kcpustat.cpustat[CPUTIME_IRQ];
|
||||
cpus++;
|
||||
}
|
||||
|
||||
|
|
|
@ -81,13 +81,14 @@ static int rackmeter_ignore_nice;
|
|||
*/
|
||||
static inline u64 get_cpu_idle_time(unsigned int cpu)
|
||||
{
|
||||
struct kernel_cpustat *kcpustat = &kcpustat_cpu(cpu);
|
||||
u64 retval;
|
||||
|
||||
retval = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE] +
|
||||
kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
|
||||
retval = kcpustat->cpustat[CPUTIME_IDLE] +
|
||||
kcpustat->cpustat[CPUTIME_IOWAIT];
|
||||
|
||||
if (rackmeter_ignore_nice)
|
||||
retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
|
||||
retval += kcpustat_field(kcpustat, CPUTIME_NICE, cpu);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
|
|
@ -120,20 +120,23 @@ static int show_stat(struct seq_file *p, void *v)
|
|||
getboottime64(&boottime);
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct kernel_cpustat *kcs = &kcpustat_cpu(i);
|
||||
struct kernel_cpustat kcpustat;
|
||||
u64 *cpustat = kcpustat.cpustat;
|
||||
|
||||
user += kcs->cpustat[CPUTIME_USER];
|
||||
nice += kcs->cpustat[CPUTIME_NICE];
|
||||
system += kcs->cpustat[CPUTIME_SYSTEM];
|
||||
idle += get_idle_time(kcs, i);
|
||||
iowait += get_iowait_time(kcs, i);
|
||||
irq += kcs->cpustat[CPUTIME_IRQ];
|
||||
softirq += kcs->cpustat[CPUTIME_SOFTIRQ];
|
||||
steal += kcs->cpustat[CPUTIME_STEAL];
|
||||
guest += kcs->cpustat[CPUTIME_GUEST];
|
||||
guest_nice += kcs->cpustat[CPUTIME_GUEST_NICE];
|
||||
sum += kstat_cpu_irqs_sum(i);
|
||||
sum += arch_irq_stat_cpu(i);
|
||||
kcpustat_cpu_fetch(&kcpustat, i);
|
||||
|
||||
user += cpustat[CPUTIME_USER];
|
||||
nice += cpustat[CPUTIME_NICE];
|
||||
system += cpustat[CPUTIME_SYSTEM];
|
||||
idle += get_idle_time(&kcpustat, i);
|
||||
iowait += get_iowait_time(&kcpustat, i);
|
||||
irq += cpustat[CPUTIME_IRQ];
|
||||
softirq += cpustat[CPUTIME_SOFTIRQ];
|
||||
steal += cpustat[CPUTIME_STEAL];
|
||||
guest += cpustat[CPUTIME_GUEST];
|
||||
guest_nice += cpustat[CPUTIME_USER];
|
||||
sum += kstat_cpu_irqs_sum(i);
|
||||
sum += arch_irq_stat_cpu(i);
|
||||
|
||||
for (j = 0; j < NR_SOFTIRQS; j++) {
|
||||
unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
|
||||
|
@ -157,19 +160,22 @@ static int show_stat(struct seq_file *p, void *v)
|
|||
seq_putc(p, '\n');
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
struct kernel_cpustat *kcs = &kcpustat_cpu(i);
|
||||
struct kernel_cpustat kcpustat;
|
||||
u64 *cpustat = kcpustat.cpustat;
|
||||
|
||||
kcpustat_cpu_fetch(&kcpustat, i);
|
||||
|
||||
/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
|
||||
user = kcs->cpustat[CPUTIME_USER];
|
||||
nice = kcs->cpustat[CPUTIME_NICE];
|
||||
system = kcs->cpustat[CPUTIME_SYSTEM];
|
||||
idle = get_idle_time(kcs, i);
|
||||
iowait = get_iowait_time(kcs, i);
|
||||
irq = kcs->cpustat[CPUTIME_IRQ];
|
||||
softirq = kcs->cpustat[CPUTIME_SOFTIRQ];
|
||||
steal = kcs->cpustat[CPUTIME_STEAL];
|
||||
guest = kcs->cpustat[CPUTIME_GUEST];
|
||||
guest_nice = kcs->cpustat[CPUTIME_GUEST_NICE];
|
||||
user = cpustat[CPUTIME_USER];
|
||||
nice = cpustat[CPUTIME_NICE];
|
||||
system = cpustat[CPUTIME_SYSTEM];
|
||||
idle = get_idle_time(&kcpustat, i);
|
||||
iowait = get_iowait_time(&kcpustat, i);
|
||||
irq = cpustat[CPUTIME_IRQ];
|
||||
softirq = cpustat[CPUTIME_SOFTIRQ];
|
||||
steal = cpustat[CPUTIME_STEAL];
|
||||
guest = cpustat[CPUTIME_GUEST];
|
||||
guest_nice = cpustat[CPUTIME_USER];
|
||||
seq_printf(p, "cpu%d", i);
|
||||
seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
|
||||
seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
|
||||
|
|
|
@ -22,26 +22,26 @@ extern void context_tracking_user_exit(void);
|
|||
|
||||
static inline void user_enter(void)
|
||||
{
|
||||
if (context_tracking_is_enabled())
|
||||
if (context_tracking_enabled())
|
||||
context_tracking_enter(CONTEXT_USER);
|
||||
|
||||
}
|
||||
static inline void user_exit(void)
|
||||
{
|
||||
if (context_tracking_is_enabled())
|
||||
if (context_tracking_enabled())
|
||||
context_tracking_exit(CONTEXT_USER);
|
||||
}
|
||||
|
||||
/* Called with interrupts disabled. */
|
||||
static inline void user_enter_irqoff(void)
|
||||
{
|
||||
if (context_tracking_is_enabled())
|
||||
if (context_tracking_enabled())
|
||||
__context_tracking_enter(CONTEXT_USER);
|
||||
|
||||
}
|
||||
static inline void user_exit_irqoff(void)
|
||||
{
|
||||
if (context_tracking_is_enabled())
|
||||
if (context_tracking_enabled())
|
||||
__context_tracking_exit(CONTEXT_USER);
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ static inline enum ctx_state exception_enter(void)
|
|||
{
|
||||
enum ctx_state prev_ctx;
|
||||
|
||||
if (!context_tracking_is_enabled())
|
||||
if (!context_tracking_enabled())
|
||||
return 0;
|
||||
|
||||
prev_ctx = this_cpu_read(context_tracking.state);
|
||||
|
@ -61,7 +61,7 @@ static inline enum ctx_state exception_enter(void)
|
|||
|
||||
static inline void exception_exit(enum ctx_state prev_ctx)
|
||||
{
|
||||
if (context_tracking_is_enabled()) {
|
||||
if (context_tracking_enabled()) {
|
||||
if (prev_ctx != CONTEXT_KERNEL)
|
||||
context_tracking_enter(prev_ctx);
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ static inline void exception_exit(enum ctx_state prev_ctx)
|
|||
*/
|
||||
static inline enum ctx_state ct_state(void)
|
||||
{
|
||||
return context_tracking_is_enabled() ?
|
||||
return context_tracking_enabled() ?
|
||||
this_cpu_read(context_tracking.state) : CONTEXT_DISABLED;
|
||||
}
|
||||
#else
|
||||
|
@ -90,7 +90,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) { }
|
|||
static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
|
||||
#endif /* !CONFIG_CONTEXT_TRACKING */
|
||||
|
||||
#define CT_WARN_ON(cond) WARN_ON(context_tracking_is_enabled() && (cond))
|
||||
#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
||||
extern void context_tracking_init(void);
|
||||
|
@ -103,12 +103,12 @@ static inline void context_tracking_init(void) { }
|
|||
/* must be called with irqs disabled */
|
||||
static inline void guest_enter_irqoff(void)
|
||||
{
|
||||
if (vtime_accounting_cpu_enabled())
|
||||
if (vtime_accounting_enabled_this_cpu())
|
||||
vtime_guest_enter(current);
|
||||
else
|
||||
current->flags |= PF_VCPU;
|
||||
|
||||
if (context_tracking_is_enabled())
|
||||
if (context_tracking_enabled())
|
||||
__context_tracking_enter(CONTEXT_GUEST);
|
||||
|
||||
/* KVM does not hold any references to rcu protected data when it
|
||||
|
@ -118,16 +118,16 @@ static inline void guest_enter_irqoff(void)
|
|||
* one time slice). Lets treat guest mode as quiescent state, just like
|
||||
* we do with user-mode execution.
|
||||
*/
|
||||
if (!context_tracking_cpu_is_enabled())
|
||||
if (!context_tracking_enabled_this_cpu())
|
||||
rcu_virt_note_context_switch(smp_processor_id());
|
||||
}
|
||||
|
||||
static inline void guest_exit_irqoff(void)
|
||||
{
|
||||
if (context_tracking_is_enabled())
|
||||
if (context_tracking_enabled())
|
||||
__context_tracking_exit(CONTEXT_GUEST);
|
||||
|
||||
if (vtime_accounting_cpu_enabled())
|
||||
if (vtime_accounting_enabled_this_cpu())
|
||||
vtime_guest_exit(current);
|
||||
else
|
||||
current->flags &= ~PF_VCPU;
|
||||
|
@ -141,7 +141,7 @@ static inline void guest_enter_irqoff(void)
|
|||
* to assume that it's the stime pending cputime
|
||||
* to flush.
|
||||
*/
|
||||
vtime_account_system(current);
|
||||
vtime_account_kernel(current);
|
||||
current->flags |= PF_VCPU;
|
||||
rcu_virt_note_context_switch(smp_processor_id());
|
||||
}
|
||||
|
@ -149,7 +149,7 @@ static inline void guest_enter_irqoff(void)
|
|||
static inline void guest_exit_irqoff(void)
|
||||
{
|
||||
/* Flush the guest cputime we spent on the guest */
|
||||
vtime_account_system(current);
|
||||
vtime_account_kernel(current);
|
||||
current->flags &= ~PF_VCPU;
|
||||
}
|
||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
||||
|
|
|
@ -23,17 +23,22 @@ struct context_tracking {
|
|||
};
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
extern struct static_key_false context_tracking_enabled;
|
||||
extern struct static_key_false context_tracking_key;
|
||||
DECLARE_PER_CPU(struct context_tracking, context_tracking);
|
||||
|
||||
static inline bool context_tracking_is_enabled(void)
|
||||
static inline bool context_tracking_enabled(void)
|
||||
{
|
||||
return static_branch_unlikely(&context_tracking_enabled);
|
||||
return static_branch_unlikely(&context_tracking_key);
|
||||
}
|
||||
|
||||
static inline bool context_tracking_cpu_is_enabled(void)
|
||||
static inline bool context_tracking_enabled_cpu(int cpu)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.active);
|
||||
return context_tracking_enabled() && per_cpu(context_tracking.active, cpu);
|
||||
}
|
||||
|
||||
static inline bool context_tracking_enabled_this_cpu(void)
|
||||
{
|
||||
return context_tracking_enabled() && __this_cpu_read(context_tracking.active);
|
||||
}
|
||||
|
||||
static inline bool context_tracking_in_user(void)
|
||||
|
@ -42,9 +47,9 @@ static inline bool context_tracking_in_user(void)
|
|||
}
|
||||
#else
|
||||
static inline bool context_tracking_in_user(void) { return false; }
|
||||
static inline bool context_tracking_active(void) { return false; }
|
||||
static inline bool context_tracking_is_enabled(void) { return false; }
|
||||
static inline bool context_tracking_cpu_is_enabled(void) { return false; }
|
||||
static inline bool context_tracking_enabled(void) { return false; }
|
||||
static inline bool context_tracking_enabled_cpu(int cpu) { return false; }
|
||||
static inline bool context_tracking_enabled_this_cpu(void) { return false; }
|
||||
#endif /* CONFIG_CONTEXT_TRACKING */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -78,6 +78,24 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
|
|||
return kstat_cpu(cpu).irqs_sum;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
extern u64 kcpustat_field(struct kernel_cpustat *kcpustat,
|
||||
enum cpu_usage_stat usage, int cpu);
|
||||
extern void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu);
|
||||
#else
|
||||
static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat,
|
||||
enum cpu_usage_stat usage, int cpu)
|
||||
{
|
||||
return kcpustat->cpustat[usage];
|
||||
}
|
||||
|
||||
static inline void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
|
||||
{
|
||||
*dst = kcpustat_cpu(cpu);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
extern void account_user_time(struct task_struct *, u64);
|
||||
extern void account_guest_time(struct task_struct *, u64);
|
||||
extern void account_system_time(struct task_struct *, int, u64);
|
||||
|
|
|
@ -250,16 +250,21 @@ struct prev_cputime {
|
|||
enum vtime_state {
|
||||
/* Task is sleeping or running in a CPU with VTIME inactive: */
|
||||
VTIME_INACTIVE = 0,
|
||||
/* Task runs in userspace in a CPU with VTIME active: */
|
||||
VTIME_USER,
|
||||
/* Task is idle */
|
||||
VTIME_IDLE,
|
||||
/* Task runs in kernelspace in a CPU with VTIME active: */
|
||||
VTIME_SYS,
|
||||
/* Task runs in userspace in a CPU with VTIME active: */
|
||||
VTIME_USER,
|
||||
/* Task runs as guests in a CPU with VTIME active: */
|
||||
VTIME_GUEST,
|
||||
};
|
||||
|
||||
struct vtime {
|
||||
seqcount_t seqcount;
|
||||
unsigned long long starttime;
|
||||
enum vtime_state state;
|
||||
unsigned int cpu;
|
||||
u64 utime;
|
||||
u64 stime;
|
||||
u64 gtime;
|
||||
|
|
|
@ -174,7 +174,7 @@ extern cpumask_var_t tick_nohz_full_mask;
|
|||
|
||||
static inline bool tick_nohz_full_enabled(void)
|
||||
{
|
||||
if (!context_tracking_is_enabled())
|
||||
if (!context_tracking_enabled())
|
||||
return false;
|
||||
|
||||
return tick_nohz_full_running;
|
||||
|
|
|
@ -11,11 +11,15 @@
|
|||
struct task_struct;
|
||||
|
||||
/*
|
||||
* vtime_accounting_cpu_enabled() definitions/declarations
|
||||
* vtime_accounting_enabled_this_cpu() definitions/declarations
|
||||
*/
|
||||
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE)
|
||||
static inline bool vtime_accounting_cpu_enabled(void) { return true; }
|
||||
|
||||
static inline bool vtime_accounting_enabled_this_cpu(void) { return true; }
|
||||
extern void vtime_task_switch(struct task_struct *prev);
|
||||
|
||||
#elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
|
||||
|
||||
/*
|
||||
* Checks if vtime is enabled on some CPU. Cputime readers want to be careful
|
||||
* in that case and compute the tickless cputime.
|
||||
|
@ -24,46 +28,43 @@ static inline bool vtime_accounting_cpu_enabled(void) { return true; }
|
|||
*/
|
||||
static inline bool vtime_accounting_enabled(void)
|
||||
{
|
||||
return context_tracking_is_enabled();
|
||||
return context_tracking_enabled();
|
||||
}
|
||||
|
||||
static inline bool vtime_accounting_cpu_enabled(void)
|
||||
static inline bool vtime_accounting_enabled_cpu(int cpu)
|
||||
{
|
||||
if (vtime_accounting_enabled()) {
|
||||
if (context_tracking_cpu_is_enabled())
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return context_tracking_enabled_cpu(cpu);
|
||||
}
|
||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
static inline bool vtime_accounting_cpu_enabled(void) { return false; }
|
||||
#endif
|
||||
|
||||
static inline bool vtime_accounting_enabled_this_cpu(void)
|
||||
{
|
||||
return context_tracking_enabled_this_cpu();
|
||||
}
|
||||
|
||||
extern void vtime_task_switch_generic(struct task_struct *prev);
|
||||
|
||||
static inline void vtime_task_switch(struct task_struct *prev)
|
||||
{
|
||||
if (vtime_accounting_enabled_this_cpu())
|
||||
vtime_task_switch_generic(prev);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
static inline bool vtime_accounting_enabled_cpu(int cpu) {return false; }
|
||||
static inline bool vtime_accounting_enabled_this_cpu(void) { return false; }
|
||||
static inline void vtime_task_switch(struct task_struct *prev) { }
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Common vtime APIs
|
||||
*/
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
|
||||
#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
|
||||
extern void vtime_task_switch(struct task_struct *prev);
|
||||
#else
|
||||
extern void vtime_common_task_switch(struct task_struct *prev);
|
||||
static inline void vtime_task_switch(struct task_struct *prev)
|
||||
{
|
||||
if (vtime_accounting_cpu_enabled())
|
||||
vtime_common_task_switch(prev);
|
||||
}
|
||||
#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
|
||||
|
||||
extern void vtime_account_system(struct task_struct *tsk);
|
||||
extern void vtime_account_kernel(struct task_struct *tsk);
|
||||
extern void vtime_account_idle(struct task_struct *tsk);
|
||||
|
||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
static inline void vtime_task_switch(struct task_struct *prev) { }
|
||||
static inline void vtime_account_system(struct task_struct *tsk) { }
|
||||
static inline void vtime_account_kernel(struct task_struct *tsk) { }
|
||||
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
|
@ -86,7 +87,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
|
|||
static inline void vtime_account_irq_exit(struct task_struct *tsk)
|
||||
{
|
||||
/* On hard|softirq exit we always account to hard|softirq cputime */
|
||||
vtime_account_system(tsk);
|
||||
vtime_account_kernel(tsk);
|
||||
}
|
||||
extern void vtime_flush(struct task_struct *tsk);
|
||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
|
||||
|
|
|
@ -65,7 +65,7 @@ config PREEMPT_RT
|
|||
preemptible priority-inheritance aware variants, enforcing
|
||||
interrupt threading and introducing mechanisms to break up long
|
||||
non-preemptible sections. This makes the kernel, except for very
|
||||
low level and critical code pathes (entry code, scheduler, low
|
||||
low level and critical code paths (entry code, scheduler, low
|
||||
level interrupt handling) fully preemptible and brings most
|
||||
execution contexts under scheduler control.
|
||||
|
||||
|
|
|
@ -25,8 +25,8 @@
|
|||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/context_tracking.h>
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(context_tracking_enabled);
|
||||
EXPORT_SYMBOL_GPL(context_tracking_enabled);
|
||||
DEFINE_STATIC_KEY_FALSE(context_tracking_key);
|
||||
EXPORT_SYMBOL_GPL(context_tracking_key);
|
||||
|
||||
DEFINE_PER_CPU(struct context_tracking, context_tracking);
|
||||
EXPORT_SYMBOL_GPL(context_tracking);
|
||||
|
@ -192,7 +192,7 @@ void __init context_tracking_cpu_set(int cpu)
|
|||
|
||||
if (!per_cpu(context_tracking.active, cpu)) {
|
||||
per_cpu(context_tracking.active, cpu) = true;
|
||||
static_branch_inc(&context_tracking_enabled);
|
||||
static_branch_inc(&context_tracking_key);
|
||||
}
|
||||
|
||||
if (initialized)
|
||||
|
|
|
@ -811,7 +811,7 @@ static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
|
|||
return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
|
||||
}
|
||||
|
||||
static inline enum uclamp_id uclamp_none(enum uclamp_id clamp_id)
|
||||
static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
|
||||
{
|
||||
if (clamp_id == UCLAMP_MIN)
|
||||
return 0;
|
||||
|
@ -854,7 +854,7 @@ static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id,
|
|||
}
|
||||
|
||||
static inline
|
||||
enum uclamp_id uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
|
||||
unsigned int uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
|
||||
unsigned int clamp_value)
|
||||
{
|
||||
struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
|
||||
|
@ -919,7 +919,7 @@ uclamp_eff_get(struct task_struct *p, enum uclamp_id clamp_id)
|
|||
return uc_req;
|
||||
}
|
||||
|
||||
enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
|
||||
unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
|
||||
{
|
||||
struct uclamp_se uc_eff;
|
||||
|
||||
|
@ -3918,13 +3918,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
|||
prev->sched_class == &fair_sched_class) &&
|
||||
rq->nr_running == rq->cfs.h_nr_running)) {
|
||||
|
||||
p = fair_sched_class.pick_next_task(rq, prev, rf);
|
||||
p = pick_next_task_fair(rq, prev, rf);
|
||||
if (unlikely(p == RETRY_TASK))
|
||||
goto restart;
|
||||
|
||||
/* Assumes fair_sched_class->next == idle_sched_class */
|
||||
if (unlikely(!p))
|
||||
p = idle_sched_class.pick_next_task(rq, prev, rf);
|
||||
if (!p) {
|
||||
put_prev_task(rq, prev);
|
||||
p = pick_next_task_idle(rq);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
@ -3948,7 +3950,7 @@ restart:
|
|||
put_prev_task(rq, prev);
|
||||
|
||||
for_each_class(class) {
|
||||
p = class->pick_next_task(rq, NULL, NULL);
|
||||
p = class->pick_next_task(rq);
|
||||
if (p)
|
||||
return p;
|
||||
}
|
||||
|
@ -6217,7 +6219,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
|
|||
struct task_struct *next;
|
||||
|
||||
for_each_class(class) {
|
||||
next = class->pick_next_task(rq, NULL, NULL);
|
||||
next = class->pick_next_task(rq);
|
||||
if (next) {
|
||||
next->sched_class->put_prev_task(rq, next);
|
||||
return next;
|
||||
|
|
|
@ -405,27 +405,25 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
|
|||
/*
|
||||
* Use precise platform statistics if available:
|
||||
*/
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||
|
||||
# ifndef __ARCH_HAS_VTIME_TASK_SWITCH
|
||||
void vtime_common_task_switch(struct task_struct *prev)
|
||||
void vtime_task_switch(struct task_struct *prev)
|
||||
{
|
||||
if (is_idle_task(prev))
|
||||
vtime_account_idle(prev);
|
||||
else
|
||||
vtime_account_system(prev);
|
||||
vtime_account_kernel(prev);
|
||||
|
||||
vtime_flush(prev);
|
||||
arch_vtime_task_switch(prev);
|
||||
}
|
||||
# endif
|
||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||
/*
|
||||
* Archs that account the whole time spent in the idle task
|
||||
* (outside irq) as idle time can rely on this and just implement
|
||||
* vtime_account_system() and vtime_account_idle(). Archs that
|
||||
* vtime_account_kernel() and vtime_account_idle(). Archs that
|
||||
* have other meaning of the idle time (s390 only includes the
|
||||
* time spent by the CPU when it's in low power mode) must override
|
||||
* vtime_account().
|
||||
|
@ -436,7 +434,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
|
|||
if (!in_interrupt() && is_idle_task(tsk))
|
||||
vtime_account_idle(tsk);
|
||||
else
|
||||
vtime_account_system(tsk);
|
||||
vtime_account_kernel(tsk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
|
||||
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
|
||||
|
@ -477,7 +475,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
|
|||
u64 cputime, steal;
|
||||
struct rq *rq = this_rq();
|
||||
|
||||
if (vtime_accounting_cpu_enabled())
|
||||
if (vtime_accounting_enabled_this_cpu())
|
||||
return;
|
||||
|
||||
if (sched_clock_irqtime) {
|
||||
|
@ -711,8 +709,8 @@ static u64 get_vtime_delta(struct vtime *vtime)
|
|||
return delta - other;
|
||||
}
|
||||
|
||||
static void __vtime_account_system(struct task_struct *tsk,
|
||||
struct vtime *vtime)
|
||||
static void vtime_account_system(struct task_struct *tsk,
|
||||
struct vtime *vtime)
|
||||
{
|
||||
vtime->stime += get_vtime_delta(vtime);
|
||||
if (vtime->stime >= TICK_NSEC) {
|
||||
|
@ -731,7 +729,17 @@ static void vtime_account_guest(struct task_struct *tsk,
|
|||
}
|
||||
}
|
||||
|
||||
void vtime_account_system(struct task_struct *tsk)
|
||||
static void __vtime_account_kernel(struct task_struct *tsk,
|
||||
struct vtime *vtime)
|
||||
{
|
||||
/* We might have scheduled out from guest path */
|
||||
if (vtime->state == VTIME_GUEST)
|
||||
vtime_account_guest(tsk, vtime);
|
||||
else
|
||||
vtime_account_system(tsk, vtime);
|
||||
}
|
||||
|
||||
void vtime_account_kernel(struct task_struct *tsk)
|
||||
{
|
||||
struct vtime *vtime = &tsk->vtime;
|
||||
|
||||
|
@ -739,11 +747,7 @@ void vtime_account_system(struct task_struct *tsk)
|
|||
return;
|
||||
|
||||
write_seqcount_begin(&vtime->seqcount);
|
||||
/* We might have scheduled out from guest path */
|
||||
if (tsk->flags & PF_VCPU)
|
||||
vtime_account_guest(tsk, vtime);
|
||||
else
|
||||
__vtime_account_system(tsk, vtime);
|
||||
__vtime_account_kernel(tsk, vtime);
|
||||
write_seqcount_end(&vtime->seqcount);
|
||||
}
|
||||
|
||||
|
@ -752,7 +756,7 @@ void vtime_user_enter(struct task_struct *tsk)
|
|||
struct vtime *vtime = &tsk->vtime;
|
||||
|
||||
write_seqcount_begin(&vtime->seqcount);
|
||||
__vtime_account_system(tsk, vtime);
|
||||
vtime_account_system(tsk, vtime);
|
||||
vtime->state = VTIME_USER;
|
||||
write_seqcount_end(&vtime->seqcount);
|
||||
}
|
||||
|
@ -782,8 +786,9 @@ void vtime_guest_enter(struct task_struct *tsk)
|
|||
* that can thus safely catch up with a tickless delta.
|
||||
*/
|
||||
write_seqcount_begin(&vtime->seqcount);
|
||||
__vtime_account_system(tsk, vtime);
|
||||
vtime_account_system(tsk, vtime);
|
||||
tsk->flags |= PF_VCPU;
|
||||
vtime->state = VTIME_GUEST;
|
||||
write_seqcount_end(&vtime->seqcount);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vtime_guest_enter);
|
||||
|
@ -795,6 +800,7 @@ void vtime_guest_exit(struct task_struct *tsk)
|
|||
write_seqcount_begin(&vtime->seqcount);
|
||||
vtime_account_guest(tsk, vtime);
|
||||
tsk->flags &= ~PF_VCPU;
|
||||
vtime->state = VTIME_SYS;
|
||||
write_seqcount_end(&vtime->seqcount);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vtime_guest_exit);
|
||||
|
@ -804,19 +810,30 @@ void vtime_account_idle(struct task_struct *tsk)
|
|||
account_idle_time(get_vtime_delta(&tsk->vtime));
|
||||
}
|
||||
|
||||
void arch_vtime_task_switch(struct task_struct *prev)
|
||||
void vtime_task_switch_generic(struct task_struct *prev)
|
||||
{
|
||||
struct vtime *vtime = &prev->vtime;
|
||||
|
||||
write_seqcount_begin(&vtime->seqcount);
|
||||
if (vtime->state == VTIME_IDLE)
|
||||
vtime_account_idle(prev);
|
||||
else
|
||||
__vtime_account_kernel(prev, vtime);
|
||||
vtime->state = VTIME_INACTIVE;
|
||||
vtime->cpu = -1;
|
||||
write_seqcount_end(&vtime->seqcount);
|
||||
|
||||
vtime = ¤t->vtime;
|
||||
|
||||
write_seqcount_begin(&vtime->seqcount);
|
||||
vtime->state = VTIME_SYS;
|
||||
if (is_idle_task(current))
|
||||
vtime->state = VTIME_IDLE;
|
||||
else if (current->flags & PF_VCPU)
|
||||
vtime->state = VTIME_GUEST;
|
||||
else
|
||||
vtime->state = VTIME_SYS;
|
||||
vtime->starttime = sched_clock();
|
||||
vtime->cpu = smp_processor_id();
|
||||
write_seqcount_end(&vtime->seqcount);
|
||||
}
|
||||
|
||||
|
@ -827,8 +844,9 @@ void vtime_init_idle(struct task_struct *t, int cpu)
|
|||
|
||||
local_irq_save(flags);
|
||||
write_seqcount_begin(&vtime->seqcount);
|
||||
vtime->state = VTIME_SYS;
|
||||
vtime->state = VTIME_IDLE;
|
||||
vtime->starttime = sched_clock();
|
||||
vtime->cpu = cpu;
|
||||
write_seqcount_end(&vtime->seqcount);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
@ -846,7 +864,7 @@ u64 task_gtime(struct task_struct *t)
|
|||
seq = read_seqcount_begin(&vtime->seqcount);
|
||||
|
||||
gtime = t->gtime;
|
||||
if (vtime->state == VTIME_SYS && t->flags & PF_VCPU)
|
||||
if (vtime->state == VTIME_GUEST)
|
||||
gtime += vtime->gtime + vtime_delta(vtime);
|
||||
|
||||
} while (read_seqcount_retry(&vtime->seqcount, seq));
|
||||
|
@ -877,20 +895,230 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
|
|||
*utime = t->utime;
|
||||
*stime = t->stime;
|
||||
|
||||
/* Task is sleeping, nothing to add */
|
||||
if (vtime->state == VTIME_INACTIVE || is_idle_task(t))
|
||||
/* Task is sleeping or idle, nothing to add */
|
||||
if (vtime->state < VTIME_SYS)
|
||||
continue;
|
||||
|
||||
delta = vtime_delta(vtime);
|
||||
|
||||
/*
|
||||
* Task runs either in user or kernel space, add pending nohz time to
|
||||
* the right place.
|
||||
* Task runs either in user (including guest) or kernel space,
|
||||
* add pending nohz time to the right place.
|
||||
*/
|
||||
if (vtime->state == VTIME_USER || t->flags & PF_VCPU)
|
||||
*utime += vtime->utime + delta;
|
||||
else if (vtime->state == VTIME_SYS)
|
||||
if (vtime->state == VTIME_SYS)
|
||||
*stime += vtime->stime + delta;
|
||||
else
|
||||
*utime += vtime->utime + delta;
|
||||
} while (read_seqcount_retry(&vtime->seqcount, seq));
|
||||
}
|
||||
|
||||
static int vtime_state_check(struct vtime *vtime, int cpu)
|
||||
{
|
||||
/*
|
||||
* We raced against a context switch, fetch the
|
||||
* kcpustat task again.
|
||||
*/
|
||||
if (vtime->cpu != cpu && vtime->cpu != -1)
|
||||
return -EAGAIN;
|
||||
|
||||
/*
|
||||
* Two possible things here:
|
||||
* 1) We are seeing the scheduling out task (prev) or any past one.
|
||||
* 2) We are seeing the scheduling in task (next) but it hasn't
|
||||
* passed though vtime_task_switch() yet so the pending
|
||||
* cputime of the prev task may not be flushed yet.
|
||||
*
|
||||
* Case 1) is ok but 2) is not. So wait for a safe VTIME state.
|
||||
*/
|
||||
if (vtime->state == VTIME_INACTIVE)
|
||||
return -EAGAIN;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 kcpustat_user_vtime(struct vtime *vtime)
|
||||
{
|
||||
if (vtime->state == VTIME_USER)
|
||||
return vtime->utime + vtime_delta(vtime);
|
||||
else if (vtime->state == VTIME_GUEST)
|
||||
return vtime->gtime + vtime_delta(vtime);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kcpustat_field_vtime(u64 *cpustat,
|
||||
struct task_struct *tsk,
|
||||
enum cpu_usage_stat usage,
|
||||
int cpu, u64 *val)
|
||||
{
|
||||
struct vtime *vtime = &tsk->vtime;
|
||||
unsigned int seq;
|
||||
int err;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&vtime->seqcount);
|
||||
|
||||
err = vtime_state_check(vtime, cpu);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
*val = cpustat[usage];
|
||||
|
||||
/*
|
||||
* Nice VS unnice cputime accounting may be inaccurate if
|
||||
* the nice value has changed since the last vtime update.
|
||||
* But proper fix would involve interrupting target on nice
|
||||
* updates which is a no go on nohz_full (although the scheduler
|
||||
* may still interrupt the target if rescheduling is needed...)
|
||||
*/
|
||||
switch (usage) {
|
||||
case CPUTIME_SYSTEM:
|
||||
if (vtime->state == VTIME_SYS)
|
||||
*val += vtime->stime + vtime_delta(vtime);
|
||||
break;
|
||||
case CPUTIME_USER:
|
||||
if (task_nice(tsk) <= 0)
|
||||
*val += kcpustat_user_vtime(vtime);
|
||||
break;
|
||||
case CPUTIME_NICE:
|
||||
if (task_nice(tsk) > 0)
|
||||
*val += kcpustat_user_vtime(vtime);
|
||||
break;
|
||||
case CPUTIME_GUEST:
|
||||
if (vtime->state == VTIME_GUEST && task_nice(tsk) <= 0)
|
||||
*val += vtime->gtime + vtime_delta(vtime);
|
||||
break;
|
||||
case CPUTIME_GUEST_NICE:
|
||||
if (vtime->state == VTIME_GUEST && task_nice(tsk) > 0)
|
||||
*val += vtime->gtime + vtime_delta(vtime);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} while (read_seqcount_retry(&vtime->seqcount, seq));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 kcpustat_field(struct kernel_cpustat *kcpustat,
|
||||
enum cpu_usage_stat usage, int cpu)
|
||||
{
|
||||
u64 *cpustat = kcpustat->cpustat;
|
||||
struct rq *rq;
|
||||
u64 val;
|
||||
int err;
|
||||
|
||||
if (!vtime_accounting_enabled_cpu(cpu))
|
||||
return cpustat[usage];
|
||||
|
||||
rq = cpu_rq(cpu);
|
||||
|
||||
for (;;) {
|
||||
struct task_struct *curr;
|
||||
|
||||
rcu_read_lock();
|
||||
curr = rcu_dereference(rq->curr);
|
||||
if (WARN_ON_ONCE(!curr)) {
|
||||
rcu_read_unlock();
|
||||
return cpustat[usage];
|
||||
}
|
||||
|
||||
err = kcpustat_field_vtime(cpustat, curr, usage, cpu, &val);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!err)
|
||||
return val;
|
||||
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kcpustat_field);
|
||||
|
||||
static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
|
||||
const struct kernel_cpustat *src,
|
||||
struct task_struct *tsk, int cpu)
|
||||
{
|
||||
struct vtime *vtime = &tsk->vtime;
|
||||
unsigned int seq;
|
||||
int err;
|
||||
|
||||
do {
|
||||
u64 *cpustat;
|
||||
u64 delta;
|
||||
|
||||
seq = read_seqcount_begin(&vtime->seqcount);
|
||||
|
||||
err = vtime_state_check(vtime, cpu);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
*dst = *src;
|
||||
cpustat = dst->cpustat;
|
||||
|
||||
/* Task is sleeping, dead or idle, nothing to add */
|
||||
if (vtime->state < VTIME_SYS)
|
||||
continue;
|
||||
|
||||
delta = vtime_delta(vtime);
|
||||
|
||||
/*
|
||||
* Task runs either in user (including guest) or kernel space,
|
||||
* add pending nohz time to the right place.
|
||||
*/
|
||||
if (vtime->state == VTIME_SYS) {
|
||||
cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
|
||||
} else if (vtime->state == VTIME_USER) {
|
||||
if (task_nice(tsk) > 0)
|
||||
cpustat[CPUTIME_NICE] += vtime->utime + delta;
|
||||
else
|
||||
cpustat[CPUTIME_USER] += vtime->utime + delta;
|
||||
} else {
|
||||
WARN_ON_ONCE(vtime->state != VTIME_GUEST);
|
||||
if (task_nice(tsk) > 0) {
|
||||
cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
|
||||
cpustat[CPUTIME_NICE] += vtime->gtime + delta;
|
||||
} else {
|
||||
cpustat[CPUTIME_GUEST] += vtime->gtime + delta;
|
||||
cpustat[CPUTIME_USER] += vtime->gtime + delta;
|
||||
}
|
||||
}
|
||||
} while (read_seqcount_retry(&vtime->seqcount, seq));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
|
||||
{
|
||||
const struct kernel_cpustat *src = &kcpustat_cpu(cpu);
|
||||
struct rq *rq;
|
||||
int err;
|
||||
|
||||
if (!vtime_accounting_enabled_cpu(cpu)) {
|
||||
*dst = *src;
|
||||
return;
|
||||
}
|
||||
|
||||
rq = cpu_rq(cpu);
|
||||
|
||||
for (;;) {
|
||||
struct task_struct *curr;
|
||||
|
||||
rcu_read_lock();
|
||||
curr = rcu_dereference(rq->curr);
|
||||
if (WARN_ON_ONCE(!curr)) {
|
||||
rcu_read_unlock();
|
||||
*dst = *src;
|
||||
return;
|
||||
}
|
||||
|
||||
err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!err)
|
||||
return;
|
||||
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
|
||||
|
||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
||||
|
|
|
@ -1743,13 +1743,16 @@ static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
|
|||
}
|
||||
#endif
|
||||
|
||||
static void set_next_task_dl(struct rq *rq, struct task_struct *p)
|
||||
static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
|
||||
{
|
||||
p->se.exec_start = rq_clock_task(rq);
|
||||
|
||||
/* You can't push away the running task */
|
||||
dequeue_pushable_dl_task(rq, p);
|
||||
|
||||
if (!first)
|
||||
return;
|
||||
|
||||
if (hrtick_enabled(rq))
|
||||
start_hrtick_dl(rq, p);
|
||||
|
||||
|
@ -1770,22 +1773,19 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
|
|||
return rb_entry(left, struct sched_dl_entity, rb_node);
|
||||
}
|
||||
|
||||
static struct task_struct *
|
||||
pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
static struct task_struct *pick_next_task_dl(struct rq *rq)
|
||||
{
|
||||
struct sched_dl_entity *dl_se;
|
||||
struct dl_rq *dl_rq = &rq->dl;
|
||||
struct task_struct *p;
|
||||
|
||||
WARN_ON_ONCE(prev || rf);
|
||||
|
||||
if (!sched_dl_runnable(rq))
|
||||
return NULL;
|
||||
|
||||
dl_se = pick_next_dl_entity(rq, dl_rq);
|
||||
BUG_ON(!dl_se);
|
||||
p = dl_task_of(dl_se);
|
||||
set_next_task_dl(rq, p);
|
||||
set_next_task_dl(rq, p, true);
|
||||
return p;
|
||||
}
|
||||
|
||||
|
|
1441
kernel/sched/fair.c
1441
kernel/sched/fair.c
File diff suppressed because it is too large
Load Diff
|
@ -89,3 +89,4 @@ SCHED_FEAT(WA_BIAS, true)
|
|||
* UtilEstimation. Use estimated CPU utilization.
|
||||
*/
|
||||
SCHED_FEAT(UTIL_EST, true)
|
||||
SCHED_FEAT(UTIL_EST_FASTUP, true)
|
||||
|
|
|
@ -385,21 +385,17 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
|
|||
{
|
||||
}
|
||||
|
||||
static void set_next_task_idle(struct rq *rq, struct task_struct *next)
|
||||
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
|
||||
{
|
||||
update_idle_core(rq);
|
||||
schedstat_inc(rq->sched_goidle);
|
||||
}
|
||||
|
||||
static struct task_struct *
|
||||
pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
struct task_struct *pick_next_task_idle(struct rq *rq)
|
||||
{
|
||||
struct task_struct *next = rq->idle;
|
||||
|
||||
if (prev)
|
||||
put_prev_task(rq, prev);
|
||||
|
||||
set_next_task_idle(rq, next);
|
||||
set_next_task_idle(rq, next, true);
|
||||
|
||||
return next;
|
||||
}
|
||||
|
|
|
@ -1515,13 +1515,16 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
|
|||
#endif
|
||||
}
|
||||
|
||||
static inline void set_next_task_rt(struct rq *rq, struct task_struct *p)
|
||||
static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
|
||||
{
|
||||
p->se.exec_start = rq_clock_task(rq);
|
||||
|
||||
/* The running task is never eligible for pushing */
|
||||
dequeue_pushable_task(rq, p);
|
||||
|
||||
if (!first)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If prev task was rt, put_prev_task() has already updated the
|
||||
* utilization. We only care of the case where we start to schedule a
|
||||
|
@ -1564,18 +1567,15 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
|
|||
return rt_task_of(rt_se);
|
||||
}
|
||||
|
||||
static struct task_struct *
|
||||
pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
static struct task_struct *pick_next_task_rt(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
WARN_ON_ONCE(prev || rf);
|
||||
|
||||
if (!sched_rt_runnable(rq))
|
||||
return NULL;
|
||||
|
||||
p = _pick_next_task_rt(rq);
|
||||
set_next_task_rt(rq, p);
|
||||
set_next_task_rt(rq, p, true);
|
||||
return p;
|
||||
}
|
||||
|
||||
|
|
|
@ -1713,22 +1713,10 @@ struct sched_class {
|
|||
|
||||
void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
|
||||
|
||||
/*
|
||||
* Both @prev and @rf are optional and may be NULL, in which case the
|
||||
* caller must already have invoked put_prev_task(rq, prev, rf).
|
||||
*
|
||||
* Otherwise it is the responsibility of the pick_next_task() to call
|
||||
* put_prev_task() on the @prev task or something equivalent, IFF it
|
||||
* returns a next task.
|
||||
*
|
||||
* In that case (@rf != NULL) it may return RETRY_TASK when it finds a
|
||||
* higher prio class has runnable tasks.
|
||||
*/
|
||||
struct task_struct * (*pick_next_task)(struct rq *rq,
|
||||
struct task_struct *prev,
|
||||
struct rq_flags *rf);
|
||||
struct task_struct *(*pick_next_task)(struct rq *rq);
|
||||
|
||||
void (*put_prev_task)(struct rq *rq, struct task_struct *p);
|
||||
void (*set_next_task)(struct rq *rq, struct task_struct *p);
|
||||
void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
|
||||
|
@ -1780,7 +1768,7 @@ static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
|
|||
static inline void set_next_task(struct rq *rq, struct task_struct *next)
|
||||
{
|
||||
WARN_ON_ONCE(rq->curr != next);
|
||||
next->sched_class->set_next_task(rq, next);
|
||||
next->sched_class->set_next_task(rq, next, false);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -1821,6 +1809,9 @@ static inline bool sched_fair_runnable(struct rq *rq)
|
|||
return rq->cfs.nr_running > 0;
|
||||
}
|
||||
|
||||
extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
|
||||
extern struct task_struct *pick_next_task_idle(struct rq *rq);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
extern void update_group_capacity(struct sched_domain *sd, int cpu);
|
||||
|
@ -2309,7 +2300,7 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
|
|||
#endif /* CONFIG_CPU_FREQ */
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
|
||||
unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
|
||||
|
||||
static __always_inline
|
||||
unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
|
||||
|
|
|
@ -29,20 +29,17 @@ check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
|
|||
/* we're never preempted */
|
||||
}
|
||||
|
||||
static void set_next_task_stop(struct rq *rq, struct task_struct *stop)
|
||||
static void set_next_task_stop(struct rq *rq, struct task_struct *stop, bool first)
|
||||
{
|
||||
stop->se.exec_start = rq_clock_task(rq);
|
||||
}
|
||||
|
||||
static struct task_struct *
|
||||
pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
static struct task_struct *pick_next_task_stop(struct rq *rq)
|
||||
{
|
||||
WARN_ON_ONCE(prev || rf);
|
||||
|
||||
if (!sched_stop_runnable(rq))
|
||||
return NULL;
|
||||
|
||||
set_next_task_stop(rq, rq->stop);
|
||||
set_next_task_stop(rq, rq->stop, true);
|
||||
return rq->stop;
|
||||
}
|
||||
|
||||
|
|
|
@ -1201,16 +1201,13 @@ static void set_domain_attribute(struct sched_domain *sd,
|
|||
if (!attr || attr->relax_domain_level < 0) {
|
||||
if (default_relax_domain_level < 0)
|
||||
return;
|
||||
else
|
||||
request = default_relax_domain_level;
|
||||
request = default_relax_domain_level;
|
||||
} else
|
||||
request = attr->relax_domain_level;
|
||||
if (request < sd->level) {
|
||||
|
||||
if (sd->level > request) {
|
||||
/* Turn off idle balance on this domain: */
|
||||
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
|
||||
} else {
|
||||
/* Turn on idle balance on this domain: */
|
||||
sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1119,7 +1119,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
|
|||
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||
unsigned long ticks;
|
||||
|
||||
if (vtime_accounting_cpu_enabled())
|
||||
if (vtime_accounting_enabled_this_cpu())
|
||||
return;
|
||||
/*
|
||||
* We stopped the tick in idle. Update process times would miss the
|
||||
|
|
Loading…
Reference in New Issue