From fd25b4c2f226de818e1d2b71e3e681d28bcaf5ba Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 13 Nov 2012 18:21:22 +0100 Subject: [PATCH 1/5] vtime: Remove the underscore prefix invasion Prepending irq-unsafe vtime APIs with underscores was actually a bad idea as the result is a big mess in the API namespace that is even waiting to be further extended. Also these helpers are always called from irq safe callers except kvm. Just provide a vtime_account_system_irqsafe() for this specific case so that we can remove the underscore prefix on other vtime functions. Signed-off-by: Frederic Weisbecker Reviewed-by: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens --- arch/ia64/kernel/time.c | 8 ++++---- arch/powerpc/kernel/time.c | 4 ++-- arch/s390/kernel/vtime.c | 4 ++-- include/linux/kvm_host.h | 4 ++-- include/linux/vtime.h | 8 ++++---- kernel/sched/cputime.c | 12 ++++++------ 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 5e4850305d3f..f6388216080d 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -106,9 +106,9 @@ void vtime_task_switch(struct task_struct *prev) struct thread_info *ni = task_thread_info(current); if (idle_task(smp_processor_id()) != prev) - __vtime_account_system(prev); + vtime_account_system(prev); else - __vtime_account_idle(prev); + vtime_account_idle(prev); vtime_account_user(prev); @@ -135,14 +135,14 @@ static cputime_t vtime_delta(struct task_struct *tsk) return delta_stime; } -void __vtime_account_system(struct task_struct *tsk) +void vtime_account_system(struct task_struct *tsk) { cputime_t delta = vtime_delta(tsk); account_system_time(tsk, 0, delta, delta); } -void __vtime_account_idle(struct task_struct *tsk) +void vtime_account_idle(struct task_struct *tsk) { account_idle_time(vtime_delta(tsk)); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0db456f30d45..ce4cb772dc78 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -336,7 +336,7 @@ static u64 vtime_delta(struct task_struct *tsk, return delta; } -void __vtime_account_system(struct task_struct *tsk) +void vtime_account_system(struct task_struct *tsk) { u64 delta, sys_scaled, stolen; @@ -346,7 +346,7 @@ void __vtime_account_system(struct task_struct *tsk) account_steal_time(stolen); } -void __vtime_account_idle(struct task_struct *tsk) +void vtime_account_idle(struct task_struct *tsk) { u64 delta, sys_scaled, stolen; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 783e988c4e1e..80d1dbc5d42e 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -140,9 +140,9 @@ void vtime_account(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(vtime_account); -void __vtime_account_system(struct task_struct *tsk) +void vtime_account_system(struct task_struct *tsk) __attribute__((alias("vtime_account"))); -EXPORT_SYMBOL_GPL(__vtime_account_system); +EXPORT_SYMBOL_GPL(vtime_account_system); void __kprobes vtime_stop_cpu(void) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0e2212fe4784..f17158bdd4fc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -741,7 +741,7 @@ static inline void kvm_guest_enter(void) * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system(current); + vtime_account_system_irqsafe(current); current->flags |= PF_VCPU; /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -759,7 +759,7 @@ static inline void kvm_guest_exit(void) * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system(current); + vtime_account_system_irqsafe(current); current->flags &= ~PF_VCPU; } diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 0c2a2d303020..5ad13c325deb 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -5,14 +5,14 @@ struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_task_switch(struct task_struct *prev); -extern void __vtime_account_system(struct task_struct *tsk); extern void vtime_account_system(struct task_struct *tsk); -extern void __vtime_account_idle(struct task_struct *tsk); +extern void vtime_account_system_irqsafe(struct task_struct *tsk); +extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account(struct task_struct *tsk); #else static inline void vtime_task_switch(struct task_struct *prev) { } -static inline void __vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system(struct task_struct *tsk) { } +static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } #endif @@ -40,7 +40,7 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk) static inline void vtime_account_irq_exit(struct task_struct *tsk) { /* On hard|softirq exit we always account to hard|softirq cputime */ - __vtime_account_system(tsk); + vtime_account_system(tsk); irqtime_account_irq(tsk); } diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 8d859dae5bed..c0aa1ba752ea 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -433,20 +433,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) *st = cputime.stime; } -void vtime_account_system(struct task_struct *tsk) +void vtime_account_system_irqsafe(struct task_struct *tsk) { unsigned long flags; local_irq_save(flags); - __vtime_account_system(tsk); + vtime_account_system(tsk); local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(vtime_account_system); +EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); /* * Archs that account the whole time spent in the idle task * (outside irq) as idle time can rely on this and just implement - * __vtime_account_system() and __vtime_account_idle(). Archs that + * vtime_account_system() and vtime_account_idle(). Archs that * have other meaning of the idle time (s390 only includes the * time spent by the CPU when it's in low power mode) must override * vtime_account(). @@ -459,9 +459,9 @@ void vtime_account(struct task_struct *tsk) local_irq_save(flags); if (in_interrupt() || !is_idle_task(tsk)) - __vtime_account_system(tsk); + vtime_account_system(tsk); else - __vtime_account_idle(tsk); + vtime_account_idle(tsk); local_irq_restore(flags); } From bcebdf846522056a84ba0b0cba5f5413868c9394 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 13 Nov 2012 23:51:06 +0100 Subject: [PATCH 2/5] vtime: Explicitly account pending user time on process tick All vtime implementations just flush the user time on process tick. Consolidate that in generic code by calling a user time accounting helper. This avoids an indirect call in ia64 and prepare to also consolidate vtime context switch code. Signed-off-by: Frederic Weisbecker Reviewed-by: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens --- arch/ia64/kernel/time.c | 11 +---------- arch/powerpc/kernel/time.c | 14 +++++++------- arch/s390/kernel/vtime.c | 7 ++++++- include/linux/kernel_stat.h | 8 ++++++++ include/linux/vtime.h | 1 + 5 files changed, 23 insertions(+), 18 deletions(-) diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index f6388216080d..834c78bd3b5f 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -83,7 +83,7 @@ static struct clocksource *itc_clocksource; extern cputime_t cycle_to_cputime(u64 cyc); -static void vtime_account_user(struct task_struct *tsk) +void vtime_account_user(struct task_struct *tsk) { cputime_t delta_utime; struct thread_info *ti = task_thread_info(tsk); @@ -147,15 +147,6 @@ void vtime_account_idle(struct task_struct *tsk) account_idle_time(vtime_delta(tsk)); } -/* - * Called from the timer interrupt handler to charge accumulated user time - * to the current process. Must be called with interrupts disabled. - */ -void account_process_tick(struct task_struct *p, int user_tick) -{ - vtime_account_user(p); -} - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ static irqreturn_t diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index ce4cb772dc78..a667aaf85846 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -355,15 +355,15 @@ void vtime_account_idle(struct task_struct *tsk) } /* - * Transfer the user and system times accumulated in the paca - * by the exception entry and exit code to the generic process - * user and system time records. + * Transfer the user time accumulated in the paca + * by the exception entry and exit code to the generic + * process user time records. * Must be called with interrupts disabled. - * Assumes that vtime_account() has been called recently - * (i.e. since the last entry from usermode) so that + * Assumes that vtime_account_system/idle() has been called + * recently (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ -void account_process_tick(struct task_struct *tsk, int user_tick) +void vtime_account_user(struct task_struct *tsk) { cputime_t utime, utimescaled; @@ -378,7 +378,7 @@ void account_process_tick(struct task_struct *tsk, int user_tick) void vtime_task_switch(struct task_struct *prev) { vtime_account(prev); - account_process_tick(prev, 0); + vtime_account_user(prev); } #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 80d1dbc5d42e..7c6d861a1a40 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -112,7 +112,12 @@ void vtime_task_switch(struct task_struct *prev) S390_lowcore.system_timer = ti->system_timer; } -void account_process_tick(struct task_struct *tsk, int user_tick) +/* + * In s390, accounting pending user time also implies + * accounting system time in order to correctly compute + * the stolen time accounting. + */ +void vtime_account_user(struct task_struct *tsk) { if (do_account_vtime(tsk, HARDIRQ_OFFSET)) virt_timer_expire(); diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 1865b1f29770..66b70780e910 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -127,7 +127,15 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t) extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +static inline void account_process_tick(struct task_struct *tsk, int user) +{ + vtime_account_user(tsk); +} +#else extern void account_process_tick(struct task_struct *, int user); +#endif + extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 5ad13c325deb..ae30ab58431a 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -8,6 +8,7 @@ extern void vtime_task_switch(struct task_struct *prev); extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); +extern void vtime_account_user(struct task_struct *tsk); extern void vtime_account(struct task_struct *tsk); #else static inline void vtime_task_switch(struct task_struct *prev) { } From e3942ba04052364d3c6454103362cafd87456010 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 14 Nov 2012 00:24:25 +0100 Subject: [PATCH 3/5] vtime: Consolidate a bit the ctx switch code On ia64 and powerpc, vtime context switch only consists in flushing system and user pending time, plus a few arch housekeeping. Consolidate that into a generic implementation. s390 is a special case because pending user and system time accounting there is hard to dissociate. So it's keeping its own implementation. Signed-off-by: Frederic Weisbecker Reviewed-by: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens --- arch/ia64/include/asm/cputime.h | 2 ++ arch/ia64/kernel/time.c | 9 +-------- arch/powerpc/include/asm/cputime.h | 2 ++ arch/powerpc/kernel/time.c | 6 ------ arch/s390/include/asm/cputime.h | 1 + kernel/sched/cputime.c | 13 +++++++++++++ 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 3deac956d325..7fcf7f08ab06 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -103,5 +103,7 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) #define cputime64_to_clock_t(__ct) \ cputime_to_clock_t((__force cputime_t)__ct) +extern void arch_vtime_task_switch(struct task_struct *tsk); + #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* __IA64_CPUTIME_H */ diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 834c78bd3b5f..c9a7d2ebe089 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -100,18 +100,11 @@ void vtime_account_user(struct task_struct *tsk) * accumulated times to the current process, and to prepare accounting on * the next process. */ -void vtime_task_switch(struct task_struct *prev) +void arch_vtime_task_switch(struct task_struct *prev) { struct thread_info *pi = task_thread_info(prev); struct thread_info *ni = task_thread_info(current); - if (idle_task(smp_processor_id()) != prev) - vtime_account_system(prev); - else - vtime_account_idle(prev); - - vtime_account_user(prev); - pi->ac_stamp = ni->ac_stamp; ni->ac_stime = ni->ac_utime = 0; } diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 487d46ff68a1..483733bd06d4 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -228,6 +228,8 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk) #define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct)) +static inline void arch_vtime_task_switch(struct task_struct *tsk) { } + #endif /* __KERNEL__ */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* __POWERPC_CPUTIME_H */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a667aaf85846..3486cfad4a63 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -375,12 +375,6 @@ void vtime_account_user(struct task_struct *tsk) account_user_time(tsk, utime, utimescaled); } -void vtime_task_switch(struct task_struct *prev) -{ - vtime_account(prev); - vtime_account_user(prev); -} - #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #define calc_cputime_factors() #endif diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 023d5ae24482..d2ff41370c0c 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -14,6 +14,7 @@ #define __ARCH_HAS_VTIME_ACCOUNT +#define __ARCH_HAS_VTIME_TASK_SWITCH /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index c0aa1ba752ea..2e8d34aac97e 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -443,6 +443,19 @@ void vtime_account_system_irqsafe(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); +#ifndef __ARCH_HAS_VTIME_TASK_SWITCH +void vtime_task_switch(struct task_struct *prev) +{ + if (is_idle_task(prev)) + vtime_account_idle(prev); + else + vtime_account_system(prev); + + vtime_account_user(prev); + arch_vtime_task_switch(prev); +} +#endif + /* * Archs that account the whole time spent in the idle task * (outside irq) as idle time can rely on this and just implement From 1017769bd0073f0a73e066377cd79a10cf0a33ab Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 14 Nov 2012 00:26:54 +0100 Subject: [PATCH 4/5] vtime: No need to disable irqs on vtime_account() vtime_account() is only called from irq entry. irqs are always disabled at this point so we can safely remove the irq disabling guards on that function. Signed-off-by: Frederic Weisbecker Reviewed-by: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens --- kernel/sched/cputime.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 2e8d34aac97e..80b2fd5a7cf0 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -467,16 +467,10 @@ void vtime_task_switch(struct task_struct *prev) #ifndef __ARCH_HAS_VTIME_ACCOUNT void vtime_account(struct task_struct *tsk) { - unsigned long flags; - - local_irq_save(flags); - if (in_interrupt() || !is_idle_task(tsk)) vtime_account_system(tsk); else vtime_account_idle(tsk); - - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(vtime_account); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ From 1b2852b152be5150fbef7b585388ec43cf6f4415 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 19 Nov 2012 17:00:24 +0100 Subject: [PATCH 5/5] vtime: Warn if irqs aren't disabled on system time accounting APIs System time accounting APIs such as vtime_account_system() and vtime_account_idle() need to be irqsafe. Current callers include irq entry, exit and kvm, all of which have been checked against that requirement. Now it's better to grow that with an automatic check in case we have further callers or we missed something. Suggested-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens --- arch/ia64/kernel/time.c | 2 ++ arch/powerpc/kernel/time.c | 2 ++ arch/s390/kernel/vtime.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index c9a7d2ebe089..b1995efbfd21 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -119,6 +119,8 @@ static cputime_t vtime_delta(struct task_struct *tsk) cputime_t delta_stime; __u64 now; + WARN_ON_ONCE(!irqs_disabled()); + now = ia64_get_itc(); delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 3486cfad4a63..b3b14352b05e 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -297,6 +297,8 @@ static u64 vtime_delta(struct task_struct *tsk, u64 now, nowscaled, deltascaled; u64 udelta, delta, user_scaled; + WARN_ON_ONCE(!irqs_disabled()); + now = mftb(); nowscaled = read_spurr(now); get_paca()->system_time += now - get_paca()->starttime; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 7c6d861a1a40..e84b8b68444a 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -132,6 +132,8 @@ void vtime_account(struct task_struct *tsk) struct thread_info *ti = task_thread_info(tsk); u64 timer, system; + WARN_ON_ONCE(!irqs_disabled()); + timer = S390_lowcore.last_update_timer; S390_lowcore.last_update_timer = get_vtimer(); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;