Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull NOHZ updates from Ingo Molnar: "NOHZ enhancements, by Frederic Weisbecker, which reorganizes/refactors the NOHZ 'can the tick be stopped?' infrastructure and related code to be data driven, and harmonizes the naming and handling of all the various properties" [ This makes the ugly "fetch_or()" macro that the scheduler used internally a new generic helper, and does a bad job at it. I'm pulling it, but I've asked Ingo and Frederic to get this fixed up ] * 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched-clock: Migrate to use new tick dependency mask model posix-cpu-timers: Migrate to use new tick dependency mask model sched: Migrate sched to use new tick dependency mask model sched: Account rr tasks perf: Migrate perf to use new tick dependency mask model nohz: Use enum code for tick stop failure tracing message nohz: New tick dependency mask nohz: Implement wide kick on top of irq work atomic: Export fetch_or()
This commit is contained in:
commit
e23604edac
|
@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
|
|||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* fetch_or - perform *ptr |= mask and return old value of *ptr
|
||||
* @ptr: pointer to value
|
||||
* @mask: mask to OR on the value
|
||||
*
|
||||
* cmpxchg based fetch_or, macro so it works for different integer types
|
||||
*/
|
||||
#ifndef fetch_or
|
||||
#define fetch_or(ptr, mask) \
|
||||
({ typeof(*(ptr)) __old, __val = *(ptr); \
|
||||
for (;;) { \
|
||||
__old = cmpxchg((ptr), __val, __val | (mask)); \
|
||||
if (__old == __val) \
|
||||
break; \
|
||||
__val = __old; \
|
||||
} \
|
||||
__old; \
|
||||
})
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef CONFIG_GENERIC_ATOMIC64
|
||||
#include <asm-generic/atomic64.h>
|
||||
#endif
|
||||
|
|
|
@ -1110,12 +1110,6 @@ static inline void perf_event_task_tick(void) { }
|
|||
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
|
||||
extern bool perf_event_can_stop_tick(void);
|
||||
#else
|
||||
static inline bool perf_event_can_stop_tick(void) { return true; }
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
|
||||
extern void perf_restore_debug_store(void);
|
||||
#else
|
||||
|
|
|
@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
|
|||
void run_posix_cpu_timers(struct task_struct *task);
|
||||
void posix_cpu_timers_exit(struct task_struct *task);
|
||||
void posix_cpu_timers_exit_group(struct task_struct *task);
|
||||
|
||||
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
|
||||
|
||||
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
|
||||
cputime_t *newval, cputime_t *oldval);
|
||||
|
||||
|
|
|
@ -717,6 +717,10 @@ struct signal_struct {
|
|||
/* Earliest-expiration cache. */
|
||||
struct task_cputime cputime_expires;
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
unsigned long tick_dep_mask;
|
||||
#endif
|
||||
|
||||
struct list_head cpu_timers[3];
|
||||
|
||||
struct pid *tty_old_pgrp;
|
||||
|
@ -1542,6 +1546,10 @@ struct task_struct {
|
|||
VTIME_SYS,
|
||||
} vtime_snap_whence;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
unsigned long tick_dep_mask;
|
||||
#endif
|
||||
unsigned long nvcsw, nivcsw; /* context switch counts */
|
||||
u64 start_time; /* monotonic time in nsec */
|
||||
u64 real_start_time; /* boot based time in nsec */
|
||||
|
@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
extern bool sched_can_stop_tick(void);
|
||||
extern u64 scheduler_tick_max_deferment(void);
|
||||
#else
|
||||
static inline bool sched_can_stop_tick(void) { return false; }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
|
|
|
@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
|
|||
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
|
||||
}
|
||||
|
||||
enum tick_dep_bits {
|
||||
TICK_DEP_BIT_POSIX_TIMER = 0,
|
||||
TICK_DEP_BIT_PERF_EVENTS = 1,
|
||||
TICK_DEP_BIT_SCHED = 2,
|
||||
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
|
||||
};
|
||||
|
||||
#define TICK_DEP_MASK_NONE 0
|
||||
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
|
||||
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
|
||||
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
|
||||
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
extern int tick_nohz_enabled;
|
||||
extern int tick_nohz_tick_stopped(void);
|
||||
|
@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
|
|||
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
|
||||
}
|
||||
|
||||
extern void tick_nohz_full_kick(void);
|
||||
extern void tick_nohz_dep_set(enum tick_dep_bits bit);
|
||||
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
|
||||
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
|
||||
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
|
||||
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
|
||||
enum tick_dep_bits bit);
|
||||
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
|
||||
enum tick_dep_bits bit);
|
||||
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
|
||||
enum tick_dep_bits bit);
|
||||
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
|
||||
enum tick_dep_bits bit);
|
||||
|
||||
/*
|
||||
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
|
||||
* on top of static keys.
|
||||
*/
|
||||
static inline void tick_dep_set(enum tick_dep_bits bit)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
tick_nohz_dep_set(bit);
|
||||
}
|
||||
|
||||
static inline void tick_dep_clear(enum tick_dep_bits bit)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
tick_nohz_dep_clear(bit);
|
||||
}
|
||||
|
||||
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
|
||||
{
|
||||
if (tick_nohz_full_cpu(cpu))
|
||||
tick_nohz_dep_set_cpu(cpu, bit);
|
||||
}
|
||||
|
||||
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
|
||||
{
|
||||
if (tick_nohz_full_cpu(cpu))
|
||||
tick_nohz_dep_clear_cpu(cpu, bit);
|
||||
}
|
||||
|
||||
static inline void tick_dep_set_task(struct task_struct *tsk,
|
||||
enum tick_dep_bits bit)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
tick_nohz_dep_set_task(tsk, bit);
|
||||
}
|
||||
static inline void tick_dep_clear_task(struct task_struct *tsk,
|
||||
enum tick_dep_bits bit)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
tick_nohz_dep_clear_task(tsk, bit);
|
||||
}
|
||||
static inline void tick_dep_set_signal(struct signal_struct *signal,
|
||||
enum tick_dep_bits bit)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
tick_nohz_dep_set_signal(signal, bit);
|
||||
}
|
||||
static inline void tick_dep_clear_signal(struct signal_struct *signal,
|
||||
enum tick_dep_bits bit)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
tick_nohz_dep_clear_signal(signal, bit);
|
||||
}
|
||||
|
||||
extern void tick_nohz_full_kick_cpu(int cpu);
|
||||
extern void tick_nohz_full_kick_all(void);
|
||||
extern void __tick_nohz_task_switch(void);
|
||||
#else
|
||||
static inline int housekeeping_any_cpu(void)
|
||||
|
@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
|
|||
static inline bool tick_nohz_full_enabled(void) { return false; }
|
||||
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
|
||||
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
|
||||
|
||||
static inline void tick_dep_set(enum tick_dep_bits bit) { }
|
||||
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
|
||||
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
|
||||
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
|
||||
static inline void tick_dep_set_task(struct task_struct *tsk,
|
||||
enum tick_dep_bits bit) { }
|
||||
static inline void tick_dep_clear_task(struct task_struct *tsk,
|
||||
enum tick_dep_bits bit) { }
|
||||
static inline void tick_dep_set_signal(struct signal_struct *signal,
|
||||
enum tick_dep_bits bit) { }
|
||||
static inline void tick_dep_clear_signal(struct signal_struct *signal,
|
||||
enum tick_dep_bits bit) { }
|
||||
|
||||
static inline void tick_nohz_full_kick_cpu(int cpu) { }
|
||||
static inline void tick_nohz_full_kick(void) { }
|
||||
static inline void tick_nohz_full_kick_all(void) { }
|
||||
static inline void __tick_nohz_task_switch(void) { }
|
||||
#endif
|
||||
|
||||
|
|
|
@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
|
|||
);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
|
||||
#define TICK_DEP_NAMES \
|
||||
tick_dep_name(NONE) \
|
||||
tick_dep_name(POSIX_TIMER) \
|
||||
tick_dep_name(PERF_EVENTS) \
|
||||
tick_dep_name(SCHED) \
|
||||
tick_dep_name_end(CLOCK_UNSTABLE)
|
||||
|
||||
#undef tick_dep_name
|
||||
#undef tick_dep_name_end
|
||||
|
||||
#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
|
||||
#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
|
||||
|
||||
TICK_DEP_NAMES
|
||||
|
||||
#undef tick_dep_name
|
||||
#undef tick_dep_name_end
|
||||
|
||||
#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
|
||||
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
|
||||
|
||||
#define show_tick_dep_name(val) \
|
||||
__print_symbolic(val, TICK_DEP_NAMES)
|
||||
|
||||
TRACE_EVENT(tick_stop,
|
||||
|
||||
TP_PROTO(int success, char *error_msg),
|
||||
TP_PROTO(int success, int dependency),
|
||||
|
||||
TP_ARGS(success, error_msg),
|
||||
TP_ARGS(success, dependency),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int , success )
|
||||
__string( msg, error_msg )
|
||||
__field( int , dependency )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->success = success;
|
||||
__assign_str(msg, error_msg);
|
||||
__entry->dependency = dependency;
|
||||
),
|
||||
|
||||
TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg))
|
||||
TP_printk("success=%d dependency=%s", __entry->success, \
|
||||
show_tick_dep_name(__entry->dependency))
|
||||
);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -3112,17 +3112,6 @@ done:
|
|||
return rotate;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
bool perf_event_can_stop_tick(void)
|
||||
{
|
||||
if (atomic_read(&nr_freq_events) ||
|
||||
__this_cpu_read(perf_throttled_count))
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void perf_event_task_tick(void)
|
||||
{
|
||||
struct list_head *head = this_cpu_ptr(&active_ctx_list);
|
||||
|
@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
|
|||
|
||||
__this_cpu_inc(perf_throttled_seq);
|
||||
throttled = __this_cpu_xchg(perf_throttled_count, 0);
|
||||
tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
|
||||
|
||||
list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
|
||||
perf_adjust_freq_unthr_context(ctx, throttled);
|
||||
|
@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
|
|||
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
static DEFINE_SPINLOCK(nr_freq_lock);
|
||||
#endif
|
||||
|
||||
static void unaccount_freq_event_nohz(void)
|
||||
{
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
spin_lock(&nr_freq_lock);
|
||||
if (atomic_dec_and_test(&nr_freq_events))
|
||||
tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
|
||||
spin_unlock(&nr_freq_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void unaccount_freq_event(void)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
unaccount_freq_event_nohz();
|
||||
else
|
||||
atomic_dec(&nr_freq_events);
|
||||
}
|
||||
|
||||
static void unaccount_event(struct perf_event *event)
|
||||
{
|
||||
bool dec = false;
|
||||
|
@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
|
|||
if (event->attr.task)
|
||||
atomic_dec(&nr_task_events);
|
||||
if (event->attr.freq)
|
||||
atomic_dec(&nr_freq_events);
|
||||
unaccount_freq_event();
|
||||
if (event->attr.context_switch) {
|
||||
dec = true;
|
||||
atomic_dec(&nr_switch_events);
|
||||
|
@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
|
|||
if (unlikely(throttle
|
||||
&& hwc->interrupts >= max_samples_per_tick)) {
|
||||
__this_cpu_inc(perf_throttled_count);
|
||||
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
|
||||
hwc->interrupts = MAX_INTERRUPTS;
|
||||
perf_log_throttle(event, 0);
|
||||
tick_nohz_full_kick();
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
|
@ -7815,6 +7827,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
|
|||
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
|
||||
}
|
||||
|
||||
/* Freq events need the tick to stay alive (see perf_event_task_tick). */
|
||||
static void account_freq_event_nohz(void)
|
||||
{
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
/* Lock so we don't race with concurrent unaccount */
|
||||
spin_lock(&nr_freq_lock);
|
||||
if (atomic_inc_return(&nr_freq_events) == 1)
|
||||
tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
|
||||
spin_unlock(&nr_freq_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void account_freq_event(void)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
account_freq_event_nohz();
|
||||
else
|
||||
atomic_inc(&nr_freq_events);
|
||||
}
|
||||
|
||||
|
||||
static void account_event(struct perf_event *event)
|
||||
{
|
||||
bool inc = false;
|
||||
|
@ -7830,10 +7863,8 @@ static void account_event(struct perf_event *event)
|
|||
atomic_inc(&nr_comm_events);
|
||||
if (event->attr.task)
|
||||
atomic_inc(&nr_task_events);
|
||||
if (event->attr.freq) {
|
||||
if (atomic_inc_return(&nr_freq_events) == 1)
|
||||
tick_nohz_full_kick_all();
|
||||
}
|
||||
if (event->attr.freq)
|
||||
account_freq_event();
|
||||
if (event->attr.context_switch) {
|
||||
atomic_inc(&nr_switch_events);
|
||||
inc = true;
|
||||
|
|
|
@ -61,6 +61,7 @@
|
|||
#include <linux/static_key.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
/*
|
||||
* Scheduler clock - returns current time in nanosec units.
|
||||
|
@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void)
|
|||
{
|
||||
if (!sched_clock_stable())
|
||||
static_key_slow_inc(&__sched_clock_stable);
|
||||
|
||||
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
|
||||
}
|
||||
|
||||
void set_sched_clock_stable(void)
|
||||
|
@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work)
|
|||
/* XXX worry about clock continuity */
|
||||
if (sched_clock_stable())
|
||||
static_key_slow_dec(&__sched_clock_stable);
|
||||
|
||||
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
|
||||
|
|
|
@ -320,20 +320,6 @@ static inline void init_hrtick(void)
|
|||
}
|
||||
#endif /* CONFIG_SCHED_HRTICK */
|
||||
|
||||
/*
|
||||
* cmpxchg based fetch_or, macro so it works for different integer types
|
||||
*/
|
||||
#define fetch_or(ptr, val) \
|
||||
({ typeof(*(ptr)) __old, __val = *(ptr); \
|
||||
for (;;) { \
|
||||
__old = cmpxchg((ptr), __val, __val | (val)); \
|
||||
if (__old == __val) \
|
||||
break; \
|
||||
__val = __old; \
|
||||
} \
|
||||
__old; \
|
||||
})
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
|
||||
/*
|
||||
* Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
|
||||
|
@ -582,31 +568,36 @@ static inline bool got_nohz_idle_kick(void)
|
|||
#endif /* CONFIG_NO_HZ_COMMON */
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
bool sched_can_stop_tick(void)
|
||||
bool sched_can_stop_tick(struct rq *rq)
|
||||
{
|
||||
int fifo_nr_running;
|
||||
|
||||
/* Deadline tasks, even if single, need the tick */
|
||||
if (rq->dl.dl_nr_running)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* FIFO realtime policy runs the highest priority task. Other runnable
|
||||
* tasks are of a lower priority. The scheduler tick does nothing.
|
||||
* FIFO realtime policy runs the highest priority task (after DEADLINE).
|
||||
* Other runnable tasks are of a lower priority. The scheduler tick
|
||||
* isn't needed.
|
||||
*/
|
||||
if (current->policy == SCHED_FIFO)
|
||||
fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
|
||||
if (fifo_nr_running)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Round-robin realtime tasks time slice with other tasks at the same
|
||||
* realtime priority. Is this task the only one at this priority?
|
||||
* realtime priority.
|
||||
*/
|
||||
if (current->policy == SCHED_RR) {
|
||||
struct sched_rt_entity *rt_se = ¤t->rt;
|
||||
|
||||
return list_is_singular(&rt_se->run_list);
|
||||
if (rq->rt.rr_nr_running) {
|
||||
if (rq->rt.rr_nr_running == 1)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* More than one running task need preemption.
|
||||
* nr_running update is assumed to be visible
|
||||
* after IPI is sent from wakers.
|
||||
*/
|
||||
if (this_rq()->nr_running > 1)
|
||||
/* Normal multitasking need periodic preemption checks */
|
||||
if (rq->cfs.nr_running > 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
|
@ -1149,6 +1149,20 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
struct rt_rq *group_rq = group_rt_rq(rt_se);
|
||||
struct task_struct *tsk;
|
||||
|
||||
if (group_rq)
|
||||
return group_rq->rr_nr_running;
|
||||
|
||||
tsk = rt_task_of(rt_se);
|
||||
|
||||
return (tsk->policy == SCHED_RR) ? 1 : 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
|
@ -1156,6 +1170,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
|||
|
||||
WARN_ON(!rt_prio(prio));
|
||||
rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
|
||||
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
|
||||
|
||||
inc_rt_prio(rt_rq, prio);
|
||||
inc_rt_migration(rt_se, rt_rq);
|
||||
|
@ -1168,6 +1183,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
|||
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
|
||||
WARN_ON(!rt_rq->rt_nr_running);
|
||||
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
|
||||
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
|
||||
|
||||
dec_rt_prio(rt_rq, rt_se_prio(rt_se));
|
||||
dec_rt_migration(rt_se, rt_rq);
|
||||
|
|
|
@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void)
|
|||
struct rt_rq {
|
||||
struct rt_prio_array active;
|
||||
unsigned int rt_nr_running;
|
||||
unsigned int rr_nr_running;
|
||||
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
|
||||
struct {
|
||||
int curr; /* highest queued rt task prio */
|
||||
|
@ -1313,6 +1314,35 @@ unsigned long to_ratio(u64 period, u64 runtime);
|
|||
|
||||
extern void init_entity_runnable_average(struct sched_entity *se);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
extern bool sched_can_stop_tick(struct rq *rq);
|
||||
|
||||
/*
|
||||
* Tick may be needed by tasks in the runqueue depending on their policy and
|
||||
* requirements. If tick is needed, lets send the target an IPI to kick it out of
|
||||
* nohz mode if necessary.
|
||||
*/
|
||||
static inline void sched_update_tick_dependency(struct rq *rq)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!tick_nohz_full_enabled())
|
||||
return;
|
||||
|
||||
cpu = cpu_of(rq);
|
||||
|
||||
if (!tick_nohz_full_cpu(cpu))
|
||||
return;
|
||||
|
||||
if (sched_can_stop_tick(rq))
|
||||
tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
|
||||
else
|
||||
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
|
||||
}
|
||||
#else
|
||||
static inline void sched_update_tick_dependency(struct rq *rq) { }
|
||||
#endif
|
||||
|
||||
static inline void add_nr_running(struct rq *rq, unsigned count)
|
||||
{
|
||||
unsigned prev_nr = rq->nr_running;
|
||||
|
@ -1324,26 +1354,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
|
|||
if (!rq->rd->overload)
|
||||
rq->rd->overload = true;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
if (tick_nohz_full_cpu(rq->cpu)) {
|
||||
/*
|
||||
* Tick is needed if more than one task runs on a CPU.
|
||||
* Send the target an IPI to kick it out of nohz mode.
|
||||
*
|
||||
* We assume that IPI implies full memory barrier and the
|
||||
* new value of rq->nr_running is visible on reception
|
||||
* from the target.
|
||||
*/
|
||||
tick_nohz_full_kick_cpu(rq->cpu);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
sched_update_tick_dependency(rq);
|
||||
}
|
||||
|
||||
static inline void sub_nr_running(struct rq *rq, unsigned count)
|
||||
{
|
||||
rq->nr_running -= count;
|
||||
/* Check if we still need preemption */
|
||||
sched_update_tick_dependency(rq);
|
||||
}
|
||||
|
||||
static inline void rq_last_tick_reset(struct rq *rq)
|
||||
|
|
|
@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
|
|||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
|
||||
* This is called from sys_timer_create() and do_cpu_nanosleep() with the
|
||||
|
@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer)
|
|||
cputime_expires->sched_exp = exp;
|
||||
break;
|
||||
}
|
||||
if (CPUCLOCK_PERTHREAD(timer->it_clock))
|
||||
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
|
||||
else
|
||||
tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
|
|||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
static void nohz_kick_work_fn(struct work_struct *work)
|
||||
{
|
||||
tick_nohz_full_kick_all();
|
||||
}
|
||||
|
||||
static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
|
||||
|
||||
/*
|
||||
* We need the IPIs to be sent from sane process context.
|
||||
* The posix cpu timers are always set with irqs disabled.
|
||||
*/
|
||||
static void posix_cpu_timer_kick_nohz(void)
|
||||
{
|
||||
if (context_tracking_is_enabled())
|
||||
schedule_work(&nohz_kick_work);
|
||||
}
|
||||
|
||||
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
|
||||
{
|
||||
if (!task_cputime_zero(&tsk->cputime_expires))
|
||||
return false;
|
||||
|
||||
/* Check if cputimer is running. This is accessed without locking. */
|
||||
if (READ_ONCE(tsk->signal->cputimer.running))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static inline void posix_cpu_timer_kick_nohz(void) { }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Guts of sys_timer_settime for CPU timers.
|
||||
* This is called with the timer locked and interrupts disabled.
|
||||
|
@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
|
|||
sample_to_timespec(timer->it_clock,
|
||||
old_incr, &old->it_interval);
|
||||
}
|
||||
if (!ret)
|
||||
posix_cpu_timer_kick_nohz();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk,
|
|||
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
|
||||
}
|
||||
}
|
||||
if (task_cputime_zero(tsk_expires))
|
||||
tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
|
||||
}
|
||||
|
||||
static inline void stop_process_timers(struct signal_struct *sig)
|
||||
|
@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
|
|||
|
||||
/* Turn off cputimer->running. This is done without locking. */
|
||||
WRITE_ONCE(cputimer->running, false);
|
||||
tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
|
||||
}
|
||||
|
||||
static u32 onecputick;
|
||||
|
@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
|
|||
arm_timer(timer);
|
||||
unlock_task_sighand(p, &flags);
|
||||
|
||||
/* Kick full dynticks CPUs in case they need to tick on the new timer */
|
||||
posix_cpu_timer_kick_nohz();
|
||||
out:
|
||||
timer->it_overrun_last = timer->it_overrun;
|
||||
timer->it_overrun = -1;
|
||||
|
@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
|
|||
}
|
||||
|
||||
if (!*newval)
|
||||
goto out;
|
||||
return;
|
||||
*newval += now;
|
||||
}
|
||||
|
||||
|
@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
|
|||
tsk->signal->cputime_expires.virt_exp = *newval;
|
||||
break;
|
||||
}
|
||||
out:
|
||||
posix_cpu_timer_kick_nohz();
|
||||
|
||||
tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
|
||||
}
|
||||
|
||||
static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/posix-timers.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/context_tracking.h>
|
||||
|
||||
#include <asm/irq_regs.h>
|
||||
|
@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
|
|||
cpumask_var_t tick_nohz_full_mask;
|
||||
cpumask_var_t housekeeping_mask;
|
||||
bool tick_nohz_full_running;
|
||||
static unsigned long tick_dep_mask;
|
||||
|
||||
static bool can_stop_full_tick(void)
|
||||
static void trace_tick_dependency(unsigned long dep)
|
||||
{
|
||||
if (dep & TICK_DEP_MASK_POSIX_TIMER) {
|
||||
trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dep & TICK_DEP_MASK_PERF_EVENTS) {
|
||||
trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dep & TICK_DEP_MASK_SCHED) {
|
||||
trace_tick_stop(0, TICK_DEP_MASK_SCHED);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
|
||||
trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
|
||||
}
|
||||
|
||||
static bool can_stop_full_tick(struct tick_sched *ts)
|
||||
{
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
if (!sched_can_stop_tick()) {
|
||||
trace_tick_stop(0, "more than 1 task in runqueue\n");
|
||||
if (tick_dep_mask) {
|
||||
trace_tick_dependency(tick_dep_mask);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!posix_cpu_timers_can_stop_tick(current)) {
|
||||
trace_tick_stop(0, "posix timers running\n");
|
||||
if (ts->tick_dep_mask) {
|
||||
trace_tick_dependency(ts->tick_dep_mask);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!perf_event_can_stop_tick()) {
|
||||
trace_tick_stop(0, "perf events running\n");
|
||||
if (current->tick_dep_mask) {
|
||||
trace_tick_dependency(current->tick_dep_mask);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* sched_clock_tick() needs us? */
|
||||
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
||||
/*
|
||||
* TODO: kick full dynticks CPUs when
|
||||
* sched_clock_stable is set.
|
||||
*/
|
||||
if (!sched_clock_stable()) {
|
||||
trace_tick_stop(0, "unstable sched clock\n");
|
||||
/*
|
||||
* Don't allow the user to think they can get
|
||||
* full NO_HZ with this machine.
|
||||
*/
|
||||
WARN_ONCE(tick_nohz_full_running,
|
||||
"NO_HZ FULL will not work with unstable sched clock");
|
||||
if (current->signal->tick_dep_mask) {
|
||||
trace_tick_dependency(current->signal->tick_dep_mask);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nohz_full_kick_work_func(struct irq_work *work)
|
||||
static void nohz_full_kick_func(struct irq_work *work)
|
||||
{
|
||||
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
||||
.func = nohz_full_kick_work_func,
|
||||
.func = nohz_full_kick_func,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
|||
* This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
|
||||
* is NMI safe.
|
||||
*/
|
||||
void tick_nohz_full_kick(void)
|
||||
static void tick_nohz_full_kick(void)
|
||||
{
|
||||
if (!tick_nohz_full_cpu(smp_processor_id()))
|
||||
return;
|
||||
|
@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu)
|
|||
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
|
||||
}
|
||||
|
||||
static void nohz_full_kick_ipi(void *info)
|
||||
{
|
||||
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick all full dynticks CPUs in order to force these to re-evaluate
|
||||
* their dependency on the tick and restart it if necessary.
|
||||
*/
|
||||
void tick_nohz_full_kick_all(void)
|
||||
static void tick_nohz_full_kick_all(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!tick_nohz_full_running)
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
smp_call_function_many(tick_nohz_full_mask,
|
||||
nohz_full_kick_ipi, NULL, false);
|
||||
tick_nohz_full_kick();
|
||||
for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
|
||||
tick_nohz_full_kick_cpu(cpu);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void tick_nohz_dep_set_all(unsigned long *dep,
|
||||
enum tick_dep_bits bit)
|
||||
{
|
||||
unsigned long prev;
|
||||
|
||||
prev = fetch_or(dep, BIT_MASK(bit));
|
||||
if (!prev)
|
||||
tick_nohz_full_kick_all();
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a global tick dependency. Used by perf events that rely on freq and
|
||||
* by unstable clock.
|
||||
*/
|
||||
void tick_nohz_dep_set(enum tick_dep_bits bit)
|
||||
{
|
||||
tick_nohz_dep_set_all(&tick_dep_mask, bit);
|
||||
}
|
||||
|
||||
void tick_nohz_dep_clear(enum tick_dep_bits bit)
|
||||
{
|
||||
clear_bit(bit, &tick_dep_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set per-CPU tick dependency. Used by scheduler and perf events in order to
|
||||
* manage events throttling.
|
||||
*/
|
||||
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
|
||||
{
|
||||
unsigned long prev;
|
||||
struct tick_sched *ts;
|
||||
|
||||
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
|
||||
|
||||
prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
|
||||
if (!prev) {
|
||||
preempt_disable();
|
||||
/* Perf needs local kick that is NMI safe */
|
||||
if (cpu == smp_processor_id()) {
|
||||
tick_nohz_full_kick();
|
||||
} else {
|
||||
/* Remote irq work not NMI-safe */
|
||||
if (!WARN_ON_ONCE(in_nmi()))
|
||||
tick_nohz_full_kick_cpu(cpu);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
|
||||
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
|
||||
{
|
||||
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
|
||||
|
||||
clear_bit(bit, &ts->tick_dep_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
|
||||
* per task timers.
|
||||
*/
|
||||
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
|
||||
{
|
||||
/*
|
||||
* We could optimize this with just kicking the target running the task
|
||||
* if that noise matters for nohz full users.
|
||||
*/
|
||||
tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
|
||||
}
|
||||
|
||||
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
|
||||
{
|
||||
clear_bit(bit, &tsk->tick_dep_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
|
||||
* per process timers.
|
||||
*/
|
||||
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
|
||||
{
|
||||
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
|
||||
}
|
||||
|
||||
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
|
||||
{
|
||||
clear_bit(bit, &sig->tick_dep_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-evaluate the need for the tick as we switch the current task.
|
||||
* It might need the tick due to per task/process properties:
|
||||
|
@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void)
|
|||
void __tick_nohz_task_switch(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct tick_sched *ts;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
if (!tick_nohz_full_cpu(smp_processor_id()))
|
||||
goto out;
|
||||
|
||||
if (tick_nohz_tick_stopped() && !can_stop_full_tick())
|
||||
tick_nohz_full_kick();
|
||||
ts = this_cpu_ptr(&tick_cpu_sched);
|
||||
|
||||
if (ts->tick_stopped) {
|
||||
if (current->tick_dep_mask || current->signal->tick_dep_mask)
|
||||
tick_nohz_full_kick();
|
||||
}
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
|
|||
|
||||
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
|
||||
ts->tick_stopped = 1;
|
||||
trace_tick_stop(1, " ");
|
||||
trace_tick_stop(1, TICK_DEP_MASK_NONE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
|
|||
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
|
||||
return;
|
||||
|
||||
if (can_stop_full_tick())
|
||||
if (can_stop_full_tick(ts))
|
||||
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
|
||||
else if (ts->tick_stopped)
|
||||
tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
|
||||
|
|
|
@ -60,6 +60,7 @@ struct tick_sched {
|
|||
u64 next_timer;
|
||||
ktime_t idle_expires;
|
||||
int do_timer_last;
|
||||
unsigned long tick_dep_mask;
|
||||
};
|
||||
|
||||
extern struct tick_sched *tick_get_tick_sched(int cpu);
|
||||
|
|
Loading…
Reference in New Issue