- A bunch of fixes: forced idle time accounting, utilization values
propagation in the sched hierarchies and other minor cleanups and improvements -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmHtNkcACgkQEsHwGGHe VUru2xAAq2sJYOjb3AFQQskKDMjUqY42+Z2LnFk+zbv/2NfXPG17lGRNl8zIFWgK en+RguHOnBDo4Lc4qcx06k02gmZmSA7YonLJVYtT/N1mwsW6zkW0wDho/W3+ssU5 5fJEFSd/y9XmoFOyFj7k+POND/Prk/sguxYcYDRMwjdw4pZoDZ4WgPU3oS3PCiBk ISua8zqxNC+kqSnlKzDbc23K22mdcsneW/aLFK7npyaKqzypy9IvqaBL6h8tyOgb Q7jOBavUQwmfi/J5A39JgUrYs90gMuQKMJ0wxWrix+YCgvdRLCX3gcWBvdxHwlmm KkxmWmM3iGO4qKXUDmmTt8e8GO1c0HgR7tBiVKkG2977fIojLGXTXwZKjIz/gn7f wg3oltKWj2JZ7X3Z3Te4TDjtWSfibUkUHhrVlm94HgZL9ZiFFY+qigBTUoa/QVAf q1nkk/acpSDAKY2CGcjeQZtkuIcfz+5Z94n07NsV4O8OriwkEOgVWGGXkky3687C /woT4a3iIeqiFzSQ8raJq0bdMj3J+wpDe4gmjKmx7oPjiS7FzsyGc8HckwQtiOQ3 kGTTB+9zJS9ChWEk2ViQQgNOUUaJJjAwsBoYkRQakFnQ4AhvQKHmD+MS02vSPBD7 j3k3RPkO0Gm+gUBnkgyKSRTQpAcoVY0lBwttJoEr0IlA/MUWMJ0= =4m7x -----END PGP SIGNATURE----- Merge tag 'sched_urgent_for_v5.17_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler fixes from Borislav Petkov: "A bunch of fixes: forced idle time accounting, utilization values propagation in the sched hierarchies and other minor cleanups and improvements" * tag 'sched_urgent_for_v5.17_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: kernel/sched: Remove dl_boosted flag comment sched: Avoid double preemption in __cond_resched_*lock*() sched/fair: Fix all kernel-doc warnings sched/core: Accounting forceidle time for all tasks except idle task sched/pelt: Relax the sync of load_sum with load_avg sched/pelt: Relax the sync of runnable_sum with runnable_avg sched/pelt: Continue to relax the sync of util_sum with util_avg sched/pelt: Relax the sync of util_sum with util_avg psi: Fix uaf issue when psi trigger is destroyed while being polled
This commit is contained in:
commit
10c64a0f28
|
@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger
|
|||
for the same psi metric can be specified. However for each trigger a separate
|
||||
file descriptor is required to be able to poll it separately from others,
|
||||
therefore for each trigger a separate open() syscall should be made even
|
||||
when opening the same psi interface file.
|
||||
when opening the same psi interface file. Write operations to a file descriptor
|
||||
with an already existing psi trigger will fail with EBUSY.
|
||||
|
||||
Monitors activate only when system enters stall state for the monitored
|
||||
psi metric and deactivates upon exit from the stall state. While system is
|
||||
|
|
|
@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to);
|
|||
|
||||
struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
char *buf, size_t nbytes, enum psi_res res);
|
||||
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
|
||||
void psi_trigger_destroy(struct psi_trigger *t);
|
||||
|
||||
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
|
||||
poll_table *wait);
|
||||
|
|
|
@ -141,9 +141,6 @@ struct psi_trigger {
|
|||
* events to one per window
|
||||
*/
|
||||
u64 last_event_time;
|
||||
|
||||
/* Refcounting to prevent premature destruction */
|
||||
struct kref refcount;
|
||||
};
|
||||
|
||||
struct psi_group {
|
||||
|
|
|
@ -619,10 +619,6 @@ struct sched_dl_entity {
|
|||
* task has to wait for a replenishment to be performed at the
|
||||
* next firing of dl_timer.
|
||||
*
|
||||
* @dl_boosted tells if we are boosted due to DI. If so we are
|
||||
* outside bandwidth enforcement mechanism (but only until we
|
||||
* exit the critical section);
|
||||
*
|
||||
* @dl_yielded tells if task gave up the CPU before consuming
|
||||
* all its available runtime during the last job.
|
||||
*
|
||||
|
|
|
@ -3643,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
|||
cgroup_get(cgrp);
|
||||
cgroup_kn_unlock(of->kn);
|
||||
|
||||
/* Allow only one trigger per file descriptor */
|
||||
if (ctx->psi.trigger) {
|
||||
cgroup_put(cgrp);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
new = psi_trigger_create(psi, buf, nbytes, res);
|
||||
if (IS_ERR(new)) {
|
||||
|
@ -3650,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
|||
return PTR_ERR(new);
|
||||
}
|
||||
|
||||
psi_trigger_replace(&ctx->psi.trigger, new);
|
||||
|
||||
smp_store_release(&ctx->psi.trigger, new);
|
||||
cgroup_put(cgrp);
|
||||
|
||||
return nbytes;
|
||||
|
@ -3690,7 +3695,7 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
|
|||
{
|
||||
struct cgroup_file_ctx *ctx = of->priv;
|
||||
|
||||
psi_trigger_replace(&ctx->psi.trigger, NULL);
|
||||
psi_trigger_destroy(ctx->psi.trigger);
|
||||
}
|
||||
|
||||
bool cgroup_psi_enabled(void)
|
||||
|
|
|
@ -5822,8 +5822,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
|||
}
|
||||
|
||||
if (schedstat_enabled() && rq->core->core_forceidle_count) {
|
||||
if (cookie)
|
||||
rq->core->core_forceidle_start = rq_clock(rq->core);
|
||||
rq->core->core_forceidle_start = rq_clock(rq->core);
|
||||
rq->core->core_forceidle_occupation = occ;
|
||||
}
|
||||
|
||||
|
@ -8219,9 +8218,7 @@ int __cond_resched_lock(spinlock_t *lock)
|
|||
|
||||
if (spin_needbreak(lock) || resched) {
|
||||
spin_unlock(lock);
|
||||
if (resched)
|
||||
preempt_schedule_common();
|
||||
else
|
||||
if (!_cond_resched())
|
||||
cpu_relax();
|
||||
ret = 1;
|
||||
spin_lock(lock);
|
||||
|
@ -8239,9 +8236,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock)
|
|||
|
||||
if (rwlock_needbreak(lock) || resched) {
|
||||
read_unlock(lock);
|
||||
if (resched)
|
||||
preempt_schedule_common();
|
||||
else
|
||||
if (!_cond_resched())
|
||||
cpu_relax();
|
||||
ret = 1;
|
||||
read_lock(lock);
|
||||
|
@ -8259,9 +8254,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock)
|
|||
|
||||
if (rwlock_needbreak(lock) || resched) {
|
||||
write_unlock(lock);
|
||||
if (resched)
|
||||
preempt_schedule_common();
|
||||
else
|
||||
if (!_cond_resched())
|
||||
cpu_relax();
|
||||
ret = 1;
|
||||
write_lock(lock);
|
||||
|
|
|
@ -277,7 +277,7 @@ void __sched_core_account_forceidle(struct rq *rq)
|
|||
rq_i = cpu_rq(i);
|
||||
p = rq_i->core_pick ?: rq_i->curr;
|
||||
|
||||
if (!p->core_cookie)
|
||||
if (p == rq_i->idle)
|
||||
continue;
|
||||
|
||||
__schedstat_add(p->stats.core_forceidle_sum, delta);
|
||||
|
|
|
@ -3028,9 +3028,11 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
static inline void
|
||||
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
u32 divider = get_pelt_divider(&se->avg);
|
||||
sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
|
||||
cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
|
||||
sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
|
||||
cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
|
@ -3381,7 +3383,6 @@ void set_task_rq_fair(struct sched_entity *se,
|
|||
se->avg.last_update_time = n_last_update_time;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
|
||||
* propagate its contribution. The key to this propagation is the invariant
|
||||
|
@ -3449,15 +3450,14 @@ void set_task_rq_fair(struct sched_entity *se,
|
|||
* XXX: only do this for the part of runnable > running ?
|
||||
*
|
||||
*/
|
||||
|
||||
static inline void
|
||||
update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
||||
{
|
||||
long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
|
||||
u32 divider;
|
||||
long delta_sum, delta_avg = gcfs_rq->avg.util_avg - se->avg.util_avg;
|
||||
u32 new_sum, divider;
|
||||
|
||||
/* Nothing to update */
|
||||
if (!delta)
|
||||
if (!delta_avg)
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -3466,23 +3466,30 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
|
|||
*/
|
||||
divider = get_pelt_divider(&cfs_rq->avg);
|
||||
|
||||
|
||||
/* Set new sched_entity's utilization */
|
||||
se->avg.util_avg = gcfs_rq->avg.util_avg;
|
||||
se->avg.util_sum = se->avg.util_avg * divider;
|
||||
new_sum = se->avg.util_avg * divider;
|
||||
delta_sum = (long)new_sum - (long)se->avg.util_sum;
|
||||
se->avg.util_sum = new_sum;
|
||||
|
||||
/* Update parent cfs_rq utilization */
|
||||
add_positive(&cfs_rq->avg.util_avg, delta);
|
||||
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
|
||||
add_positive(&cfs_rq->avg.util_avg, delta_avg);
|
||||
add_positive(&cfs_rq->avg.util_sum, delta_sum);
|
||||
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
|
||||
cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
||||
{
|
||||
long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
|
||||
u32 divider;
|
||||
long delta_sum, delta_avg = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
|
||||
u32 new_sum, divider;
|
||||
|
||||
/* Nothing to update */
|
||||
if (!delta)
|
||||
if (!delta_avg)
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -3493,19 +3500,25 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
|
|||
|
||||
/* Set new sched_entity's runnable */
|
||||
se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
|
||||
se->avg.runnable_sum = se->avg.runnable_avg * divider;
|
||||
new_sum = se->avg.runnable_avg * divider;
|
||||
delta_sum = (long)new_sum - (long)se->avg.runnable_sum;
|
||||
se->avg.runnable_sum = new_sum;
|
||||
|
||||
/* Update parent cfs_rq runnable */
|
||||
add_positive(&cfs_rq->avg.runnable_avg, delta);
|
||||
cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
|
||||
add_positive(&cfs_rq->avg.runnable_avg, delta_avg);
|
||||
add_positive(&cfs_rq->avg.runnable_sum, delta_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
|
||||
cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
||||
{
|
||||
long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
|
||||
long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
|
||||
unsigned long load_avg;
|
||||
u64 load_sum = 0;
|
||||
s64 delta_sum;
|
||||
u32 divider;
|
||||
|
||||
if (!runnable_sum)
|
||||
|
@ -3532,7 +3545,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
|
|||
* assuming all tasks are equally runnable.
|
||||
*/
|
||||
if (scale_load_down(gcfs_rq->load.weight)) {
|
||||
load_sum = div_s64(gcfs_rq->avg.load_sum,
|
||||
load_sum = div_u64(gcfs_rq->avg.load_sum,
|
||||
scale_load_down(gcfs_rq->load.weight));
|
||||
}
|
||||
|
||||
|
@ -3549,19 +3562,22 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
|
|||
running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT;
|
||||
runnable_sum = max(runnable_sum, running_sum);
|
||||
|
||||
load_sum = (s64)se_weight(se) * runnable_sum;
|
||||
load_avg = div_s64(load_sum, divider);
|
||||
load_sum = se_weight(se) * runnable_sum;
|
||||
load_avg = div_u64(load_sum, divider);
|
||||
|
||||
se->avg.load_sum = runnable_sum;
|
||||
|
||||
delta = load_avg - se->avg.load_avg;
|
||||
if (!delta)
|
||||
delta_avg = load_avg - se->avg.load_avg;
|
||||
if (!delta_avg)
|
||||
return;
|
||||
|
||||
se->avg.load_avg = load_avg;
|
||||
delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
|
||||
|
||||
add_positive(&cfs_rq->avg.load_avg, delta);
|
||||
cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
|
||||
se->avg.load_sum = runnable_sum;
|
||||
se->avg.load_avg = load_avg;
|
||||
add_positive(&cfs_rq->avg.load_avg, delta_avg);
|
||||
add_positive(&cfs_rq->avg.load_sum, delta_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
|
||||
cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
|
||||
}
|
||||
|
||||
static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
|
||||
|
@ -3652,7 +3668,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
|
|||
*
|
||||
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
|
||||
*
|
||||
* Returns true if the load decayed or we removed load.
|
||||
* Return: true if the load decayed or we removed load.
|
||||
*
|
||||
* Since both these conditions indicate a changed cfs_rq->avg.load we should
|
||||
* call update_tg_load_avg() when this function returns true.
|
||||
|
@ -3677,15 +3693,32 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
|||
|
||||
r = removed_load;
|
||||
sub_positive(&sa->load_avg, r);
|
||||
sa->load_sum = sa->load_avg * divider;
|
||||
sub_positive(&sa->load_sum, r * divider);
|
||||
/* See sa->util_sum below */
|
||||
sa->load_sum = max_t(u32, sa->load_sum, sa->load_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
r = removed_util;
|
||||
sub_positive(&sa->util_avg, r);
|
||||
sa->util_sum = sa->util_avg * divider;
|
||||
sub_positive(&sa->util_sum, r * divider);
|
||||
/*
|
||||
* Because of rounding, se->util_sum might ends up being +1 more than
|
||||
* cfs->util_sum. Although this is not a problem by itself, detaching
|
||||
* a lot of tasks with the rounding problem between 2 updates of
|
||||
* util_avg (~1ms) can make cfs->util_sum becoming null whereas
|
||||
* cfs_util_avg is not.
|
||||
* Check that util_sum is still above its lower bound for the new
|
||||
* util_avg. Given that period_contrib might have moved since the last
|
||||
* sync, we are only sure that util_sum must be above or equal to
|
||||
* util_avg * minimum possible divider
|
||||
*/
|
||||
sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
r = removed_runnable;
|
||||
sub_positive(&sa->runnable_avg, r);
|
||||
sa->runnable_sum = sa->runnable_avg * divider;
|
||||
sub_positive(&sa->runnable_sum, r * divider);
|
||||
/* See sa->util_sum above */
|
||||
sa->runnable_sum = max_t(u32, sa->runnable_sum,
|
||||
sa->runnable_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
/*
|
||||
* removed_runnable is the unweighted version of removed_load so we
|
||||
|
@ -3772,17 +3805,18 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
|||
*/
|
||||
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
/*
|
||||
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
|
||||
* See ___update_load_avg() for details.
|
||||
*/
|
||||
u32 divider = get_pelt_divider(&cfs_rq->avg);
|
||||
|
||||
dequeue_load_avg(cfs_rq, se);
|
||||
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
|
||||
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
|
||||
sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
|
||||
cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
|
||||
cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
|
||||
sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
|
||||
/* See update_cfs_rq_load_avg() */
|
||||
cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
|
||||
cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
|
||||
|
||||
add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
|
||||
|
||||
|
@ -8539,6 +8573,8 @@ group_type group_classify(unsigned int imbalance_pct,
|
|||
*
|
||||
* If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
|
||||
* of @dst_cpu are idle and @sg has lower priority.
|
||||
*
|
||||
* Return: true if @dst_cpu can pull tasks, false otherwise.
|
||||
*/
|
||||
static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
|
||||
struct sg_lb_stats *sgs,
|
||||
|
@ -8614,6 +8650,7 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
|
|||
/**
|
||||
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
|
||||
* @env: The load balancing environment.
|
||||
* @sds: Load-balancing data with statistics of the local group.
|
||||
* @group: sched_group whose statistics are to be updated.
|
||||
* @sgs: variable to hold the statistics for this group.
|
||||
* @sg_status: Holds flag indicating the status of the sched_group
|
||||
|
@ -9421,12 +9458,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
|||
/**
|
||||
* find_busiest_group - Returns the busiest group within the sched_domain
|
||||
* if there is an imbalance.
|
||||
* @env: The load balancing environment.
|
||||
*
|
||||
* Also calculates the amount of runnable load which should be moved
|
||||
* to restore balance.
|
||||
*
|
||||
* @env: The load balancing environment.
|
||||
*
|
||||
* Return: - The busiest group if imbalance exists.
|
||||
*/
|
||||
static struct sched_group *find_busiest_group(struct lb_env *env)
|
||||
|
|
|
@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running)
|
|||
}
|
||||
#endif
|
||||
|
||||
#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024)
|
||||
|
||||
static inline u32 get_pelt_divider(struct sched_avg *avg)
|
||||
{
|
||||
return LOAD_AVG_MAX - 1024 + avg->period_contrib;
|
||||
return PELT_MIN_DIVIDER + avg->period_contrib;
|
||||
}
|
||||
|
||||
static inline void cfs_se_util_change(struct sched_avg *avg)
|
||||
|
|
|
@ -1162,7 +1162,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
|||
t->event = 0;
|
||||
t->last_event_time = 0;
|
||||
init_waitqueue_head(&t->event_wait);
|
||||
kref_init(&t->refcount);
|
||||
|
||||
mutex_lock(&group->trigger_lock);
|
||||
|
||||
|
@ -1191,15 +1190,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
|||
return t;
|
||||
}
|
||||
|
||||
static void psi_trigger_destroy(struct kref *ref)
|
||||
void psi_trigger_destroy(struct psi_trigger *t)
|
||||
{
|
||||
struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
|
||||
struct psi_group *group = t->group;
|
||||
struct psi_group *group;
|
||||
struct task_struct *task_to_destroy = NULL;
|
||||
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
/*
|
||||
* We do not check psi_disabled since it might have been disabled after
|
||||
* the trigger got created.
|
||||
*/
|
||||
if (!t)
|
||||
return;
|
||||
|
||||
group = t->group;
|
||||
/*
|
||||
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
|
||||
* from under a polling process.
|
||||
|
@ -1235,9 +1238,9 @@ static void psi_trigger_destroy(struct kref *ref)
|
|||
mutex_unlock(&group->trigger_lock);
|
||||
|
||||
/*
|
||||
* Wait for both *trigger_ptr from psi_trigger_replace and
|
||||
* poll_task RCUs to complete their read-side critical sections
|
||||
* before destroying the trigger and optionally the poll_task
|
||||
* Wait for psi_schedule_poll_work RCU to complete its read-side
|
||||
* critical section before destroying the trigger and optionally the
|
||||
* poll_task.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
/*
|
||||
|
@ -1254,18 +1257,6 @@ static void psi_trigger_destroy(struct kref *ref)
|
|||
kfree(t);
|
||||
}
|
||||
|
||||
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
|
||||
{
|
||||
struct psi_trigger *old = *trigger_ptr;
|
||||
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
rcu_assign_pointer(*trigger_ptr, new);
|
||||
if (old)
|
||||
kref_put(&old->refcount, psi_trigger_destroy);
|
||||
}
|
||||
|
||||
__poll_t psi_trigger_poll(void **trigger_ptr,
|
||||
struct file *file, poll_table *wait)
|
||||
{
|
||||
|
@ -1275,24 +1266,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
|
|||
if (static_branch_likely(&psi_disabled))
|
||||
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
|
||||
if (!t) {
|
||||
rcu_read_unlock();
|
||||
t = smp_load_acquire(trigger_ptr);
|
||||
if (!t)
|
||||
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
||||
}
|
||||
kref_get(&t->refcount);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
poll_wait(file, &t->event_wait, wait);
|
||||
|
||||
if (cmpxchg(&t->event, 1, 0) == 1)
|
||||
ret |= EPOLLPRI;
|
||||
|
||||
kref_put(&t->refcount, psi_trigger_destroy);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1316,14 +1298,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
|
|||
|
||||
buf[buf_size - 1] = '\0';
|
||||
|
||||
new = psi_trigger_create(&psi_system, buf, nbytes, res);
|
||||
if (IS_ERR(new))
|
||||
return PTR_ERR(new);
|
||||
|
||||
seq = file->private_data;
|
||||
|
||||
/* Take seq->lock to protect seq->private from concurrent writes */
|
||||
mutex_lock(&seq->lock);
|
||||
psi_trigger_replace(&seq->private, new);
|
||||
|
||||
/* Allow only one trigger per file descriptor */
|
||||
if (seq->private) {
|
||||
mutex_unlock(&seq->lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
new = psi_trigger_create(&psi_system, buf, nbytes, res);
|
||||
if (IS_ERR(new)) {
|
||||
mutex_unlock(&seq->lock);
|
||||
return PTR_ERR(new);
|
||||
}
|
||||
|
||||
smp_store_release(&seq->private, new);
|
||||
mutex_unlock(&seq->lock);
|
||||
|
||||
return nbytes;
|
||||
|
@ -1358,7 +1350,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
|
|||
{
|
||||
struct seq_file *seq = file->private_data;
|
||||
|
||||
psi_trigger_replace(&seq->private, NULL);
|
||||
psi_trigger_destroy(seq->private);
|
||||
return single_release(inode, file);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue