cgroup: rstat: fix A-A deadlock on 32bit around u64_stats_sync

0fa294fb19 ("cgroup: Replace cgroup_rstat_mutex with a spinlock") added
cgroup_rstat_flush_irqsafe() allowing flushing to happen from the irq
context. However, rstat paths use u64_stats_sync to synchronize access to
64bit stat counters on 32bit machines. u64_stats_sync is implemented using
seq_lock and trying to read from an irq context can lead to A-A deadlock if
the irq happens to interrupt the stat update.

Fix it by using the irqsafe variants - u64_stats_update_begin_irqsave() and
u64_stats_update_end_irqrestore() - in the update paths. Note that none of
this matters on 64bit machines. All these are just for 32bit SMP setups.

Note that the interface was introduced way back, its first and currently
only use was recently added by 2d146aa3aa ("mm: memcontrol: switch to
rstat"). Stable tagging targets this commit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Rik van Riel <riel@surriel.com>
Fixes: 2d146aa3aa ("mm: memcontrol: switch to rstat")
Cc: stable@vger.kernel.org # v5.13+
This commit is contained in:
Tejun Heo 2021-07-27 13:12:20 -10:00
parent 1e7107c5ef
commit c3df5fb57f
2 changed files with 19 additions and 14 deletions

View File

@ -790,6 +790,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
struct blkcg_gq *parent = blkg->parent; struct blkcg_gq *parent = blkg->parent;
struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu); struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
struct blkg_iostat cur, delta; struct blkg_iostat cur, delta;
unsigned long flags;
unsigned int seq; unsigned int seq;
/* fetch the current per-cpu values */ /* fetch the current per-cpu values */
@ -799,21 +800,21 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
} while (u64_stats_fetch_retry(&bisc->sync, seq)); } while (u64_stats_fetch_retry(&bisc->sync, seq));
/* propagate percpu delta to global */ /* propagate percpu delta to global */
u64_stats_update_begin(&blkg->iostat.sync); flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
blkg_iostat_set(&delta, &cur); blkg_iostat_set(&delta, &cur);
blkg_iostat_sub(&delta, &bisc->last); blkg_iostat_sub(&delta, &bisc->last);
blkg_iostat_add(&blkg->iostat.cur, &delta); blkg_iostat_add(&blkg->iostat.cur, &delta);
blkg_iostat_add(&bisc->last, &delta); blkg_iostat_add(&bisc->last, &delta);
u64_stats_update_end(&blkg->iostat.sync); u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
/* propagate global delta to parent (unless that's root) */ /* propagate global delta to parent (unless that's root) */
if (parent && parent->parent) { if (parent && parent->parent) {
u64_stats_update_begin(&parent->iostat.sync); flags = u64_stats_update_begin_irqsave(&parent->iostat.sync);
blkg_iostat_set(&delta, &blkg->iostat.cur); blkg_iostat_set(&delta, &blkg->iostat.cur);
blkg_iostat_sub(&delta, &blkg->iostat.last); blkg_iostat_sub(&delta, &blkg->iostat.last);
blkg_iostat_add(&parent->iostat.cur, &delta); blkg_iostat_add(&parent->iostat.cur, &delta);
blkg_iostat_add(&blkg->iostat.last, &delta); blkg_iostat_add(&blkg->iostat.last, &delta);
u64_stats_update_end(&parent->iostat.sync); u64_stats_update_end_irqrestore(&parent->iostat.sync, flags);
} }
} }
@ -848,6 +849,7 @@ static void blkcg_fill_root_iostats(void)
memset(&tmp, 0, sizeof(tmp)); memset(&tmp, 0, sizeof(tmp));
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct disk_stats *cpu_dkstats; struct disk_stats *cpu_dkstats;
unsigned long flags;
cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu);
tmp.ios[BLKG_IOSTAT_READ] += tmp.ios[BLKG_IOSTAT_READ] +=
@ -864,9 +866,9 @@ static void blkcg_fill_root_iostats(void)
tmp.bytes[BLKG_IOSTAT_DISCARD] += tmp.bytes[BLKG_IOSTAT_DISCARD] +=
cpu_dkstats->sectors[STAT_DISCARD] << 9; cpu_dkstats->sectors[STAT_DISCARD] << 9;
u64_stats_update_begin(&blkg->iostat.sync); flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
blkg_iostat_set(&blkg->iostat.cur, &tmp); blkg_iostat_set(&blkg->iostat.cur, &tmp);
u64_stats_update_end(&blkg->iostat.sync); u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
} }
} }
} }

View File

@ -347,19 +347,20 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
} }
static struct cgroup_rstat_cpu * static struct cgroup_rstat_cpu *
cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp) cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags)
{ {
struct cgroup_rstat_cpu *rstatc; struct cgroup_rstat_cpu *rstatc;
rstatc = get_cpu_ptr(cgrp->rstat_cpu); rstatc = get_cpu_ptr(cgrp->rstat_cpu);
u64_stats_update_begin(&rstatc->bsync); *flags = u64_stats_update_begin_irqsave(&rstatc->bsync);
return rstatc; return rstatc;
} }
static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
struct cgroup_rstat_cpu *rstatc) struct cgroup_rstat_cpu *rstatc,
unsigned long flags)
{ {
u64_stats_update_end(&rstatc->bsync); u64_stats_update_end_irqrestore(&rstatc->bsync, flags);
cgroup_rstat_updated(cgrp, smp_processor_id()); cgroup_rstat_updated(cgrp, smp_processor_id());
put_cpu_ptr(rstatc); put_cpu_ptr(rstatc);
} }
@ -367,18 +368,20 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
{ {
struct cgroup_rstat_cpu *rstatc; struct cgroup_rstat_cpu *rstatc;
unsigned long flags;
rstatc = cgroup_base_stat_cputime_account_begin(cgrp); rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
rstatc->bstat.cputime.sum_exec_runtime += delta_exec; rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
cgroup_base_stat_cputime_account_end(cgrp, rstatc); cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
} }
void __cgroup_account_cputime_field(struct cgroup *cgrp, void __cgroup_account_cputime_field(struct cgroup *cgrp,
enum cpu_usage_stat index, u64 delta_exec) enum cpu_usage_stat index, u64 delta_exec)
{ {
struct cgroup_rstat_cpu *rstatc; struct cgroup_rstat_cpu *rstatc;
unsigned long flags;
rstatc = cgroup_base_stat_cputime_account_begin(cgrp); rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
switch (index) { switch (index) {
case CPUTIME_USER: case CPUTIME_USER:
@ -394,7 +397,7 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
break; break;
} }
cgroup_base_stat_cputime_account_end(cgrp, rstatc); cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
} }
/* /*