blk-cgroup: move the cgroup information to struct gendisk

cgroup information only makes sense on a live gendisk that allows
file system I/O (which includes the raw block device).  So move over
the cgroup related members.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20230203150400.3199230-20-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig 2023-02-03 16:04:00 +01:00 committed by Jens Axboe
parent 479664cee1
commit 3f13ab7c80
6 changed files with 54 additions and 50 deletions

View File

@ -1001,7 +1001,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
{ {
struct blkcg_gq *blkg; struct blkcg_gq *blkg;
list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { list_for_each_entry(blkg, &bfqd->queue->disk->blkg_list, entry) {
struct bfq_group *bfqg = blkg_to_bfqg(blkg); struct bfq_group *bfqg = blkg_to_bfqg(blkg);
bfq_end_wr_async_queues(bfqd, bfqg); bfq_end_wr_async_queues(bfqd, bfqg);
@ -1295,7 +1295,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
if (ret) if (ret)
return NULL; return NULL;
return blkg_to_bfqg(bfqd->queue->root_blkg); return blkg_to_bfqg(bfqd->queue->disk->root_blkg);
} }
struct blkcg_policy blkcg_policy_bfq = { struct blkcg_policy blkcg_policy_bfq = {

View File

@ -108,10 +108,10 @@ static struct cgroup_subsys_state *blkcg_css(void)
return task_css(current, io_cgrp_id); return task_css(current, io_cgrp_id);
} }
static bool blkcg_policy_enabled(struct request_queue *q, static bool blkcg_policy_enabled(struct gendisk *disk,
const struct blkcg_policy *pol) const struct blkcg_policy *pol)
{ {
return pol && test_bit(pol->plid, q->blkcg_pols); return pol && test_bit(pol->plid, disk->blkcg_pols);
} }
static void blkg_free(struct blkcg_gq *blkg) static void blkg_free(struct blkcg_gq *blkg)
@ -121,18 +121,18 @@ static void blkg_free(struct blkcg_gq *blkg)
/* /*
* pd_free_fn() can also be called from blkcg_deactivate_policy(), * pd_free_fn() can also be called from blkcg_deactivate_policy(),
* in order to make sure pd_free_fn() is called in order, the deletion * in order to make sure pd_free_fn() is called in order, the deletion
* of the list blkg->q_node is delayed to here from blkg_destroy(), and * of the list blkg->entry is delayed to here from blkg_destroy(), and
* blkcg_mutex is used to synchronize blkg_free_workfn() and * blkcg_mutex is used to synchronize blkg_free_workfn() and
* blkcg_deactivate_policy(). * blkcg_deactivate_policy().
*/ */
mutex_lock(&blkg->disk->queue->blkcg_mutex); mutex_lock(&blkg->disk->blkcg_mutex);
for (i = 0; i < BLKCG_MAX_POLS; i++) for (i = 0; i < BLKCG_MAX_POLS; i++)
if (blkg->pd[i]) if (blkg->pd[i])
blkcg_policy[i]->pd_free_fn(blkg->pd[i]); blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
if (blkg->parent) if (blkg->parent)
blkg_put(blkg->parent); blkg_put(blkg->parent);
list_del_init(&blkg->q_node); list_del_init(&blkg->entry);
mutex_unlock(&blkg->disk->queue->blkcg_mutex); mutex_unlock(&blkg->disk->blkcg_mutex);
put_disk(blkg->disk); put_disk(blkg->disk);
free_percpu(blkg->iostat_cpu); free_percpu(blkg->iostat_cpu);
@ -256,7 +256,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
get_device(disk_to_dev(disk)); get_device(disk_to_dev(disk));
blkg->disk = disk; blkg->disk = disk;
INIT_LIST_HEAD(&blkg->q_node); INIT_LIST_HEAD(&blkg->entry);
spin_lock_init(&blkg->async_bio_lock); spin_lock_init(&blkg->async_bio_lock);
bio_list_init(&blkg->async_bios); bio_list_init(&blkg->async_bios);
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
@ -272,7 +272,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
struct blkcg_policy *pol = blkcg_policy[i]; struct blkcg_policy *pol = blkcg_policy[i];
struct blkg_policy_data *pd; struct blkg_policy_data *pd;
if (!blkcg_policy_enabled(disk->queue, pol)) if (!blkcg_policy_enabled(disk, pol))
continue; continue;
/* alloc per-policy data and attach it to blkg */ /* alloc per-policy data and attach it to blkg */
@ -358,7 +358,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
ret = radix_tree_insert(&blkcg->blkg_tree, disk->queue->id, blkg); ret = radix_tree_insert(&blkcg->blkg_tree, disk->queue->id, blkg);
if (likely(!ret)) { if (likely(!ret)) {
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
list_add(&blkg->q_node, &disk->queue->blkg_list); list_add(&blkg->entry, &disk->blkg_list);
for (i = 0; i < BLKCG_MAX_POLS; i++) { for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i]; struct blkcg_policy *pol = blkcg_policy[i];
@ -430,7 +430,7 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
while (true) { while (true) {
struct blkcg *pos = blkcg; struct blkcg *pos = blkcg;
struct blkcg *parent = blkcg_parent(blkcg); struct blkcg *parent = blkcg_parent(blkcg);
struct blkcg_gq *ret_blkg = q->root_blkg; struct blkcg_gq *ret_blkg = disk->root_blkg;
while (parent) { while (parent) {
blkg = blkg_lookup(parent, disk); blkg = blkg_lookup(parent, disk);
@ -512,7 +512,7 @@ static void blkg_destroy_all(struct gendisk *disk)
restart: restart:
spin_lock_irq(&q->queue_lock); spin_lock_irq(&q->queue_lock);
list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { list_for_each_entry_safe(blkg, n, &disk->blkg_list, entry) {
struct blkcg *blkcg = blkg->blkcg; struct blkcg *blkcg = blkg->blkcg;
spin_lock(&blkcg->lock); spin_lock(&blkcg->lock);
@ -531,7 +531,7 @@ restart:
} }
} }
q->root_blkg = NULL; disk->root_blkg = NULL;
spin_unlock_irq(&q->queue_lock); spin_unlock_irq(&q->queue_lock);
} }
@ -606,7 +606,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
rcu_read_lock(); rcu_read_lock();
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
spin_lock_irq(&blkg->disk->queue->queue_lock); spin_lock_irq(&blkg->disk->queue->queue_lock);
if (blkcg_policy_enabled(blkg->disk->queue, pol)) if (blkcg_policy_enabled(blkg->disk, pol))
total += prfill(sf, blkg->pd[pol->plid], data); total += prfill(sf, blkg->pd[pol->plid], data);
spin_unlock_irq(&blkg->disk->queue->queue_lock); spin_unlock_irq(&blkg->disk->queue->queue_lock);
} }
@ -714,7 +714,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
rcu_read_lock(); rcu_read_lock();
spin_lock_irq(&q->queue_lock); spin_lock_irq(&q->queue_lock);
if (!blkcg_policy_enabled(q, pol)) { if (!blkcg_policy_enabled(disk, pol)) {
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
goto fail_unlock; goto fail_unlock;
} }
@ -757,7 +757,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
rcu_read_lock(); rcu_read_lock();
spin_lock_irq(&q->queue_lock); spin_lock_irq(&q->queue_lock);
if (!blkcg_policy_enabled(q, pol)) { if (!blkcg_policy_enabled(disk, pol)) {
blkg_free(new_blkg); blkg_free(new_blkg);
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
goto fail_preloaded; goto fail_preloaded;
@ -937,7 +937,7 @@ static void blkcg_fill_root_iostats(void)
class_dev_iter_init(&iter, &block_class, NULL, &disk_type); class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
while ((dev = class_dev_iter_next(&iter))) { while ((dev = class_dev_iter_next(&iter))) {
struct block_device *bdev = dev_to_bdev(dev); struct block_device *bdev = dev_to_bdev(dev);
struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg; struct blkcg_gq *blkg = bdev->bd_disk->root_blkg;
struct blkg_iostat tmp; struct blkg_iostat tmp;
int cpu; int cpu;
unsigned long flags; unsigned long flags;
@ -1284,8 +1284,8 @@ int blkcg_init_disk(struct gendisk *disk)
bool preloaded; bool preloaded;
int ret; int ret;
INIT_LIST_HEAD(&q->blkg_list); INIT_LIST_HEAD(&disk->blkg_list);
mutex_init(&q->blkcg_mutex); mutex_init(&disk->blkcg_mutex);
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL); new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
if (!new_blkg) if (!new_blkg)
@ -1299,7 +1299,7 @@ int blkcg_init_disk(struct gendisk *disk)
blkg = blkg_create(&blkcg_root, disk, new_blkg); blkg = blkg_create(&blkcg_root, disk, new_blkg);
if (IS_ERR(blkg)) if (IS_ERR(blkg))
goto err_unlock; goto err_unlock;
q->root_blkg = blkg; disk->root_blkg = blkg;
spin_unlock_irq(&q->queue_lock); spin_unlock_irq(&q->queue_lock);
if (preloaded) if (preloaded)
@ -1412,7 +1412,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
struct blkcg_gq *blkg, *pinned_blkg = NULL; struct blkcg_gq *blkg, *pinned_blkg = NULL;
int ret; int ret;
if (blkcg_policy_enabled(q, pol)) if (blkcg_policy_enabled(disk, pol))
return 0; return 0;
if (queue_is_mq(q)) if (queue_is_mq(q))
@ -1421,7 +1421,7 @@ retry:
spin_lock_irq(&q->queue_lock); spin_lock_irq(&q->queue_lock);
/* blkg_list is pushed at the head, reverse walk to allocate parents first */ /* blkg_list is pushed at the head, reverse walk to allocate parents first */
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) {
struct blkg_policy_data *pd; struct blkg_policy_data *pd;
if (blkg->pd[pol->plid]) if (blkg->pd[pol->plid])
@ -1466,16 +1466,16 @@ retry:
/* all allocated, init in the same order */ /* all allocated, init in the same order */
if (pol->pd_init_fn) if (pol->pd_init_fn)
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) list_for_each_entry_reverse(blkg, &disk->blkg_list, entry)
pol->pd_init_fn(blkg->pd[pol->plid]); pol->pd_init_fn(blkg->pd[pol->plid]);
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) {
if (pol->pd_online_fn) if (pol->pd_online_fn)
pol->pd_online_fn(blkg->pd[pol->plid]); pol->pd_online_fn(blkg->pd[pol->plid]);
blkg->pd[pol->plid]->online = true; blkg->pd[pol->plid]->online = true;
} }
__set_bit(pol->plid, q->blkcg_pols); __set_bit(pol->plid, disk->blkcg_pols);
ret = 0; ret = 0;
spin_unlock_irq(&q->queue_lock); spin_unlock_irq(&q->queue_lock);
@ -1491,7 +1491,7 @@ out:
enomem: enomem:
/* alloc failed, nothing's initialized yet, free everything */ /* alloc failed, nothing's initialized yet, free everything */
spin_lock_irq(&q->queue_lock); spin_lock_irq(&q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) { list_for_each_entry(blkg, &disk->blkg_list, entry) {
struct blkcg *blkcg = blkg->blkcg; struct blkcg *blkcg = blkg->blkcg;
spin_lock(&blkcg->lock); spin_lock(&blkcg->lock);
@ -1521,18 +1521,18 @@ void blkcg_deactivate_policy(struct gendisk *disk,
struct request_queue *q = disk->queue; struct request_queue *q = disk->queue;
struct blkcg_gq *blkg; struct blkcg_gq *blkg;
if (!blkcg_policy_enabled(q, pol)) if (!blkcg_policy_enabled(disk, pol))
return; return;
if (queue_is_mq(q)) if (queue_is_mq(q))
blk_mq_freeze_queue(q); blk_mq_freeze_queue(q);
mutex_lock(&q->blkcg_mutex); mutex_lock(&disk->blkcg_mutex);
spin_lock_irq(&q->queue_lock); spin_lock_irq(&q->queue_lock);
__clear_bit(pol->plid, q->blkcg_pols); __clear_bit(pol->plid, disk->blkcg_pols);
list_for_each_entry(blkg, &q->blkg_list, q_node) { list_for_each_entry(blkg, &disk->blkg_list, entry) {
struct blkcg *blkcg = blkg->blkcg; struct blkcg *blkcg = blkg->blkcg;
spin_lock(&blkcg->lock); spin_lock(&blkcg->lock);
@ -1546,7 +1546,7 @@ void blkcg_deactivate_policy(struct gendisk *disk,
} }
spin_unlock_irq(&q->queue_lock); spin_unlock_irq(&q->queue_lock);
mutex_unlock(&q->blkcg_mutex); mutex_unlock(&disk->blkcg_mutex);
if (queue_is_mq(q)) if (queue_is_mq(q))
blk_mq_unfreeze_queue(q); blk_mq_unfreeze_queue(q);
@ -1943,7 +1943,7 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
* Associate @bio with the blkg found by combining the css's blkg and the * Associate @bio with the blkg found by combining the css's blkg and the
* request_queue of the @bio. An association failure is handled by walking up * request_queue of the @bio. An association failure is handled by walking up
* the blkg tree. Therefore, the blkg associated can be anything between @blkg * the blkg tree. Therefore, the blkg associated can be anything between @blkg
* and q->root_blkg. This situation only happens when a cgroup is dying and * and disk->root_blkg. This situation only happens when a cgroup is dying and
* then the remaining bios will spill to the closest alive blkg. * then the remaining bios will spill to the closest alive blkg.
* *
* A reference will be taken on the blkg and will be released when @bio is * A reference will be taken on the blkg and will be released when @bio is
@ -1958,8 +1958,8 @@ void bio_associate_blkg_from_css(struct bio *bio,
if (css && css->parent) { if (css && css->parent) {
bio->bi_blkg = blkg_tryget_closest(bio, css); bio->bi_blkg = blkg_tryget_closest(bio, css);
} else { } else {
blkg_get(bdev_get_queue(bio->bi_bdev)->root_blkg); blkg_get(bio->bi_bdev->bd_disk->root_blkg);
bio->bi_blkg = bdev_get_queue(bio->bi_bdev)->root_blkg; bio->bi_blkg = bio->bi_bdev->bd_disk->root_blkg;
} }
} }
EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);

View File

@ -54,7 +54,7 @@ struct blkg_iostat_set {
/* association between a blk cgroup and a request queue */ /* association between a blk cgroup and a request queue */
struct blkcg_gq { struct blkcg_gq {
struct gendisk *disk; struct gendisk *disk;
struct list_head q_node; struct list_head entry;
struct hlist_node blkcg_node; struct hlist_node blkcg_node;
struct blkcg *blkcg; struct blkcg *blkcg;
@ -250,7 +250,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
WARN_ON_ONCE(!rcu_read_lock_held()); WARN_ON_ONCE(!rcu_read_lock_held());
if (blkcg == &blkcg_root) if (blkcg == &blkcg_root)
return disk->queue->root_blkg; return disk->root_blkg;
blkg = rcu_dereference(blkcg->blkg_hint); blkg = rcu_dereference(blkcg->blkg_hint);
if (blkg && blkg->disk == disk) if (blkg && blkg->disk == disk)

View File

@ -665,7 +665,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
rcu_read_lock(); rcu_read_lock();
blkg_for_each_descendant_pre(blkg, pos_css, blkg_for_each_descendant_pre(blkg, pos_css,
blkiolat->rqos.disk->queue->root_blkg) { blkiolat->rqos.disk->root_blkg) {
struct iolatency_grp *iolat; struct iolatency_grp *iolat;
struct child_latency_info *lat_info; struct child_latency_info *lat_info;
unsigned long flags; unsigned long flags;

View File

@ -451,7 +451,8 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td)
bool low_valid = false; bool low_valid = false;
rcu_read_lock(); rcu_read_lock();
blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { blkg_for_each_descendant_post(blkg, pos_css,
td->queue->disk->root_blkg) {
struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_grp *tg = blkg_to_tg(blkg);
if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] || if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||
@ -1180,7 +1181,7 @@ static void throtl_pending_timer_fn(struct timer_list *t)
spin_lock_irq(&q->queue_lock); spin_lock_irq(&q->queue_lock);
if (!q->root_blkg) if (!q->disk->root_blkg)
goto out_unlock; goto out_unlock;
if (throtl_can_upgrade(td, NULL)) if (throtl_can_upgrade(td, NULL))
@ -1322,7 +1323,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
* blk-throttle. * blk-throttle.
*/ */
blkg_for_each_descendant_pre(blkg, pos_css, blkg_for_each_descendant_pre(blkg, pos_css,
global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) { global ? tg->td->queue->disk->root_blkg :
tg_to_blkg(tg)) {
struct throtl_grp *this_tg = blkg_to_tg(blkg); struct throtl_grp *this_tg = blkg_to_tg(blkg);
struct throtl_grp *parent_tg; struct throtl_grp *parent_tg;
@ -1717,7 +1719,7 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
* path need RCU protection and to prevent warning from lockdep. * path need RCU protection and to prevent warning from lockdep.
*/ */
rcu_read_lock(); rcu_read_lock();
blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) { blkg_for_each_descendant_post(blkg, pos_css, disk->root_blkg) {
struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_grp *tg = blkg_to_tg(blkg);
struct throtl_service_queue *sq = &tg->service_queue; struct throtl_service_queue *sq = &tg->service_queue;
@ -1871,7 +1873,8 @@ static bool throtl_can_upgrade(struct throtl_data *td,
return false; return false;
rcu_read_lock(); rcu_read_lock();
blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { blkg_for_each_descendant_post(blkg, pos_css,
td->queue->disk->root_blkg) {
struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_grp *tg = blkg_to_tg(blkg);
if (tg == this_tg) if (tg == this_tg)
@ -1917,7 +1920,8 @@ static void throtl_upgrade_state(struct throtl_data *td)
td->low_upgrade_time = jiffies; td->low_upgrade_time = jiffies;
td->scale = 0; td->scale = 0;
rcu_read_lock(); rcu_read_lock();
blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { blkg_for_each_descendant_post(blkg, pos_css,
td->queue->disk->root_blkg) {
struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_grp *tg = blkg_to_tg(blkg);
struct throtl_service_queue *sq = &tg->service_queue; struct throtl_service_queue *sq = &tg->service_queue;

View File

@ -163,6 +163,12 @@ struct gendisk {
struct timer_rand_state *random; struct timer_rand_state *random;
atomic_t sync_io; /* RAID */ atomic_t sync_io; /* RAID */
struct disk_events *ev; struct disk_events *ev;
#ifdef CONFIG_BLK_CGROUP
DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS);
struct blkcg_gq *root_blkg;
struct list_head blkg_list;
struct mutex blkcg_mutex;
#endif /* CONFIG_BLK_CGROUP */
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
struct kobject integrity_kobj; struct kobject integrity_kobj;
#endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* CONFIG_BLK_DEV_INTEGRITY */
@ -481,12 +487,6 @@ struct request_queue {
struct blk_mq_tags *sched_shared_tags; struct blk_mq_tags *sched_shared_tags;
struct list_head icq_list; struct list_head icq_list;
#ifdef CONFIG_BLK_CGROUP
DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS);
struct blkcg_gq *root_blkg;
struct list_head blkg_list;
struct mutex blkcg_mutex;
#endif
struct queue_limits limits; struct queue_limits limits;