blkcg: use double locking instead of RCU for blkg synchronization
blkgs are chained from both blkcgs and request_queues and thus subjected to two locks - blkcg->lock and q->queue_lock. As both blkcg and q can go away anytime, locking during removal is tricky. It's currently solved by wrapping removal inside RCU, which makes the synchronization complex. There are three locks to worry about - the outer RCU, q lock and blkcg lock, and it leads to nasty subtle complications like conditional synchronize_rcu() on queue exit paths. For all other paths, blkcg lock is naturally nested inside q lock and the only exception is blkcg removal path, which is a very cold path and can be implemented as clumsy but conceptually-simple reverse double lock dancing. This patch updates blkg removal path such that blkgs are removed while holding both q and blkcg locks, which is trivial for request queue exit path - blkg_destroy_all(). The blkcg removal path, blkiocg_pre_destroy(), implements reverse double lock dancing essentially identical to ioc_release_fn(). This simplifies blkg locking - no half-dead blkgs to worry about. Now unnecessary RCU annotations will be removed by the next patch. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
e8989fae38
commit
9f13ef678e
|
@ -620,32 +620,6 @@ out:
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkg_lookup_create);
|
EXPORT_SYMBOL_GPL(blkg_lookup_create);
|
||||||
|
|
||||||
static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
|
|
||||||
{
|
|
||||||
hlist_del_init_rcu(&blkg->blkcg_node);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
|
|
||||||
* indicating that blk_group was unhashed by the time we got to it.
|
|
||||||
*/
|
|
||||||
int blkiocg_del_blkio_group(struct blkio_group *blkg)
|
|
||||||
{
|
|
||||||
struct blkio_cgroup *blkcg = blkg->blkcg;
|
|
||||||
unsigned long flags;
|
|
||||||
int ret = 1;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&blkcg->lock, flags);
|
|
||||||
if (!hlist_unhashed(&blkg->blkcg_node)) {
|
|
||||||
__blkiocg_del_blkio_group(blkg);
|
|
||||||
ret = 0;
|
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&blkcg->lock, flags);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
|
|
||||||
|
|
||||||
/* called under rcu_read_lock(). */
|
/* called under rcu_read_lock(). */
|
||||||
struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
|
struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
|
||||||
struct request_queue *q)
|
struct request_queue *q)
|
||||||
|
@ -663,12 +637,16 @@ EXPORT_SYMBOL_GPL(blkg_lookup);
|
||||||
static void blkg_destroy(struct blkio_group *blkg)
|
static void blkg_destroy(struct blkio_group *blkg)
|
||||||
{
|
{
|
||||||
struct request_queue *q = blkg->q;
|
struct request_queue *q = blkg->q;
|
||||||
|
struct blkio_cgroup *blkcg = blkg->blkcg;
|
||||||
|
|
||||||
lockdep_assert_held(q->queue_lock);
|
lockdep_assert_held(q->queue_lock);
|
||||||
|
lockdep_assert_held(&blkcg->lock);
|
||||||
|
|
||||||
/* Something wrong if we are trying to remove same group twice */
|
/* Something wrong if we are trying to remove same group twice */
|
||||||
WARN_ON_ONCE(list_empty(&blkg->q_node));
|
WARN_ON_ONCE(list_empty(&blkg->q_node));
|
||||||
|
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
|
||||||
list_del_init(&blkg->q_node);
|
list_del_init(&blkg->q_node);
|
||||||
|
hlist_del_init_rcu(&blkg->blkcg_node);
|
||||||
|
|
||||||
WARN_ON_ONCE(q->nr_blkgs <= 0);
|
WARN_ON_ONCE(q->nr_blkgs <= 0);
|
||||||
q->nr_blkgs--;
|
q->nr_blkgs--;
|
||||||
|
@ -713,45 +691,33 @@ void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(update_root_blkg_pd);
|
EXPORT_SYMBOL_GPL(update_root_blkg_pd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blkg_destroy_all - destroy all blkgs associated with a request_queue
|
||||||
|
* @q: request_queue of interest
|
||||||
|
* @destroy_root: whether to destroy root blkg or not
|
||||||
|
*
|
||||||
|
* Destroy blkgs associated with @q. If @destroy_root is %true, all are
|
||||||
|
* destroyed; otherwise, root blkg is left alone.
|
||||||
|
*/
|
||||||
void blkg_destroy_all(struct request_queue *q, bool destroy_root)
|
void blkg_destroy_all(struct request_queue *q, bool destroy_root)
|
||||||
{
|
{
|
||||||
struct blkio_group *blkg, *n;
|
struct blkio_group *blkg, *n;
|
||||||
|
|
||||||
while (true) {
|
spin_lock_irq(q->queue_lock);
|
||||||
bool done = true;
|
|
||||||
|
|
||||||
spin_lock_irq(q->queue_lock);
|
list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
|
||||||
|
struct blkio_cgroup *blkcg = blkg->blkcg;
|
||||||
|
|
||||||
list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
|
/* skip root? */
|
||||||
/* skip root? */
|
if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
|
||||||
if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
|
continue;
|
||||||
continue;
|
|
||||||
|
|
||||||
/*
|
spin_lock(&blkcg->lock);
|
||||||
* If cgroup removal path got to blk_group first
|
blkg_destroy(blkg);
|
||||||
* and removed it from cgroup list, then it will
|
spin_unlock(&blkcg->lock);
|
||||||
* take care of destroying cfqg also.
|
|
||||||
*/
|
|
||||||
if (!blkiocg_del_blkio_group(blkg))
|
|
||||||
blkg_destroy(blkg);
|
|
||||||
else
|
|
||||||
done = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Group list may not be empty if we raced cgroup removal
|
|
||||||
* and lost. cgroup removal is guaranteed to make forward
|
|
||||||
* progress and retrying after a while is enough. This
|
|
||||||
* ugliness is scheduled to be removed after locking
|
|
||||||
* update.
|
|
||||||
*/
|
|
||||||
if (done)
|
|
||||||
break;
|
|
||||||
|
|
||||||
msleep(10); /* just some random duration I like */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkg_destroy_all);
|
EXPORT_SYMBOL_GPL(blkg_destroy_all);
|
||||||
|
|
||||||
|
@ -1600,45 +1566,45 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
|
||||||
ARRAY_SIZE(blkio_files));
|
ARRAY_SIZE(blkio_files));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blkiocg_pre_destroy - cgroup pre_destroy callback
|
||||||
|
* @subsys: cgroup subsys
|
||||||
|
* @cgroup: cgroup of interest
|
||||||
|
*
|
||||||
|
* This function is called when @cgroup is about to go away and responsible
|
||||||
|
* for shooting down all blkgs associated with @cgroup. blkgs should be
|
||||||
|
* removed while holding both q and blkcg locks. As blkcg lock is nested
|
||||||
|
* inside q lock, this function performs reverse double lock dancing.
|
||||||
|
*
|
||||||
|
* This is the blkcg counterpart of ioc_release_fn().
|
||||||
|
*/
|
||||||
static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
|
static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
|
||||||
struct cgroup *cgroup)
|
struct cgroup *cgroup)
|
||||||
{
|
{
|
||||||
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
|
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
|
||||||
unsigned long flags;
|
|
||||||
struct blkio_group *blkg;
|
|
||||||
struct request_queue *q;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
spin_lock_irq(&blkcg->lock);
|
||||||
|
|
||||||
do {
|
while (!hlist_empty(&blkcg->blkg_list)) {
|
||||||
spin_lock_irqsave(&blkcg->lock, flags);
|
struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
|
||||||
|
struct blkio_group, blkcg_node);
|
||||||
|
struct request_queue *q = rcu_dereference(blkg->q);
|
||||||
|
|
||||||
if (hlist_empty(&blkcg->blkg_list)) {
|
if (spin_trylock(q->queue_lock)) {
|
||||||
spin_unlock_irqrestore(&blkcg->lock, flags);
|
blkg_destroy(blkg);
|
||||||
break;
|
spin_unlock(q->queue_lock);
|
||||||
|
} else {
|
||||||
|
spin_unlock_irq(&blkcg->lock);
|
||||||
|
rcu_read_unlock();
|
||||||
|
cpu_relax();
|
||||||
|
rcu_read_lock();
|
||||||
|
spin_lock(&blkcg->lock);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
|
spin_unlock_irq(&blkcg->lock);
|
||||||
blkcg_node);
|
|
||||||
q = rcu_dereference(blkg->q);
|
|
||||||
__blkiocg_del_blkio_group(blkg);
|
|
||||||
|
|
||||||
spin_unlock_irqrestore(&blkcg->lock, flags);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This blkio_group is being unlinked as associated cgroup is
|
|
||||||
* going away. Let all the IO controlling policies know about
|
|
||||||
* this event.
|
|
||||||
*/
|
|
||||||
spin_lock(&blkio_list_lock);
|
|
||||||
spin_lock_irqsave(q->queue_lock, flags);
|
|
||||||
blkg_destroy(blkg);
|
|
||||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
|
||||||
spin_unlock(&blkio_list_lock);
|
|
||||||
} while (1);
|
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -380,7 +380,6 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
|
||||||
extern struct blkio_cgroup blkio_root_cgroup;
|
extern struct blkio_cgroup blkio_root_cgroup;
|
||||||
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
|
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
|
||||||
extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
|
extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
|
||||||
extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
|
|
||||||
extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
|
extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
|
||||||
struct request_queue *q);
|
struct request_queue *q);
|
||||||
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
|
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
|
||||||
|
@ -416,9 +415,6 @@ cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
|
||||||
static inline struct blkio_cgroup *
|
static inline struct blkio_cgroup *
|
||||||
task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
|
task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
|
||||||
|
|
||||||
static inline int
|
|
||||||
blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
|
|
||||||
|
|
||||||
static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
|
static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
|
||||||
void *key) { return NULL; }
|
void *key) { return NULL; }
|
||||||
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
|
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
|
||||||
|
|
10
block/cfq.h
10
block/cfq.h
|
@ -79,11 +79,6 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
|
||||||
direction, sync);
|
direction, sync);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
|
|
||||||
{
|
|
||||||
return blkiocg_del_blkio_group(blkg);
|
|
||||||
}
|
|
||||||
|
|
||||||
#else /* CFQ_GROUP_IOSCHED */
|
#else /* CFQ_GROUP_IOSCHED */
|
||||||
static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
|
static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
|
||||||
struct blkio_policy_type *pol,
|
struct blkio_policy_type *pol,
|
||||||
|
@ -119,10 +114,5 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
|
||||||
struct blkio_policy_type *pol, uint64_t start_time,
|
struct blkio_policy_type *pol, uint64_t start_time,
|
||||||
uint64_t io_start_time, bool direction, bool sync) { }
|
uint64_t io_start_time, bool direction, bool sync) { }
|
||||||
|
|
||||||
static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* CFQ_GROUP_IOSCHED */
|
#endif /* CFQ_GROUP_IOSCHED */
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue