Merge branch 'for-3.11/core' of git://git.kernel.dk/linux-block
Pull core block IO updates from Jens Axboe: "Here are the core IO block bits for 3.11. It contains: - A tweak to the reserved tag logic from Jan, for weirdo devices with just 3 free tags. But for those it improves things substantially for random writes. - Periodic writeback fix from Jan. Marked for stable as well. - Fix for a race condition in IO scheduler switching from Jianpeng. - The hierarchical blk-cgroup support from Tejun. This is the grunt of the series. - blk-throttle fix from Vivek. Just a note that I'm in the middle of a relocation, whole family is flying out tomorrow. Hence I will be awal the remainder of this week, but back at work again on Monday the 15th. CC'ing Tejun, since any potential "surprises" will most likely be from the blk-cgroup work. But it's been brewing for a while and sitting in my tree and linux-next for a long time, so should be solid." * 'for-3.11/core' of git://git.kernel.dk/linux-block: (36 commits) elevator: Fix a race in elevator switching block: Reserve only one queue tag for sync IO if only 3 tags are available writeback: Fix periodic writeback after fs mount blk-throttle: implement proper hierarchy support blk-throttle: implement throtl_grp->has_rules[] blk-throttle: Account for child group's start time in parent while bio climbs up blk-throttle: add throtl_qnode for dispatch fairness blk-throttle: make throtl_pending_timer_fn() ready for hierarchy blk-throttle: make tg_dispatch_one_bio() ready for hierarchy blk-throttle: make blk_throtl_bio() ready for hierarchy blk-throttle: make blk_throtl_drain() ready for hierarchy blk-throttle: dispatch from throtl_pending_timer_fn() blk-throttle: implement dispatch looping blk-throttle: separate out throtl_service_queue->pending_timer from throtl_data->dispatch_work blk-throttle: set REQ_THROTTLED from throtl_charge_bio() and gate stats update with it blk-throttle: implement sq_to_tg(), sq_to_td() and throtl_log() blk-throttle: add throtl_service_queue->parent_sq blk-throttle: generalize update_disptime optimization in blk_throtl_bio() blk-throttle: dispatch to throtl_data->service_queue.bio_lists[] blk-throttle: move bio_lists[] and friends to throtl_service_queue ...
This commit is contained in:
commit
36805aaea5
|
@ -94,11 +94,13 @@ Throttling/Upper Limit policy
|
||||||
|
|
||||||
Hierarchical Cgroups
|
Hierarchical Cgroups
|
||||||
====================
|
====================
|
||||||
- Currently only CFQ supports hierarchical groups. For throttling,
|
|
||||||
cgroup interface does allow creation of hierarchical cgroups and
|
|
||||||
internally it treats them as flat hierarchy.
|
|
||||||
|
|
||||||
If somebody created a hierarchy like as follows.
|
Both CFQ and throttling implement hierarchy support; however,
|
||||||
|
throttling's hierarchy support is enabled iff "sane_behavior" is
|
||||||
|
enabled from cgroup side, which currently is a development option and
|
||||||
|
not publicly available.
|
||||||
|
|
||||||
|
If somebody created a hierarchy like as follows.
|
||||||
|
|
||||||
root
|
root
|
||||||
/ \
|
/ \
|
||||||
|
@ -106,21 +108,20 @@ Hierarchical Cgroups
|
||||||
|
|
|
|
||||||
test3
|
test3
|
||||||
|
|
||||||
CFQ will handle the hierarchy correctly but and throttling will
|
CFQ by default and throttling with "sane_behavior" will handle the
|
||||||
practically treat all groups at same level. For details on CFQ
|
hierarchy correctly. For details on CFQ hierarchy support, refer to
|
||||||
hierarchy support, refer to Documentation/block/cfq-iosched.txt.
|
Documentation/block/cfq-iosched.txt. For throttling, all limits apply
|
||||||
Throttling will treat the hierarchy as if it looks like the
|
to the whole subtree while all statistics are local to the IOs
|
||||||
following.
|
directly generated by tasks in that cgroup.
|
||||||
|
|
||||||
|
Throttling without "sane_behavior" enabled from cgroup side will
|
||||||
|
practically treat all groups at same level as if it looks like the
|
||||||
|
following.
|
||||||
|
|
||||||
pivot
|
pivot
|
||||||
/ / \ \
|
/ / \ \
|
||||||
root test1 test2 test3
|
root test1 test2 test3
|
||||||
|
|
||||||
Nesting cgroups, while allowed, isn't officially supported and blkio
|
|
||||||
genereates warning when cgroups nest. Once throttling implements
|
|
||||||
hierarchy support, hierarchy will be supported and the warning will
|
|
||||||
be removed.
|
|
||||||
|
|
||||||
Various user visible config options
|
Various user visible config options
|
||||||
===================================
|
===================================
|
||||||
CONFIG_BLK_CGROUP
|
CONFIG_BLK_CGROUP
|
||||||
|
|
|
@ -32,26 +32,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);
|
||||||
|
|
||||||
static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
|
static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
|
||||||
|
|
||||||
static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
|
|
||||||
struct request_queue *q, bool update_hint);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
|
|
||||||
* @d_blkg: loop cursor pointing to the current descendant
|
|
||||||
* @pos_cgrp: used for iteration
|
|
||||||
* @p_blkg: target blkg to walk descendants of
|
|
||||||
*
|
|
||||||
* Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
|
|
||||||
* read locked. If called under either blkcg or queue lock, the iteration
|
|
||||||
* is guaranteed to include all and only online blkgs. The caller may
|
|
||||||
* update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
|
|
||||||
* subtree.
|
|
||||||
*/
|
|
||||||
#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
|
|
||||||
cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
|
|
||||||
if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
|
|
||||||
(p_blkg)->q, false)))
|
|
||||||
|
|
||||||
static bool blkcg_policy_enabled(struct request_queue *q,
|
static bool blkcg_policy_enabled(struct request_queue *q,
|
||||||
const struct blkcg_policy *pol)
|
const struct blkcg_policy *pol)
|
||||||
{
|
{
|
||||||
|
@ -71,18 +51,8 @@ static void blkg_free(struct blkcg_gq *blkg)
|
||||||
if (!blkg)
|
if (!blkg)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
for (i = 0; i < BLKCG_MAX_POLS; i++)
|
||||||
struct blkcg_policy *pol = blkcg_policy[i];
|
kfree(blkg->pd[i]);
|
||||||
struct blkg_policy_data *pd = blkg->pd[i];
|
|
||||||
|
|
||||||
if (!pd)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (pol && pol->pd_exit_fn)
|
|
||||||
pol->pd_exit_fn(blkg);
|
|
||||||
|
|
||||||
kfree(pd);
|
|
||||||
}
|
|
||||||
|
|
||||||
blk_exit_rl(&blkg->rl);
|
blk_exit_rl(&blkg->rl);
|
||||||
kfree(blkg);
|
kfree(blkg);
|
||||||
|
@ -134,10 +104,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||||
blkg->pd[i] = pd;
|
blkg->pd[i] = pd;
|
||||||
pd->blkg = blkg;
|
pd->blkg = blkg;
|
||||||
pd->plid = i;
|
pd->plid = i;
|
||||||
|
|
||||||
/* invoke per-policy init */
|
|
||||||
if (pol->pd_init_fn)
|
|
||||||
pol->pd_init_fn(blkg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return blkg;
|
return blkg;
|
||||||
|
@ -158,8 +124,8 @@ err_free:
|
||||||
* @q's bypass state. If @update_hint is %true, the caller should be
|
* @q's bypass state. If @update_hint is %true, the caller should be
|
||||||
* holding @q->queue_lock and lookup hint is updated on success.
|
* holding @q->queue_lock and lookup hint is updated on success.
|
||||||
*/
|
*/
|
||||||
static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
|
struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
|
||||||
struct request_queue *q, bool update_hint)
|
bool update_hint)
|
||||||
{
|
{
|
||||||
struct blkcg_gq *blkg;
|
struct blkcg_gq *blkg;
|
||||||
|
|
||||||
|
@ -234,16 +200,25 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
||||||
}
|
}
|
||||||
blkg = new_blkg;
|
blkg = new_blkg;
|
||||||
|
|
||||||
/* link parent and insert */
|
/* link parent */
|
||||||
if (blkcg_parent(blkcg)) {
|
if (blkcg_parent(blkcg)) {
|
||||||
blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
|
blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
|
||||||
if (WARN_ON_ONCE(!blkg->parent)) {
|
if (WARN_ON_ONCE(!blkg->parent)) {
|
||||||
blkg = ERR_PTR(-EINVAL);
|
ret = -EINVAL;
|
||||||
goto err_put_css;
|
goto err_put_css;
|
||||||
}
|
}
|
||||||
blkg_get(blkg->parent);
|
blkg_get(blkg->parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* invoke per-policy init */
|
||||||
|
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||||
|
struct blkcg_policy *pol = blkcg_policy[i];
|
||||||
|
|
||||||
|
if (blkg->pd[i] && pol->pd_init_fn)
|
||||||
|
pol->pd_init_fn(blkg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* insert */
|
||||||
spin_lock(&blkcg->lock);
|
spin_lock(&blkcg->lock);
|
||||||
ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
|
ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
|
||||||
if (likely(!ret)) {
|
if (likely(!ret)) {
|
||||||
|
@ -394,30 +369,38 @@ static void blkg_destroy_all(struct request_queue *q)
|
||||||
q->root_rl.blkg = NULL;
|
q->root_rl.blkg = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void blkg_rcu_free(struct rcu_head *rcu_head)
|
/*
|
||||||
|
* A group is RCU protected, but having an rcu lock does not mean that one
|
||||||
|
* can access all the fields of blkg and assume these are valid. For
|
||||||
|
* example, don't try to follow throtl_data and request queue links.
|
||||||
|
*
|
||||||
|
* Having a reference to blkg under an rcu allows accesses to only values
|
||||||
|
* local to groups like group stats and group rate limits.
|
||||||
|
*/
|
||||||
|
void __blkg_release_rcu(struct rcu_head *rcu_head)
|
||||||
{
|
{
|
||||||
blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head));
|
struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
|
||||||
}
|
int i;
|
||||||
|
|
||||||
|
/* tell policies that this one is being freed */
|
||||||
|
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||||
|
struct blkcg_policy *pol = blkcg_policy[i];
|
||||||
|
|
||||||
|
if (blkg->pd[i] && pol->pd_exit_fn)
|
||||||
|
pol->pd_exit_fn(blkg);
|
||||||
|
}
|
||||||
|
|
||||||
void __blkg_release(struct blkcg_gq *blkg)
|
|
||||||
{
|
|
||||||
/* release the blkcg and parent blkg refs this blkg has been holding */
|
/* release the blkcg and parent blkg refs this blkg has been holding */
|
||||||
css_put(&blkg->blkcg->css);
|
css_put(&blkg->blkcg->css);
|
||||||
if (blkg->parent)
|
if (blkg->parent) {
|
||||||
|
spin_lock_irq(blkg->q->queue_lock);
|
||||||
blkg_put(blkg->parent);
|
blkg_put(blkg->parent);
|
||||||
|
spin_unlock_irq(blkg->q->queue_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
blkg_free(blkg);
|
||||||
* A group is freed in rcu manner. But having an rcu lock does not
|
|
||||||
* mean that one can access all the fields of blkg and assume these
|
|
||||||
* are valid. For example, don't try to follow throtl_data and
|
|
||||||
* request queue links.
|
|
||||||
*
|
|
||||||
* Having a reference to blkg under an rcu allows acess to only
|
|
||||||
* values local to groups like group stats and group rate limits
|
|
||||||
*/
|
|
||||||
call_rcu(&blkg->rcu_head, blkg_rcu_free);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(__blkg_release);
|
EXPORT_SYMBOL_GPL(__blkg_release_rcu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The next function used by blk_queue_for_each_rl(). It's a bit tricky
|
* The next function used by blk_queue_for_each_rl(). It's a bit tricky
|
||||||
|
@ -928,14 +911,6 @@ struct cgroup_subsys blkio_subsys = {
|
||||||
.subsys_id = blkio_subsys_id,
|
.subsys_id = blkio_subsys_id,
|
||||||
.base_cftypes = blkcg_files,
|
.base_cftypes = blkcg_files,
|
||||||
.module = THIS_MODULE,
|
.module = THIS_MODULE,
|
||||||
|
|
||||||
/*
|
|
||||||
* blkio subsystem is utterly broken in terms of hierarchy support.
|
|
||||||
* It treats all cgroups equally regardless of where they're
|
|
||||||
* located in the hierarchy - all cgroups are treated as if they're
|
|
||||||
* right below the root. Fix it and remove the following.
|
|
||||||
*/
|
|
||||||
.broken_hierarchy = true,
|
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL_GPL(blkio_subsys);
|
EXPORT_SYMBOL_GPL(blkio_subsys);
|
||||||
|
|
||||||
|
|
|
@ -266,7 +266,7 @@ static inline void blkg_get(struct blkcg_gq *blkg)
|
||||||
blkg->refcnt++;
|
blkg->refcnt++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __blkg_release(struct blkcg_gq *blkg);
|
void __blkg_release_rcu(struct rcu_head *rcu);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blkg_put - put a blkg reference
|
* blkg_put - put a blkg reference
|
||||||
|
@ -279,9 +279,43 @@ static inline void blkg_put(struct blkcg_gq *blkg)
|
||||||
lockdep_assert_held(blkg->q->queue_lock);
|
lockdep_assert_held(blkg->q->queue_lock);
|
||||||
WARN_ON_ONCE(blkg->refcnt <= 0);
|
WARN_ON_ONCE(blkg->refcnt <= 0);
|
||||||
if (!--blkg->refcnt)
|
if (!--blkg->refcnt)
|
||||||
__blkg_release(blkg);
|
call_rcu(&blkg->rcu_head, __blkg_release_rcu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
|
||||||
|
bool update_hint);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
|
||||||
|
* @d_blkg: loop cursor pointing to the current descendant
|
||||||
|
* @pos_cgrp: used for iteration
|
||||||
|
* @p_blkg: target blkg to walk descendants of
|
||||||
|
*
|
||||||
|
* Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
|
||||||
|
* read locked. If called under either blkcg or queue lock, the iteration
|
||||||
|
* is guaranteed to include all and only online blkgs. The caller may
|
||||||
|
* update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
|
||||||
|
* subtree.
|
||||||
|
*/
|
||||||
|
#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
|
||||||
|
cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
|
||||||
|
if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
|
||||||
|
(p_blkg)->q, false)))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blkg_for_each_descendant_post - post-order walk of a blkg's descendants
|
||||||
|
* @d_blkg: loop cursor pointing to the current descendant
|
||||||
|
* @pos_cgrp: used for iteration
|
||||||
|
* @p_blkg: target blkg to walk descendants of
|
||||||
|
*
|
||||||
|
* Similar to blkg_for_each_descendant_pre() but performs post-order
|
||||||
|
* traversal instead. Synchronization rules are the same.
|
||||||
|
*/
|
||||||
|
#define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \
|
||||||
|
cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
|
||||||
|
if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
|
||||||
|
(p_blkg)->q, false)))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blk_get_rl - get request_list to use
|
* blk_get_rl - get request_list to use
|
||||||
* @q: request_queue of interest
|
* @q: request_queue of interest
|
||||||
|
|
|
@ -348,9 +348,16 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
|
||||||
*/
|
*/
|
||||||
max_depth = bqt->max_depth;
|
max_depth = bqt->max_depth;
|
||||||
if (!rq_is_sync(rq) && max_depth > 1) {
|
if (!rq_is_sync(rq) && max_depth > 1) {
|
||||||
max_depth -= 2;
|
switch (max_depth) {
|
||||||
if (!max_depth)
|
case 2:
|
||||||
max_depth = 1;
|
max_depth = 1;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
max_depth = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
max_depth -= 2;
|
||||||
|
}
|
||||||
if (q->in_flight[BLK_RW_ASYNC] > max_depth)
|
if (q->in_flight[BLK_RW_ASYNC] > max_depth)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
1070
block/blk-throttle.c
1070
block/blk-throttle.c
File diff suppressed because it is too large
Load Diff
|
@ -4347,18 +4347,28 @@ static void cfq_exit_queue(struct elevator_queue *e)
|
||||||
kfree(cfqd);
|
kfree(cfqd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cfq_init_queue(struct request_queue *q)
|
static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
|
||||||
{
|
{
|
||||||
struct cfq_data *cfqd;
|
struct cfq_data *cfqd;
|
||||||
struct blkcg_gq *blkg __maybe_unused;
|
struct blkcg_gq *blkg __maybe_unused;
|
||||||
int i, ret;
|
int i, ret;
|
||||||
|
struct elevator_queue *eq;
|
||||||
|
|
||||||
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
|
eq = elevator_alloc(q, e);
|
||||||
if (!cfqd)
|
if (!eq)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
|
||||||
|
if (!cfqd) {
|
||||||
|
kobject_put(&eq->kobj);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
eq->elevator_data = cfqd;
|
||||||
|
|
||||||
cfqd->queue = q;
|
cfqd->queue = q;
|
||||||
q->elevator->elevator_data = cfqd;
|
spin_lock_irq(q->queue_lock);
|
||||||
|
q->elevator = eq;
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
/* Init root service tree */
|
/* Init root service tree */
|
||||||
cfqd->grp_service_tree = CFQ_RB_ROOT;
|
cfqd->grp_service_tree = CFQ_RB_ROOT;
|
||||||
|
@ -4433,6 +4443,7 @@ static int cfq_init_queue(struct request_queue *q)
|
||||||
|
|
||||||
out_free:
|
out_free:
|
||||||
kfree(cfqd);
|
kfree(cfqd);
|
||||||
|
kobject_put(&eq->kobj);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -337,13 +337,21 @@ static void deadline_exit_queue(struct elevator_queue *e)
|
||||||
/*
|
/*
|
||||||
* initialize elevator private data (deadline_data).
|
* initialize elevator private data (deadline_data).
|
||||||
*/
|
*/
|
||||||
static int deadline_init_queue(struct request_queue *q)
|
static int deadline_init_queue(struct request_queue *q, struct elevator_type *e)
|
||||||
{
|
{
|
||||||
struct deadline_data *dd;
|
struct deadline_data *dd;
|
||||||
|
struct elevator_queue *eq;
|
||||||
|
|
||||||
|
eq = elevator_alloc(q, e);
|
||||||
|
if (!eq)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
dd = kmalloc_node(sizeof(*dd), GFP_KERNEL | __GFP_ZERO, q->node);
|
dd = kmalloc_node(sizeof(*dd), GFP_KERNEL | __GFP_ZERO, q->node);
|
||||||
if (!dd)
|
if (!dd) {
|
||||||
|
kobject_put(&eq->kobj);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
eq->elevator_data = dd;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&dd->fifo_list[READ]);
|
INIT_LIST_HEAD(&dd->fifo_list[READ]);
|
||||||
INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
|
INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
|
||||||
|
@ -355,7 +363,9 @@ static int deadline_init_queue(struct request_queue *q)
|
||||||
dd->front_merges = 1;
|
dd->front_merges = 1;
|
||||||
dd->fifo_batch = fifo_batch;
|
dd->fifo_batch = fifo_batch;
|
||||||
|
|
||||||
q->elevator->elevator_data = dd;
|
spin_lock_irq(q->queue_lock);
|
||||||
|
q->elevator = eq;
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -150,7 +150,7 @@ void __init load_default_elevator_module(void)
|
||||||
|
|
||||||
static struct kobj_type elv_ktype;
|
static struct kobj_type elv_ktype;
|
||||||
|
|
||||||
static struct elevator_queue *elevator_alloc(struct request_queue *q,
|
struct elevator_queue *elevator_alloc(struct request_queue *q,
|
||||||
struct elevator_type *e)
|
struct elevator_type *e)
|
||||||
{
|
{
|
||||||
struct elevator_queue *eq;
|
struct elevator_queue *eq;
|
||||||
|
@ -170,6 +170,7 @@ err:
|
||||||
elevator_put(e);
|
elevator_put(e);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(elevator_alloc);
|
||||||
|
|
||||||
static void elevator_release(struct kobject *kobj)
|
static void elevator_release(struct kobject *kobj)
|
||||||
{
|
{
|
||||||
|
@ -221,16 +222,7 @@ int elevator_init(struct request_queue *q, char *name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
q->elevator = elevator_alloc(q, e);
|
err = e->ops.elevator_init_fn(q, e);
|
||||||
if (!q->elevator)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
err = e->ops.elevator_init_fn(q);
|
|
||||||
if (err) {
|
|
||||||
kobject_put(&q->elevator->kobj);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(elevator_init);
|
EXPORT_SYMBOL(elevator_init);
|
||||||
|
@ -935,17 +927,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
/* allocate, init and register new elevator */
|
/* allocate, init and register new elevator */
|
||||||
err = -ENOMEM;
|
err = new_e->ops.elevator_init_fn(q, new_e);
|
||||||
q->elevator = elevator_alloc(q, new_e);
|
if (err)
|
||||||
if (!q->elevator)
|
|
||||||
goto fail_init;
|
goto fail_init;
|
||||||
|
|
||||||
err = new_e->ops.elevator_init_fn(q);
|
|
||||||
if (err) {
|
|
||||||
kobject_put(&q->elevator->kobj);
|
|
||||||
goto fail_init;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (registered) {
|
if (registered) {
|
||||||
err = elv_register_queue(q);
|
err = elv_register_queue(q);
|
||||||
if (err)
|
if (err)
|
||||||
|
|
|
@ -59,16 +59,27 @@ noop_latter_request(struct request_queue *q, struct request *rq)
|
||||||
return list_entry(rq->queuelist.next, struct request, queuelist);
|
return list_entry(rq->queuelist.next, struct request, queuelist);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int noop_init_queue(struct request_queue *q)
|
static int noop_init_queue(struct request_queue *q, struct elevator_type *e)
|
||||||
{
|
{
|
||||||
struct noop_data *nd;
|
struct noop_data *nd;
|
||||||
|
struct elevator_queue *eq;
|
||||||
|
|
||||||
nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
|
eq = elevator_alloc(q, e);
|
||||||
if (!nd)
|
if (!eq)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
|
||||||
|
if (!nd) {
|
||||||
|
kobject_put(&eq->kobj);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
eq->elevator_data = nd;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&nd->queue);
|
INIT_LIST_HEAD(&nd->queue);
|
||||||
q->elevator->elevator_data = nd;
|
|
||||||
|
spin_lock_irq(q->queue_lock);
|
||||||
|
q->elevator = eq;
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode,
|
||||||
struct backing_dev_info *dst)
|
struct backing_dev_info *dst)
|
||||||
{
|
{
|
||||||
struct backing_dev_info *old = inode->i_data.backing_dev_info;
|
struct backing_dev_info *old = inode->i_data.backing_dev_info;
|
||||||
|
bool wakeup_bdi = false;
|
||||||
|
|
||||||
if (unlikely(dst == old)) /* deadlock avoidance */
|
if (unlikely(dst == old)) /* deadlock avoidance */
|
||||||
return;
|
return;
|
||||||
bdi_lock_two(&old->wb, &dst->wb);
|
bdi_lock_two(&old->wb, &dst->wb);
|
||||||
spin_lock(&inode->i_lock);
|
spin_lock(&inode->i_lock);
|
||||||
inode->i_data.backing_dev_info = dst;
|
inode->i_data.backing_dev_info = dst;
|
||||||
if (inode->i_state & I_DIRTY)
|
if (inode->i_state & I_DIRTY) {
|
||||||
|
if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb))
|
||||||
|
wakeup_bdi = true;
|
||||||
list_move(&inode->i_wb_list, &dst->wb.b_dirty);
|
list_move(&inode->i_wb_list, &dst->wb.b_dirty);
|
||||||
|
}
|
||||||
spin_unlock(&inode->i_lock);
|
spin_unlock(&inode->i_lock);
|
||||||
spin_unlock(&old->wb.list_lock);
|
spin_unlock(&old->wb.list_lock);
|
||||||
spin_unlock(&dst->wb.list_lock);
|
spin_unlock(&dst->wb.list_lock);
|
||||||
|
|
||||||
|
if (wakeup_bdi)
|
||||||
|
bdi_wakeup_thread_delayed(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Kill _all_ buffers and pagecache , dirty or not.. */
|
/* Kill _all_ buffers and pagecache , dirty or not.. */
|
||||||
|
|
|
@ -278,6 +278,8 @@ enum {
|
||||||
*
|
*
|
||||||
* - memcg: use_hierarchy is on by default and the cgroup file for
|
* - memcg: use_hierarchy is on by default and the cgroup file for
|
||||||
* the flag is not created.
|
* the flag is not created.
|
||||||
|
*
|
||||||
|
* - blkcg: blk-throttle becomes properly hierarchical.
|
||||||
*/
|
*/
|
||||||
CGRP_ROOT_SANE_BEHAVIOR = (1 << 0),
|
CGRP_ROOT_SANE_BEHAVIOR = (1 << 0),
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#ifdef CONFIG_BLOCK
|
#ifdef CONFIG_BLOCK
|
||||||
|
|
||||||
struct io_cq;
|
struct io_cq;
|
||||||
|
struct elevator_type;
|
||||||
|
|
||||||
typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
|
typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
|
||||||
struct bio *);
|
struct bio *);
|
||||||
|
@ -35,7 +36,8 @@ typedef void (elevator_put_req_fn) (struct request *);
|
||||||
typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
|
typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
|
||||||
typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
|
typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
|
||||||
|
|
||||||
typedef int (elevator_init_fn) (struct request_queue *);
|
typedef int (elevator_init_fn) (struct request_queue *,
|
||||||
|
struct elevator_type *e);
|
||||||
typedef void (elevator_exit_fn) (struct elevator_queue *);
|
typedef void (elevator_exit_fn) (struct elevator_queue *);
|
||||||
|
|
||||||
struct elevator_ops
|
struct elevator_ops
|
||||||
|
@ -155,6 +157,8 @@ extern int elevator_init(struct request_queue *, char *);
|
||||||
extern void elevator_exit(struct elevator_queue *);
|
extern void elevator_exit(struct elevator_queue *);
|
||||||
extern int elevator_change(struct request_queue *, const char *);
|
extern int elevator_change(struct request_queue *, const char *);
|
||||||
extern bool elv_rq_merge_ok(struct request *, struct bio *);
|
extern bool elv_rq_merge_ok(struct request *, struct bio *);
|
||||||
|
extern struct elevator_queue *elevator_alloc(struct request_queue *,
|
||||||
|
struct elevator_type *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Helper functions.
|
* Helper functions.
|
||||||
|
|
Loading…
Reference in New Issue