Merge branch 'for-3.6/core' of git://git.kernel.dk/linux-block
Pull core block IO bits from Jens Axboe: "The most complicated part if this is the request allocation rework by Tejun, which has been queued up for a long time and has been in for-next ditto as well. There are a few commits from yesterday and today, mostly trivial and obvious fixes. So I'm pretty confident that it is sound. It's also smaller than usual." * 'for-3.6/core' of git://git.kernel.dk/linux-block: block: remove dead func declaration block: add partition resize function to blkpg ioctl block: uninitialized ioc->nr_tasks triggers WARN_ON block: do not artificially constrain max_sectors for stacking drivers blkcg: implement per-blkg request allocation block: prepare for multiple request_lists block: add q->nr_rqs[] and move q->rq.elvpriv to q->nr_rqs_elvpriv blkcg: inline bio_blkcg() and friends block: allocate io_context upfront block: refactor get_request[_wait]() block: drop custom queue draining used by scsi_transport_{iscsi|fc} mempool: add @gfp_mask to mempool_create_node() blkcg: make root blkcg allocation use %GFP_KERNEL blkcg: __blkg_lookup_create() doesn't need radix preload
This commit is contained in:
commit
8cf1a3fce0
|
@ -38,6 +38,13 @@ read or write requests. Note that the total allocated number may be twice
|
|||
this amount, since it applies only to reads or writes (not the accumulated
|
||||
sum).
|
||||
|
||||
To avoid priority inversion through request starvation, a request
|
||||
queue maintains a separate request pool per each cgroup when
|
||||
CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
|
||||
per-block-cgroup request pool. IOW, if there are N block cgroups,
|
||||
each request queue may have upto N request pools, each independently
|
||||
regulated by nr_requests.
|
||||
|
||||
read_ahead_kb (RW)
|
||||
------------------
|
||||
Maximum number of kilobytes to read-ahead for filesystems on this block
|
||||
|
|
|
@ -31,27 +31,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);
|
|||
|
||||
static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
|
||||
|
||||
struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
|
||||
{
|
||||
return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
|
||||
struct blkcg, css);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cgroup_to_blkcg);
|
||||
|
||||
static struct blkcg *task_blkcg(struct task_struct *tsk)
|
||||
{
|
||||
return container_of(task_subsys_state(tsk, blkio_subsys_id),
|
||||
struct blkcg, css);
|
||||
}
|
||||
|
||||
struct blkcg *bio_blkcg(struct bio *bio)
|
||||
{
|
||||
if (bio && bio->bi_css)
|
||||
return container_of(bio->bi_css, struct blkcg, css);
|
||||
return task_blkcg(current);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_blkcg);
|
||||
|
||||
static bool blkcg_policy_enabled(struct request_queue *q,
|
||||
const struct blkcg_policy *pol)
|
||||
{
|
||||
|
@ -84,6 +63,7 @@ static void blkg_free(struct blkcg_gq *blkg)
|
|||
kfree(pd);
|
||||
}
|
||||
|
||||
blk_exit_rl(&blkg->rl);
|
||||
kfree(blkg);
|
||||
}
|
||||
|
||||
|
@ -91,16 +71,18 @@ static void blkg_free(struct blkcg_gq *blkg)
|
|||
* blkg_alloc - allocate a blkg
|
||||
* @blkcg: block cgroup the new blkg is associated with
|
||||
* @q: request_queue the new blkg is associated with
|
||||
* @gfp_mask: allocation mask to use
|
||||
*
|
||||
* Allocate a new blkg assocating @blkcg and @q.
|
||||
*/
|
||||
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
|
||||
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
int i;
|
||||
|
||||
/* alloc and init base part */
|
||||
blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
|
||||
blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
|
||||
if (!blkg)
|
||||
return NULL;
|
||||
|
||||
|
@ -109,6 +91,13 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
|
|||
blkg->blkcg = blkcg;
|
||||
blkg->refcnt = 1;
|
||||
|
||||
/* root blkg uses @q->root_rl, init rl only for !root blkgs */
|
||||
if (blkcg != &blkcg_root) {
|
||||
if (blk_init_rl(&blkg->rl, q, gfp_mask))
|
||||
goto err_free;
|
||||
blkg->rl.blkg = blkg;
|
||||
}
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
struct blkg_policy_data *pd;
|
||||
|
@ -117,11 +106,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
|
|||
continue;
|
||||
|
||||
/* alloc per-policy data and attach it to blkg */
|
||||
pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node);
|
||||
if (!pd) {
|
||||
blkg_free(blkg);
|
||||
return NULL;
|
||||
}
|
||||
pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
|
||||
if (!pd)
|
||||
goto err_free;
|
||||
|
||||
blkg->pd[i] = pd;
|
||||
pd->blkg = blkg;
|
||||
|
@ -132,6 +119,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
|
|||
}
|
||||
|
||||
return blkg;
|
||||
|
||||
err_free:
|
||||
blkg_free(blkg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
|
||||
|
@ -175,9 +166,13 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_lookup);
|
||||
|
||||
/*
|
||||
* If @new_blkg is %NULL, this function tries to allocate a new one as
|
||||
* necessary using %GFP_ATOMIC. @new_blkg is always consumed on return.
|
||||
*/
|
||||
static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
__releases(q->queue_lock) __acquires(q->queue_lock)
|
||||
struct request_queue *q,
|
||||
struct blkcg_gq *new_blkg)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
int ret;
|
||||
|
@ -189,24 +184,26 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
|||
blkg = __blkg_lookup(blkcg, q);
|
||||
if (blkg) {
|
||||
rcu_assign_pointer(blkcg->blkg_hint, blkg);
|
||||
return blkg;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
/* blkg holds a reference to blkcg */
|
||||
if (!css_tryget(&blkcg->css))
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (!css_tryget(&blkcg->css)) {
|
||||
blkg = ERR_PTR(-EINVAL);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
/* allocate */
|
||||
ret = -ENOMEM;
|
||||
blkg = blkg_alloc(blkcg, q);
|
||||
if (unlikely(!blkg))
|
||||
goto err_put;
|
||||
if (!new_blkg) {
|
||||
new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
|
||||
if (unlikely(!new_blkg)) {
|
||||
blkg = ERR_PTR(-ENOMEM);
|
||||
goto out_put;
|
||||
}
|
||||
}
|
||||
blkg = new_blkg;
|
||||
|
||||
/* insert */
|
||||
ret = radix_tree_preload(GFP_ATOMIC);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
spin_lock(&blkcg->lock);
|
||||
ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
|
||||
if (likely(!ret)) {
|
||||
|
@ -215,15 +212,15 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
|||
}
|
||||
spin_unlock(&blkcg->lock);
|
||||
|
||||
radix_tree_preload_end();
|
||||
|
||||
if (!ret)
|
||||
return blkg;
|
||||
err_free:
|
||||
blkg_free(blkg);
|
||||
err_put:
|
||||
|
||||
blkg = ERR_PTR(ret);
|
||||
out_put:
|
||||
css_put(&blkcg->css);
|
||||
return ERR_PTR(ret);
|
||||
out_free:
|
||||
blkg_free(new_blkg);
|
||||
return blkg;
|
||||
}
|
||||
|
||||
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
|
@ -235,7 +232,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
|||
*/
|
||||
if (unlikely(blk_queue_bypass(q)))
|
||||
return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
|
||||
return __blkg_lookup_create(blkcg, q);
|
||||
return __blkg_lookup_create(blkcg, q, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_lookup_create);
|
||||
|
||||
|
@ -313,6 +310,38 @@ void __blkg_release(struct blkcg_gq *blkg)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(__blkg_release);
|
||||
|
||||
/*
|
||||
* The next function used by blk_queue_for_each_rl(). It's a bit tricky
|
||||
* because the root blkg uses @q->root_rl instead of its own rl.
|
||||
*/
|
||||
struct request_list *__blk_queue_next_rl(struct request_list *rl,
|
||||
struct request_queue *q)
|
||||
{
|
||||
struct list_head *ent;
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
/*
|
||||
* Determine the current blkg list_head. The first entry is
|
||||
* root_rl which is off @q->blkg_list and mapped to the head.
|
||||
*/
|
||||
if (rl == &q->root_rl) {
|
||||
ent = &q->blkg_list;
|
||||
} else {
|
||||
blkg = container_of(rl, struct blkcg_gq, rl);
|
||||
ent = &blkg->q_node;
|
||||
}
|
||||
|
||||
/* walk to the next list_head, skip root blkcg */
|
||||
ent = ent->next;
|
||||
if (ent == &q->root_blkg->q_node)
|
||||
ent = ent->next;
|
||||
if (ent == &q->blkg_list)
|
||||
return NULL;
|
||||
|
||||
blkg = container_of(ent, struct blkcg_gq, q_node);
|
||||
return &blkg->rl;
|
||||
}
|
||||
|
||||
static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
|
||||
u64 val)
|
||||
{
|
||||
|
@ -734,24 +763,36 @@ int blkcg_activate_policy(struct request_queue *q,
|
|||
struct blkcg_gq *blkg;
|
||||
struct blkg_policy_data *pd, *n;
|
||||
int cnt = 0, ret;
|
||||
bool preloaded;
|
||||
|
||||
if (blkcg_policy_enabled(q, pol))
|
||||
return 0;
|
||||
|
||||
/* preallocations for root blkg */
|
||||
blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
|
||||
if (!blkg)
|
||||
return -ENOMEM;
|
||||
|
||||
preloaded = !radix_tree_preload(GFP_KERNEL);
|
||||
|
||||
blk_queue_bypass_start(q);
|
||||
|
||||
/* make sure the root blkg exists and count the existing blkgs */
|
||||
spin_lock_irq(q->queue_lock);
|
||||
|
||||
rcu_read_lock();
|
||||
blkg = __blkg_lookup_create(&blkcg_root, q);
|
||||
blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (preloaded)
|
||||
radix_tree_preload_end();
|
||||
|
||||
if (IS_ERR(blkg)) {
|
||||
ret = PTR_ERR(blkg);
|
||||
goto out_unlock;
|
||||
}
|
||||
q->root_blkg = blkg;
|
||||
q->root_rl.blkg = blkg;
|
||||
|
||||
list_for_each_entry(blkg, &q->blkg_list, q_node)
|
||||
cnt++;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <linux/u64_stats_sync.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
/* Max limits for throttle policy */
|
||||
#define THROTL_IOPS_MAX UINT_MAX
|
||||
|
@ -93,6 +94,8 @@ struct blkcg_gq {
|
|||
struct list_head q_node;
|
||||
struct hlist_node blkcg_node;
|
||||
struct blkcg *blkcg;
|
||||
/* request allocation list for this blkcg-q pair */
|
||||
struct request_list rl;
|
||||
/* reference count */
|
||||
int refcnt;
|
||||
|
||||
|
@ -120,8 +123,6 @@ struct blkcg_policy {
|
|||
|
||||
extern struct blkcg blkcg_root;
|
||||
|
||||
struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup);
|
||||
struct blkcg *bio_blkcg(struct bio *bio);
|
||||
struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
|
||||
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q);
|
||||
|
@ -160,6 +161,25 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
|||
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
|
||||
|
||||
|
||||
static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
|
||||
{
|
||||
return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
|
||||
struct blkcg, css);
|
||||
}
|
||||
|
||||
static inline struct blkcg *task_blkcg(struct task_struct *tsk)
|
||||
{
|
||||
return container_of(task_subsys_state(tsk, blkio_subsys_id),
|
||||
struct blkcg, css);
|
||||
}
|
||||
|
||||
static inline struct blkcg *bio_blkcg(struct bio *bio)
|
||||
{
|
||||
if (bio && bio->bi_css)
|
||||
return container_of(bio->bi_css, struct blkcg, css);
|
||||
return task_blkcg(current);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_to_pdata - get policy private data
|
||||
* @blkg: blkg of interest
|
||||
|
@ -233,6 +253,95 @@ static inline void blkg_put(struct blkcg_gq *blkg)
|
|||
__blkg_release(blkg);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_get_rl - get request_list to use
|
||||
* @q: request_queue of interest
|
||||
* @bio: bio which will be attached to the allocated request (may be %NULL)
|
||||
*
|
||||
* The caller wants to allocate a request from @q to use for @bio. Find
|
||||
* the request_list to use and obtain a reference on it. Should be called
|
||||
* under queue_lock. This function is guaranteed to return non-%NULL
|
||||
* request_list.
|
||||
*/
|
||||
static inline struct request_list *blk_get_rl(struct request_queue *q,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct blkcg *blkcg;
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
blkcg = bio_blkcg(bio);
|
||||
|
||||
/* bypass blkg lookup and use @q->root_rl directly for root */
|
||||
if (blkcg == &blkcg_root)
|
||||
goto root_rl;
|
||||
|
||||
/*
|
||||
* Try to use blkg->rl. blkg lookup may fail under memory pressure
|
||||
* or if either the blkcg or queue is going away. Fall back to
|
||||
* root_rl in such cases.
|
||||
*/
|
||||
blkg = blkg_lookup_create(blkcg, q);
|
||||
if (unlikely(IS_ERR(blkg)))
|
||||
goto root_rl;
|
||||
|
||||
blkg_get(blkg);
|
||||
rcu_read_unlock();
|
||||
return &blkg->rl;
|
||||
root_rl:
|
||||
rcu_read_unlock();
|
||||
return &q->root_rl;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_put_rl - put request_list
|
||||
* @rl: request_list to put
|
||||
*
|
||||
* Put the reference acquired by blk_get_rl(). Should be called under
|
||||
* queue_lock.
|
||||
*/
|
||||
static inline void blk_put_rl(struct request_list *rl)
|
||||
{
|
||||
/* root_rl may not have blkg set */
|
||||
if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
|
||||
blkg_put(rl->blkg);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_rq_set_rl - associate a request with a request_list
|
||||
* @rq: request of interest
|
||||
* @rl: target request_list
|
||||
*
|
||||
* Associate @rq with @rl so that accounting and freeing can know the
|
||||
* request_list @rq came from.
|
||||
*/
|
||||
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
|
||||
{
|
||||
rq->rl = rl;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_rq_rl - return the request_list a request came from
|
||||
* @rq: request of interest
|
||||
*
|
||||
* Return the request_list @rq is allocated from.
|
||||
*/
|
||||
static inline struct request_list *blk_rq_rl(struct request *rq)
|
||||
{
|
||||
return rq->rl;
|
||||
}
|
||||
|
||||
struct request_list *__blk_queue_next_rl(struct request_list *rl,
|
||||
struct request_queue *q);
|
||||
/**
|
||||
* blk_queue_for_each_rl - iterate through all request_lists of a request_queue
|
||||
*
|
||||
* Should be used under queue_lock.
|
||||
*/
|
||||
#define blk_queue_for_each_rl(rl, q) \
|
||||
for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
|
||||
|
||||
/**
|
||||
* blkg_stat_add - add a value to a blkg_stat
|
||||
* @stat: target blkg_stat
|
||||
|
@ -351,6 +460,7 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
|
|||
#else /* CONFIG_BLK_CGROUP */
|
||||
|
||||
struct cgroup;
|
||||
struct blkcg;
|
||||
|
||||
struct blkg_policy_data {
|
||||
};
|
||||
|
@ -361,8 +471,6 @@ struct blkcg_gq {
|
|||
struct blkcg_policy {
|
||||
};
|
||||
|
||||
static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
|
||||
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
|
||||
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
|
||||
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
|
||||
static inline void blkcg_drain_queue(struct request_queue *q) { }
|
||||
|
@ -374,6 +482,9 @@ static inline int blkcg_activate_policy(struct request_queue *q,
|
|||
static inline void blkcg_deactivate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol) { }
|
||||
|
||||
static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
|
||||
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
|
||||
|
||||
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
|
||||
struct blkcg_policy *pol) { return NULL; }
|
||||
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
|
||||
|
@ -381,5 +492,14 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
|
|||
static inline void blkg_get(struct blkcg_gq *blkg) { }
|
||||
static inline void blkg_put(struct blkcg_gq *blkg) { }
|
||||
|
||||
static inline struct request_list *blk_get_rl(struct request_queue *q,
|
||||
struct bio *bio) { return &q->root_rl; }
|
||||
static inline void blk_put_rl(struct request_list *rl) { }
|
||||
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
|
||||
static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
|
||||
|
||||
#define blk_queue_for_each_rl(rl, q) \
|
||||
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
|
||||
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
#endif /* _BLK_CGROUP_H */
|
||||
|
|
209
block/blk-core.c
209
block/blk-core.c
|
@ -387,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
|
|||
if (!list_empty(&q->queue_head) && q->request_fn)
|
||||
__blk_run_queue(q);
|
||||
|
||||
drain |= q->rq.elvpriv;
|
||||
drain |= q->nr_rqs_elvpriv;
|
||||
|
||||
/*
|
||||
* Unfortunately, requests are queued at and tracked from
|
||||
|
@ -397,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
|
|||
if (drain_all) {
|
||||
drain |= !list_empty(&q->queue_head);
|
||||
for (i = 0; i < 2; i++) {
|
||||
drain |= q->rq.count[i];
|
||||
drain |= q->nr_rqs[i];
|
||||
drain |= q->in_flight[i];
|
||||
drain |= !list_empty(&q->flush_queue[i]);
|
||||
}
|
||||
|
@ -416,9 +416,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
|
|||
* left with hung waiters. We need to wake up those waiters.
|
||||
*/
|
||||
if (q->request_fn) {
|
||||
struct request_list *rl;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++)
|
||||
wake_up_all(&q->rq.wait[i]);
|
||||
|
||||
blk_queue_for_each_rl(rl, q)
|
||||
for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
|
||||
wake_up_all(&rl->wait[i]);
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
}
|
||||
|
@ -517,28 +522,33 @@ void blk_cleanup_queue(struct request_queue *q)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_cleanup_queue);
|
||||
|
||||
static int blk_init_free_list(struct request_queue *q)
|
||||
int blk_init_rl(struct request_list *rl, struct request_queue *q,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct request_list *rl = &q->rq;
|
||||
|
||||
if (unlikely(rl->rq_pool))
|
||||
return 0;
|
||||
|
||||
rl->q = q;
|
||||
rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
|
||||
rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
|
||||
rl->elvpriv = 0;
|
||||
init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
|
||||
init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
|
||||
|
||||
rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
|
||||
mempool_free_slab, request_cachep, q->node);
|
||||
|
||||
mempool_free_slab, request_cachep,
|
||||
gfp_mask, q->node);
|
||||
if (!rl->rq_pool)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blk_exit_rl(struct request_list *rl)
|
||||
{
|
||||
if (rl->rq_pool)
|
||||
mempool_destroy(rl->rq_pool);
|
||||
}
|
||||
|
||||
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
|
||||
{
|
||||
return blk_alloc_queue_node(gfp_mask, -1);
|
||||
|
@ -680,7 +690,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
|
|||
if (!q)
|
||||
return NULL;
|
||||
|
||||
if (blk_init_free_list(q))
|
||||
if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
|
||||
return NULL;
|
||||
|
||||
q->request_fn = rfn;
|
||||
|
@ -722,15 +732,15 @@ bool blk_get_queue(struct request_queue *q)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_get_queue);
|
||||
|
||||
static inline void blk_free_request(struct request_queue *q, struct request *rq)
|
||||
static inline void blk_free_request(struct request_list *rl, struct request *rq)
|
||||
{
|
||||
if (rq->cmd_flags & REQ_ELVPRIV) {
|
||||
elv_put_request(q, rq);
|
||||
elv_put_request(rl->q, rq);
|
||||
if (rq->elv.icq)
|
||||
put_io_context(rq->elv.icq->ioc);
|
||||
}
|
||||
|
||||
mempool_free(rq, q->rq.rq_pool);
|
||||
mempool_free(rq, rl->rq_pool);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -767,18 +777,23 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
|
|||
ioc->last_waited = jiffies;
|
||||
}
|
||||
|
||||
static void __freed_request(struct request_queue *q, int sync)
|
||||
static void __freed_request(struct request_list *rl, int sync)
|
||||
{
|
||||
struct request_list *rl = &q->rq;
|
||||
struct request_queue *q = rl->q;
|
||||
|
||||
if (rl->count[sync] < queue_congestion_off_threshold(q))
|
||||
/*
|
||||
* bdi isn't aware of blkcg yet. As all async IOs end up root
|
||||
* blkcg anyway, just use root blkcg state.
|
||||
*/
|
||||
if (rl == &q->root_rl &&
|
||||
rl->count[sync] < queue_congestion_off_threshold(q))
|
||||
blk_clear_queue_congested(q, sync);
|
||||
|
||||
if (rl->count[sync] + 1 <= q->nr_requests) {
|
||||
if (waitqueue_active(&rl->wait[sync]))
|
||||
wake_up(&rl->wait[sync]);
|
||||
|
||||
blk_clear_queue_full(q, sync);
|
||||
blk_clear_rl_full(rl, sync);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -786,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync)
|
|||
* A request has just been released. Account for it, update the full and
|
||||
* congestion status, wake up any waiters. Called under q->queue_lock.
|
||||
*/
|
||||
static void freed_request(struct request_queue *q, unsigned int flags)
|
||||
static void freed_request(struct request_list *rl, unsigned int flags)
|
||||
{
|
||||
struct request_list *rl = &q->rq;
|
||||
struct request_queue *q = rl->q;
|
||||
int sync = rw_is_sync(flags);
|
||||
|
||||
q->nr_rqs[sync]--;
|
||||
rl->count[sync]--;
|
||||
if (flags & REQ_ELVPRIV)
|
||||
rl->elvpriv--;
|
||||
q->nr_rqs_elvpriv--;
|
||||
|
||||
__freed_request(q, sync);
|
||||
__freed_request(rl, sync);
|
||||
|
||||
if (unlikely(rl->starved[sync ^ 1]))
|
||||
__freed_request(q, sync ^ 1);
|
||||
__freed_request(rl, sync ^ 1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -837,8 +853,8 @@ static struct io_context *rq_ioc(struct bio *bio)
|
|||
}
|
||||
|
||||
/**
|
||||
* get_request - get a free request
|
||||
* @q: request_queue to allocate request from
|
||||
* __get_request - get a free request
|
||||
* @rl: request list to allocate from
|
||||
* @rw_flags: RW and SYNC flags
|
||||
* @bio: bio to allocate request for (can be %NULL)
|
||||
* @gfp_mask: allocation mask
|
||||
|
@ -850,20 +866,16 @@ static struct io_context *rq_ioc(struct bio *bio)
|
|||
* Returns %NULL on failure, with @q->queue_lock held.
|
||||
* Returns !%NULL on success, with @q->queue_lock *not held*.
|
||||
*/
|
||||
static struct request *get_request(struct request_queue *q, int rw_flags,
|
||||
struct bio *bio, gfp_t gfp_mask)
|
||||
static struct request *__get_request(struct request_list *rl, int rw_flags,
|
||||
struct bio *bio, gfp_t gfp_mask)
|
||||
{
|
||||
struct request_queue *q = rl->q;
|
||||
struct request *rq;
|
||||
struct request_list *rl = &q->rq;
|
||||
struct elevator_type *et;
|
||||
struct io_context *ioc;
|
||||
struct elevator_type *et = q->elevator->type;
|
||||
struct io_context *ioc = rq_ioc(bio);
|
||||
struct io_cq *icq = NULL;
|
||||
const bool is_sync = rw_is_sync(rw_flags) != 0;
|
||||
bool retried = false;
|
||||
int may_queue;
|
||||
retry:
|
||||
et = q->elevator->type;
|
||||
ioc = rq_ioc(bio);
|
||||
|
||||
if (unlikely(blk_queue_dead(q)))
|
||||
return NULL;
|
||||
|
@ -874,29 +886,15 @@ retry:
|
|||
|
||||
if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
|
||||
if (rl->count[is_sync]+1 >= q->nr_requests) {
|
||||
/*
|
||||
* We want ioc to record batching state. If it's
|
||||
* not already there, creating a new one requires
|
||||
* dropping queue_lock, which in turn requires
|
||||
* retesting conditions to avoid queue hang.
|
||||
*/
|
||||
if (!ioc && !retried) {
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
create_io_context(gfp_mask, q->node);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
retried = true;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* The queue will fill after this allocation, so set
|
||||
* it as full, and mark this process as "batching".
|
||||
* This process will be allowed to complete a batch of
|
||||
* requests, others will be blocked.
|
||||
*/
|
||||
if (!blk_queue_full(q, is_sync)) {
|
||||
if (!blk_rl_full(rl, is_sync)) {
|
||||
ioc_set_batching(q, ioc);
|
||||
blk_set_queue_full(q, is_sync);
|
||||
blk_set_rl_full(rl, is_sync);
|
||||
} else {
|
||||
if (may_queue != ELV_MQUEUE_MUST
|
||||
&& !ioc_batching(q, ioc)) {
|
||||
|
@ -909,7 +907,12 @@ retry:
|
|||
}
|
||||
}
|
||||
}
|
||||
blk_set_queue_congested(q, is_sync);
|
||||
/*
|
||||
* bdi isn't aware of blkcg yet. As all async IOs end up
|
||||
* root blkcg anyway, just use root blkcg state.
|
||||
*/
|
||||
if (rl == &q->root_rl)
|
||||
blk_set_queue_congested(q, is_sync);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -920,6 +923,7 @@ retry:
|
|||
if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
|
||||
return NULL;
|
||||
|
||||
q->nr_rqs[is_sync]++;
|
||||
rl->count[is_sync]++;
|
||||
rl->starved[is_sync] = 0;
|
||||
|
||||
|
@ -935,7 +939,7 @@ retry:
|
|||
*/
|
||||
if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
|
||||
rw_flags |= REQ_ELVPRIV;
|
||||
rl->elvpriv++;
|
||||
q->nr_rqs_elvpriv++;
|
||||
if (et->icq_cache && ioc)
|
||||
icq = ioc_lookup_icq(ioc, q);
|
||||
}
|
||||
|
@ -945,22 +949,19 @@ retry:
|
|||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
/* allocate and init request */
|
||||
rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
|
||||
rq = mempool_alloc(rl->rq_pool, gfp_mask);
|
||||
if (!rq)
|
||||
goto fail_alloc;
|
||||
|
||||
blk_rq_init(q, rq);
|
||||
blk_rq_set_rl(rq, rl);
|
||||
rq->cmd_flags = rw_flags | REQ_ALLOCED;
|
||||
|
||||
/* init elvpriv */
|
||||
if (rw_flags & REQ_ELVPRIV) {
|
||||
if (unlikely(et->icq_cache && !icq)) {
|
||||
create_io_context(gfp_mask, q->node);
|
||||
ioc = rq_ioc(bio);
|
||||
if (!ioc)
|
||||
goto fail_elvpriv;
|
||||
|
||||
icq = ioc_create_icq(ioc, q, gfp_mask);
|
||||
if (ioc)
|
||||
icq = ioc_create_icq(ioc, q, gfp_mask);
|
||||
if (!icq)
|
||||
goto fail_elvpriv;
|
||||
}
|
||||
|
@ -1000,7 +1001,7 @@ fail_elvpriv:
|
|||
rq->elv.icq = NULL;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
rl->elvpriv--;
|
||||
q->nr_rqs_elvpriv--;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
goto out;
|
||||
|
||||
|
@ -1013,7 +1014,7 @@ fail_alloc:
|
|||
* queue, but this is pretty rare.
|
||||
*/
|
||||
spin_lock_irq(q->queue_lock);
|
||||
freed_request(q, rw_flags);
|
||||
freed_request(rl, rw_flags);
|
||||
|
||||
/*
|
||||
* in the very unlikely event that allocation failed and no
|
||||
|
@ -1029,56 +1030,58 @@ rq_starved:
|
|||
}
|
||||
|
||||
/**
|
||||
* get_request_wait - get a free request with retry
|
||||
* get_request - get a free request
|
||||
* @q: request_queue to allocate request from
|
||||
* @rw_flags: RW and SYNC flags
|
||||
* @bio: bio to allocate request for (can be %NULL)
|
||||
* @gfp_mask: allocation mask
|
||||
*
|
||||
* Get a free request from @q. This function keeps retrying under memory
|
||||
* pressure and fails iff @q is dead.
|
||||
* Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this
|
||||
* function keeps retrying under memory pressure and fails iff @q is dead.
|
||||
*
|
||||
* Must be callled with @q->queue_lock held and,
|
||||
* Returns %NULL on failure, with @q->queue_lock held.
|
||||
* Returns !%NULL on success, with @q->queue_lock *not held*.
|
||||
*/
|
||||
static struct request *get_request_wait(struct request_queue *q, int rw_flags,
|
||||
struct bio *bio)
|
||||
static struct request *get_request(struct request_queue *q, int rw_flags,
|
||||
struct bio *bio, gfp_t gfp_mask)
|
||||
{
|
||||
const bool is_sync = rw_is_sync(rw_flags) != 0;
|
||||
DEFINE_WAIT(wait);
|
||||
struct request_list *rl;
|
||||
struct request *rq;
|
||||
|
||||
rq = get_request(q, rw_flags, bio, GFP_NOIO);
|
||||
while (!rq) {
|
||||
DEFINE_WAIT(wait);
|
||||
struct request_list *rl = &q->rq;
|
||||
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
|
||||
retry:
|
||||
rq = __get_request(rl, rw_flags, bio, gfp_mask);
|
||||
if (rq)
|
||||
return rq;
|
||||
|
||||
if (unlikely(blk_queue_dead(q)))
|
||||
return NULL;
|
||||
if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
|
||||
blk_put_rl(rl);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
/* wait on @rl and retry */
|
||||
prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
trace_block_sleeprq(q, bio, rw_flags & 1);
|
||||
trace_block_sleeprq(q, bio, rw_flags & 1);
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
io_schedule();
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
io_schedule();
|
||||
|
||||
/*
|
||||
* After sleeping, we become a "batching" process and
|
||||
* will be able to allocate at least one request, and
|
||||
* up to a big batch of them for a small period time.
|
||||
* See ioc_batching, ioc_set_batching
|
||||
*/
|
||||
create_io_context(GFP_NOIO, q->node);
|
||||
ioc_set_batching(q, current->io_context);
|
||||
/*
|
||||
* After sleeping, we become a "batching" process and will be able
|
||||
* to allocate at least one request, and up to a big batch of them
|
||||
* for a small period time. See ioc_batching, ioc_set_batching
|
||||
*/
|
||||
ioc_set_batching(q, current->io_context);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
finish_wait(&rl->wait[is_sync], &wait);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
finish_wait(&rl->wait[is_sync], &wait);
|
||||
|
||||
rq = get_request(q, rw_flags, bio, GFP_NOIO);
|
||||
};
|
||||
|
||||
return rq;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
|
||||
|
@ -1087,11 +1090,11 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
|
|||
|
||||
BUG_ON(rw != READ && rw != WRITE);
|
||||
|
||||
/* create ioc upfront */
|
||||
create_io_context(gfp_mask, q->node);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (gfp_mask & __GFP_WAIT)
|
||||
rq = get_request_wait(q, rw, NULL);
|
||||
else
|
||||
rq = get_request(q, rw, NULL, gfp_mask);
|
||||
rq = get_request(q, rw, NULL, gfp_mask);
|
||||
if (!rq)
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
/* q->queue_lock is unlocked at this point */
|
||||
|
@ -1248,12 +1251,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
|
|||
*/
|
||||
if (req->cmd_flags & REQ_ALLOCED) {
|
||||
unsigned int flags = req->cmd_flags;
|
||||
struct request_list *rl = blk_rq_rl(req);
|
||||
|
||||
BUG_ON(!list_empty(&req->queuelist));
|
||||
BUG_ON(!hlist_unhashed(&req->hash));
|
||||
|
||||
blk_free_request(q, req);
|
||||
freed_request(q, flags);
|
||||
blk_free_request(rl, req);
|
||||
freed_request(rl, flags);
|
||||
blk_put_rl(rl);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blk_put_request);
|
||||
|
@ -1481,7 +1486,7 @@ get_rq:
|
|||
* Grab a free request. This is might sleep but can not fail.
|
||||
* Returns with the queue unlocked.
|
||||
*/
|
||||
req = get_request_wait(q, rw_flags, bio);
|
||||
req = get_request(q, rw_flags, bio, GFP_NOIO);
|
||||
if (unlikely(!req)) {
|
||||
bio_endio(bio, -ENODEV); /* @q is dead */
|
||||
goto out_unlock;
|
||||
|
@ -1702,6 +1707,14 @@ generic_make_request_checks(struct bio *bio)
|
|||
goto end_io;
|
||||
}
|
||||
|
||||
/*
|
||||
* Various block parts want %current->io_context and lazy ioc
|
||||
* allocation ends up trading a lot of pain for a small amount of
|
||||
* memory. Just allocate it upfront. This may fail and block
|
||||
* layer knows how to live with it.
|
||||
*/
|
||||
create_io_context(GFP_ATOMIC, q->node);
|
||||
|
||||
if (blk_throtl_bio(q, bio))
|
||||
return false; /* throttled, will be resubmitted later */
|
||||
|
||||
|
|
|
@ -244,6 +244,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
|
|||
|
||||
/* initialize */
|
||||
atomic_long_set(&ioc->refcount, 1);
|
||||
atomic_set(&ioc->nr_tasks, 1);
|
||||
atomic_set(&ioc->active_ref, 1);
|
||||
spin_lock_init(&ioc->lock);
|
||||
INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
|
||||
|
|
|
@ -143,8 +143,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
|
|||
lim->discard_zeroes_data = 1;
|
||||
lim->max_segments = USHRT_MAX;
|
||||
lim->max_hw_sectors = UINT_MAX;
|
||||
|
||||
lim->max_sectors = BLK_DEF_MAX_SECTORS;
|
||||
lim->max_sectors = UINT_MAX;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_stacking_limits);
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page)
|
|||
static ssize_t
|
||||
queue_requests_store(struct request_queue *q, const char *page, size_t count)
|
||||
{
|
||||
struct request_list *rl = &q->rq;
|
||||
struct request_list *rl;
|
||||
unsigned long nr;
|
||||
int ret;
|
||||
|
||||
|
@ -55,6 +55,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
|
|||
q->nr_requests = nr;
|
||||
blk_queue_congestion_threshold(q);
|
||||
|
||||
/* congestion isn't cgroup aware and follows root blkcg for now */
|
||||
rl = &q->root_rl;
|
||||
|
||||
if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
|
||||
blk_set_queue_congested(q, BLK_RW_SYNC);
|
||||
else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
|
||||
|
@ -65,19 +68,22 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
|
|||
else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
|
||||
blk_clear_queue_congested(q, BLK_RW_ASYNC);
|
||||
|
||||
if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
|
||||
blk_set_queue_full(q, BLK_RW_SYNC);
|
||||
} else {
|
||||
blk_clear_queue_full(q, BLK_RW_SYNC);
|
||||
wake_up(&rl->wait[BLK_RW_SYNC]);
|
||||
blk_queue_for_each_rl(rl, q) {
|
||||
if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
|
||||
blk_set_rl_full(rl, BLK_RW_SYNC);
|
||||
} else {
|
||||
blk_clear_rl_full(rl, BLK_RW_SYNC);
|
||||
wake_up(&rl->wait[BLK_RW_SYNC]);
|
||||
}
|
||||
|
||||
if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
|
||||
blk_set_rl_full(rl, BLK_RW_ASYNC);
|
||||
} else {
|
||||
blk_clear_rl_full(rl, BLK_RW_ASYNC);
|
||||
wake_up(&rl->wait[BLK_RW_ASYNC]);
|
||||
}
|
||||
}
|
||||
|
||||
if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
|
||||
blk_set_queue_full(q, BLK_RW_ASYNC);
|
||||
} else {
|
||||
blk_clear_queue_full(q, BLK_RW_ASYNC);
|
||||
wake_up(&rl->wait[BLK_RW_ASYNC]);
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
return ret;
|
||||
}
|
||||
|
@ -476,7 +482,6 @@ static void blk_release_queue(struct kobject *kobj)
|
|||
{
|
||||
struct request_queue *q =
|
||||
container_of(kobj, struct request_queue, kobj);
|
||||
struct request_list *rl = &q->rq;
|
||||
|
||||
blk_sync_queue(q);
|
||||
|
||||
|
@ -489,8 +494,7 @@ static void blk_release_queue(struct kobject *kobj)
|
|||
elevator_exit(q->elevator);
|
||||
}
|
||||
|
||||
if (rl->rq_pool)
|
||||
mempool_destroy(rl->rq_pool);
|
||||
blk_exit_rl(&q->root_rl);
|
||||
|
||||
if (q->queue_tags)
|
||||
__blk_queue_free_tags(q);
|
||||
|
|
|
@ -1123,9 +1123,6 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
|
|||
goto out;
|
||||
}
|
||||
|
||||
/* bio_associate_current() needs ioc, try creating */
|
||||
create_io_context(GFP_ATOMIC, q->node);
|
||||
|
||||
/*
|
||||
* A throtl_grp pointer retrieved under rcu can be used to access
|
||||
* basic fields like stats and io rates. If a group has no rules,
|
||||
|
|
|
@ -18,6 +18,9 @@ static inline void __blk_get_queue(struct request_queue *q)
|
|||
kobject_get(&q->kobj);
|
||||
}
|
||||
|
||||
int blk_init_rl(struct request_list *rl, struct request_queue *q,
|
||||
gfp_t gfp_mask);
|
||||
void blk_exit_rl(struct request_list *rl);
|
||||
void init_request_from_bio(struct request *req, struct bio *bio);
|
||||
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
|
||||
struct bio *bio);
|
||||
|
@ -33,7 +36,6 @@ bool __blk_end_bidi_request(struct request *rq, int error,
|
|||
void blk_rq_timed_out_timer(unsigned long data);
|
||||
void blk_delete_timer(struct request *);
|
||||
void blk_add_timer(struct request *);
|
||||
void __generic_unplug_device(struct request_queue *);
|
||||
|
||||
/*
|
||||
* Internal atomic flags for request handling
|
||||
|
|
|
@ -243,56 +243,3 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q,
|
|||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_setup_queue);
|
||||
|
||||
/**
|
||||
* bsg_remove_queue - Deletes the bsg dev from the q
|
||||
* @q: the request_queue that is to be torn down.
|
||||
*
|
||||
* Notes:
|
||||
* Before unregistering the queue empty any requests that are blocked
|
||||
*/
|
||||
void bsg_remove_queue(struct request_queue *q)
|
||||
{
|
||||
struct request *req; /* block request */
|
||||
int counts; /* totals for request_list count and starved */
|
||||
|
||||
if (!q)
|
||||
return;
|
||||
|
||||
/* Stop taking in new requests */
|
||||
spin_lock_irq(q->queue_lock);
|
||||
blk_stop_queue(q);
|
||||
|
||||
/* drain all requests in the queue */
|
||||
while (1) {
|
||||
/* need the lock to fetch a request
|
||||
* this may fetch the same reqeust as the previous pass
|
||||
*/
|
||||
req = blk_fetch_request(q);
|
||||
/* save requests in use and starved */
|
||||
counts = q->rq.count[0] + q->rq.count[1] +
|
||||
q->rq.starved[0] + q->rq.starved[1];
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
/* any requests still outstanding? */
|
||||
if (counts == 0)
|
||||
break;
|
||||
|
||||
/* This may be the same req as the previous iteration,
|
||||
* always send the blk_end_request_all after a prefetch.
|
||||
* It is not okay to not end the request because the
|
||||
* prefetch started the request.
|
||||
*/
|
||||
if (req) {
|
||||
/* return -ENXIO to indicate that this queue is
|
||||
* going away
|
||||
*/
|
||||
req->errors = -ENXIO;
|
||||
blk_end_request_all(req, -ENXIO);
|
||||
}
|
||||
|
||||
msleep(200); /* allow bsg to possibly finish */
|
||||
spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
bsg_unregister_queue(q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_remove_queue);
|
||||
|
|
|
@ -154,7 +154,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
|
|||
part = rcu_dereference(ptbl->part[piter->idx]);
|
||||
if (!part)
|
||||
continue;
|
||||
if (!part->nr_sects &&
|
||||
if (!part_nr_sects_read(part) &&
|
||||
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
|
||||
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
|
||||
piter->idx == 0))
|
||||
|
@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
|
|||
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
|
||||
{
|
||||
return part->start_sect <= sector &&
|
||||
sector < part->start_sect + part->nr_sects;
|
||||
sector < part->start_sect + part_nr_sects_read(part);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -769,8 +769,8 @@ void __init printk_all_partitions(void)
|
|||
|
||||
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
|
||||
bdevt_str(part_devt(part), devt_buf),
|
||||
(unsigned long long)part->nr_sects >> 1,
|
||||
disk_name(disk, part->partno, name_buf),
|
||||
(unsigned long long)part_nr_sects_read(part) >> 1
|
||||
, disk_name(disk, part->partno, name_buf),
|
||||
uuid_buf);
|
||||
if (is_part0) {
|
||||
if (disk->driverfs_dev != NULL &&
|
||||
|
@ -862,7 +862,7 @@ static int show_partition(struct seq_file *seqf, void *v)
|
|||
while ((part = disk_part_iter_next(&piter)))
|
||||
seq_printf(seqf, "%4d %7d %10llu %s\n",
|
||||
MAJOR(part_devt(part)), MINOR(part_devt(part)),
|
||||
(unsigned long long)part->nr_sects >> 1,
|
||||
(unsigned long long)part_nr_sects_read(part) >> 1,
|
||||
disk_name(sgp, part->partno, buf));
|
||||
disk_part_iter_exit(&piter);
|
||||
|
||||
|
@ -1268,6 +1268,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
|
|||
}
|
||||
disk->part_tbl->part[0] = &disk->part0;
|
||||
|
||||
/*
|
||||
* set_capacity() and get_capacity() currently don't use
|
||||
* seqcounter to read/update the part0->nr_sects. Still init
|
||||
* the counter as we can read the sectors in IO submission
|
||||
* patch using seqence counters.
|
||||
*
|
||||
* TODO: Ideally set_capacity() and get_capacity() should be
|
||||
* converted to make use of bd_mutex and sequence counters.
|
||||
*/
|
||||
seqcount_init(&disk->part0.nr_sects_seq);
|
||||
hd_ref_init(&disk->part0);
|
||||
|
||||
disk->minors = minors;
|
||||
|
|
|
@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
|
|||
{
|
||||
struct block_device *bdevp;
|
||||
struct gendisk *disk;
|
||||
struct hd_struct *part;
|
||||
struct hd_struct *part, *lpart;
|
||||
struct blkpg_ioctl_arg a;
|
||||
struct blkpg_partition p;
|
||||
struct disk_part_iter piter;
|
||||
|
@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
|
|||
case BLKPG_ADD_PARTITION:
|
||||
start = p.start >> 9;
|
||||
length = p.length >> 9;
|
||||
/* check for fit in a hd_struct */
|
||||
if (sizeof(sector_t) == sizeof(long) &&
|
||||
/* check for fit in a hd_struct */
|
||||
if (sizeof(sector_t) == sizeof(long) &&
|
||||
sizeof(long long) > sizeof(long)) {
|
||||
long pstart = start, plength = length;
|
||||
if (pstart != start || plength != length
|
||||
|
@ -91,6 +91,59 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
|
|||
mutex_unlock(&bdevp->bd_mutex);
|
||||
bdput(bdevp);
|
||||
|
||||
return 0;
|
||||
case BLKPG_RESIZE_PARTITION:
|
||||
start = p.start >> 9;
|
||||
/* new length of partition in bytes */
|
||||
length = p.length >> 9;
|
||||
/* check for fit in a hd_struct */
|
||||
if (sizeof(sector_t) == sizeof(long) &&
|
||||
sizeof(long long) > sizeof(long)) {
|
||||
long pstart = start, plength = length;
|
||||
if (pstart != start || plength != length
|
||||
|| pstart < 0 || plength < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
part = disk_get_part(disk, partno);
|
||||
if (!part)
|
||||
return -ENXIO;
|
||||
bdevp = bdget(part_devt(part));
|
||||
if (!bdevp) {
|
||||
disk_put_part(part);
|
||||
return -ENOMEM;
|
||||
}
|
||||
mutex_lock(&bdevp->bd_mutex);
|
||||
mutex_lock_nested(&bdev->bd_mutex, 1);
|
||||
if (start != part->start_sect) {
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
bdput(bdevp);
|
||||
disk_put_part(part);
|
||||
return -EINVAL;
|
||||
}
|
||||
/* overlap? */
|
||||
disk_part_iter_init(&piter, disk,
|
||||
DISK_PITER_INCL_EMPTY);
|
||||
while ((lpart = disk_part_iter_next(&piter))) {
|
||||
if (lpart->partno != partno &&
|
||||
!(start + length <= lpart->start_sect ||
|
||||
start >= lpart->start_sect + lpart->nr_sects)
|
||||
) {
|
||||
disk_part_iter_exit(&piter);
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
bdput(bdevp);
|
||||
disk_put_part(part);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
disk_part_iter_exit(&piter);
|
||||
part_nr_sects_write(part, (sector_t)length);
|
||||
i_size_write(bdevp->bd_inode, p.length);
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
bdput(bdevp);
|
||||
disk_put_part(part);
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
|
|
|
@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
|
|||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hd_struct *p = dev_to_part(dev);
|
||||
return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
|
||||
return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
|
||||
}
|
||||
|
||||
static ssize_t part_ro_show(struct device *dev,
|
||||
|
@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
|||
err = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
seqcount_init(&p->nr_sects_seq);
|
||||
pdev = part_to_dev(p);
|
||||
|
||||
p->start_sect = start;
|
||||
|
|
|
@ -4146,45 +4146,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
|
|||
static void
|
||||
fc_bsg_remove(struct request_queue *q)
|
||||
{
|
||||
struct request *req; /* block request */
|
||||
int counts; /* totals for request_list count and starved */
|
||||
|
||||
if (q) {
|
||||
/* Stop taking in new requests */
|
||||
spin_lock_irq(q->queue_lock);
|
||||
blk_stop_queue(q);
|
||||
|
||||
/* drain all requests in the queue */
|
||||
while (1) {
|
||||
/* need the lock to fetch a request
|
||||
* this may fetch the same reqeust as the previous pass
|
||||
*/
|
||||
req = blk_fetch_request(q);
|
||||
/* save requests in use and starved */
|
||||
counts = q->rq.count[0] + q->rq.count[1] +
|
||||
q->rq.starved[0] + q->rq.starved[1];
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
/* any requests still outstanding? */
|
||||
if (counts == 0)
|
||||
break;
|
||||
|
||||
/* This may be the same req as the previous iteration,
|
||||
* always send the blk_end_request_all after a prefetch.
|
||||
* It is not okay to not end the request because the
|
||||
* prefetch started the request.
|
||||
*/
|
||||
if (req) {
|
||||
/* return -ENXIO to indicate that this queue is
|
||||
* going away
|
||||
*/
|
||||
req->errors = -ENXIO;
|
||||
blk_end_request_all(req, -ENXIO);
|
||||
}
|
||||
|
||||
msleep(200); /* allow bsg to possibly finish */
|
||||
spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
bsg_unregister_queue(q);
|
||||
blk_cleanup_queue(q);
|
||||
}
|
||||
|
|
|
@ -575,7 +575,7 @@ static int iscsi_remove_host(struct transport_container *tc,
|
|||
struct iscsi_cls_host *ihost = shost->shost_data;
|
||||
|
||||
if (ihost->bsg_q) {
|
||||
bsg_remove_queue(ihost->bsg_q);
|
||||
bsg_unregister_queue(ihost->bsg_q);
|
||||
blk_cleanup_queue(ihost->bsg_q);
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -46,16 +46,23 @@ struct blkcg_gq;
|
|||
struct request;
|
||||
typedef void (rq_end_io_fn)(struct request *, int);
|
||||
|
||||
#define BLK_RL_SYNCFULL (1U << 0)
|
||||
#define BLK_RL_ASYNCFULL (1U << 1)
|
||||
|
||||
struct request_list {
|
||||
struct request_queue *q; /* the queue this rl belongs to */
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
struct blkcg_gq *blkg; /* blkg this request pool belongs to */
|
||||
#endif
|
||||
/*
|
||||
* count[], starved[], and wait[] are indexed by
|
||||
* BLK_RW_SYNC/BLK_RW_ASYNC
|
||||
*/
|
||||
int count[2];
|
||||
int starved[2];
|
||||
int elvpriv;
|
||||
mempool_t *rq_pool;
|
||||
wait_queue_head_t wait[2];
|
||||
int count[2];
|
||||
int starved[2];
|
||||
mempool_t *rq_pool;
|
||||
wait_queue_head_t wait[2];
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -138,6 +145,7 @@ struct request {
|
|||
struct hd_struct *part;
|
||||
unsigned long start_time;
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
struct request_list *rl; /* rl this rq is alloced from */
|
||||
unsigned long long start_time_ns;
|
||||
unsigned long long io_start_time_ns; /* when passed to hardware */
|
||||
#endif
|
||||
|
@ -282,11 +290,16 @@ struct request_queue {
|
|||
struct list_head queue_head;
|
||||
struct request *last_merge;
|
||||
struct elevator_queue *elevator;
|
||||
int nr_rqs[2]; /* # allocated [a]sync rqs */
|
||||
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
|
||||
|
||||
/*
|
||||
* the queue request freelist, one for reads and one for writes
|
||||
* If blkcg is not used, @q->root_rl serves all requests. If blkcg
|
||||
* is used, root blkg allocates from @q->root_rl and all other
|
||||
* blkgs from their own blkg->rl. Which one to use should be
|
||||
* determined using bio_request_list().
|
||||
*/
|
||||
struct request_list rq;
|
||||
struct request_list root_rl;
|
||||
|
||||
request_fn_proc *request_fn;
|
||||
make_request_fn *make_request_fn;
|
||||
|
@ -561,27 +574,25 @@ static inline bool rq_is_sync(struct request *rq)
|
|||
return rw_is_sync(rq->cmd_flags);
|
||||
}
|
||||
|
||||
static inline int blk_queue_full(struct request_queue *q, int sync)
|
||||
static inline bool blk_rl_full(struct request_list *rl, bool sync)
|
||||
{
|
||||
if (sync)
|
||||
return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
|
||||
return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
|
||||
unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
|
||||
|
||||
return rl->flags & flag;
|
||||
}
|
||||
|
||||
static inline void blk_set_queue_full(struct request_queue *q, int sync)
|
||||
static inline void blk_set_rl_full(struct request_list *rl, bool sync)
|
||||
{
|
||||
if (sync)
|
||||
queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
|
||||
else
|
||||
queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
|
||||
unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
|
||||
|
||||
rl->flags |= flag;
|
||||
}
|
||||
|
||||
static inline void blk_clear_queue_full(struct request_queue *q, int sync)
|
||||
static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
|
||||
{
|
||||
if (sync)
|
||||
queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
|
||||
else
|
||||
queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
|
||||
unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
|
||||
|
||||
rl->flags &= ~flag;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
|
|||
/* The subfunctions (for the op field) */
|
||||
#define BLKPG_ADD_PARTITION 1
|
||||
#define BLKPG_DEL_PARTITION 2
|
||||
#define BLKPG_RESIZE_PARTITION 3
|
||||
|
||||
/* Sizes of name fields. Unused at present. */
|
||||
#define BLKPG_DEVNAMELTH 64
|
||||
|
|
|
@ -67,7 +67,6 @@ void bsg_job_done(struct bsg_job *job, int result,
|
|||
int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
|
||||
bsg_job_fn *job_fn, int dd_job_size);
|
||||
void bsg_request_fn(struct request_queue *q);
|
||||
void bsg_remove_queue(struct request_queue *q);
|
||||
void bsg_goose_queue(struct request_queue *q);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -97,7 +97,13 @@ struct partition_meta_info {
|
|||
|
||||
struct hd_struct {
|
||||
sector_t start_sect;
|
||||
/*
|
||||
* nr_sects is protected by sequence counter. One might extend a
|
||||
* partition while IO is happening to it and update of nr_sects
|
||||
* can be non-atomic on 32bit machines with 64bit sector_t.
|
||||
*/
|
||||
sector_t nr_sects;
|
||||
seqcount_t nr_sects_seq;
|
||||
sector_t alignment_offset;
|
||||
unsigned int discard_alignment;
|
||||
struct device __dev;
|
||||
|
@ -647,6 +653,57 @@ static inline void hd_struct_put(struct hd_struct *part)
|
|||
__delete_partition(part);
|
||||
}
|
||||
|
||||
/*
|
||||
* Any access of part->nr_sects which is not protected by partition
|
||||
* bd_mutex or gendisk bdev bd_mutex, should be done using this
|
||||
* accessor function.
|
||||
*
|
||||
* Code written along the lines of i_size_read() and i_size_write().
|
||||
* CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
|
||||
* on.
|
||||
*/
|
||||
static inline sector_t part_nr_sects_read(struct hd_struct *part)
|
||||
{
|
||||
#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
|
||||
sector_t nr_sects;
|
||||
unsigned seq;
|
||||
do {
|
||||
seq = read_seqcount_begin(&part->nr_sects_seq);
|
||||
nr_sects = part->nr_sects;
|
||||
} while (read_seqcount_retry(&part->nr_sects_seq, seq));
|
||||
return nr_sects;
|
||||
#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
|
||||
sector_t nr_sects;
|
||||
|
||||
preempt_disable();
|
||||
nr_sects = part->nr_sects;
|
||||
preempt_enable();
|
||||
return nr_sects;
|
||||
#else
|
||||
return part->nr_sects;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Should be called with mutex lock held (typically bd_mutex) of partition
|
||||
* to provide mutual exlusion among writers otherwise seqcount might be
|
||||
* left in wrong state leaving the readers spinning infinitely.
|
||||
*/
|
||||
static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
|
||||
{
|
||||
#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
|
||||
write_seqcount_begin(&part->nr_sects_seq);
|
||||
part->nr_sects = size;
|
||||
write_seqcount_end(&part->nr_sects_seq);
|
||||
#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
|
||||
preempt_disable();
|
||||
part->nr_sects = size;
|
||||
preempt_enable();
|
||||
#else
|
||||
part->nr_sects = size;
|
||||
#endif
|
||||
}
|
||||
|
||||
#else /* CONFIG_BLOCK */
|
||||
|
||||
static inline void printk_all_partitions(void) { }
|
||||
|
|
|
@ -26,7 +26,8 @@ typedef struct mempool_s {
|
|||
extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
|
||||
mempool_free_t *free_fn, void *pool_data);
|
||||
extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
|
||||
mempool_free_t *free_fn, void *pool_data, int nid);
|
||||
mempool_free_t *free_fn, void *pool_data,
|
||||
gfp_t gfp_mask, int nid);
|
||||
|
||||
extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask);
|
||||
extern void mempool_destroy(mempool_t *pool);
|
||||
|
|
12
mm/mempool.c
12
mm/mempool.c
|
@ -63,19 +63,21 @@ EXPORT_SYMBOL(mempool_destroy);
|
|||
mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
|
||||
mempool_free_t *free_fn, void *pool_data)
|
||||
{
|
||||
return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,-1);
|
||||
return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,
|
||||
GFP_KERNEL, NUMA_NO_NODE);
|
||||
}
|
||||
EXPORT_SYMBOL(mempool_create);
|
||||
|
||||
mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
|
||||
mempool_free_t *free_fn, void *pool_data, int node_id)
|
||||
mempool_free_t *free_fn, void *pool_data,
|
||||
gfp_t gfp_mask, int node_id)
|
||||
{
|
||||
mempool_t *pool;
|
||||
pool = kmalloc_node(sizeof(*pool), GFP_KERNEL | __GFP_ZERO, node_id);
|
||||
pool = kmalloc_node(sizeof(*pool), gfp_mask | __GFP_ZERO, node_id);
|
||||
if (!pool)
|
||||
return NULL;
|
||||
pool->elements = kmalloc_node(min_nr * sizeof(void *),
|
||||
GFP_KERNEL, node_id);
|
||||
gfp_mask, node_id);
|
||||
if (!pool->elements) {
|
||||
kfree(pool);
|
||||
return NULL;
|
||||
|
@ -93,7 +95,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
|
|||
while (pool->curr_nr < pool->min_nr) {
|
||||
void *element;
|
||||
|
||||
element = pool->alloc(GFP_KERNEL, pool->pool_data);
|
||||
element = pool->alloc(gfp_mask, pool->pool_data);
|
||||
if (unlikely(!element)) {
|
||||
mempool_destroy(pool);
|
||||
return NULL;
|
||||
|
|
Loading…
Reference in New Issue