Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "This contains a set of fixes for xen-blkback by way of Konrad, and a performance regression fix for blk-mq for shared tags. The latter could account for as much as a 50x reduction in performance, with the test case from the user with 500 name spaces. A more realistic setup on my end with 32 drives showed a 3.5x drop. The fix has been thoroughly tested before being committed" * 'for-linus' of git://git.kernel.dk/linux-block: blk-mq: fix performance regression with shared tags xen-blkback: don't leak stack data via response ring xen/blkback: don't use xen_blkif_get() in xen-blkback kthread xen/blkback: don't free be structure too early xen/blkback: fix disconnect while I/Os in flight
This commit is contained in:
commit
8d829b9bb8
|
@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
|
|||
__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a hardware queue as needing a restart. For shared queues, maintain
|
||||
* a count of how many hardware queues are marked for restart.
|
||||
*/
|
||||
static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
return;
|
||||
|
||||
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
atomic_inc(&q->shared_hctx_restart);
|
||||
} else
|
||||
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
}
|
||||
|
||||
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
return false;
|
||||
|
||||
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
atomic_dec(&q->shared_hctx_restart);
|
||||
} else
|
||||
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
|
||||
if (blk_mq_hctx_has_pending(hctx)) {
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct request *blk_mq_sched_get_request(struct request_queue *q,
|
||||
struct bio *bio,
|
||||
unsigned int op,
|
||||
|
@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
|
||||
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
if (blk_mq_hctx_has_pending(hctx)) {
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
|
||||
* @pos: loop cursor.
|
||||
|
@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
|
|||
unsigned int i, j;
|
||||
|
||||
if (set->flags & BLK_MQ_F_TAG_SHARED) {
|
||||
/*
|
||||
* If this is 0, then we know that no hardware queues
|
||||
* have RESTART marked. We're done.
|
||||
*/
|
||||
if (!atomic_read(&queue->shared_hctx_restart))
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
|
||||
tag_set_list) {
|
||||
|
|
|
@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
|
|||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a hardware queue as needing a restart.
|
||||
*/
|
||||
static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
|
|
|
@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Caller needs to ensure that we're either frozen/quiesced, or that
|
||||
* the queue isn't live yet.
|
||||
*/
|
||||
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (shared)
|
||||
if (shared) {
|
||||
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
atomic_inc(&q->shared_hctx_restart);
|
||||
hctx->flags |= BLK_MQ_F_TAG_SHARED;
|
||||
else
|
||||
} else {
|
||||
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
atomic_dec(&q->shared_hctx_restart);
|
||||
hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
|
||||
static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
|
||||
bool shared)
|
||||
{
|
||||
struct request_queue *q;
|
||||
|
||||
|
|
|
@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
|
|||
unsigned long timeout;
|
||||
int ret;
|
||||
|
||||
xen_blkif_get(blkif);
|
||||
|
||||
set_freezable();
|
||||
while (!kthread_should_stop()) {
|
||||
if (try_to_freeze())
|
||||
|
@ -665,7 +663,6 @@ purge_gnt_list:
|
|||
print_stats(ring);
|
||||
|
||||
ring->xenblkd = NULL;
|
||||
xen_blkif_put(blkif);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1436,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
|
|||
static void make_response(struct xen_blkif_ring *ring, u64 id,
|
||||
unsigned short op, int st)
|
||||
{
|
||||
struct blkif_response resp;
|
||||
struct blkif_response *resp;
|
||||
unsigned long flags;
|
||||
union blkif_back_rings *blk_rings;
|
||||
int notify;
|
||||
|
||||
resp.id = id;
|
||||
resp.operation = op;
|
||||
resp.status = st;
|
||||
|
||||
spin_lock_irqsave(&ring->blk_ring_lock, flags);
|
||||
blk_rings = &ring->blk_rings;
|
||||
/* Place on the response ring for the relevant domain. */
|
||||
switch (ring->blkif->blk_protocol) {
|
||||
case BLKIF_PROTOCOL_NATIVE:
|
||||
memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
|
||||
&resp, sizeof(resp));
|
||||
resp = RING_GET_RESPONSE(&blk_rings->native,
|
||||
blk_rings->native.rsp_prod_pvt);
|
||||
break;
|
||||
case BLKIF_PROTOCOL_X86_32:
|
||||
memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
|
||||
&resp, sizeof(resp));
|
||||
resp = RING_GET_RESPONSE(&blk_rings->x86_32,
|
||||
blk_rings->x86_32.rsp_prod_pvt);
|
||||
break;
|
||||
case BLKIF_PROTOCOL_X86_64:
|
||||
memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
|
||||
&resp, sizeof(resp));
|
||||
resp = RING_GET_RESPONSE(&blk_rings->x86_64,
|
||||
blk_rings->x86_64.rsp_prod_pvt);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
resp->id = id;
|
||||
resp->operation = op;
|
||||
resp->status = st;
|
||||
|
||||
blk_rings->common.rsp_prod_pvt++;
|
||||
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
|
||||
spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
|
||||
|
|
|
@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
|
|||
struct blkif_common_request {
|
||||
char dummy;
|
||||
};
|
||||
struct blkif_common_response {
|
||||
char dummy;
|
||||
};
|
||||
|
||||
/* i386 protocol version */
|
||||
|
||||
struct blkif_x86_32_request_rw {
|
||||
uint8_t nr_segments; /* number of segments */
|
||||
|
@ -129,14 +128,6 @@ struct blkif_x86_32_request {
|
|||
} u;
|
||||
} __attribute__((__packed__));
|
||||
|
||||
/* i386 protocol version */
|
||||
#pragma pack(push, 4)
|
||||
struct blkif_x86_32_response {
|
||||
uint64_t id; /* copied from request */
|
||||
uint8_t operation; /* copied from request */
|
||||
int16_t status; /* BLKIF_RSP_??? */
|
||||
};
|
||||
#pragma pack(pop)
|
||||
/* x86_64 protocol version */
|
||||
|
||||
struct blkif_x86_64_request_rw {
|
||||
|
@ -193,18 +184,12 @@ struct blkif_x86_64_request {
|
|||
} u;
|
||||
} __attribute__((__packed__));
|
||||
|
||||
struct blkif_x86_64_response {
|
||||
uint64_t __attribute__((__aligned__(8))) id;
|
||||
uint8_t operation; /* copied from request */
|
||||
int16_t status; /* BLKIF_RSP_??? */
|
||||
};
|
||||
|
||||
DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
|
||||
struct blkif_common_response);
|
||||
struct blkif_response);
|
||||
DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
|
||||
struct blkif_x86_32_response);
|
||||
struct blkif_response __packed);
|
||||
DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
|
||||
struct blkif_x86_64_response);
|
||||
struct blkif_response);
|
||||
|
||||
union blkif_back_rings {
|
||||
struct blkif_back_ring native;
|
||||
|
@ -281,6 +266,7 @@ struct xen_blkif_ring {
|
|||
|
||||
wait_queue_head_t wq;
|
||||
atomic_t inflight;
|
||||
bool active;
|
||||
/* One thread per blkif ring. */
|
||||
struct task_struct *xenblkd;
|
||||
unsigned int waiting_reqs;
|
||||
|
|
|
@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
|
|||
init_waitqueue_head(&ring->shutdown_wq);
|
||||
ring->blkif = blkif;
|
||||
ring->st_print = jiffies;
|
||||
xen_blkif_get(blkif);
|
||||
ring->active = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
|
|||
struct xen_blkif_ring *ring = &blkif->rings[r];
|
||||
unsigned int i = 0;
|
||||
|
||||
if (!ring->active)
|
||||
continue;
|
||||
|
||||
if (ring->xenblkd) {
|
||||
kthread_stop(ring->xenblkd);
|
||||
wake_up(&ring->shutdown_wq);
|
||||
ring->xenblkd = NULL;
|
||||
}
|
||||
|
||||
/* The above kthread_stop() guarantees that at this point we
|
||||
|
@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
|
|||
BUG_ON(ring->free_pages_num != 0);
|
||||
BUG_ON(ring->persistent_gnt_c != 0);
|
||||
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
|
||||
xen_blkif_put(blkif);
|
||||
ring->active = false;
|
||||
}
|
||||
blkif->nr_ring_pages = 0;
|
||||
/*
|
||||
|
@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
|
|||
|
||||
static void xen_blkif_free(struct xen_blkif *blkif)
|
||||
{
|
||||
|
||||
xen_blkif_disconnect(blkif);
|
||||
WARN_ON(xen_blkif_disconnect(blkif));
|
||||
xen_vbd_free(&blkif->vbd);
|
||||
kfree(blkif->be->mode);
|
||||
kfree(blkif->be);
|
||||
|
||||
/* Make sure everything is drained before shutting down */
|
||||
kmem_cache_free(xen_blkif_cachep, blkif);
|
||||
|
@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
|
|||
xen_blkif_put(be->blkif);
|
||||
}
|
||||
|
||||
kfree(be->mode);
|
||||
kfree(be);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -391,6 +391,8 @@ struct request_queue {
|
|||
int nr_rqs[2]; /* # allocated [a]sync rqs */
|
||||
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
|
||||
|
||||
atomic_t shared_hctx_restart;
|
||||
|
||||
struct blk_queue_stats *stats;
|
||||
struct rq_wb *rq_wb;
|
||||
|
||||
|
|
Loading…
Reference in New Issue