block: use normal I/O path for discard requests
prepare_discard_fn() was being called in a place where memory allocation was effectively impossible. This makes it inappropriate for all but the most trivial translations of Linux's DISCARD operation to the block command set. Additionally adding a payload there makes the ownership of the bio backing unclear as it's now allocated by the device driver and not the submitter as usual. It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether the queue supports discard operations or not. blkdev_issue_discard now allocates a one-page, sector-length payload which is the right thing for the common ATA and SCSI implementations. The mtd implementation of prepare_discard_fn() is replaced with simply checking for the request being a discard. Largely based on a previous patch from Matthew Wilcox <matthew@wil.cx> which did the prepare_discard_fn but not the different payload allocation yet. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
This commit is contained in:
parent
3bd0f0c763
commit
c15227de13
|
@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
|
||||||
|
|
||||||
if (bio->bi_private)
|
if (bio->bi_private)
|
||||||
complete(bio->bi_private);
|
complete(bio->bi_private);
|
||||||
|
__free_page(bio_page(bio));
|
||||||
|
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
|
@ -372,26 +373,44 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
struct request_queue *q = bdev_get_queue(bdev);
|
struct request_queue *q = bdev_get_queue(bdev);
|
||||||
int type = flags & DISCARD_FL_BARRIER ?
|
int type = flags & DISCARD_FL_BARRIER ?
|
||||||
DISCARD_BARRIER : DISCARD_NOBARRIER;
|
DISCARD_BARRIER : DISCARD_NOBARRIER;
|
||||||
|
struct bio *bio;
|
||||||
|
struct page *page;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (!q)
|
if (!q)
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
|
|
||||||
if (!q->prepare_discard_fn)
|
if (!blk_queue_discard(q))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
while (nr_sects && !ret) {
|
while (nr_sects && !ret) {
|
||||||
struct bio *bio = bio_alloc(gfp_mask, 0);
|
unsigned int sector_size = q->limits.logical_block_size;
|
||||||
if (!bio)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
|
bio = bio_alloc(gfp_mask, 1);
|
||||||
|
if (!bio)
|
||||||
|
goto out;
|
||||||
|
bio->bi_sector = sector;
|
||||||
bio->bi_end_io = blkdev_discard_end_io;
|
bio->bi_end_io = blkdev_discard_end_io;
|
||||||
bio->bi_bdev = bdev;
|
bio->bi_bdev = bdev;
|
||||||
if (flags & DISCARD_FL_WAIT)
|
if (flags & DISCARD_FL_WAIT)
|
||||||
bio->bi_private = &wait;
|
bio->bi_private = &wait;
|
||||||
|
|
||||||
bio->bi_sector = sector;
|
/*
|
||||||
|
* Add a zeroed one-sector payload as that's what
|
||||||
|
* our current implementations need. If we'll ever need
|
||||||
|
* more the interface will need revisiting.
|
||||||
|
*/
|
||||||
|
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||||
|
if (!page)
|
||||||
|
goto out_free_bio;
|
||||||
|
if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
|
||||||
|
goto out_free_page;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* And override the bio size - the way discard works we
|
||||||
|
* touch many more blocks on disk than the actual payload
|
||||||
|
* length.
|
||||||
|
*/
|
||||||
if (nr_sects > queue_max_hw_sectors(q)) {
|
if (nr_sects > queue_max_hw_sectors(q)) {
|
||||||
bio->bi_size = queue_max_hw_sectors(q) << 9;
|
bio->bi_size = queue_max_hw_sectors(q) << 9;
|
||||||
nr_sects -= queue_max_hw_sectors(q);
|
nr_sects -= queue_max_hw_sectors(q);
|
||||||
|
@ -414,5 +433,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
out_free_page:
|
||||||
|
__free_page(page);
|
||||||
|
out_free_bio:
|
||||||
|
bio_put(bio);
|
||||||
|
out:
|
||||||
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blkdev_issue_discard);
|
EXPORT_SYMBOL(blkdev_issue_discard);
|
||||||
|
|
|
@ -1124,7 +1124,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
|
||||||
req->cmd_flags |= REQ_DISCARD;
|
req->cmd_flags |= REQ_DISCARD;
|
||||||
if (bio_rw_flagged(bio, BIO_RW_BARRIER))
|
if (bio_rw_flagged(bio, BIO_RW_BARRIER))
|
||||||
req->cmd_flags |= REQ_SOFTBARRIER;
|
req->cmd_flags |= REQ_SOFTBARRIER;
|
||||||
req->q->prepare_discard_fn(req->q, req);
|
|
||||||
} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
|
} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
|
||||||
req->cmd_flags |= REQ_HARDBARRIER;
|
req->cmd_flags |= REQ_HARDBARRIER;
|
||||||
|
|
||||||
|
@ -1470,7 +1469,7 @@ static inline void __generic_make_request(struct bio *bio)
|
||||||
goto end_io;
|
goto end_io;
|
||||||
|
|
||||||
if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
|
if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
|
||||||
!q->prepare_discard_fn) {
|
!blk_queue_discard(q)) {
|
||||||
err = -EOPNOTSUPP;
|
err = -EOPNOTSUPP;
|
||||||
goto end_io;
|
goto end_io;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,23 +33,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_queue_prep_rq);
|
EXPORT_SYMBOL(blk_queue_prep_rq);
|
||||||
|
|
||||||
/**
|
|
||||||
* blk_queue_set_discard - set a discard_sectors function for queue
|
|
||||||
* @q: queue
|
|
||||||
* @dfn: prepare_discard function
|
|
||||||
*
|
|
||||||
* It's possible for a queue to register a discard callback which is used
|
|
||||||
* to transform a discard request into the appropriate type for the
|
|
||||||
* hardware. If none is registered, then discard requests are failed
|
|
||||||
* with %EOPNOTSUPP.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
|
|
||||||
{
|
|
||||||
q->prepare_discard_fn = dfn;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(blk_queue_set_discard);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blk_queue_merge_bvec - set a merge_bvec function for queue
|
* blk_queue_merge_bvec - set a merge_bvec function for queue
|
||||||
* @q: queue
|
* @q: queue
|
||||||
|
|
|
@ -32,14 +32,6 @@ struct mtd_blkcore_priv {
|
||||||
spinlock_t queue_lock;
|
spinlock_t queue_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int blktrans_discard_request(struct request_queue *q,
|
|
||||||
struct request *req)
|
|
||||||
{
|
|
||||||
req->cmd_type = REQ_TYPE_LINUX_BLOCK;
|
|
||||||
req->cmd[0] = REQ_LB_OP_DISCARD;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int do_blktrans_request(struct mtd_blktrans_ops *tr,
|
static int do_blktrans_request(struct mtd_blktrans_ops *tr,
|
||||||
struct mtd_blktrans_dev *dev,
|
struct mtd_blktrans_dev *dev,
|
||||||
struct request *req)
|
struct request *req)
|
||||||
|
@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
|
||||||
|
|
||||||
buf = req->buffer;
|
buf = req->buffer;
|
||||||
|
|
||||||
if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
|
|
||||||
req->cmd[0] == REQ_LB_OP_DISCARD)
|
|
||||||
return tr->discard(dev, block, nsect);
|
|
||||||
|
|
||||||
if (!blk_fs_request(req))
|
if (!blk_fs_request(req))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
|
@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
|
||||||
get_capacity(req->rq_disk))
|
get_capacity(req->rq_disk))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
|
if (blk_discard_rq(req))
|
||||||
|
return tr->discard(dev, block, nsect);
|
||||||
|
|
||||||
switch(rq_data_dir(req)) {
|
switch(rq_data_dir(req)) {
|
||||||
case READ:
|
case READ:
|
||||||
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
|
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
|
||||||
|
@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
|
||||||
tr->blkcore_priv->rq->queuedata = tr;
|
tr->blkcore_priv->rq->queuedata = tr;
|
||||||
blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
|
blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
|
||||||
if (tr->discard)
|
if (tr->discard)
|
||||||
blk_queue_set_discard(tr->blkcore_priv->rq,
|
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
|
||||||
blktrans_discard_request);
|
tr->blkcore_priv->rq);
|
||||||
|
|
||||||
tr->blkshift = ffs(tr->blksize) - 1;
|
tr->blkshift = ffs(tr->blksize) - 1;
|
||||||
|
|
||||||
|
|
|
@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio)
|
||||||
struct dst_node *n = q->queuedata;
|
struct dst_node *n = q->queuedata;
|
||||||
int err = -EIO;
|
int err = -EIO;
|
||||||
|
|
||||||
if (bio_empty_barrier(bio) && !q->prepare_discard_fn) {
|
if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
|
||||||
/*
|
/*
|
||||||
* This is a dirty^Wnice hack, but if we complete this
|
* This is a dirty^Wnice hack, but if we complete this
|
||||||
* operation with -EOPNOTSUPP like intended, XFS
|
* operation with -EOPNOTSUPP like intended, XFS
|
||||||
|
|
|
@ -82,7 +82,6 @@ enum rq_cmd_type_bits {
|
||||||
enum {
|
enum {
|
||||||
REQ_LB_OP_EJECT = 0x40, /* eject request */
|
REQ_LB_OP_EJECT = 0x40, /* eject request */
|
||||||
REQ_LB_OP_FLUSH = 0x41, /* flush request */
|
REQ_LB_OP_FLUSH = 0x41, /* flush request */
|
||||||
REQ_LB_OP_DISCARD = 0x42, /* discard sectors */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
|
||||||
typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
|
typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
|
||||||
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
|
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
|
||||||
typedef void (unplug_fn) (struct request_queue *);
|
typedef void (unplug_fn) (struct request_queue *);
|
||||||
typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
|
|
||||||
|
|
||||||
struct bio_vec;
|
struct bio_vec;
|
||||||
struct bvec_merge_data {
|
struct bvec_merge_data {
|
||||||
|
@ -340,7 +338,6 @@ struct request_queue
|
||||||
make_request_fn *make_request_fn;
|
make_request_fn *make_request_fn;
|
||||||
prep_rq_fn *prep_rq_fn;
|
prep_rq_fn *prep_rq_fn;
|
||||||
unplug_fn *unplug_fn;
|
unplug_fn *unplug_fn;
|
||||||
prepare_discard_fn *prepare_discard_fn;
|
|
||||||
merge_bvec_fn *merge_bvec_fn;
|
merge_bvec_fn *merge_bvec_fn;
|
||||||
prepare_flush_fn *prepare_flush_fn;
|
prepare_flush_fn *prepare_flush_fn;
|
||||||
softirq_done_fn *softirq_done_fn;
|
softirq_done_fn *softirq_done_fn;
|
||||||
|
@ -460,6 +457,7 @@ struct request_queue
|
||||||
#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
|
#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
|
||||||
#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
|
#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
|
||||||
#define QUEUE_FLAG_CQ 16 /* hardware does queuing */
|
#define QUEUE_FLAG_CQ 16 /* hardware does queuing */
|
||||||
|
#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */
|
||||||
|
|
||||||
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
|
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
|
||||||
(1 << QUEUE_FLAG_CLUSTER) | \
|
(1 << QUEUE_FLAG_CLUSTER) | \
|
||||||
|
@ -591,6 +589,7 @@ enum {
|
||||||
#define blk_queue_flushing(q) ((q)->ordseq)
|
#define blk_queue_flushing(q) ((q)->ordseq)
|
||||||
#define blk_queue_stackable(q) \
|
#define blk_queue_stackable(q) \
|
||||||
test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
|
test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
|
||||||
|
#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
|
||||||
|
|
||||||
#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
|
#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
|
||||||
#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
|
#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
|
||||||
|
@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
|
||||||
extern void blk_queue_dma_alignment(struct request_queue *, int);
|
extern void blk_queue_dma_alignment(struct request_queue *, int);
|
||||||
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
|
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
|
||||||
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
|
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
|
||||||
extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
|
|
||||||
extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
|
extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
|
||||||
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
|
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
|
||||||
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
|
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
|
||||||
|
|
Loading…
Reference in New Issue