Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - the virtio_blk stack DMA corruption fix from Christoph, fixing and
   issue with VMAP stacks.

 - O_DIRECT blkbits calculation fix from Chandan.

 - discard regression fix from Christoph.

 - queue init error handling fixes for nbd and virtio_blk, from Omar and
   Jeff.

 - two small nvme fixes, from Christoph and Guilherme.

 - rename of blk_queue_zone_size and bdev_zone_size to _sectors instead,
   to more closely follow what we do in other places in the block layer.
   This interface is new for this series, so let's get the naming right
   before releasing a kernel with this feature. From Damien.

* 'for-linus' of git://git.kernel.dk/linux-block:
  block: don't try to discard from __blkdev_issue_zeroout
  sd: remove __data_len hack for WRITE SAME
  nvme: use blk_rq_payload_bytes
  scsi: use blk_rq_payload_bytes
  block: add blk_rq_payload_bytes
  block: Rename blk_queue_zone_size and bdev_zone_size
  nvme: apply DELAY_BEFORE_CHK_RDY quirk at probe time too
  nvme-rdma: fix nvme_rdma_queue_is_ready
  virtio_blk: fix panic in initialization error path
  nbd: blk_mq_init_queue returns an error code on failure, not NULL
  virtio_blk: avoid DMA to stack for the sense buffer
  do_direct_IO: Use inode->i_blkbits to compute block count to be cleaned
This commit is contained in:
Linus Torvalds 2017-01-14 17:07:04 -08:00
commit 34241af77b
16 changed files with 66 additions and 83 deletions

View File

@ -301,13 +301,6 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
if ((sector | nr_sects) & bs_mask)
return -EINVAL;
if (discard) {
ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
BLKDEV_DISCARD_ZERO, biop);
if (ret == 0 || (ret && ret != -EOPNOTSUPP))
goto out;
}
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
biop);
if (ret == 0 || (ret && ret != -EOPNOTSUPP))
@ -370,6 +363,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
struct bio *bio = NULL;
struct blk_plug plug;
if (discard) {
if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
BLKDEV_DISCARD_ZERO))
return 0;
}
blk_start_plug(&plug);
ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
&bio, discard);

View File

@ -16,7 +16,7 @@
static inline sector_t blk_zone_start(struct request_queue *q,
sector_t sector)
{
sector_t zone_mask = blk_queue_zone_size(q) - 1;
sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
return sector & ~zone_mask;
}
@ -222,7 +222,7 @@ int blkdev_reset_zones(struct block_device *bdev,
return -EINVAL;
/* Check alignment (handle eventual smaller last zone) */
zone_sectors = blk_queue_zone_size(q);
zone_sectors = blk_queue_zone_sectors(q);
if (sector & (zone_sectors - 1))
return -EINVAL;

View File

@ -434,7 +434,7 @@ static bool part_zone_aligned(struct gendisk *disk,
struct block_device *bdev,
sector_t from, sector_t size)
{
unsigned int zone_size = bdev_zone_size(bdev);
unsigned int zone_sectors = bdev_zone_sectors(bdev);
/*
* If this function is called, then the disk is a zoned block device
@ -446,7 +446,7 @@ static bool part_zone_aligned(struct gendisk *disk,
* regular block devices (no zone operation) and their zone size will
* be reported as 0. Allow this case.
*/
if (!zone_size)
if (!zone_sectors)
return true;
/*
@ -455,24 +455,24 @@ static bool part_zone_aligned(struct gendisk *disk,
* use it. Check the zone size too: it should be a power of 2 number
* of sectors.
*/
if (WARN_ON_ONCE(!is_power_of_2(zone_size))) {
if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) {
u32 rem;
div_u64_rem(from, zone_size, &rem);
div_u64_rem(from, zone_sectors, &rem);
if (rem)
return false;
if ((from + size) < get_capacity(disk)) {
div_u64_rem(size, zone_size, &rem);
div_u64_rem(size, zone_sectors, &rem);
if (rem)
return false;
}
} else {
if (from & (zone_size - 1))
if (from & (zone_sectors - 1))
return false;
if ((from + size) < get_capacity(disk) &&
(size & (zone_size - 1)))
(size & (zone_sectors - 1)))
return false;
}

View File

@ -1042,6 +1042,7 @@ static int __init nbd_init(void)
return -ENOMEM;
for (i = 0; i < nbds_max; i++) {
struct request_queue *q;
struct gendisk *disk = alloc_disk(1 << part_shift);
if (!disk)
goto out;
@ -1067,12 +1068,13 @@ static int __init nbd_init(void)
* every gendisk to have its very own request_queue struct.
* These structs are big so we dynamically allocate them.
*/
disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set);
if (!disk->queue) {
q = blk_mq_init_queue(&nbd_dev[i].tag_set);
if (IS_ERR(q)) {
blk_mq_free_tag_set(&nbd_dev[i].tag_set);
put_disk(disk);
goto out;
}
disk->queue = q;
/*
* Tell the block layer that we are not a rotational device

View File

@ -56,6 +56,7 @@ struct virtblk_req {
struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr;
u8 status;
u8 sense[SCSI_SENSE_BUFFERSIZE];
struct scatterlist sg[];
};
@ -102,7 +103,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
}
if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE);
sgs[num_out + num_in++] = &sense;
sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
sgs[num_out + num_in++] = &inhdr;
@ -628,11 +630,12 @@ static int virtblk_probe(struct virtio_device *vdev)
if (err)
goto out_put_disk;
q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
q = blk_mq_init_queue(&vblk->tag_set);
if (IS_ERR(q)) {
err = -ENOMEM;
goto out_free_tags;
}
vblk->disk->queue = q;
q->queuedata = vblk;

View File

@ -1106,12 +1106,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
if (ret)
return ret;
/* Checking for ctrl->tagset is a trick to avoid sleeping on module
* load, since we only need the quirk on reset_controller. Notice
* that the HGST device needs this delay only in firmware activation
* procedure; unfortunately we have no (easy) way to verify this.
*/
if ((ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) && ctrl->tagset)
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
return nvme_wait_ready(ctrl, cap, false);

View File

@ -1654,13 +1654,12 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
struct nvme_fc_fcp_op *op)
{
struct nvmefc_fcp_req *freq = &op->fcp_req;
u32 map_len = nvme_map_len(rq);
enum dma_data_direction dir;
int ret;
freq->sg_cnt = 0;
if (!map_len)
if (!blk_rq_payload_bytes(rq))
return 0;
freq->sg_table.sgl = freq->first_sgl;
@ -1854,7 +1853,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ret)
return ret;
data_len = nvme_map_len(rq);
data_len = blk_rq_payload_bytes(rq);
if (data_len)
io_dir = ((rq_data_dir(rq) == WRITE) ?
NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);

View File

@ -225,14 +225,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
return (sector >> (ns->lba_shift - 9));
}
static inline unsigned nvme_map_len(struct request *rq)
{
if (req_op(rq) == REQ_OP_DISCARD)
return sizeof(struct nvme_dsm_range);
else
return blk_rq_bytes(rq);
}
static inline void nvme_cleanup_cmd(struct request *req)
{
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {

View File

@ -306,11 +306,11 @@ static __le64 **iod_list(struct request *req)
return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
}
static int nvme_init_iod(struct request *rq, unsigned size,
struct nvme_dev *dev)
static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
int nseg = blk_rq_nr_phys_segments(rq);
unsigned int size = blk_rq_payload_bytes(rq);
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
@ -420,12 +420,11 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
}
#endif
static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
int total_len)
static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct dma_pool *pool;
int length = total_len;
int length = blk_rq_payload_bytes(req);
struct scatterlist *sg = iod->sg;
int dma_len = sg_dma_len(sg);
u64 dma_addr = sg_dma_address(sg);
@ -501,7 +500,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
}
static int nvme_map_data(struct nvme_dev *dev, struct request *req,
unsigned size, struct nvme_command *cmnd)
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct request_queue *q = req->q;
@ -519,7 +518,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
DMA_ATTR_NO_WARN))
goto out;
if (!nvme_setup_prps(dev, req, size))
if (!nvme_setup_prps(dev, req))
goto out_unmap;
ret = BLK_MQ_RQ_QUEUE_ERROR;
@ -580,7 +579,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
struct nvme_command cmnd;
unsigned map_len;
int ret = BLK_MQ_RQ_QUEUE_OK;
/*
@ -600,13 +598,12 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ret != BLK_MQ_RQ_QUEUE_OK)
return ret;
map_len = nvme_map_len(req);
ret = nvme_init_iod(req, map_len, dev);
ret = nvme_init_iod(req, dev);
if (ret != BLK_MQ_RQ_QUEUE_OK)
goto out_free_cmd;
if (blk_rq_nr_phys_segments(req))
ret = nvme_map_data(dev, req, map_len, &cmnd);
ret = nvme_map_data(dev, req, &cmnd);
if (ret != BLK_MQ_RQ_QUEUE_OK)
goto out_cleanup_iod;

View File

@ -981,8 +981,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
}
static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
struct request *rq, unsigned int map_len,
struct nvme_command *c)
struct request *rq, struct nvme_command *c)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
@ -1014,9 +1013,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
}
if (count == 1) {
if (rq_data_dir(rq) == WRITE &&
map_len <= nvme_rdma_inline_data_size(queue) &&
nvme_rdma_queue_idx(queue))
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
blk_rq_payload_bytes(rq) <=
nvme_rdma_inline_data_size(queue))
return nvme_rdma_map_sg_inline(queue, req, c);
if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@ -1422,7 +1421,7 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
struct request *rq)
{
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
struct nvme_command *cmd = (struct nvme_command *)rq->cmd;
struct nvme_command *cmd = nvme_req(rq)->cmd;
if (rq->cmd_type != REQ_TYPE_DRV_PRIV ||
cmd->common.opcode != nvme_fabrics_command ||
@ -1444,7 +1443,6 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_command *c = sqe->data;
bool flush = false;
struct ib_device *dev;
unsigned int map_len;
int ret;
WARN_ON_ONCE(rq->tag < 0);
@ -1462,8 +1460,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
map_len = nvme_map_len(rq);
ret = nvme_rdma_map_data(queue, rq, map_len, c);
ret = nvme_rdma_map_data(queue, rq, c);
if (ret < 0) {
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", ret);

View File

@ -1018,7 +1018,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
BUG_ON(count > sdb->table.nents);
sdb->table.nents = count;
sdb->length = blk_rq_bytes(req);
sdb->length = blk_rq_payload_bytes(req);
return BLKPREP_OK;
}

View File

@ -836,7 +836,6 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
struct bio *bio = rq->bio;
sector_t sector = blk_rq_pos(rq);
unsigned int nr_sectors = blk_rq_sectors(rq);
unsigned int nr_bytes = blk_rq_bytes(rq);
int ret;
if (sdkp->device->no_write_same)
@ -869,21 +868,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
cmd->transfersize = sdp->sector_size;
cmd->allowed = SD_MAX_RETRIES;
/*
* For WRITE_SAME the data transferred in the DATA IN buffer is
* different from the amount of data actually written to the target.
*
* We set up __data_len to the amount of data transferred from the
* DATA IN buffer so that blk_rq_map_sg set up the proper S/G list
* to transfer a single sector of data first, but then reset it to
* the amount of data to be written right after so that the I/O path
* knows how much to actually write.
*/
rq->__data_len = sdp->sector_size;
ret = scsi_init_io(cmd);
rq->__data_len = nr_bytes;
return ret;
return scsi_init_io(cmd);
}
static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd)

View File

@ -906,6 +906,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
struct buffer_head *map_bh)
{
const unsigned blkbits = sdio->blkbits;
const unsigned i_blkbits = blkbits + sdio->blkfactor;
int ret = 0;
while (sdio->block_in_file < sdio->final_block_in_request) {
@ -949,7 +950,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
clean_bdev_aliases(
map_bh->b_bdev,
map_bh->b_blocknr,
map_bh->b_size >> blkbits);
map_bh->b_size >> i_blkbits);
}
if (!sdio->blkfactor)

View File

@ -713,8 +713,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
}
sector = SECTOR_FROM_BLOCK(blkstart);
if (sector & (bdev_zone_size(bdev) - 1) ||
nr_sects != bdev_zone_size(bdev)) {
if (sector & (bdev_zone_sectors(bdev) - 1) ||
nr_sects != bdev_zone_sectors(bdev)) {
f2fs_msg(sbi->sb, KERN_INFO,
"(%d) %s: Unaligned discard attempted (block %x + %x)",
devi, sbi->s_ndevs ? FDEV(devi).path: "",

View File

@ -1553,16 +1553,16 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
return 0;
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
SECTOR_TO_BLOCK(bdev_zone_size(bdev)))
SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
return -EINVAL;
sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
__ilog2_u32(sbi->blocks_per_blkz))
return -EINVAL;
sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
sbi->log_blocks_per_blkz;
if (nr_sectors & (bdev_zone_size(bdev) - 1))
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);

View File

@ -739,7 +739,7 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
}
}
static inline unsigned int blk_queue_zone_size(struct request_queue *q)
static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
{
return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
}
@ -1000,6 +1000,19 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
return blk_rq_cur_bytes(rq) >> 9;
}
/*
* Some commands like WRITE SAME have a payload or data transfer size which
* is different from the size of the request. Any driver that supports such
* commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
* calculate the data transfer size.
*/
static inline unsigned int blk_rq_payload_bytes(struct request *rq)
{
if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
return rq->special_vec.bv_len;
return blk_rq_bytes(rq);
}
static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
int op)
{
@ -1536,12 +1549,12 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
return false;
}
static inline unsigned int bdev_zone_size(struct block_device *bdev)
static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
if (q)
return blk_queue_zone_size(q);
return blk_queue_zone_sectors(q);
return 0;
}