block-5.16-2021-11-25

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmGfuqoQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpgYlD/wPqQJ+DuJc6jgK/F0gVIwcxjAemIilItqD
 LMyUfOeYWSINAL7iCPc8J2lLYt/JjlCZ/AcHpnaaqtLBC7YDf8GQxX6ocMSFRJxg
 aEwaNO63ngauW/aMuTmWBQ1i1Vzt57EgH/TGp8XX7Yee1h7O9rPrSWFpEdV/cbk3
 R7mQAzpGgDIDYhMU/odaGitLDVLKoiNmsIoP4QuUXharLChJ6oApJNcFFeTiX6Wz
 5ttwvJmNlZ2ZQMchz2upxM1k2ftGhh7PNi47GEeMuxJBBkOLxkjx8lxtFFQwjgcz
 7fL0ctQWzwA60JD9hifMacOpcVVcx7Nqk6NxFnPybxXPd2VRtuPS8G3OEN6AIM7N
 Vgbd0q0auS8DTBHkCWg+zXJ55uGBM7kZW/H8Urk/prO3qhYRf8cDaP6tBVBRkAPM
 WmrrhgohUpjo0FxCByfyHSFoUdRwM4znyMkF2DhKHoCO44qrAW2b4ucJx/pJG9mU
 mZoGVETtM9dabiQwoTfArBqcWppE8KAFMc+a3We8NHEZO4xBaJz6+1tuh+hSriAm
 rsR1zm/SuJS+66mgCjp4ECzpo8NDx7u328kSddM3dWnT/Fywrz9AZg4PJoC1V9lD
 tz2DF8dSRZFLce5NQroow8BPWpdHTDKJqqi0WY51i8eODWm6pOjGS9XDmqUN/fjH
 mZo7WlJiZA==
 =76bB
 -----END PGP SIGNATURE-----

Merge tag 'block-5.16-2021-11-25' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request via Christoph:
      - Add a NO APST quirk for a Kioxia device (Enzo Matsumiya)
      - Fix write zeroes pi (Klaus Jensen)
      - Various TCP transport fixes (Maurizio Lombardi and Varun
        Prakash)
      - Ignore invalid fast_io_fail_tmo values (Maurizio Lombardi)
      - Use IOCB_NOWAIT only if the filesystem supports it (Maurizio
        Lombardi)

 - Module loading fix (Ming)

 - Kerneldoc warning fix (Yang)

* tag 'block-5.16-2021-11-25' of git://git.kernel.dk/linux-block:
  block: fix parameter not described warning
  nvmet: use IOCB_NOWAIT only if the filesystem supports it
  nvme: fix write zeroes pi
  nvme-fabrics: ignore invalid fast_io_fail_tmo values
  nvme-pci: add NO APST quirk for Kioxia device
  nvme-tcp: fix memory leak when freeing a queue
  nvme-tcp: validate R2T PDU in nvme_tcp_handle_r2t()
  nvmet-tcp: fix incomplete data digest send
  nvmet-tcp: fix memory leak when performing a controller reset
  nvmet-tcp: add an helper to free the cmd buffers
  nvmet-tcp: fix a race condition between release_queue and io_work
  block: avoid to touch unloaded module instance when opening bdev
This commit is contained in:
Linus Torvalds 2021-11-25 11:06:05 -08:00
commit 8ced7ca357
7 changed files with 102 additions and 52 deletions

View File

@ -753,8 +753,7 @@ struct block_device *blkdev_get_no_open(dev_t dev)
if (!bdev) if (!bdev)
return NULL; return NULL;
if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) || if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN)) {
!try_module_get(bdev->bd_disk->fops->owner)) {
put_device(&bdev->bd_device); put_device(&bdev->bd_device);
return NULL; return NULL;
} }
@ -764,7 +763,6 @@ struct block_device *blkdev_get_no_open(dev_t dev)
void blkdev_put_no_open(struct block_device *bdev) void blkdev_put_no_open(struct block_device *bdev)
{ {
module_put(bdev->bd_disk->fops->owner);
put_device(&bdev->bd_device); put_device(&bdev->bd_device);
} }
@ -820,12 +818,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
ret = -ENXIO; ret = -ENXIO;
if (!disk_live(disk)) if (!disk_live(disk))
goto abort_claiming; goto abort_claiming;
if (!try_module_get(disk->fops->owner))
goto abort_claiming;
if (bdev_is_partition(bdev)) if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode); ret = blkdev_get_part(bdev, mode);
else else
ret = blkdev_get_whole(bdev, mode); ret = blkdev_get_whole(bdev, mode);
if (ret) if (ret)
goto abort_claiming; goto put_module;
if (mode & FMODE_EXCL) { if (mode & FMODE_EXCL) {
bd_finish_claiming(bdev, holder); bd_finish_claiming(bdev, holder);
@ -847,7 +847,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
if (unblock_events) if (unblock_events)
disk_unblock_events(disk); disk_unblock_events(disk);
return bdev; return bdev;
put_module:
module_put(disk->fops->owner);
abort_claiming: abort_claiming:
if (mode & FMODE_EXCL) if (mode & FMODE_EXCL)
bd_abort_claiming(bdev, holder); bd_abort_claiming(bdev, holder);
@ -956,6 +957,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
blkdev_put_whole(bdev, mode); blkdev_put_whole(bdev, mode);
mutex_unlock(&disk->open_mutex); mutex_unlock(&disk->open_mutex);
module_put(disk->fops->owner);
blkdev_put_no_open(bdev); blkdev_put_no_open(bdev);
} }
EXPORT_SYMBOL(blkdev_put); EXPORT_SYMBOL(blkdev_put);

View File

@ -1017,6 +1017,7 @@ EXPORT_SYMBOL(submit_bio);
/** /**
* bio_poll - poll for BIO completions * bio_poll - poll for BIO completions
* @bio: bio to poll for * @bio: bio to poll for
* @iob: batches of IO
* @flags: BLK_POLL_* flags that control the behavior * @flags: BLK_POLL_* flags that control the behavior
* *
* Poll for completions on queue associated with the bio. Returns number of * Poll for completions on queue associated with the bio. Returns number of

View File

@ -895,10 +895,19 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->write_zeroes.length = cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
if (nvme_ns_has_pi(ns))
if (nvme_ns_has_pi(ns)) {
cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT); cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
else
cmnd->write_zeroes.control = 0; switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE1:
case NVME_NS_DPS_PI_TYPE2:
cmnd->write_zeroes.reftag =
cpu_to_le32(t10_pi_ref_tag(req));
break;
}
}
return BLK_STS_OK; return BLK_STS_OK;
} }
@ -2469,6 +2478,20 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
.vid = 0x14a4, .vid = 0x14a4,
.fr = "22301111", .fr = "22301111",
.quirks = NVME_QUIRK_SIMPLE_SUSPEND, .quirks = NVME_QUIRK_SIMPLE_SUSPEND,
},
{
/*
* This Kioxia CD6-V Series / HPE PE8030 device times out and
* aborts I/O during any load, but more easily reproducible
* with discards (fstrim).
*
* The device is left in a state where it is also not possible
* to use "nvme set-feature" to disable APST, but booting with
* nvme_core.default_ps_max_latency=0 works.
*/
.vid = 0x1e0f,
.mn = "KCD6XVUL6T40",
.quirks = NVME_QUIRK_NO_APST,
} }
}; };

View File

@ -698,6 +698,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
if (token >= 0) if (token >= 0)
pr_warn("I/O fail on reconnect controller after %d sec\n", pr_warn("I/O fail on reconnect controller after %d sec\n",
token); token);
else
token = -1;
opts->fast_io_fail_tmo = token; opts->fast_io_fail_tmo = token;
break; break;
case NVMF_OPT_HOSTNQN: case NVMF_OPT_HOSTNQN:

View File

@ -572,7 +572,7 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
return ret; return ret;
} }
static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
struct nvme_tcp_r2t_pdu *pdu) struct nvme_tcp_r2t_pdu *pdu)
{ {
struct nvme_tcp_data_pdu *data = req->pdu; struct nvme_tcp_data_pdu *data = req->pdu;
@ -581,32 +581,11 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
u8 hdgst = nvme_tcp_hdgst_len(queue); u8 hdgst = nvme_tcp_hdgst_len(queue);
u8 ddgst = nvme_tcp_ddgst_len(queue); u8 ddgst = nvme_tcp_ddgst_len(queue);
req->state = NVME_TCP_SEND_H2C_PDU;
req->offset = 0;
req->pdu_len = le32_to_cpu(pdu->r2t_length); req->pdu_len = le32_to_cpu(pdu->r2t_length);
req->pdu_sent = 0; req->pdu_sent = 0;
if (unlikely(!req->pdu_len)) {
dev_err(queue->ctrl->ctrl.device,
"req %d r2t len is %u, probably a bug...\n",
rq->tag, req->pdu_len);
return -EPROTO;
}
if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
dev_err(queue->ctrl->ctrl.device,
"req %d r2t len %u exceeded data len %u (%zu sent)\n",
rq->tag, req->pdu_len, req->data_len,
req->data_sent);
return -EPROTO;
}
if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
dev_err(queue->ctrl->ctrl.device,
"req %d unexpected r2t offset %u (expected %zu)\n",
rq->tag, le32_to_cpu(pdu->r2t_offset),
req->data_sent);
return -EPROTO;
}
memset(data, 0, sizeof(*data)); memset(data, 0, sizeof(*data));
data->hdr.type = nvme_tcp_h2c_data; data->hdr.type = nvme_tcp_h2c_data;
data->hdr.flags = NVME_TCP_F_DATA_LAST; data->hdr.flags = NVME_TCP_F_DATA_LAST;
@ -622,7 +601,6 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
data->command_id = nvme_cid(rq); data->command_id = nvme_cid(rq);
data->data_offset = pdu->r2t_offset; data->data_offset = pdu->r2t_offset;
data->data_length = cpu_to_le32(req->pdu_len); data->data_length = cpu_to_le32(req->pdu_len);
return 0;
} }
static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
@ -630,7 +608,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
{ {
struct nvme_tcp_request *req; struct nvme_tcp_request *req;
struct request *rq; struct request *rq;
int ret; u32 r2t_length = le32_to_cpu(pdu->r2t_length);
rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) { if (!rq) {
@ -641,13 +619,28 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
} }
req = blk_mq_rq_to_pdu(rq); req = blk_mq_rq_to_pdu(rq);
ret = nvme_tcp_setup_h2c_data_pdu(req, pdu); if (unlikely(!r2t_length)) {
if (unlikely(ret)) dev_err(queue->ctrl->ctrl.device,
return ret; "req %d r2t len is %u, probably a bug...\n",
rq->tag, r2t_length);
return -EPROTO;
}
req->state = NVME_TCP_SEND_H2C_PDU; if (unlikely(req->data_sent + r2t_length > req->data_len)) {
req->offset = 0; dev_err(queue->ctrl->ctrl.device,
"req %d r2t len %u exceeded data len %u (%zu sent)\n",
rq->tag, r2t_length, req->data_len, req->data_sent);
return -EPROTO;
}
if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
dev_err(queue->ctrl->ctrl.device,
"req %d unexpected r2t offset %u (expected %zu)\n",
rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent);
return -EPROTO;
}
nvme_tcp_setup_h2c_data_pdu(req, pdu);
nvme_tcp_queue_request(req, false, true); nvme_tcp_queue_request(req, false, true);
return 0; return 0;
@ -1232,6 +1225,7 @@ static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
{ {
struct page *page;
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid]; struct nvme_tcp_queue *queue = &ctrl->queues[qid];
@ -1241,6 +1235,11 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
if (queue->hdr_digest || queue->data_digest) if (queue->hdr_digest || queue->data_digest)
nvme_tcp_free_crypto(queue); nvme_tcp_free_crypto(queue);
if (queue->pf_cache.va) {
page = virt_to_head_page(queue->pf_cache.va);
__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
queue->pf_cache.va = NULL;
}
sock_release(queue->sock); sock_release(queue->sock);
kfree(queue->pdu); kfree(queue->pdu);
mutex_destroy(&queue->send_mutex); mutex_destroy(&queue->send_mutex);

View File

@ -8,6 +8,7 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/falloc.h> #include <linux/falloc.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/fs.h>
#include "nvmet.h" #include "nvmet.h"
#define NVMET_MAX_MPOOL_BVEC 16 #define NVMET_MAX_MPOOL_BVEC 16
@ -266,7 +267,8 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
if (req->ns->buffered_io) { if (req->ns->buffered_io) {
if (likely(!req->f.mpool_alloc) && if (likely(!req->f.mpool_alloc) &&
nvmet_file_execute_io(req, IOCB_NOWAIT)) (req->ns->file->f_mode & FMODE_NOWAIT) &&
nvmet_file_execute_io(req, IOCB_NOWAIT))
return; return;
nvmet_file_submit_buffered_io(req); nvmet_file_submit_buffered_io(req);
} else } else

View File

@ -166,6 +166,8 @@ static struct workqueue_struct *nvmet_tcp_wq;
static const struct nvmet_fabrics_ops nvmet_tcp_ops; static const struct nvmet_fabrics_ops nvmet_tcp_ops;
static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd);
static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd);
static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd) struct nvmet_tcp_cmd *cmd)
@ -297,6 +299,16 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu)
return 0; return 0;
} }
static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
{
WARN_ON(unlikely(cmd->nr_mapped > 0));
kfree(cmd->iov);
sgl_free(cmd->req.sg);
cmd->iov = NULL;
cmd->req.sg = NULL;
}
static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd) static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
{ {
struct scatterlist *sg; struct scatterlist *sg;
@ -306,6 +318,8 @@ static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
for (i = 0; i < cmd->nr_mapped; i++) for (i = 0; i < cmd->nr_mapped; i++)
kunmap(sg_page(&sg[i])); kunmap(sg_page(&sg[i]));
cmd->nr_mapped = 0;
} }
static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
@ -387,7 +401,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
return 0; return 0;
err: err:
sgl_free(cmd->req.sg); nvmet_tcp_free_cmd_buffers(cmd);
return NVME_SC_INTERNAL; return NVME_SC_INTERNAL;
} }
@ -632,10 +646,8 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
} }
} }
if (queue->nvme_sq.sqhd_disabled) { if (queue->nvme_sq.sqhd_disabled)
kfree(cmd->iov); nvmet_tcp_free_cmd_buffers(cmd);
sgl_free(cmd->req.sg);
}
return 1; return 1;
@ -664,8 +676,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
if (left) if (left)
return -EAGAIN; return -EAGAIN;
kfree(cmd->iov); nvmet_tcp_free_cmd_buffers(cmd);
sgl_free(cmd->req.sg);
cmd->queue->snd_cmd = NULL; cmd->queue->snd_cmd = NULL;
nvmet_tcp_put_cmd(cmd); nvmet_tcp_put_cmd(cmd);
return 1; return 1;
@ -700,10 +711,11 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{ {
struct nvmet_tcp_queue *queue = cmd->queue; struct nvmet_tcp_queue *queue = cmd->queue;
int left = NVME_TCP_DIGEST_LENGTH - cmd->offset;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = { struct kvec iov = {
.iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset, .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
.iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset .iov_len = left
}; };
int ret; int ret;
@ -717,6 +729,10 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
return ret; return ret;
cmd->offset += ret; cmd->offset += ret;
left -= ret;
if (left)
return -EAGAIN;
if (queue->nvme_sq.sqhd_disabled) { if (queue->nvme_sq.sqhd_disabled) {
cmd->queue->snd_cmd = NULL; cmd->queue->snd_cmd = NULL;
@ -1406,8 +1422,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
{ {
nvmet_req_uninit(&cmd->req); nvmet_req_uninit(&cmd->req);
nvmet_tcp_unmap_pdu_iovec(cmd); nvmet_tcp_unmap_pdu_iovec(cmd);
kfree(cmd->iov); nvmet_tcp_free_cmd_buffers(cmd);
sgl_free(cmd->req.sg);
} }
static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
@ -1417,7 +1432,10 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
for (i = 0; i < queue->nr_cmds; i++, cmd++) { for (i = 0; i < queue->nr_cmds; i++, cmd++) {
if (nvmet_tcp_need_data_in(cmd)) if (nvmet_tcp_need_data_in(cmd))
nvmet_tcp_finish_cmd(cmd); nvmet_req_uninit(&cmd->req);
nvmet_tcp_unmap_pdu_iovec(cmd);
nvmet_tcp_free_cmd_buffers(cmd);
} }
if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) { if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
@ -1437,7 +1455,9 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
mutex_unlock(&nvmet_tcp_queue_mutex); mutex_unlock(&nvmet_tcp_queue_mutex);
nvmet_tcp_restore_socket_callbacks(queue); nvmet_tcp_restore_socket_callbacks(queue);
flush_work(&queue->io_work); cancel_work_sync(&queue->io_work);
/* stop accepting incoming data */
queue->rcv_state = NVMET_TCP_RECV_ERR;
nvmet_tcp_uninit_data_in_cmds(queue); nvmet_tcp_uninit_data_in_cmds(queue);
nvmet_sq_destroy(&queue->nvme_sq); nvmet_sq_destroy(&queue->nvme_sq);