block-5.12-2021-03-19
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmBVI64QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpsRdD/9DONnkH5KrrqE0vltBOcSefpuDsT0ET6Fl xEfmPw3ndW2WWG7U/Elb3dXiOGA2AslmDe2XaU+5gaodKn6u3XpRiCFSvwX0At8l Ple5wyvQg3z0XiecL+iKyUcrfqamcTy4Aa2lrN+ffvYo4fsY0a546hbTPkqhckcZ 1Svuc+C+Gh+TuCUwINWT+A7n2ji5xUjsuX19vZ8iWhTECsWRwa64HGku5PHB+zF0 z4eFPhOrKYxus/4ArH2/QnZXXb2K3jE1sULCoGC08DJBDnC7pRaxwaVvhZWNIxrT 0evKZRHtzCmkmWqZPwIc82wcNGfJ0URPQ2817ZWLZY9fbk0rY2LBgo+iWsIi95iO g+OkETweJbBOOcibto337i+VaeYaQ6MFRvTA4IM46fhuHBAXFf+4qPl3SisnXDyx xT+cF7GPXG+qg9/FytmMC1Ngj+C1y15gF9tmpsTnol3mud354al99hs+WU934of9 buYKPQ8DmdHTWNtOFv8/brHnaa0jPOQZxQEN+no+bHfZbGYjr8Oo+KUuxx/OY164 ZNj3FGQU98MMky3nqJLgPr806G+wNdTVgtRpmlMnf0doQLPyoEaYJrNKy6Uz0BxV OEtJEWuEPlsgQpkk0Aqs2ZnzNtke/lUKhupV9oiqNapEaxHflnmnmZt0T/easbPu EWwYltg0Xw== =hP6T -----END PGP SIGNATURE----- Merge tag 'block-5.12-2021-03-19' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: "Just an NVMe pull request this week: - fix tag allocation for keep alive - fix a unit mismatch for the Write Zeroes limits - various TCP transport fixes (Sagi Grimberg, Elad Grupi) - fix iosqes and iocqes validation for discovery controllers (Sagi Grimberg)" * tag 'block-5.12-2021-03-19' of git://git.kernel.dk/linux-block: nvmet-tcp: fix kmap leak when data digest in use nvmet: don't check iosqes,iocqes for discovery controllers nvme-rdma: fix possible hang when failing to set io queues nvme-tcp: fix possible hang when failing to set io queues nvme-tcp: fix misuse of __smp_processor_id with preemption enabled nvme-tcp: fix a NULL deref when receiving a 0-length r2t PDU nvme: fix Write Zeroes limitations nvme: allocate the keep alive request using BLK_MQ_REQ_NOWAIT nvme: merge nvme_keep_alive into nvme_keep_alive_work nvme-fabrics: only reserve a single tag
This commit is contained in:
commit
d626c692aa
|
@ -1226,28 +1226,12 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
|
|||
queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
|
||||
}
|
||||
|
||||
static int nvme_keep_alive(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct request *rq;
|
||||
|
||||
rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
|
||||
BLK_MQ_REQ_RESERVED);
|
||||
if (IS_ERR(rq))
|
||||
return PTR_ERR(rq);
|
||||
|
||||
rq->timeout = ctrl->kato * HZ;
|
||||
rq->end_io_data = ctrl;
|
||||
|
||||
blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvme_keep_alive_work(struct work_struct *work)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
|
||||
struct nvme_ctrl, ka_work);
|
||||
bool comp_seen = ctrl->comp_seen;
|
||||
struct request *rq;
|
||||
|
||||
if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) {
|
||||
dev_dbg(ctrl->device,
|
||||
|
@ -1257,12 +1241,18 @@ static void nvme_keep_alive_work(struct work_struct *work)
|
|||
return;
|
||||
}
|
||||
|
||||
if (nvme_keep_alive(ctrl)) {
|
||||
rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
|
||||
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
|
||||
if (IS_ERR(rq)) {
|
||||
/* allocation failure, reset the controller */
|
||||
dev_err(ctrl->device, "keep-alive failed\n");
|
||||
dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq));
|
||||
nvme_reset_ctrl(ctrl);
|
||||
return;
|
||||
}
|
||||
|
||||
rq->timeout = ctrl->kato * HZ;
|
||||
rq->end_io_data = ctrl;
|
||||
blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io);
|
||||
}
|
||||
|
||||
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
|
||||
|
@ -1964,30 +1954,18 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
|
|||
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
|
||||
}
|
||||
|
||||
static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
|
||||
/*
|
||||
* Even though NVMe spec explicitly states that MDTS is not applicable to the
|
||||
* write-zeroes, we are cautious and limit the size to the controllers
|
||||
* max_hw_sectors value, which is based on the MDTS field and possibly other
|
||||
* limiting factors.
|
||||
*/
|
||||
static void nvme_config_write_zeroes(struct request_queue *q,
|
||||
struct nvme_ctrl *ctrl)
|
||||
{
|
||||
u64 max_blocks;
|
||||
|
||||
if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
|
||||
(ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
|
||||
return;
|
||||
/*
|
||||
* Even though NVMe spec explicitly states that MDTS is not
|
||||
* applicable to the write-zeroes:- "The restriction does not apply to
|
||||
* commands that do not transfer data between the host and the
|
||||
* controller (e.g., Write Uncorrectable ro Write Zeroes command).".
|
||||
* In order to be more cautious use controller's max_hw_sectors value
|
||||
* to configure the maximum sectors for the write-zeroes which is
|
||||
* configured based on the controller's MDTS field in the
|
||||
* nvme_init_identify() if available.
|
||||
*/
|
||||
if (ns->ctrl->max_hw_sectors == UINT_MAX)
|
||||
max_blocks = (u64)USHRT_MAX + 1;
|
||||
else
|
||||
max_blocks = ns->ctrl->max_hw_sectors + 1;
|
||||
|
||||
blk_queue_max_write_zeroes_sectors(disk->queue,
|
||||
nvme_lba_to_sect(ns, max_blocks));
|
||||
if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) &&
|
||||
!(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
|
||||
blk_queue_max_write_zeroes_sectors(q, ctrl->max_hw_sectors);
|
||||
}
|
||||
|
||||
static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
|
||||
|
@ -2159,7 +2137,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
|
|||
set_capacity_and_notify(disk, capacity);
|
||||
|
||||
nvme_config_discard(disk, ns);
|
||||
nvme_config_write_zeroes(disk, ns);
|
||||
nvme_config_write_zeroes(disk->queue, ns->ctrl);
|
||||
|
||||
set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
|
||||
test_bit(NVME_NS_FORCE_RO, &ns->flags));
|
||||
|
|
|
@ -18,6 +18,13 @@
|
|||
/* default is -1: the fail fast mechanism is disabled */
|
||||
#define NVMF_DEF_FAIL_FAST_TMO -1
|
||||
|
||||
/*
|
||||
* Reserved one command for internal usage. This command is used for sending
|
||||
* the connect command, as well as for the keep alive command on the admin
|
||||
* queue once live.
|
||||
*/
|
||||
#define NVMF_RESERVED_TAGS 1
|
||||
|
||||
/*
|
||||
* Define a host as seen by the target. We allocate one at boot, but also
|
||||
* allow the override it when creating controllers. This is both to provide
|
||||
|
|
|
@ -2863,7 +2863,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
|
|||
memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
|
||||
ctrl->tag_set.ops = &nvme_fc_mq_ops;
|
||||
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
|
||||
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
|
||||
ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS;
|
||||
ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
|
||||
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
ctrl->tag_set.cmd_size =
|
||||
|
@ -3485,7 +3485,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
|
|||
memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
|
||||
ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
|
||||
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
|
||||
ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS;
|
||||
ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
|
||||
ctrl->admin_tag_set.cmd_size =
|
||||
struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
|
||||
|
|
|
@ -736,8 +736,11 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
|
|||
return ret;
|
||||
|
||||
ctrl->ctrl.queue_count = nr_io_queues + 1;
|
||||
if (ctrl->ctrl.queue_count < 2)
|
||||
return 0;
|
||||
if (ctrl->ctrl.queue_count < 2) {
|
||||
dev_err(ctrl->ctrl.device,
|
||||
"unable to set any I/O queues\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"creating %d I/O queues.\n", nr_io_queues);
|
||||
|
@ -798,7 +801,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_rdma_admin_mq_ops;
|
||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
set->reserved_tags = 2; /* connect + keep-alive */
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->cmd_size = sizeof(struct nvme_rdma_request) +
|
||||
NVME_RDMA_DATA_SGL_SIZE;
|
||||
|
@ -811,7 +814,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_rdma_mq_ops;
|
||||
set->queue_depth = nctrl->sqsize + 1;
|
||||
set->reserved_tags = 1; /* fabric connect */
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
set->cmd_size = sizeof(struct nvme_rdma_request) +
|
||||
|
|
|
@ -287,7 +287,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
|
|||
* directly, otherwise queue io_work. Also, only do that if we
|
||||
* are on the same cpu, so we don't introduce contention.
|
||||
*/
|
||||
if (queue->io_cpu == __smp_processor_id() &&
|
||||
if (queue->io_cpu == raw_smp_processor_id() &&
|
||||
sync && empty && mutex_trylock(&queue->send_mutex)) {
|
||||
queue->more_requests = !last;
|
||||
nvme_tcp_send_all(queue);
|
||||
|
@ -568,6 +568,13 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
|
|||
req->pdu_len = le32_to_cpu(pdu->r2t_length);
|
||||
req->pdu_sent = 0;
|
||||
|
||||
if (unlikely(!req->pdu_len)) {
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"req %d r2t len is %u, probably a bug...\n",
|
||||
rq->tag, req->pdu_len);
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"req %d r2t len %u exceeded data len %u (%zu sent)\n",
|
||||
|
@ -1575,7 +1582,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_tcp_admin_mq_ops;
|
||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
set->reserved_tags = 2; /* connect + keep-alive */
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_BLOCKING;
|
||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||
|
@ -1587,7 +1594,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||
memset(set, 0, sizeof(*set));
|
||||
set->ops = &nvme_tcp_mq_ops;
|
||||
set->queue_depth = nctrl->sqsize + 1;
|
||||
set->reserved_tags = 1; /* fabric connect */
|
||||
set->reserved_tags = NVMF_RESERVED_TAGS;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
|
||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||
|
@ -1745,8 +1752,11 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
|
|||
return ret;
|
||||
|
||||
ctrl->queue_count = nr_io_queues + 1;
|
||||
if (ctrl->queue_count < 2)
|
||||
return 0;
|
||||
if (ctrl->queue_count < 2) {
|
||||
dev_err(ctrl->device,
|
||||
"unable to set any I/O queues\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dev_info(ctrl->device,
|
||||
"creating %d I/O queues.\n", nr_io_queues);
|
||||
|
|
|
@ -1118,9 +1118,20 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
|
|||
{
|
||||
lockdep_assert_held(&ctrl->lock);
|
||||
|
||||
if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
|
||||
nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
|
||||
nvmet_cc_mps(ctrl->cc) != 0 ||
|
||||
/*
|
||||
* Only I/O controllers should verify iosqes,iocqes.
|
||||
* Strictly speaking, the spec says a discovery controller
|
||||
* should verify iosqes,iocqes are zeroed, however that
|
||||
* would break backwards compatibility, so don't enforce it.
|
||||
*/
|
||||
if (ctrl->subsys->type != NVME_NQN_DISC &&
|
||||
(nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
|
||||
nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) {
|
||||
ctrl->csts = NVME_CSTS_CFS;
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvmet_cc_mps(ctrl->cc) != 0 ||
|
||||
nvmet_cc_ams(ctrl->cc) != 0 ||
|
||||
nvmet_cc_css(ctrl->cc) != 0) {
|
||||
ctrl->csts = NVME_CSTS_CFS;
|
||||
|
|
|
@ -349,7 +349,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
|
|||
memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
|
||||
ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
|
||||
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
|
||||
ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS;
|
||||
ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
|
||||
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
||||
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
|
||||
|
@ -520,7 +520,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
|
|||
memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
|
||||
ctrl->tag_set.ops = &nvme_loop_mq_ops;
|
||||
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
|
||||
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
|
||||
ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS;
|
||||
ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
|
||||
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
||||
|
|
|
@ -1098,11 +1098,11 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
|
|||
cmd->rbytes_done += ret;
|
||||
}
|
||||
|
||||
nvmet_tcp_unmap_pdu_iovec(cmd);
|
||||
if (queue->data_digest) {
|
||||
nvmet_tcp_prep_recv_ddgst(cmd);
|
||||
return 0;
|
||||
}
|
||||
nvmet_tcp_unmap_pdu_iovec(cmd);
|
||||
|
||||
if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
|
||||
cmd->rbytes_done == cmd->req.transfer_len) {
|
||||
|
|
Loading…
Reference in New Issue