nvme-fc: reject reconnect if io queue count is reduced to zero
If: - A successful connect has occurred with an io queue count greater than zero and namespaces detected and running. - An error or something occurs which causes a termination of the prior association and then starts a reconnect, - The reconnect then creates a new controller, but for whatever reason, nvme_set_queue_count() results in io queue count set to zero. This will skip io queue and tag set changes. - But... the controller will transition to live, calling nvme_start_ctrl, which calls nvme_start_queues(), which then releases I/Os into the transport which then sends them to the driver. As there are no queues, things eventually hit the driver looking for a handle, which was cleared when the original controller was reset, and it can't proceed. Worst case, things progress, but everything fails. In the failing scenario, the nvme_set_features(NVME_FEAT_NUM_QUEUES) command actually failed with a NVME_SC_INTERNAL error. For some reason, although nvme_set_queue_count() saw the error and set io queue count to zero, it doesn't return a failure status to the transport, which allows the transport to continue using the controller. Fix the problem by simply rejecting the new association if at least 1 I/O queue can't be created. The association reject will fail the reconnect attempt and fall into the reconnect retry policy. Signed-off-by: James Smart <jsmart2021@gmail.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
06f3d71ea0
commit
834d3710a0
|
@ -2475,6 +2475,7 @@ static int
|
|||
nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
|
||||
{
|
||||
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
|
||||
u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1;
|
||||
unsigned int nr_io_queues;
|
||||
int ret;
|
||||
|
||||
|
@ -2487,6 +2488,13 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
|
|||
return ret;
|
||||
}
|
||||
|
||||
if (!nr_io_queues && prior_ioq_cnt) {
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"Fail Reconnect: At least 1 io queue "
|
||||
"required (was %d)\n", prior_ioq_cnt);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
ctrl->ctrl.queue_count = nr_io_queues + 1;
|
||||
/* check for io queues existing */
|
||||
if (ctrl->ctrl.queue_count == 1)
|
||||
|
@ -2500,6 +2508,10 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
|
|||
if (ret)
|
||||
goto out_delete_hw_queues;
|
||||
|
||||
if (prior_ioq_cnt != nr_io_queues)
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"reconnect: revising io queue count from %d to %d\n",
|
||||
prior_ioq_cnt, nr_io_queues);
|
||||
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
|
||||
|
||||
return 0;
|
||||
|
|
Loading…
Reference in New Issue