nvme-rdma: fix sysfs invoked reset_ctrl error flow
When reset_controller that is invoked by sysfs fails, it enters an error flow which practically removes the nvme ctrl entirely (similar to delete_ctrl flow). It causes the system to hang, since a sysfs attribute cannot be unregistered by one of its own methods. This can be fixed by calling delete_ctrl as a work rather than sequential code. In addition, it should give the ctrl a chance to recover using reconnection mechanism (consistant with FC reset_ctrl error flow). Also, while we're here, return suitable errno in case the reset ended with non live ctrl. Signed-off-by: Nitzan Carmi <nitzanc@mellanox.com> Reviewed-by: Max Gurtovoy <maxg@mellanox.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
This commit is contained in:
parent
7756f72ccd
commit
8000d1fdb0
|
@ -120,8 +120,12 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
|
|||
int ret;
|
||||
|
||||
ret = nvme_reset_ctrl(ctrl);
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
flush_work(&ctrl->reset_work);
|
||||
if (ctrl->state != NVME_CTRL_LIVE)
|
||||
ret = -ENETRESET;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
|
||||
|
|
|
@ -1784,11 +1784,8 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
|
|||
return;
|
||||
|
||||
out_fail:
|
||||
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
|
||||
nvme_remove_namespaces(&ctrl->ctrl);
|
||||
nvme_rdma_shutdown_ctrl(ctrl, true);
|
||||
nvme_uninit_ctrl(&ctrl->ctrl);
|
||||
nvme_put_ctrl(&ctrl->ctrl);
|
||||
++ctrl->ctrl.nr_reconnects;
|
||||
nvme_rdma_reconnect_or_remove(ctrl);
|
||||
}
|
||||
|
||||
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
|
||||
|
|
Loading…
Reference in New Issue