nvme-fc: add dev_loss_tmo timeout and remoteport resume support
When a remoteport is unregistered (connectivity lost), the following actions are taken: - the remoteport is marked DELETED - the time when dev_loss_tmo would expire is set in the remoteport - all controllers on the remoteport are reset. After a controller resets, it will stall in a RECONNECTING state waiting for one of the following: - the controller will continue to attempt reconnect per max_retries and reconnect_delay. As no remoteport connectivity, the reconnect attempt will immediately fail. If max reconnects has not been reached, a new reconnect_delay timer will be schedule. If the current time plus another reconnect_delay exceeds when dev_loss_tmo expires on the remote port, then the reconnect_delay will be shortend to schedule no later than when dev_loss_tmo expires. If max reconnect attempts are reached (e.g. ctrl_loss_tmo reached) or dev_loss_tmo ix exceeded without connectivity, the controller is deleted. - the remoteport is re-registered prior to dev_loss_tmo expiring. The resume of the remoteport will immediately attempt to reconnect each of its suspended controllers. Signed-off-by: James Smart <james.smart@broadcom.com> Reviewed-by: Hannes Reinecke <hare@suse.com> [hch: updated to use nvme_delete_ctrl] Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
parent
3cec7f9de4
commit
2b632970da
|
@ -138,6 +138,7 @@ struct nvme_fc_rport {
|
|||
struct nvme_fc_lport *lport;
|
||||
spinlock_t lock;
|
||||
struct kref ref;
|
||||
unsigned long dev_loss_end;
|
||||
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
|
||||
|
||||
enum nvme_fcctrl_flags {
|
||||
|
@ -528,6 +529,102 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
|
|||
return kref_get_unless_zero(&rport->ref);
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
|
||||
{
|
||||
switch (ctrl->ctrl.state) {
|
||||
case NVME_CTRL_NEW:
|
||||
case NVME_CTRL_RECONNECTING:
|
||||
/*
|
||||
* As all reconnects were suppressed, schedule a
|
||||
* connect.
|
||||
*/
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: connectivity re-established. "
|
||||
"Attempting reconnect\n", ctrl->cnum);
|
||||
|
||||
queue_delayed_work(nvme_wq, &ctrl->connect_work, 0);
|
||||
break;
|
||||
|
||||
case NVME_CTRL_RESETTING:
|
||||
/*
|
||||
* Controller is already in the process of terminating the
|
||||
* association. No need to do anything further. The reconnect
|
||||
* step will naturally occur after the reset completes.
|
||||
*/
|
||||
break;
|
||||
|
||||
default:
|
||||
/* no action to take - let it delete */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static struct nvme_fc_rport *
|
||||
nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
|
||||
struct nvme_fc_port_info *pinfo)
|
||||
{
|
||||
struct nvme_fc_rport *rport;
|
||||
struct nvme_fc_ctrl *ctrl;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&nvme_fc_lock, flags);
|
||||
|
||||
list_for_each_entry(rport, &lport->endp_list, endp_list) {
|
||||
if (rport->remoteport.node_name != pinfo->node_name ||
|
||||
rport->remoteport.port_name != pinfo->port_name)
|
||||
continue;
|
||||
|
||||
if (!nvme_fc_rport_get(rport)) {
|
||||
rport = ERR_PTR(-ENOLCK);
|
||||
goto out_done;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
||||
|
||||
spin_lock_irqsave(&rport->lock, flags);
|
||||
|
||||
/* has it been unregistered */
|
||||
if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
|
||||
/* means lldd called us twice */
|
||||
spin_unlock_irqrestore(&rport->lock, flags);
|
||||
nvme_fc_rport_put(rport);
|
||||
return ERR_PTR(-ESTALE);
|
||||
}
|
||||
|
||||
rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
|
||||
rport->dev_loss_end = 0;
|
||||
|
||||
/*
|
||||
* kick off a reconnect attempt on all associations to the
|
||||
* remote port. A successful reconnects will resume i/o.
|
||||
*/
|
||||
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
|
||||
nvme_fc_resume_controller(ctrl);
|
||||
|
||||
spin_unlock_irqrestore(&rport->lock, flags);
|
||||
|
||||
return rport;
|
||||
}
|
||||
|
||||
rport = NULL;
|
||||
|
||||
out_done:
|
||||
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
||||
|
||||
return rport;
|
||||
}
|
||||
|
||||
static inline void
|
||||
__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
|
||||
struct nvme_fc_port_info *pinfo)
|
||||
{
|
||||
if (pinfo->dev_loss_tmo)
|
||||
rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
|
||||
else
|
||||
rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
|
||||
}
|
||||
|
||||
/**
|
||||
* nvme_fc_register_remoteport - transport entry point called by an
|
||||
* LLDD to register the existence of a NVME
|
||||
|
@ -554,22 +651,45 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
|
|||
unsigned long flags;
|
||||
int ret, idx;
|
||||
|
||||
if (!nvme_fc_lport_get(lport)) {
|
||||
ret = -ESHUTDOWN;
|
||||
goto out_reghost_failed;
|
||||
}
|
||||
|
||||
/*
|
||||
* look to see if there is already a remoteport that is waiting
|
||||
* for a reconnect (within dev_loss_tmo) with the same WWN's.
|
||||
* If so, transition to it and reconnect.
|
||||
*/
|
||||
newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
|
||||
|
||||
/* found an rport, but something about its state is bad */
|
||||
if (IS_ERR(newrec)) {
|
||||
ret = PTR_ERR(newrec);
|
||||
goto out_lport_put;
|
||||
|
||||
/* found existing rport, which was resumed */
|
||||
} else if (newrec) {
|
||||
nvme_fc_lport_put(lport);
|
||||
__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
|
||||
nvme_fc_signal_discovery_scan(lport, newrec);
|
||||
*portptr = &newrec->remoteport;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* nothing found - allocate a new remoteport struct */
|
||||
|
||||
newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz),
|
||||
GFP_KERNEL);
|
||||
if (!newrec) {
|
||||
ret = -ENOMEM;
|
||||
goto out_reghost_failed;
|
||||
}
|
||||
|
||||
if (!nvme_fc_lport_get(lport)) {
|
||||
ret = -ESHUTDOWN;
|
||||
goto out_kfree_rport;
|
||||
goto out_lport_put;
|
||||
}
|
||||
|
||||
idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
|
||||
if (idx < 0) {
|
||||
ret = -ENOSPC;
|
||||
goto out_lport_put;
|
||||
goto out_kfree_rport;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&newrec->endp_list);
|
||||
|
@ -587,11 +707,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
|
|||
newrec->remoteport.port_id = pinfo->port_id;
|
||||
newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
|
||||
newrec->remoteport.port_num = idx;
|
||||
/* a registration value of dev_loss_tmo=0 results in the default */
|
||||
if (pinfo->dev_loss_tmo)
|
||||
newrec->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
|
||||
else
|
||||
newrec->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
|
||||
__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
|
||||
|
||||
spin_lock_irqsave(&nvme_fc_lock, flags);
|
||||
list_add_tail(&newrec->endp_list, &lport->endp_list);
|
||||
|
@ -602,10 +718,10 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
|
|||
*portptr = &newrec->remoteport;
|
||||
return 0;
|
||||
|
||||
out_lport_put:
|
||||
nvme_fc_lport_put(lport);
|
||||
out_kfree_rport:
|
||||
kfree(newrec);
|
||||
out_lport_put:
|
||||
nvme_fc_lport_put(lport);
|
||||
out_reghost_failed:
|
||||
*portptr = NULL;
|
||||
return ret;
|
||||
|
@ -636,6 +752,58 @@ restart:
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
|
||||
{
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: controller connectivity lost. Awaiting "
|
||||
"Reconnect", ctrl->cnum);
|
||||
|
||||
switch (ctrl->ctrl.state) {
|
||||
case NVME_CTRL_NEW:
|
||||
case NVME_CTRL_LIVE:
|
||||
/*
|
||||
* Schedule a controller reset. The reset will terminate the
|
||||
* association and schedule the reconnect timer. Reconnects
|
||||
* will be attempted until either the ctlr_loss_tmo
|
||||
* (max_retries * connect_delay) expires or the remoteport's
|
||||
* dev_loss_tmo expires.
|
||||
*/
|
||||
if (nvme_reset_ctrl(&ctrl->ctrl)) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Couldn't schedule reset. "
|
||||
"Deleting controller.\n",
|
||||
ctrl->cnum);
|
||||
nvme_delete_ctrl(&ctrl->ctrl);
|
||||
}
|
||||
break;
|
||||
|
||||
case NVME_CTRL_RECONNECTING:
|
||||
/*
|
||||
* The association has already been terminated and the
|
||||
* controller is attempting reconnects. No need to do anything
|
||||
* futher. Reconnects will be attempted until either the
|
||||
* ctlr_loss_tmo (max_retries * connect_delay) expires or the
|
||||
* remoteport's dev_loss_tmo expires.
|
||||
*/
|
||||
break;
|
||||
|
||||
case NVME_CTRL_RESETTING:
|
||||
/*
|
||||
* Controller is already in the process of terminating the
|
||||
* association. No need to do anything further. The reconnect
|
||||
* step will kick in naturally after the association is
|
||||
* terminated.
|
||||
*/
|
||||
break;
|
||||
|
||||
case NVME_CTRL_DELETING:
|
||||
default:
|
||||
/* no action to take - let it delete */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* nvme_fc_unregister_remoteport - transport entry point called by an
|
||||
* LLDD to deregister/remove a previously
|
||||
|
@ -665,15 +833,31 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
|
|||
}
|
||||
portptr->port_state = FC_OBJSTATE_DELETED;
|
||||
|
||||
/* tear down all associations to the remote port */
|
||||
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
|
||||
nvme_delete_ctrl(&ctrl->ctrl);
|
||||
rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ);
|
||||
|
||||
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
|
||||
/* if dev_loss_tmo==0, dev loss is immediate */
|
||||
if (!portptr->dev_loss_tmo) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: controller connectivity lost. "
|
||||
"Deleting controller.\n",
|
||||
ctrl->cnum);
|
||||
nvme_delete_ctrl(&ctrl->ctrl);
|
||||
} else
|
||||
nvme_fc_ctrl_connectivity_loss(ctrl);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&rport->lock, flags);
|
||||
|
||||
nvme_fc_abort_lsops(rport);
|
||||
|
||||
/*
|
||||
* release the reference, which will allow, if all controllers
|
||||
* go away, which should only occur after dev_loss_tmo occurs,
|
||||
* for the rport to be torn down.
|
||||
*/
|
||||
nvme_fc_rport_put(rport);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
|
||||
|
@ -700,7 +884,6 @@ nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr,
|
|||
u32 dev_loss_tmo)
|
||||
{
|
||||
struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
|
||||
struct nvme_fc_ctrl *ctrl;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&rport->lock, flags);
|
||||
|
@ -2676,28 +2859,43 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
|
|||
static void
|
||||
nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
|
||||
{
|
||||
struct nvme_fc_rport *rport = ctrl->rport;
|
||||
struct nvme_fc_remote_port *portptr = &rport->remoteport;
|
||||
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
|
||||
bool recon = true;
|
||||
|
||||
if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
|
||||
return;
|
||||
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
|
||||
ctrl->cnum, status);
|
||||
|
||||
if (nvmf_should_reconnect(&ctrl->ctrl)) {
|
||||
/* Only schedule the reconnect if the remote port is online */
|
||||
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
|
||||
return;
|
||||
|
||||
if (portptr->port_state == FC_OBJSTATE_ONLINE)
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
|
||||
ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
|
||||
queue_delayed_work(nvme_wq, &ctrl->connect_work,
|
||||
ctrl->ctrl.opts->reconnect_delay * HZ);
|
||||
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
|
||||
ctrl->cnum, status);
|
||||
else if (time_after_eq(jiffies, rport->dev_loss_end))
|
||||
recon = false;
|
||||
|
||||
if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
|
||||
if (portptr->port_state == FC_OBJSTATE_ONLINE)
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Reconnect attempt in %ld "
|
||||
"seconds\n",
|
||||
ctrl->cnum, recon_delay / HZ);
|
||||
else if (time_after(jiffies + recon_delay, rport->dev_loss_end))
|
||||
recon_delay = rport->dev_loss_end - jiffies;
|
||||
|
||||
queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
|
||||
} else {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
if (portptr->port_state == FC_OBJSTATE_ONLINE)
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Max reconnect attempts (%d) "
|
||||
"reached. Removing controller\n",
|
||||
ctrl->cnum, ctrl->ctrl.nr_reconnects);
|
||||
else
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
|
||||
"while waiting for remoteport connectivity. "
|
||||
"Removing controller\n", ctrl->cnum,
|
||||
portptr->dev_loss_tmo);
|
||||
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
|
||||
}
|
||||
}
|
||||
|
@ -2721,15 +2919,17 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
|
|||
return;
|
||||
}
|
||||
|
||||
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
|
||||
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
|
||||
ret = nvme_fc_create_association(ctrl);
|
||||
if (ret)
|
||||
nvme_fc_reconnect_or_delete(ctrl, ret);
|
||||
else
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: controller reset complete\n",
|
||||
ctrl->cnum);
|
||||
}
|
||||
else
|
||||
ret = -ENOTCONN;
|
||||
|
||||
if (ret)
|
||||
nvme_fc_reconnect_or_delete(ctrl, ret);
|
||||
else
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: controller reset complete\n",
|
||||
ctrl->cnum);
|
||||
}
|
||||
|
||||
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
|
||||
|
|
Loading…
Reference in New Issue