nvme-fc: add dev_loss_tmo timeout and remoteport resume support

When a remoteport is unregistered (connectivity lost), the following
actions are taken:

 - the remoteport is marked DELETED
 - the time when dev_loss_tmo would expire is set in the remoteport
 - all controllers on the remoteport are reset.

After a controller resets, it will stall in a RECONNECTING state waiting
for one of the following:

 - the controller will continue to attempt reconnect per max_retries and
   reconnect_delay.  As no remoteport connectivity, the reconnect attempt
   will immediately fail.  If max reconnects has not been reached, a new
   reconnect_delay timer will be schedule.  If the current time plus
   another reconnect_delay exceeds when dev_loss_tmo expires on the remote
   port, then the reconnect_delay will be shortend to schedule no later
   than when dev_loss_tmo expires.  If max reconnect attempts are reached
   (e.g. ctrl_loss_tmo reached) or dev_loss_tmo ix exceeded without
   connectivity, the controller is deleted.
 - the remoteport is re-registered prior to dev_loss_tmo expiring.
   The resume of the remoteport will immediately attempt to reconnect
   each of its suspended controllers.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
[hch: updated to use nvme_delete_ctrl]
Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
James Smart 2017-10-25 16:43:17 -07:00 committed by Christoph Hellwig
parent 3cec7f9de4
commit 2b632970da
1 changed files with 240 additions and 40 deletions

View File

@ -138,6 +138,7 @@ struct nvme_fc_rport {
struct nvme_fc_lport *lport; struct nvme_fc_lport *lport;
spinlock_t lock; spinlock_t lock;
struct kref ref; struct kref ref;
unsigned long dev_loss_end;
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
enum nvme_fcctrl_flags { enum nvme_fcctrl_flags {
@ -528,6 +529,102 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
return kref_get_unless_zero(&rport->ref); return kref_get_unless_zero(&rport->ref);
} }
static void
nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
{
switch (ctrl->ctrl.state) {
case NVME_CTRL_NEW:
case NVME_CTRL_RECONNECTING:
/*
* As all reconnects were suppressed, schedule a
* connect.
*/
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: connectivity re-established. "
"Attempting reconnect\n", ctrl->cnum);
queue_delayed_work(nvme_wq, &ctrl->connect_work, 0);
break;
case NVME_CTRL_RESETTING:
/*
* Controller is already in the process of terminating the
* association. No need to do anything further. The reconnect
* step will naturally occur after the reset completes.
*/
break;
default:
/* no action to take - let it delete */
break;
}
}
static struct nvme_fc_rport *
nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
struct nvme_fc_port_info *pinfo)
{
struct nvme_fc_rport *rport;
struct nvme_fc_ctrl *ctrl;
unsigned long flags;
spin_lock_irqsave(&nvme_fc_lock, flags);
list_for_each_entry(rport, &lport->endp_list, endp_list) {
if (rport->remoteport.node_name != pinfo->node_name ||
rport->remoteport.port_name != pinfo->port_name)
continue;
if (!nvme_fc_rport_get(rport)) {
rport = ERR_PTR(-ENOLCK);
goto out_done;
}
spin_unlock_irqrestore(&nvme_fc_lock, flags);
spin_lock_irqsave(&rport->lock, flags);
/* has it been unregistered */
if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
/* means lldd called us twice */
spin_unlock_irqrestore(&rport->lock, flags);
nvme_fc_rport_put(rport);
return ERR_PTR(-ESTALE);
}
rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
rport->dev_loss_end = 0;
/*
* kick off a reconnect attempt on all associations to the
* remote port. A successful reconnects will resume i/o.
*/
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
nvme_fc_resume_controller(ctrl);
spin_unlock_irqrestore(&rport->lock, flags);
return rport;
}
rport = NULL;
out_done:
spin_unlock_irqrestore(&nvme_fc_lock, flags);
return rport;
}
static inline void
__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
struct nvme_fc_port_info *pinfo)
{
if (pinfo->dev_loss_tmo)
rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
else
rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
}
/** /**
* nvme_fc_register_remoteport - transport entry point called by an * nvme_fc_register_remoteport - transport entry point called by an
* LLDD to register the existence of a NVME * LLDD to register the existence of a NVME
@ -554,22 +651,45 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
unsigned long flags; unsigned long flags;
int ret, idx; int ret, idx;
if (!nvme_fc_lport_get(lport)) {
ret = -ESHUTDOWN;
goto out_reghost_failed;
}
/*
* look to see if there is already a remoteport that is waiting
* for a reconnect (within dev_loss_tmo) with the same WWN's.
* If so, transition to it and reconnect.
*/
newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
/* found an rport, but something about its state is bad */
if (IS_ERR(newrec)) {
ret = PTR_ERR(newrec);
goto out_lport_put;
/* found existing rport, which was resumed */
} else if (newrec) {
nvme_fc_lport_put(lport);
__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
nvme_fc_signal_discovery_scan(lport, newrec);
*portptr = &newrec->remoteport;
return 0;
}
/* nothing found - allocate a new remoteport struct */
newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz),
GFP_KERNEL); GFP_KERNEL);
if (!newrec) { if (!newrec) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_reghost_failed; goto out_lport_put;
}
if (!nvme_fc_lport_get(lport)) {
ret = -ESHUTDOWN;
goto out_kfree_rport;
} }
idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
if (idx < 0) { if (idx < 0) {
ret = -ENOSPC; ret = -ENOSPC;
goto out_lport_put; goto out_kfree_rport;
} }
INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->endp_list);
@ -587,11 +707,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
newrec->remoteport.port_id = pinfo->port_id; newrec->remoteport.port_id = pinfo->port_id;
newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
newrec->remoteport.port_num = idx; newrec->remoteport.port_num = idx;
/* a registration value of dev_loss_tmo=0 results in the default */ __nvme_fc_set_dev_loss_tmo(newrec, pinfo);
if (pinfo->dev_loss_tmo)
newrec->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
else
newrec->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
spin_lock_irqsave(&nvme_fc_lock, flags); spin_lock_irqsave(&nvme_fc_lock, flags);
list_add_tail(&newrec->endp_list, &lport->endp_list); list_add_tail(&newrec->endp_list, &lport->endp_list);
@ -602,10 +718,10 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
*portptr = &newrec->remoteport; *portptr = &newrec->remoteport;
return 0; return 0;
out_lport_put:
nvme_fc_lport_put(lport);
out_kfree_rport: out_kfree_rport:
kfree(newrec); kfree(newrec);
out_lport_put:
nvme_fc_lport_put(lport);
out_reghost_failed: out_reghost_failed:
*portptr = NULL; *portptr = NULL;
return ret; return ret;
@ -636,6 +752,58 @@ nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
return 0; return 0;
} }
static void
nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
{
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller connectivity lost. Awaiting "
"Reconnect", ctrl->cnum);
switch (ctrl->ctrl.state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
/*
* Schedule a controller reset. The reset will terminate the
* association and schedule the reconnect timer. Reconnects
* will be attempted until either the ctlr_loss_tmo
* (max_retries * connect_delay) expires or the remoteport's
* dev_loss_tmo expires.
*/
if (nvme_reset_ctrl(&ctrl->ctrl)) {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Couldn't schedule reset. "
"Deleting controller.\n",
ctrl->cnum);
nvme_delete_ctrl(&ctrl->ctrl);
}
break;
case NVME_CTRL_RECONNECTING:
/*
* The association has already been terminated and the
* controller is attempting reconnects. No need to do anything
* futher. Reconnects will be attempted until either the
* ctlr_loss_tmo (max_retries * connect_delay) expires or the
* remoteport's dev_loss_tmo expires.
*/
break;
case NVME_CTRL_RESETTING:
/*
* Controller is already in the process of terminating the
* association. No need to do anything further. The reconnect
* step will kick in naturally after the association is
* terminated.
*/
break;
case NVME_CTRL_DELETING:
default:
/* no action to take - let it delete */
break;
}
}
/** /**
* nvme_fc_unregister_remoteport - transport entry point called by an * nvme_fc_unregister_remoteport - transport entry point called by an
* LLDD to deregister/remove a previously * LLDD to deregister/remove a previously
@ -665,15 +833,31 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
} }
portptr->port_state = FC_OBJSTATE_DELETED; portptr->port_state = FC_OBJSTATE_DELETED;
/* tear down all associations to the remote port */ rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ);
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
nvme_delete_ctrl(&ctrl->ctrl); list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
/* if dev_loss_tmo==0, dev loss is immediate */
if (!portptr->dev_loss_tmo) {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: controller connectivity lost. "
"Deleting controller.\n",
ctrl->cnum);
nvme_delete_ctrl(&ctrl->ctrl);
} else
nvme_fc_ctrl_connectivity_loss(ctrl);
}
spin_unlock_irqrestore(&rport->lock, flags); spin_unlock_irqrestore(&rport->lock, flags);
nvme_fc_abort_lsops(rport); nvme_fc_abort_lsops(rport);
/*
* release the reference, which will allow, if all controllers
* go away, which should only occur after dev_loss_tmo occurs,
* for the rport to be torn down.
*/
nvme_fc_rport_put(rport); nvme_fc_rport_put(rport);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
@ -700,7 +884,6 @@ nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr,
u32 dev_loss_tmo) u32 dev_loss_tmo)
{ {
struct nvme_fc_rport *rport = remoteport_to_rport(portptr); struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
struct nvme_fc_ctrl *ctrl;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&rport->lock, flags); spin_lock_irqsave(&rport->lock, flags);
@ -2676,28 +2859,43 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
static void static void
nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
{ {
struct nvme_fc_rport *rport = ctrl->rport;
struct nvme_fc_remote_port *portptr = &rport->remoteport;
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
bool recon = true;
if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
return; return;
dev_info(ctrl->ctrl.device, if (portptr->port_state == FC_OBJSTATE_ONLINE)
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
ctrl->cnum, status);
if (nvmf_should_reconnect(&ctrl->ctrl)) {
/* Only schedule the reconnect if the remote port is online */
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
return;
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n", "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); ctrl->cnum, status);
queue_delayed_work(nvme_wq, &ctrl->connect_work, else if (time_after_eq(jiffies, rport->dev_loss_end))
ctrl->ctrl.opts->reconnect_delay * HZ); recon = false;
if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
if (portptr->port_state == FC_OBJSTATE_ONLINE)
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %ld "
"seconds\n",
ctrl->cnum, recon_delay / HZ);
else if (time_after(jiffies + recon_delay, rport->dev_loss_end))
recon_delay = rport->dev_loss_end - jiffies;
queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
} else { } else {
dev_warn(ctrl->ctrl.device, if (portptr->port_state == FC_OBJSTATE_ONLINE)
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Max reconnect attempts (%d) " "NVME-FC{%d}: Max reconnect attempts (%d) "
"reached. Removing controller\n", "reached. Removing controller\n",
ctrl->cnum, ctrl->ctrl.nr_reconnects); ctrl->cnum, ctrl->ctrl.nr_reconnects);
else
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
"while waiting for remoteport connectivity. "
"Removing controller\n", ctrl->cnum,
portptr->dev_loss_tmo);
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
} }
} }
@ -2721,15 +2919,17 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
return; return;
} }
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
ret = nvme_fc_create_association(ctrl); ret = nvme_fc_create_association(ctrl);
if (ret) else
nvme_fc_reconnect_or_delete(ctrl, ret); ret = -ENOTCONN;
else
dev_info(ctrl->ctrl.device, if (ret)
"NVME-FC{%d}: controller reset complete\n", nvme_fc_reconnect_or_delete(ctrl, ret);
ctrl->cnum); else
} dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller reset complete\n",
ctrl->cnum);
} }
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {