Merge branch 'nvmf-4.8-rc' of git://git.infradead.org/nvme-fabrics into for-linus

Sagi writes:

Mostly stability fixes for nvmet, rdma:
- fix uninitialized rdma_cm private data from Roland.
- rdma device removal handling (host and target).
- fix controller disconnect during active mounts.
- fix namespaces lost after fabric reconnects.
- remove redundant calls to namespace removal (rdma, loop).
- actually send controller shutdown when disconnecting.
- reconnect fixes (ns rescan and aen requeue)
- nvmet controller serial number inconsistency fix.
This commit is contained in:
Jens Axboe 2016-08-08 07:42:42 -06:00
commit d3f422c8d5
6 changed files with 128 additions and 74 deletions

View File

@ -12,13 +12,11 @@
* more details. * more details.
*/ */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/delay.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/jiffies.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/blk-mq.h> #include <linux/blk-mq.h>
#include <linux/types.h> #include <linux/types.h>
@ -26,7 +24,6 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/nvme.h> #include <linux/nvme.h>
#include <linux/t10-pi.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
@ -169,7 +166,6 @@ MODULE_PARM_DESC(register_always,
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event); struct rdma_cm_event *event);
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl);
/* XXX: really should move to a generic header sooner or later.. */ /* XXX: really should move to a generic header sooner or later.. */
static inline void put_unaligned_le24(u32 val, u8 *p) static inline void put_unaligned_le24(u32 val, u8 *p)
@ -687,11 +683,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
list_del(&ctrl->list); list_del(&ctrl->list);
mutex_unlock(&nvme_rdma_ctrl_mutex); mutex_unlock(&nvme_rdma_ctrl_mutex);
if (ctrl->ctrl.tagset) {
blk_cleanup_queue(ctrl->ctrl.connect_q);
blk_mq_free_tag_set(&ctrl->tag_set);
nvme_rdma_dev_put(ctrl->device);
}
kfree(ctrl->queues); kfree(ctrl->queues);
nvmf_free_options(nctrl->opts); nvmf_free_options(nctrl->opts);
free_ctrl: free_ctrl:
@ -748,8 +739,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed); WARN_ON_ONCE(!changed);
if (ctrl->queue_count > 1) if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl); nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl);
nvme_queue_async_events(&ctrl->ctrl);
}
dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
@ -1269,7 +1263,7 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
{ {
struct nvme_rdma_ctrl *ctrl = queue->ctrl; struct nvme_rdma_ctrl *ctrl = queue->ctrl;
struct rdma_conn_param param = { }; struct rdma_conn_param param = { };
struct nvme_rdma_cm_req priv; struct nvme_rdma_cm_req priv = { };
int ret; int ret;
param.qp_num = queue->qp->qp_num; param.qp_num = queue->qp->qp_num;
@ -1318,37 +1312,39 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
* that caught the event. Since we hold the callout until the controller * that caught the event. Since we hold the callout until the controller
* deletion is completed, we'll deadlock if the controller deletion will * deletion is completed, we'll deadlock if the controller deletion will
* call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
* of destroying this queue before-hand, destroy the queue resources * of destroying this queue before-hand, destroy the queue resources,
* after the controller deletion completed with the exception of destroying * then queue the controller deletion which won't destroy this queue and
* the cm_id implicitely by returning a non-zero rc to the callout. * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
*/ */
static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
{ {
struct nvme_rdma_ctrl *ctrl = queue->ctrl; struct nvme_rdma_ctrl *ctrl = queue->ctrl;
int ret, ctrl_deleted = 0; int ret;
/* First disable the queue so ctrl delete won't free it */ /* Own the controller deletion */
if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
goto out; return 0;
/* delete the controller */ dev_warn(ctrl->ctrl.device,
ret = __nvme_rdma_del_ctrl(ctrl); "Got rdma device removal event, deleting ctrl\n");
if (!ret) {
dev_warn(ctrl->ctrl.device, /* Get rid of reconnect work if its running */
"Got rdma device removal event, deleting ctrl\n"); cancel_delayed_work_sync(&ctrl->reconnect_work);
flush_work(&ctrl->delete_work);
/* Disable the queue so ctrl delete won't free it */
if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
/* Free this queue ourselves */
nvme_rdma_stop_queue(queue);
nvme_rdma_destroy_queue_ib(queue);
/* Return non-zero so the cm_id will destroy implicitly */ /* Return non-zero so the cm_id will destroy implicitly */
ctrl_deleted = 1; ret = 1;
/* Free this queue ourselves */
rdma_disconnect(queue->cm_id);
ib_drain_qp(queue->qp);
nvme_rdma_destroy_queue_ib(queue);
} }
out: /* Queue controller deletion */
return ctrl_deleted; queue_work(nvme_rdma_wq, &ctrl->delete_work);
flush_work(&ctrl->delete_work);
return ret;
} }
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
@ -1648,7 +1644,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_rdma_free_io_queues(ctrl); nvme_rdma_free_io_queues(ctrl);
} }
if (ctrl->ctrl.state == NVME_CTRL_LIVE) if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
nvme_shutdown_ctrl(&ctrl->ctrl); nvme_shutdown_ctrl(&ctrl->ctrl);
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@ -1657,15 +1653,27 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_rdma_destroy_admin_queue(ctrl); nvme_rdma_destroy_admin_queue(ctrl);
} }
static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
nvme_uninit_ctrl(&ctrl->ctrl);
if (shutdown)
nvme_rdma_shutdown_ctrl(ctrl);
if (ctrl->ctrl.tagset) {
blk_cleanup_queue(ctrl->ctrl.connect_q);
blk_mq_free_tag_set(&ctrl->tag_set);
nvme_rdma_dev_put(ctrl->device);
}
nvme_put_ctrl(&ctrl->ctrl);
}
static void nvme_rdma_del_ctrl_work(struct work_struct *work) static void nvme_rdma_del_ctrl_work(struct work_struct *work)
{ {
struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work); struct nvme_rdma_ctrl, delete_work);
nvme_remove_namespaces(&ctrl->ctrl); __nvme_rdma_remove_ctrl(ctrl, true);
nvme_rdma_shutdown_ctrl(ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
} }
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@ -1698,9 +1706,7 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work); struct nvme_rdma_ctrl, delete_work);
nvme_remove_namespaces(&ctrl->ctrl); __nvme_rdma_remove_ctrl(ctrl, false);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
} }
static void nvme_rdma_reset_ctrl_work(struct work_struct *work) static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@ -1739,6 +1745,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
if (ctrl->queue_count > 1) { if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl); nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl);
nvme_queue_async_events(&ctrl->ctrl);
} }
return; return;

View File

@ -13,7 +13,6 @@
*/ */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h> #include <linux/module.h>
#include <linux/random.h>
#include <generated/utsrelease.h> #include <generated/utsrelease.h>
#include "nvmet.h" #include "nvmet.h"
@ -83,7 +82,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{ {
struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id; struct nvme_id_ctrl *id;
u64 serial;
u16 status = 0; u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL); id = kzalloc(sizeof(*id), GFP_KERNEL);
@ -96,10 +94,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->vid = 0; id->vid = 0;
id->ssvid = 0; id->ssvid = 0;
/* generate a random serial number as our controllers are ephemeral: */
get_random_bytes(&serial, sizeof(serial));
memset(id->sn, ' ', sizeof(id->sn)); memset(id->sn, ' ', sizeof(id->sn));
snprintf(id->sn, sizeof(id->sn), "%llx", serial); snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
memset(id->mn, ' ', sizeof(id->mn)); memset(id->mn, ' ', sizeof(id->mn));
strncpy((char *)id->mn, "Linux", sizeof(id->mn)); strncpy((char *)id->mn, "Linux", sizeof(id->mn));

View File

@ -13,6 +13,7 @@
*/ */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h> #include <linux/module.h>
#include <linux/random.h>
#include "nvmet.h" #include "nvmet.h"
static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@ -728,6 +729,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
/* generate a random serial number as our controllers are ephemeral: */
get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
kref_init(&ctrl->ref); kref_init(&ctrl->ref);
ctrl->subsys = subsys; ctrl->subsys = subsys;

View File

@ -414,9 +414,8 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
struct nvme_loop_ctrl *ctrl = container_of(work, struct nvme_loop_ctrl *ctrl = container_of(work,
struct nvme_loop_ctrl, delete_work); struct nvme_loop_ctrl, delete_work);
nvme_remove_namespaces(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
nvme_uninit_ctrl(&ctrl->ctrl); nvme_uninit_ctrl(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
nvme_put_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl);
} }
@ -501,7 +500,6 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
nvme_loop_destroy_admin_queue(ctrl); nvme_loop_destroy_admin_queue(ctrl);
out_disable: out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
nvme_remove_namespaces(&ctrl->ctrl);
nvme_uninit_ctrl(&ctrl->ctrl); nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl);
} }

View File

@ -113,6 +113,7 @@ struct nvmet_ctrl {
struct mutex lock; struct mutex lock;
u64 cap; u64 cap;
u64 serial;
u32 cc; u32 cc;
u32 csts; u32 csts;

View File

@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state {
NVMET_RDMA_Q_CONNECTING, NVMET_RDMA_Q_CONNECTING,
NVMET_RDMA_Q_LIVE, NVMET_RDMA_Q_LIVE,
NVMET_RDMA_Q_DISCONNECTING, NVMET_RDMA_Q_DISCONNECTING,
NVMET_RDMA_IN_DEVICE_REMOVAL,
}; };
struct nvmet_rdma_queue { struct nvmet_rdma_queue {
@ -615,15 +616,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
if (!len) if (!len)
return 0; return 0;
/* use the already allocated data buffer if possible */ status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) { len);
nvmet_rdma_use_inline_sg(rsp, len, 0); if (status)
} else { return status;
status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
len);
if (status)
return status;
}
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@ -984,7 +980,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
struct nvmet_rdma_device *dev = queue->dev; struct nvmet_rdma_device *dev = queue->dev;
nvmet_rdma_free_queue(queue); nvmet_rdma_free_queue(queue);
rdma_destroy_id(cm_id);
if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
rdma_destroy_id(cm_id);
kref_put(&dev->ref, nvmet_rdma_free_dev); kref_put(&dev->ref, nvmet_rdma_free_dev);
} }
@ -1233,8 +1232,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
switch (queue->state) { switch (queue->state) {
case NVMET_RDMA_Q_CONNECTING: case NVMET_RDMA_Q_CONNECTING:
case NVMET_RDMA_Q_LIVE: case NVMET_RDMA_Q_LIVE:
disconnect = true;
queue->state = NVMET_RDMA_Q_DISCONNECTING; queue->state = NVMET_RDMA_Q_DISCONNECTING;
case NVMET_RDMA_IN_DEVICE_REMOVAL:
disconnect = true;
break; break;
case NVMET_RDMA_Q_DISCONNECTING: case NVMET_RDMA_Q_DISCONNECTING:
break; break;
@ -1272,6 +1272,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
schedule_work(&queue->release_work); schedule_work(&queue->release_work);
} }
/**
* nvme_rdma_device_removal() - Handle RDMA device removal
* @queue: nvmet rdma queue (cm id qp_context)
* @addr: nvmet address (cm_id context)
*
* DEVICE_REMOVAL event notifies us that the RDMA device is about
* to unplug so we should take care of destroying our RDMA resources.
* This event will be generated for each allocated cm_id.
*
* Note that this event can be generated on a normal queue cm_id
* and/or a device bound listener cm_id (where in this case
* queue will be null).
*
* we claim ownership on destroying the cm_id. For queues we move
* the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
* we nullify the priv to prevent double cm_id destruction and destroying
* the cm_id implicitely by returning a non-zero rc to the callout.
*/
static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
struct nvmet_rdma_queue *queue)
{
unsigned long flags;
if (!queue) {
struct nvmet_port *port = cm_id->context;
/*
* This is a listener cm_id. Make sure that
* future remove_port won't invoke a double
* cm_id destroy. use atomic xchg to make sure
* we don't compete with remove_port.
*/
if (xchg(&port->priv, NULL) != cm_id)
return 0;
} else {
/*
* This is a queue cm_id. Make sure that
* release queue will not destroy the cm_id
* and schedule all ctrl queues removal (only
* if the queue is not disconnecting already).
*/
spin_lock_irqsave(&queue->state_lock, flags);
if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
spin_unlock_irqrestore(&queue->state_lock, flags);
nvmet_rdma_queue_disconnect(queue);
flush_scheduled_work();
}
/*
* We need to return 1 so that the core will destroy
* it's own ID. What a great API design..
*/
return 1;
}
static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event) struct rdma_cm_event *event)
{ {
@ -1294,20 +1350,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
break; break;
case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_TIMEWAIT_EXIT: case RDMA_CM_EVENT_TIMEWAIT_EXIT:
/* nvmet_rdma_queue_disconnect(queue);
* We can get the device removal callback even for a break;
* CM ID that we aren't actually using. In that case case RDMA_CM_EVENT_DEVICE_REMOVAL:
* the context pointer is NULL, so we shouldn't try ret = nvmet_rdma_device_removal(cm_id, queue);
* to disconnect a non-existing queue. But we also
* need to return 1 so that the core will destroy
* it's own ID. What a great API design..
*/
if (queue)
nvmet_rdma_queue_disconnect(queue);
else
ret = 1;
break; break;
case RDMA_CM_EVENT_REJECTED: case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_UNREACHABLE:
@ -1396,9 +1443,10 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
static void nvmet_rdma_remove_port(struct nvmet_port *port) static void nvmet_rdma_remove_port(struct nvmet_port *port)
{ {
struct rdma_cm_id *cm_id = port->priv; struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
rdma_destroy_id(cm_id); if (cm_id)
rdma_destroy_id(cm_id);
} }
static struct nvmet_fabrics_ops nvmet_rdma_ops = { static struct nvmet_fabrics_ops nvmet_rdma_ops = {