Merge branch 'nvmf-4.8-rc' of git://git.infradead.org/nvme-fabrics into for-linus

Sagi writes:

Mostly stability fixes for nvmet, rdma:
- fix uninitialized rdma_cm private data from Roland.
- rdma device removal handling (host and target).
- fix controller disconnect during active mounts.
- fix namespaces lost after fabric reconnects.
- remove redundant calls to namespace removal (rdma, loop).
- actually send controller shutdown when disconnecting.
- reconnect fixes (ns rescan and aen requeue)
- nvmet controller serial number inconsistency fix.
This commit is contained in:
Jens Axboe 2016-08-08 07:42:42 -06:00
commit d3f422c8d5
6 changed files with 128 additions and 74 deletions

View File

@ -12,13 +12,11 @@
* more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/string.h>
#include <linux/jiffies.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
#include <linux/types.h>
@ -26,7 +24,6 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/nvme.h>
#include <linux/t10-pi.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
@ -169,7 +166,6 @@ MODULE_PARM_DESC(register_always,
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl);
/* XXX: really should move to a generic header sooner or later.. */
static inline void put_unaligned_le24(u32 val, u8 *p)
@ -687,11 +683,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
list_del(&ctrl->list);
mutex_unlock(&nvme_rdma_ctrl_mutex);
if (ctrl->ctrl.tagset) {
blk_cleanup_queue(ctrl->ctrl.connect_q);
blk_mq_free_tag_set(&ctrl->tag_set);
nvme_rdma_dev_put(ctrl->device);
}
kfree(ctrl->queues);
nvmf_free_options(nctrl->opts);
free_ctrl:
@ -748,8 +739,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
if (ctrl->queue_count > 1)
if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl);
nvme_queue_async_events(&ctrl->ctrl);
}
dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
@ -1269,7 +1263,7 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
{
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
struct rdma_conn_param param = { };
struct nvme_rdma_cm_req priv;
struct nvme_rdma_cm_req priv = { };
int ret;
param.qp_num = queue->qp->qp_num;
@ -1318,37 +1312,39 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
* that caught the event. Since we hold the callout until the controller
* deletion is completed, we'll deadlock if the controller deletion will
* call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
* of destroying this queue before-hand, destroy the queue resources
* after the controller deletion completed with the exception of destroying
* the cm_id implicitely by returning a non-zero rc to the callout.
* of destroying this queue before-hand, destroy the queue resources,
* then queue the controller deletion which won't destroy this queue and
* we destroy the cm_id implicitely by returning a non-zero rc to the callout.
*/
static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
{
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
int ret, ctrl_deleted = 0;
int ret;
/* First disable the queue so ctrl delete won't free it */
if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
goto out;
/* Own the controller deletion */
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return 0;
/* delete the controller */
ret = __nvme_rdma_del_ctrl(ctrl);
if (!ret) {
dev_warn(ctrl->ctrl.device,
"Got rdma device removal event, deleting ctrl\n");
flush_work(&ctrl->delete_work);
dev_warn(ctrl->ctrl.device,
"Got rdma device removal event, deleting ctrl\n");
/* Get rid of reconnect work if its running */
cancel_delayed_work_sync(&ctrl->reconnect_work);
/* Disable the queue so ctrl delete won't free it */
if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
/* Free this queue ourselves */
nvme_rdma_stop_queue(queue);
nvme_rdma_destroy_queue_ib(queue);
/* Return non-zero so the cm_id will destroy implicitly */
ctrl_deleted = 1;
/* Free this queue ourselves */
rdma_disconnect(queue->cm_id);
ib_drain_qp(queue->qp);
nvme_rdma_destroy_queue_ib(queue);
ret = 1;
}
out:
return ctrl_deleted;
/* Queue controller deletion */
queue_work(nvme_rdma_wq, &ctrl->delete_work);
flush_work(&ctrl->delete_work);
return ret;
}
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
@ -1648,7 +1644,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_rdma_free_io_queues(ctrl);
}
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
nvme_shutdown_ctrl(&ctrl->ctrl);
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@ -1657,15 +1653,27 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_rdma_destroy_admin_queue(ctrl);
}
static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
nvme_uninit_ctrl(&ctrl->ctrl);
if (shutdown)
nvme_rdma_shutdown_ctrl(ctrl);
if (ctrl->ctrl.tagset) {
blk_cleanup_queue(ctrl->ctrl.connect_q);
blk_mq_free_tag_set(&ctrl->tag_set);
nvme_rdma_dev_put(ctrl->device);
}
nvme_put_ctrl(&ctrl->ctrl);
}
static void nvme_rdma_del_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
nvme_remove_namespaces(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
__nvme_rdma_remove_ctrl(ctrl, true);
}
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@ -1698,9 +1706,7 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
nvme_remove_namespaces(&ctrl->ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
__nvme_rdma_remove_ctrl(ctrl, false);
}
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@ -1739,6 +1745,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl);
nvme_queue_async_events(&ctrl->ctrl);
}
return;

View File

@ -13,7 +13,6 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/random.h>
#include <generated/utsrelease.h>
#include "nvmet.h"
@ -83,7 +82,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
u64 serial;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@ -96,10 +94,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->vid = 0;
id->ssvid = 0;
/* generate a random serial number as our controllers are ephemeral: */
get_random_bytes(&serial, sizeof(serial));
memset(id->sn, ' ', sizeof(id->sn));
snprintf(id->sn, sizeof(id->sn), "%llx", serial);
snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
memset(id->mn, ' ', sizeof(id->mn));
strncpy((char *)id->mn, "Linux", sizeof(id->mn));

View File

@ -13,6 +13,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/random.h>
#include "nvmet.h"
static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@ -728,6 +729,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
/* generate a random serial number as our controllers are ephemeral: */
get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
kref_init(&ctrl->ref);
ctrl->subsys = subsys;

View File

@ -414,9 +414,8 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
struct nvme_loop_ctrl *ctrl = container_of(work,
struct nvme_loop_ctrl, delete_work);
nvme_remove_namespaces(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
@ -501,7 +500,6 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
nvme_loop_destroy_admin_queue(ctrl);
out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
nvme_remove_namespaces(&ctrl->ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}

View File

@ -113,6 +113,7 @@ struct nvmet_ctrl {
struct mutex lock;
u64 cap;
u64 serial;
u32 cc;
u32 csts;

View File

@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state {
NVMET_RDMA_Q_CONNECTING,
NVMET_RDMA_Q_LIVE,
NVMET_RDMA_Q_DISCONNECTING,
NVMET_RDMA_IN_DEVICE_REMOVAL,
};
struct nvmet_rdma_queue {
@ -615,15 +616,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
if (!len)
return 0;
/* use the already allocated data buffer if possible */
if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) {
nvmet_rdma_use_inline_sg(rsp, len, 0);
} else {
status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
len);
if (status)
return status;
}
status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
len);
if (status)
return status;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@ -984,7 +980,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
struct nvmet_rdma_device *dev = queue->dev;
nvmet_rdma_free_queue(queue);
rdma_destroy_id(cm_id);
if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
rdma_destroy_id(cm_id);
kref_put(&dev->ref, nvmet_rdma_free_dev);
}
@ -1233,8 +1232,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
switch (queue->state) {
case NVMET_RDMA_Q_CONNECTING:
case NVMET_RDMA_Q_LIVE:
disconnect = true;
queue->state = NVMET_RDMA_Q_DISCONNECTING;
case NVMET_RDMA_IN_DEVICE_REMOVAL:
disconnect = true;
break;
case NVMET_RDMA_Q_DISCONNECTING:
break;
@ -1272,6 +1272,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
schedule_work(&queue->release_work);
}
/**
* nvme_rdma_device_removal() - Handle RDMA device removal
* @queue: nvmet rdma queue (cm id qp_context)
* @addr: nvmet address (cm_id context)
*
* DEVICE_REMOVAL event notifies us that the RDMA device is about
* to unplug so we should take care of destroying our RDMA resources.
* This event will be generated for each allocated cm_id.
*
* Note that this event can be generated on a normal queue cm_id
* and/or a device bound listener cm_id (where in this case
* queue will be null).
*
* we claim ownership on destroying the cm_id. For queues we move
* the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
* we nullify the priv to prevent double cm_id destruction and destroying
* the cm_id implicitely by returning a non-zero rc to the callout.
*/
static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
struct nvmet_rdma_queue *queue)
{
unsigned long flags;
if (!queue) {
struct nvmet_port *port = cm_id->context;
/*
* This is a listener cm_id. Make sure that
* future remove_port won't invoke a double
* cm_id destroy. use atomic xchg to make sure
* we don't compete with remove_port.
*/
if (xchg(&port->priv, NULL) != cm_id)
return 0;
} else {
/*
* This is a queue cm_id. Make sure that
* release queue will not destroy the cm_id
* and schedule all ctrl queues removal (only
* if the queue is not disconnecting already).
*/
spin_lock_irqsave(&queue->state_lock, flags);
if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
spin_unlock_irqrestore(&queue->state_lock, flags);
nvmet_rdma_queue_disconnect(queue);
flush_scheduled_work();
}
/*
* We need to return 1 so that the core will destroy
* it's own ID. What a great API design..
*/
return 1;
}
static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@ -1294,20 +1350,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
/*
* We can get the device removal callback even for a
* CM ID that we aren't actually using. In that case
* the context pointer is NULL, so we shouldn't try
* to disconnect a non-existing queue. But we also
* need to return 1 so that the core will destroy
* it's own ID. What a great API design..
*/
if (queue)
nvmet_rdma_queue_disconnect(queue);
else
ret = 1;
nvmet_rdma_queue_disconnect(queue);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
ret = nvmet_rdma_device_removal(cm_id, queue);
break;
case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_UNREACHABLE:
@ -1396,9 +1443,10 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
static void nvmet_rdma_remove_port(struct nvmet_port *port)
{
struct rdma_cm_id *cm_id = port->priv;
struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
rdma_destroy_id(cm_id);
if (cm_id)
rdma_destroy_id(cm_id);
}
static struct nvmet_fabrics_ops nvmet_rdma_ops = {