IB/srpt: Detect session shutdown reliably

The Last WQE Reached event is only generated after one or more work
requests have been queued on the QP associated with a session. Since
session shutdown can start before any work requests have been queued,
use a zero-length RDMA write to wait until a QP has been drained.

Additionally, rework the code for closing and disconnecting a session.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Bart Van Assche 2016-02-11 11:08:53 -08:00 committed by Doug Ledford
parent 8628991fbe
commit aaf45bd83e
2 changed files with 150 additions and 150 deletions

View File

@ -92,10 +92,11 @@ MODULE_PARM_DESC(srpt_service_guid,
static struct ib_client srpt_client;
static void srpt_release_cmd(struct se_cmd *se_cmd);
static void srpt_release_channel(struct srpt_rdma_ch *ch);
static void srpt_free_ch(struct kref *kref);
static int srpt_queue_status(struct se_cmd *cmd);
static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc);
/*
* The only allowed channel state changes are those that change the channel
@ -175,6 +176,23 @@ static void srpt_srq_event(struct ib_event *event, void *ctx)
pr_info("SRQ event %d\n", event->event);
}
static const char *get_ch_state_name(enum rdma_ch_state s)
{
switch (s) {
case CH_CONNECTING:
return "connecting";
case CH_LIVE:
return "live";
case CH_DISCONNECTING:
return "disconnecting";
case CH_DRAINING:
return "draining";
case CH_DISCONNECTED:
return "disconnected";
}
return "???";
}
/**
* srpt_qp_event() - QP event callback function.
*/
@ -188,11 +206,9 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
ib_cm_notify(ch->cm_id, event->event);
break;
case IB_EVENT_QP_LAST_WQE_REACHED:
if (srpt_set_ch_state(ch, CH_RELEASING))
srpt_release_channel(ch);
else
pr_debug("%s: state %d - ignored LAST_WQE.\n",
ch->sess_name, ch->state);
pr_debug("%s-%d, state %s: received Last WQE event.\n",
ch->sess_name, ch->qp->qp_num,
get_ch_state_name(ch->state));
break;
default:
pr_err("received unrecognized IB QP event %d\n", event->event);
@ -794,6 +810,37 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
return ret;
}
/**
* srpt_zerolength_write() - Perform a zero-length RDMA write.
*
* A quote from the InfiniBand specification: C9-88: For an HCA responder
* using Reliable Connection service, for each zero-length RDMA READ or WRITE
* request, the R_Key shall not be validated, even if the request includes
* Immediate data.
*/
static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
{
struct ib_send_wr wr, *bad_wr;
memset(&wr, 0, sizeof(wr));
wr.opcode = IB_WR_RDMA_WRITE;
wr.wr_cqe = &ch->zw_cqe;
wr.send_flags = IB_SEND_SIGNALED;
return ib_post_send(ch->qp, &wr, &bad_wr);
}
static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct srpt_rdma_ch *ch = cq->cq_context;
WARN(wc->status == IB_WC_SUCCESS, "%s-%d: QP not in error state\n",
ch->sess_name, ch->qp->qp_num);
if (srpt_set_ch_state(ch, CH_DISCONNECTED))
schedule_work(&ch->release_work);
else
WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num);
}
/**
* srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
* @ioctx: Pointer to the I/O context associated with the request.
@ -1816,57 +1863,87 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
}
/**
* __srpt_close_ch() - Close an RDMA channel by setting the QP error state.
* srpt_close_ch() - Close an RDMA channel.
*
* Reset the QP and make sure all resources associated with the channel will
* be deallocated at an appropriate time.
* Make sure all resources associated with the channel will be deallocated at
* an appropriate time.
*
* Note: The caller must hold ch->sport->sdev->spinlock.
* Returns true if and only if the channel state has been modified into
* CH_DRAINING.
*/
static void __srpt_close_ch(struct srpt_rdma_ch *ch)
static bool srpt_close_ch(struct srpt_rdma_ch *ch)
{
enum rdma_ch_state prev_state;
unsigned long flags;
int ret;
spin_lock_irqsave(&ch->spinlock, flags);
prev_state = ch->state;
switch (prev_state) {
case CH_CONNECTING:
case CH_LIVE:
ch->state = CH_DISCONNECTING;
break;
default:
break;
if (!srpt_set_ch_state(ch, CH_DRAINING)) {
pr_debug("%s-%d: already closed\n", ch->sess_name,
ch->qp->qp_num);
return false;
}
spin_unlock_irqrestore(&ch->spinlock, flags);
switch (prev_state) {
case CH_CONNECTING:
ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0,
NULL, 0);
/* fall through */
case CH_LIVE:
if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
pr_err("sending CM DREQ failed.\n");
break;
case CH_DISCONNECTING:
break;
case CH_DRAINING:
case CH_RELEASING:
break;
kref_get(&ch->kref);
ret = srpt_ch_qp_err(ch);
if (ret < 0)
pr_err("%s-%d: changing queue pair into error state failed: %d\n",
ch->sess_name, ch->qp->qp_num, ret);
pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
ch->qp->qp_num);
ret = srpt_zerolength_write(ch);
if (ret < 0) {
pr_err("%s-%d: queuing zero-length write failed: %d\n",
ch->sess_name, ch->qp->qp_num, ret);
if (srpt_set_ch_state(ch, CH_DISCONNECTED))
schedule_work(&ch->release_work);
else
WARN_ON_ONCE(true);
}
kref_put(&ch->kref, srpt_free_ch);
return true;
}
/**
* srpt_close_ch() - Close an RDMA channel.
/*
* Change the channel state into CH_DISCONNECTING. If a channel has not yet
* reached the connected state, close it. If a channel is in the connected
* state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is
* the responsibility of the caller to ensure that this function is not
* invoked concurrently with the code that accepts a connection. This means
* that this function must either be invoked from inside a CM callback
* function or that it must be invoked with the srpt_port.mutex held.
*/
static void srpt_close_ch(struct srpt_rdma_ch *ch)
static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
{
struct srpt_device *sdev = ch->sport->sdev;
int ret;
mutex_lock(&sdev->mutex);
__srpt_close_ch(ch);
mutex_unlock(&sdev->mutex);
if (!srpt_set_ch_state(ch, CH_DISCONNECTING))
return -ENOTCONN;
ret = ib_send_cm_dreq(ch->cm_id, NULL, 0);
if (ret < 0)
ret = ib_send_cm_drep(ch->cm_id, NULL, 0);
if (ret < 0 && srpt_close_ch(ch))
ret = 0;
return ret;
}
static void __srpt_close_all_ch(struct srpt_device *sdev)
{
struct srpt_rdma_ch *ch;
lockdep_assert_held(&sdev->mutex);
list_for_each_entry(ch, &sdev->rch_list, list) {
if (srpt_disconnect_ch(ch) >= 0)
pr_info("Closing channel %s-%d because target %s has been disabled\n",
ch->sess_name, ch->qp->qp_num,
sdev->device->name);
srpt_close_ch(ch);
}
}
/**
@ -1877,49 +1954,11 @@ static int srpt_shutdown_session(struct se_session *se_sess)
return 1;
}
/**
* srpt_drain_channel() - Drain a channel by resetting the IB queue pair.
* @cm_id: Pointer to the CM ID of the channel to be drained.
*
* Note: Must be called from inside srpt_cm_handler to avoid a race between
* accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
* (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one()
* waits until all target sessions for the associated IB device have been
* unregistered and target session registration involves a call to
* ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until
* this function has finished).
*/
static void srpt_drain_channel(struct srpt_rdma_ch *ch)
static void srpt_free_ch(struct kref *kref)
{
int ret;
bool do_reset = false;
struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref);
WARN_ON_ONCE(irqs_disabled());
do_reset = srpt_set_ch_state(ch, CH_DRAINING);
if (do_reset) {
if (ch->sess)
srpt_shutdown_session(ch->sess);
ret = srpt_ch_qp_err(ch);
if (ret < 0)
pr_err("Setting queue pair in error state"
" failed: %d\n", ret);
}
}
/**
* srpt_release_channel() - Release channel resources.
*
* Schedules the actual release because:
* - Calling the ib_destroy_cm_id() call from inside an IB CM callback would
* trigger a deadlock.
* - It is not safe to call TCM transport_* functions from interrupt context.
*/
static void srpt_release_channel(struct srpt_rdma_ch *ch)
{
schedule_work(&ch->release_work);
kfree(ch);
}
static void srpt_release_channel_work(struct work_struct *w)
@ -1961,7 +2000,7 @@ static void srpt_release_channel_work(struct work_struct *w)
wake_up(&sdev->ch_releaseQ);
kfree(ch);
kref_put(&ch->kref, srpt_free_ch);
}
/**
@ -2046,17 +2085,10 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
&& param->port == ch->sport->port
&& param->listen_id == ch->sport->sdev->cm_id
&& ch->cm_id) {
if (ch->state != CH_CONNECTING
&& ch->state != CH_LIVE)
if (srpt_disconnect_ch(ch) < 0)
continue;
/* found an existing channel */
pr_debug("Found existing channel %s"
" cm_id= %p state= %d\n",
ch->sess_name, ch->cm_id, ch->state);
__srpt_close_ch(ch);
pr_info("Relogin - closed existing channel %s\n",
ch->sess_name);
rsp->rsp_flags =
SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
}
@ -2087,6 +2119,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto reject;
}
kref_init(&ch->kref);
ch->zw_cqe.done = srpt_zerolength_write_done;
INIT_WORK(&ch->release_work, srpt_release_channel_work);
memcpy(ch->i_port_id, req->initiator_port_id, 16);
memcpy(ch->t_port_id, req->target_port_id, 16);
@ -2214,7 +2248,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto out;
release_channel:
srpt_set_ch_state(ch, CH_RELEASING);
srpt_disconnect_ch(ch);
transport_deregister_session_configfs(ch->sess);
transport_deregister_session(ch->sess);
ch->sess = NULL;
@ -2263,7 +2297,6 @@ static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
ch->sess_name, ch->qp->qp_num, reason, private_data_len ?
"; private data" : "", priv ? priv : " (?)");
kfree(priv);
srpt_drain_channel(ch);
}
/**
@ -2291,40 +2324,6 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
}
}
/**
* srpt_cm_dreq_recv() - Process reception of a DREQ message.
*/
static void srpt_cm_dreq_recv(struct srpt_rdma_ch *ch)
{
unsigned long flags;
bool send_drep = false;
pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num,
ch->state);
spin_lock_irqsave(&ch->spinlock, flags);
switch (ch->state) {
case CH_CONNECTING:
case CH_LIVE:
send_drep = true;
ch->state = CH_DISCONNECTING;
break;
case CH_DISCONNECTING:
case CH_DRAINING:
case CH_RELEASING:
WARN(true, "unexpected channel state %d\n", ch->state);
break;
}
spin_unlock_irqrestore(&ch->spinlock, flags);
if (send_drep) {
if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
pr_err("Sending IB DREP failed.\n");
pr_info("Received DREQ and sent DREP for session %s.\n",
ch->sess_name);
}
}
/**
* srpt_cm_handler() - IB connection manager callback function.
*
@ -2356,22 +2355,21 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
srpt_cm_rtu_recv(ch);
break;
case IB_CM_DREQ_RECEIVED:
srpt_cm_dreq_recv(ch);
srpt_disconnect_ch(ch);
break;
case IB_CM_DREP_RECEIVED:
pr_info("Received CM DREP message for ch %s-%d.\n",
ch->sess_name, ch->qp->qp_num);
srpt_drain_channel(ch);
srpt_close_ch(ch);
break;
case IB_CM_TIMEWAIT_EXIT:
pr_info("Received CM TimeWait exit for ch %s-%d.\n",
ch->sess_name, ch->qp->qp_num);
srpt_drain_channel(ch);
srpt_close_ch(ch);
break;
case IB_CM_REP_ERROR:
pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name,
ch->qp->qp_num);
srpt_drain_channel(ch);
break;
case IB_CM_DREQ_ERROR:
pr_info("Received CM DREQ ERROR event.\n");
@ -2511,7 +2509,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd)
break;
case CH_DISCONNECTING:
case CH_DRAINING:
case CH_RELEASING:
case CH_DISCONNECTED:
pr_debug("cmd with tag %lld: channel disconnecting\n",
ioctx->cmd.tag);
srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN);
@ -2657,16 +2655,16 @@ static void srpt_refresh_port_work(struct work_struct *work)
*/
static int srpt_release_sdev(struct srpt_device *sdev)
{
struct srpt_rdma_ch *ch, *tmp_ch;
int res;
int i, res;
WARN_ON_ONCE(irqs_disabled());
BUG_ON(!sdev);
mutex_lock(&sdev->mutex);
list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
__srpt_close_ch(ch);
for (i = 0; i < ARRAY_SIZE(sdev->port); i++)
sdev->port[i].enabled = false;
__srpt_close_all_ch(sdev);
mutex_unlock(&sdev->mutex);
res = wait_event_interruptible(sdev->ch_releaseQ,
@ -2963,7 +2961,7 @@ static void srpt_close_session(struct se_session *se_sess)
BUG_ON(ch->release_done);
ch->release_done = &release_done;
wait = !list_empty(&ch->list);
__srpt_close_ch(ch);
srpt_disconnect_ch(ch);
mutex_unlock(&sdev->mutex);
if (!wait)

View File

@ -218,20 +218,20 @@ struct srpt_send_ioctx {
/**
* enum rdma_ch_state - SRP channel state.
* @CH_CONNECTING: QP is in RTR state; waiting for RTU.
* @CH_LIVE: QP is in RTS state.
* @CH_DISCONNECTING: DREQ has been received; waiting for DREP
* or DREQ has been send and waiting for DREP
* or .
* @CH_DRAINING: QP is in ERR state; waiting for last WQE event.
* @CH_RELEASING: Last WQE event has been received; releasing resources.
* @CH_CONNECTING: QP is in RTR state; waiting for RTU.
* @CH_LIVE: QP is in RTS state.
* @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has
* been received.
* @CH_DRAINING: DREP has been received or waiting for DREP timed out
* and last work request has been queued.
* @CH_DISCONNECTED: Last completion has been received.
*/
enum rdma_ch_state {
CH_CONNECTING,
CH_LIVE,
CH_DISCONNECTING,
CH_DRAINING,
CH_RELEASING
CH_DISCONNECTED,
};
/**
@ -267,6 +267,8 @@ struct srpt_rdma_ch {
struct ib_cm_id *cm_id;
struct ib_qp *qp;
struct ib_cq *cq;
struct ib_cqe zw_cqe;
struct kref kref;
int rq_size;
u32 rsp_size;
atomic_t sq_wr_avail;