mirror of https://gitee.com/openkylin/linux.git
drbd: fix race when forcefully disconnecting
If a forced disconnect hits a restarting receiver right after it passed its final "if (C_DISCONNECTING)" test in drbdd_init(), but before it was actually restarted by drbd_thread_setup, we could be left with a connection stuck in C_DISCONNECTING, never reaching C_STANDALONE, which would be necessary to take it down or reconfigure it. Move the last cleanup into w_after_conn_state_ch(), and do an additional state change request in conn_try_disconnect(), just in case. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
parent
88104ca458
commit
f3dfa40a67
|
@ -2115,37 +2115,54 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
|
||||||
static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
|
static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
|
||||||
{
|
{
|
||||||
enum drbd_state_rv rv;
|
enum drbd_state_rv rv;
|
||||||
if (force) {
|
|
||||||
spin_lock_irq(&tconn->req_lock);
|
|
||||||
rv = _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
|
|
||||||
spin_unlock_irq(&tconn->req_lock);
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0);
|
rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING),
|
||||||
|
force ? CS_HARD : 0);
|
||||||
|
|
||||||
switch (rv) {
|
switch (rv) {
|
||||||
case SS_NOTHING_TO_DO:
|
case SS_NOTHING_TO_DO:
|
||||||
|
break;
|
||||||
case SS_ALREADY_STANDALONE:
|
case SS_ALREADY_STANDALONE:
|
||||||
return SS_SUCCESS;
|
return SS_SUCCESS;
|
||||||
case SS_PRIMARY_NOP:
|
case SS_PRIMARY_NOP:
|
||||||
/* Our state checking code wants to see the peer outdated. */
|
/* Our state checking code wants to see the peer outdated. */
|
||||||
rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
|
rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
|
||||||
pdsk, D_OUTDATED), CS_VERBOSE);
|
pdsk, D_OUTDATED), CS_VERBOSE);
|
||||||
break;
|
break;
|
||||||
case SS_CW_FAILED_BY_PEER:
|
case SS_CW_FAILED_BY_PEER:
|
||||||
/* The peer probably wants to see us outdated. */
|
/* The peer probably wants to see us outdated. */
|
||||||
rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
|
rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
|
||||||
disk, D_OUTDATED), 0);
|
disk, D_OUTDATED), 0);
|
||||||
if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
|
if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
|
||||||
conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
|
rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING),
|
||||||
rv = SS_SUCCESS;
|
CS_HARD);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:;
|
default:;
|
||||||
/* no special handling necessary */
|
/* no special handling necessary */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rv >= SS_SUCCESS) {
|
||||||
|
enum drbd_state_rv rv2;
|
||||||
|
/* No one else can reconfigure the network while I am here.
|
||||||
|
* The state handling only uses drbd_thread_stop_nowait(),
|
||||||
|
* we want to really wait here until the receiver is no more.
|
||||||
|
*/
|
||||||
|
drbd_thread_stop(&adm_ctx.tconn->receiver);
|
||||||
|
|
||||||
|
/* Race breaker. This additional state change request may be
|
||||||
|
* necessary, if this was a forced disconnect during a receiver
|
||||||
|
* restart. We may have "killed" the receiver thread just
|
||||||
|
* after drbdd_init() returned. Typically, we should be
|
||||||
|
* C_STANDALONE already, now, and this becomes a no-op.
|
||||||
|
*/
|
||||||
|
rv2 = conn_request_state(tconn, NS(conn, C_STANDALONE),
|
||||||
|
CS_VERBOSE | CS_HARD);
|
||||||
|
if (rv2 < SS_SUCCESS)
|
||||||
|
conn_err(tconn,
|
||||||
|
"unexpected rv2=%d in conn_try_disconnect()\n",
|
||||||
|
rv2);
|
||||||
|
}
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2176,19 +2193,9 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
|
||||||
|
|
||||||
rv = conn_try_disconnect(tconn, parms.force_disconnect);
|
rv = conn_try_disconnect(tconn, parms.force_disconnect);
|
||||||
if (rv < SS_SUCCESS)
|
if (rv < SS_SUCCESS)
|
||||||
goto fail;
|
retcode = rv; /* FIXME: Type mismatch. */
|
||||||
|
else
|
||||||
/* No one else can reconfigure the network while I am here.
|
retcode = NO_ERROR;
|
||||||
* The state handling only uses drbd_thread_stop_nowait(),
|
|
||||||
* we want to really wait here until the receiver is no more. */
|
|
||||||
drbd_thread_stop(&tconn->receiver);
|
|
||||||
if (wait_event_interruptible(tconn->ping_wait,
|
|
||||||
tconn->cstate == C_STANDALONE)) {
|
|
||||||
retcode = ERR_INTR;
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
retcode = NO_ERROR;
|
|
||||||
fail:
|
fail:
|
||||||
drbd_adm_finish(info, retcode);
|
drbd_adm_finish(info, retcode);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -3049,8 +3056,7 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
|
||||||
|
|
||||||
int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
|
int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
|
||||||
{
|
{
|
||||||
enum drbd_ret_code retcode;
|
int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
|
||||||
enum drbd_state_rv rv;
|
|
||||||
struct drbd_conf *mdev;
|
struct drbd_conf *mdev;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
@ -3074,30 +3080,35 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
up_read(&drbd_cfg_rwsem);
|
||||||
|
|
||||||
/* disconnect */
|
/* disconnect; may stop the receiver;
|
||||||
rv = conn_try_disconnect(adm_ctx.tconn, 0);
|
* must not hold the drbd_cfg_rwsem */
|
||||||
if (rv < SS_SUCCESS) {
|
retcode = conn_try_disconnect(adm_ctx.tconn, 0);
|
||||||
retcode = rv; /* enum type mismatch! */
|
if (retcode < SS_SUCCESS) {
|
||||||
drbd_msg_put_info("failed to disconnect");
|
drbd_msg_put_info("failed to disconnect");
|
||||||
goto out_unlock;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Make sure the network threads have actually stopped,
|
down_read(&drbd_cfg_rwsem);
|
||||||
* state handling only does drbd_thread_stop_nowait(). */
|
|
||||||
drbd_thread_stop(&adm_ctx.tconn->receiver);
|
|
||||||
|
|
||||||
/* detach */
|
/* detach */
|
||||||
idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
|
idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
|
||||||
rv = adm_detach(mdev);
|
retcode = adm_detach(mdev);
|
||||||
if (rv < SS_SUCCESS) {
|
if (retcode < SS_SUCCESS) {
|
||||||
retcode = rv; /* enum type mismatch! */
|
|
||||||
drbd_msg_put_info("failed to detach");
|
drbd_msg_put_info("failed to detach");
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
up_read(&drbd_cfg_rwsem);
|
up_read(&drbd_cfg_rwsem);
|
||||||
|
|
||||||
|
/* If we reach this, all volumes (of this tconn) are Secondary,
|
||||||
|
* Disconnected, Diskless, aka Unconfigured. Make sure all threads have
|
||||||
|
* actually stopped, state handling only does drbd_thread_stop_nowait().
|
||||||
|
* This needs to be done without holding drbd_cfg_rwsem. */
|
||||||
|
drbd_thread_stop(&adm_ctx.tconn->worker);
|
||||||
|
|
||||||
|
/* Now, nothing can fail anymore */
|
||||||
|
|
||||||
/* delete volumes */
|
/* delete volumes */
|
||||||
down_write(&drbd_cfg_rwsem);
|
down_write(&drbd_cfg_rwsem);
|
||||||
idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
|
idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
|
||||||
|
|
|
@ -4213,20 +4213,8 @@ static void drbd_disconnect(struct drbd_tconn *tconn)
|
||||||
|
|
||||||
spin_unlock_irq(&tconn->req_lock);
|
spin_unlock_irq(&tconn->req_lock);
|
||||||
|
|
||||||
if (oc == C_DISCONNECTING) {
|
if (oc == C_DISCONNECTING)
|
||||||
struct net_conf *old_conf;
|
|
||||||
|
|
||||||
mutex_lock(&tconn->net_conf_update);
|
|
||||||
old_conf = tconn->net_conf;
|
|
||||||
rcu_assign_pointer(tconn->net_conf, NULL);
|
|
||||||
conn_free_crypto(tconn);
|
|
||||||
mutex_unlock(&tconn->net_conf_update);
|
|
||||||
|
|
||||||
synchronize_rcu();
|
|
||||||
kfree(old_conf);
|
|
||||||
|
|
||||||
conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
|
conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int drbd_disconnected(int vnr, void *p, void *data)
|
static int drbd_disconnected(int vnr, void *p, void *data)
|
||||||
|
|
|
@ -1416,6 +1416,19 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
|
||||||
if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED)
|
if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED)
|
||||||
drbd_thread_start(&tconn->receiver);
|
drbd_thread_start(&tconn->receiver);
|
||||||
|
|
||||||
|
if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
|
||||||
|
struct net_conf *old_conf;
|
||||||
|
|
||||||
|
mutex_lock(&tconn->net_conf_update);
|
||||||
|
old_conf = tconn->net_conf;
|
||||||
|
rcu_assign_pointer(tconn->net_conf, NULL);
|
||||||
|
conn_free_crypto(tconn);
|
||||||
|
mutex_unlock(&tconn->net_conf_update);
|
||||||
|
|
||||||
|
synchronize_rcu();
|
||||||
|
kfree(old_conf);
|
||||||
|
}
|
||||||
|
|
||||||
if (ns_max.susp_fen) {
|
if (ns_max.susp_fen) {
|
||||||
/* case1: The outdate peer handler is successful: */
|
/* case1: The outdate peer handler is successful: */
|
||||||
if (ns_max.pdsk <= D_OUTDATED) {
|
if (ns_max.pdsk <= D_OUTDATED) {
|
||||||
|
|
Loading…
Reference in New Issue