drbd: fix potential deadlock when trying to detach during handshake
When requesting a detach, we first suspend IO, and also inhibit meta-data IO by means of drbd_md_get_buffer(), because we don't want to "fail" the disk while there is IO in-flight: the transition into D_FAILED for detach purposes may get misinterpreted as actual IO error in a confused endio function. We wrap it all into wait_event(), to retry in case the drbd_req_state() returns SS_IN_TRANSIENT_STATE, as it does for example during an ongoing connection handshake. In that example, the receiver thread may need to grab drbd_md_get_buffer() during the handshake to make progress. To avoid potential deadlock with detach, detach needs to grab and release the meta data buffer inside of that wait_event retry loop. To avoid lock inversion between mutex_lock(&device->state_mutex) and drbd_md_get_buffer(device), introduce a new enum chg_state_flag CS_INHIBIT_MD_IO, and move the call to drbd_md_get_buffer() inside the state_mutex grabbed in drbd_req_state(). Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
427fd2bee0
commit
33d32fa712
|
@ -2149,34 +2149,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
|
|||
|
||||
static int adm_detach(struct drbd_device *device, int force)
|
||||
{
|
||||
enum drbd_state_rv retcode;
|
||||
void *buffer;
|
||||
int ret;
|
||||
|
||||
if (force) {
|
||||
set_bit(FORCE_DETACH, &device->flags);
|
||||
drbd_force_state(device, NS(disk, D_FAILED));
|
||||
retcode = SS_SUCCESS;
|
||||
goto out;
|
||||
return SS_SUCCESS;
|
||||
}
|
||||
|
||||
drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
|
||||
buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
|
||||
if (buffer) {
|
||||
retcode = drbd_request_state(device, NS(disk, D_FAILED));
|
||||
drbd_md_put_buffer(device);
|
||||
} else /* already <= D_FAILED */
|
||||
retcode = SS_NOTHING_TO_DO;
|
||||
/* D_FAILED will transition to DISKLESS. */
|
||||
drbd_resume_io(device);
|
||||
ret = wait_event_interruptible(device->misc_wait,
|
||||
device->state.disk != D_FAILED);
|
||||
if ((int)retcode == (int)SS_IS_DISKLESS)
|
||||
retcode = SS_NOTHING_TO_DO;
|
||||
if (ret)
|
||||
retcode = ERR_INTR;
|
||||
out:
|
||||
return retcode;
|
||||
return drbd_request_detach_interruptible(device);
|
||||
}
|
||||
|
||||
/* Detaching the disk is a process in multiple stages. First we need to lock
|
||||
|
|
|
@ -579,11 +579,14 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
|
|||
unsigned long flags;
|
||||
union drbd_state os, ns;
|
||||
enum drbd_state_rv rv;
|
||||
void *buffer = NULL;
|
||||
|
||||
init_completion(&done);
|
||||
|
||||
if (f & CS_SERIALIZE)
|
||||
mutex_lock(device->state_mutex);
|
||||
if (f & CS_INHIBIT_MD_IO)
|
||||
buffer = drbd_md_get_buffer(device, __func__);
|
||||
|
||||
spin_lock_irqsave(&device->resource->req_lock, flags);
|
||||
os = drbd_read_state(device);
|
||||
|
@ -636,6 +639,8 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
|
|||
}
|
||||
|
||||
abort:
|
||||
if (buffer)
|
||||
drbd_md_put_buffer(device);
|
||||
if (f & CS_SERIALIZE)
|
||||
mutex_unlock(device->state_mutex);
|
||||
|
||||
|
@ -664,6 +669,47 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask,
|
|||
return rv;
|
||||
}
|
||||
|
||||
/*
|
||||
* We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while
|
||||
* there is IO in-flight: the transition into D_FAILED for detach purposes
|
||||
* may get misinterpreted as actual IO error in a confused endio function.
|
||||
*
|
||||
* We wrap it all into wait_event(), to retry in case the drbd_req_state()
|
||||
* returns SS_IN_TRANSIENT_STATE.
|
||||
*
|
||||
* To avoid potential deadlock with e.g. the receiver thread trying to grab
|
||||
* drbd_md_get_buffer() while trying to get out of the "transient state", we
|
||||
* need to grab and release the meta data buffer inside of that wait_event loop.
|
||||
*/
|
||||
static enum drbd_state_rv
|
||||
request_detach(struct drbd_device *device)
|
||||
{
|
||||
return drbd_req_state(device, NS(disk, D_FAILED),
|
||||
CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
|
||||
}
|
||||
|
||||
enum drbd_state_rv
|
||||
drbd_request_detach_interruptible(struct drbd_device *device)
|
||||
{
|
||||
enum drbd_state_rv rv;
|
||||
int ret;
|
||||
|
||||
drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
|
||||
wait_event_interruptible(device->state_wait,
|
||||
(rv = request_detach(device)) != SS_IN_TRANSIENT_STATE);
|
||||
drbd_resume_io(device);
|
||||
|
||||
ret = wait_event_interruptible(device->misc_wait,
|
||||
device->state.disk != D_FAILED);
|
||||
|
||||
if (rv == SS_IS_DISKLESS)
|
||||
rv = SS_NOTHING_TO_DO;
|
||||
if (ret)
|
||||
rv = ERR_INTR;
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
enum drbd_state_rv
|
||||
_drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask,
|
||||
union drbd_state val, enum chg_state_flags f)
|
||||
|
|
|
@ -71,6 +71,10 @@ enum chg_state_flags {
|
|||
CS_DC_SUSP = 1 << 10,
|
||||
CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
|
||||
CS_IGN_OUTD_FAIL = 1 << 11,
|
||||
|
||||
/* Make sure no meta data IO is in flight, by calling
|
||||
* drbd_md_get_buffer(). Used for graceful detach. */
|
||||
CS_INHIBIT_MD_IO = 1 << 12,
|
||||
};
|
||||
|
||||
/* drbd_dev_state and drbd_state are different types. This is to stress the
|
||||
|
@ -156,6 +160,10 @@ static inline int drbd_request_state(struct drbd_device *device,
|
|||
return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED);
|
||||
}
|
||||
|
||||
/* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */
|
||||
enum drbd_state_rv
|
||||
drbd_request_detach_interruptible(struct drbd_device *device);
|
||||
|
||||
enum drbd_role conn_highest_role(struct drbd_connection *connection);
|
||||
enum drbd_role conn_highest_peer(struct drbd_connection *connection);
|
||||
enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection);
|
||||
|
|
Loading…
Reference in New Issue