drbd: fix potential kernel BUG (NULL deref)

BUG trace would look like:
 lc_find
 drbd_rs_complete_io
 got_OVResult
 drbd_asender

Could be triggered by explicit, or IO-error policy based,
detach during online-verify.

We may only dereference mdev->resync, if we first get_ldev(), as the
disk may break any time, causing mdev->resync to disappear once all
ldev references have been returned.
Already in flight online-verify requests or replies may still come in,
which we then need to ignore.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
Lars Ellenberg 2010-09-05 01:13:24 +02:00 committed by Philipp Reisner
parent 435f07402b
commit 1d53f09e17
2 changed files with 19 additions and 6 deletions

View File

@ -4241,10 +4241,13 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
update_peer_seq(mdev, be32_to_cpu(p->seq_num));
drbd_rs_complete_io(mdev, sector);
drbd_set_in_sync(mdev, sector, blksize);
/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
if (get_ldev(mdev)) {
drbd_rs_complete_io(mdev, sector);
drbd_set_in_sync(mdev, sector, blksize);
/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
put_ldev(mdev);
}
dec_rs_pending(mdev);
atomic_add(blksize >> 9, &mdev->rs_sect_in);
@ -4423,6 +4426,9 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
else
ov_oos_print(mdev);
if (!get_ldev(mdev))
return TRUE;
drbd_rs_complete_io(mdev, sector);
dec_rs_pending(mdev);
@ -4437,6 +4443,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
drbd_resync_finished(mdev);
}
}
put_ldev(mdev);
return TRUE;
}

View File

@ -1027,7 +1027,10 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return 1;
}
drbd_rs_complete_io(mdev, e->sector);
if (get_ldev(mdev)) {
drbd_rs_complete_io(mdev, e->sector);
put_ldev(mdev);
}
di = e->digest;
@ -1134,7 +1137,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
* the resync lru has been cleaned up already */
drbd_rs_complete_io(mdev, e->sector);
if (get_ldev(mdev)) {
drbd_rs_complete_io(mdev, e->sector);
put_ldev(mdev);
}
di = e->digest;