mirror of https://gitee.com/openkylin/linux.git
md update for 3.3
Big change is new hot-replacement. A slot in an array can hold 2 devices - one that wants-replacement and one that is the replacement. Once the replacement is built - either from the original or (in the case of errors) from elsewhere, the wants-replacement device will be removed. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.18 (GNU/Linux) iQIVAwUATwUdyDnsnt1WYoG5AQJBExAAuFQrzt7CN32nmiqaLlJ1snBC/ixOSJf7 0X88YB+utO0qNIhiRTI1AulslRGms9pChyKmZZaxU2Hvzk1pTrFspsc8RlJTIv9S Si43due99Np08/Kf5rjYulyH0fcug0qIqlrCiBvc36pOIHZzam6IzrEvKFf0dqbe 4vtLWuylEUiSEbN5gBordfTFZcxSPI/huMUgx6hEpdA4NtaPmN57/03d49k4RYgp f5l9htuIqdMgHwNJRUDGMooifuvILC5HXINUbsCFT6KUF/bxA75nW7W3C2BUq0V/ CVb/ZoHFYtuaGBEcMUzN34k0DbHghPeSmlvXT4XWq7+gVNqGe33nbSJsx1oLXWr1 m/b3j4Ublv+VVd7L1Rr40vTRB6wN5/2uN7SiD6d83ppPD0TuAY8YvMHgoLZQmQvh Ak4fEz07re+tueKhHwbi+1qMIw/ciQ9O/tI7r+AsVklkAxJNAfFKW6FOEFL2cjEW h4rbg1z9sU+xD8G01LBnJ0to/ajrJ4ch4wV/raLgi4+dJ4Tt3+/tas6WJlxHbyFF IiHCFM0+KcxLcwNkalZYg/zr5qu7EcwpPKLnc68m+LjXlYVWcnNHwv5WnOCHHQTw 6yurGGlKqBfvsb8zJJGSRWqEtRSYjDlZiMt10/u+H40HiCiLX//vSvVbZoFiwWu1 VzgEhzhvaYw= =j00/ -----END PGP SIGNATURE----- Merge tag 'md-3.3' of git://neil.brown.name/md md update for 3.3 Big change is new hot-replacement. A slot in an array can hold 2 devices - one that wants-replacement and one that is the replacement. Once the replacement is built - either from the original or (in the case of errors) from elsewhere, the wants-replacement device will be removed. * tag 'md-3.3' of git://neil.brown.name/md: (36 commits) md/raid1: Mark device want_replacement when we see a write error. md/raid1: If there is a spare and a want_replacement device, start replacement. md/raid1: recognise replacements when assembling arrays. md/raid1: handle activation of replacement device when recovery completes. md/raid1: Allow a failed replacement device to be removed. md/raid1: Allocate spare to store replacement devices and their bios. md/raid1: Replace use of mddev->raid_disks with conf->raid_disks. md/raid10: If there is a spare and a want_replacement device, start replacement. md/raid10: recognise replacements when assembling array. md/raid10: Allow replacement device to be replace old drive. md/raid10: handle recovery of replacement devices. md/raid10: Handle replacement devices during resync. md/raid10: writes should get directed to replacement as well as original. md/raid10: allow removal of failed replacement devices. md/raid10: preferentially read from replacement device if possible. md/raid10: change read_balance to return an rdev md/raid10: prepare data structures for handling replacement. md/raid5: Mark device want_replacement when we see a write error. md/raid5: If there is a spare and a want_replacement device, start replacement. md/raid5: recognise replacements when assembling array. ...
This commit is contained in:
commit
2943c83322
|
@ -357,14 +357,14 @@ Each directory contains:
|
||||||
written to, that device.
|
written to, that device.
|
||||||
|
|
||||||
state
|
state
|
||||||
A file recording the current state of the device in the array
|
A file recording the current state of the device in the array
|
||||||
which can be a comma separated list of
|
which can be a comma separated list of
|
||||||
faulty - device has been kicked from active use due to
|
faulty - device has been kicked from active use due to
|
||||||
a detected fault or it has unacknowledged bad
|
a detected fault, or it has unacknowledged bad
|
||||||
blocks
|
blocks
|
||||||
in_sync - device is a fully in-sync member of the array
|
in_sync - device is a fully in-sync member of the array
|
||||||
writemostly - device will only be subject to read
|
writemostly - device will only be subject to read
|
||||||
requests if there are no other options.
|
requests if there are no other options.
|
||||||
This applies only to raid1 arrays.
|
This applies only to raid1 arrays.
|
||||||
blocked - device has failed, and the failure hasn't been
|
blocked - device has failed, and the failure hasn't been
|
||||||
acknowledged yet by the metadata handler.
|
acknowledged yet by the metadata handler.
|
||||||
|
@ -374,6 +374,13 @@ Each directory contains:
|
||||||
This includes spares that are in the process
|
This includes spares that are in the process
|
||||||
of being recovered to
|
of being recovered to
|
||||||
write_error - device has ever seen a write error.
|
write_error - device has ever seen a write error.
|
||||||
|
want_replacement - device is (mostly) working but probably
|
||||||
|
should be replaced, either due to errors or
|
||||||
|
due to user request.
|
||||||
|
replacement - device is a replacement for another active
|
||||||
|
device with same raid_disk.
|
||||||
|
|
||||||
|
|
||||||
This list may grow in future.
|
This list may grow in future.
|
||||||
This can be written to.
|
This can be written to.
|
||||||
Writing "faulty" simulates a failure on the device.
|
Writing "faulty" simulates a failure on the device.
|
||||||
|
@ -386,6 +393,13 @@ Each directory contains:
|
||||||
Writing "in_sync" sets the in_sync flag.
|
Writing "in_sync" sets the in_sync flag.
|
||||||
Writing "write_error" sets writeerrorseen flag.
|
Writing "write_error" sets writeerrorseen flag.
|
||||||
Writing "-write_error" clears writeerrorseen flag.
|
Writing "-write_error" clears writeerrorseen flag.
|
||||||
|
Writing "want_replacement" is allowed at any time except to a
|
||||||
|
replacement device or a spare. It sets the flag.
|
||||||
|
Writing "-want_replacement" is allowed at any time. It clears
|
||||||
|
the flag.
|
||||||
|
Writing "replacement" or "-replacement" is only allowed before
|
||||||
|
starting the array. It sets or clears the flag.
|
||||||
|
|
||||||
|
|
||||||
This file responds to select/poll. Any change to 'faulty'
|
This file responds to select/poll. Any change to 'faulty'
|
||||||
or 'blocked' causes an event.
|
or 'blocked' causes an event.
|
||||||
|
|
|
@ -1149,12 +1149,12 @@ void bitmap_daemon_work(struct mddev *mddev)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (time_before(jiffies, bitmap->daemon_lastrun
|
if (time_before(jiffies, bitmap->daemon_lastrun
|
||||||
+ bitmap->mddev->bitmap_info.daemon_sleep))
|
+ mddev->bitmap_info.daemon_sleep))
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
bitmap->daemon_lastrun = jiffies;
|
bitmap->daemon_lastrun = jiffies;
|
||||||
if (bitmap->allclean) {
|
if (bitmap->allclean) {
|
||||||
bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
|
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
bitmap->allclean = 1;
|
bitmap->allclean = 1;
|
||||||
|
@ -1206,7 +1206,7 @@ void bitmap_daemon_work(struct mddev *mddev)
|
||||||
* sure that events_cleared is up-to-date.
|
* sure that events_cleared is up-to-date.
|
||||||
*/
|
*/
|
||||||
if (bitmap->need_sync &&
|
if (bitmap->need_sync &&
|
||||||
bitmap->mddev->bitmap_info.external == 0) {
|
mddev->bitmap_info.external == 0) {
|
||||||
bitmap_super_t *sb;
|
bitmap_super_t *sb;
|
||||||
bitmap->need_sync = 0;
|
bitmap->need_sync = 0;
|
||||||
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
|
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
|
||||||
|
@ -1270,8 +1270,8 @@ void bitmap_daemon_work(struct mddev *mddev)
|
||||||
|
|
||||||
done:
|
done:
|
||||||
if (bitmap->allclean == 0)
|
if (bitmap->allclean == 0)
|
||||||
bitmap->mddev->thread->timeout =
|
mddev->thread->timeout =
|
||||||
bitmap->mddev->bitmap_info.daemon_sleep;
|
mddev->bitmap_info.daemon_sleep;
|
||||||
mutex_unlock(&mddev->bitmap_info.mutex);
|
mutex_unlock(&mddev->bitmap_info.mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1587,7 +1587,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
|
||||||
}
|
}
|
||||||
if (!*bmc) {
|
if (!*bmc) {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
*bmc = 1 | (needed ? NEEDED_MASK : 0);
|
*bmc = 2 | (needed ? NEEDED_MASK : 0);
|
||||||
bitmap_count_page(bitmap, offset, 1);
|
bitmap_count_page(bitmap, offset, 1);
|
||||||
page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
|
page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
|
||||||
set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
|
set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
|
||||||
|
|
107
drivers/md/md.c
107
drivers/md/md.c
|
@ -1713,6 +1713,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
}
|
}
|
||||||
if (sb->devflags & WriteMostly1)
|
if (sb->devflags & WriteMostly1)
|
||||||
set_bit(WriteMostly, &rdev->flags);
|
set_bit(WriteMostly, &rdev->flags);
|
||||||
|
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
|
||||||
|
set_bit(Replacement, &rdev->flags);
|
||||||
} else /* MULTIPATH are always insync */
|
} else /* MULTIPATH are always insync */
|
||||||
set_bit(In_sync, &rdev->flags);
|
set_bit(In_sync, &rdev->flags);
|
||||||
|
|
||||||
|
@ -1766,6 +1768,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
sb->recovery_offset =
|
sb->recovery_offset =
|
||||||
cpu_to_le64(rdev->recovery_offset);
|
cpu_to_le64(rdev->recovery_offset);
|
||||||
}
|
}
|
||||||
|
if (test_bit(Replacement, &rdev->flags))
|
||||||
|
sb->feature_map |=
|
||||||
|
cpu_to_le32(MD_FEATURE_REPLACEMENT);
|
||||||
|
|
||||||
if (mddev->reshape_position != MaxSector) {
|
if (mddev->reshape_position != MaxSector) {
|
||||||
sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
|
sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
|
||||||
|
@ -2559,6 +2564,15 @@ state_show(struct md_rdev *rdev, char *page)
|
||||||
len += sprintf(page+len, "%swrite_error", sep);
|
len += sprintf(page+len, "%swrite_error", sep);
|
||||||
sep = ",";
|
sep = ",";
|
||||||
}
|
}
|
||||||
|
if (test_bit(WantReplacement, &rdev->flags)) {
|
||||||
|
len += sprintf(page+len, "%swant_replacement", sep);
|
||||||
|
sep = ",";
|
||||||
|
}
|
||||||
|
if (test_bit(Replacement, &rdev->flags)) {
|
||||||
|
len += sprintf(page+len, "%sreplacement", sep);
|
||||||
|
sep = ",";
|
||||||
|
}
|
||||||
|
|
||||||
return len+sprintf(page+len, "\n");
|
return len+sprintf(page+len, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2627,6 +2641,42 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||||
} else if (cmd_match(buf, "-write_error")) {
|
} else if (cmd_match(buf, "-write_error")) {
|
||||||
clear_bit(WriteErrorSeen, &rdev->flags);
|
clear_bit(WriteErrorSeen, &rdev->flags);
|
||||||
err = 0;
|
err = 0;
|
||||||
|
} else if (cmd_match(buf, "want_replacement")) {
|
||||||
|
/* Any non-spare device that is not a replacement can
|
||||||
|
* become want_replacement at any time, but we then need to
|
||||||
|
* check if recovery is needed.
|
||||||
|
*/
|
||||||
|
if (rdev->raid_disk >= 0 &&
|
||||||
|
!test_bit(Replacement, &rdev->flags))
|
||||||
|
set_bit(WantReplacement, &rdev->flags);
|
||||||
|
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
|
||||||
|
md_wakeup_thread(rdev->mddev->thread);
|
||||||
|
err = 0;
|
||||||
|
} else if (cmd_match(buf, "-want_replacement")) {
|
||||||
|
/* Clearing 'want_replacement' is always allowed.
|
||||||
|
* Once replacements starts it is too late though.
|
||||||
|
*/
|
||||||
|
err = 0;
|
||||||
|
clear_bit(WantReplacement, &rdev->flags);
|
||||||
|
} else if (cmd_match(buf, "replacement")) {
|
||||||
|
/* Can only set a device as a replacement when array has not
|
||||||
|
* yet been started. Once running, replacement is automatic
|
||||||
|
* from spares, or by assigning 'slot'.
|
||||||
|
*/
|
||||||
|
if (rdev->mddev->pers)
|
||||||
|
err = -EBUSY;
|
||||||
|
else {
|
||||||
|
set_bit(Replacement, &rdev->flags);
|
||||||
|
err = 0;
|
||||||
|
}
|
||||||
|
} else if (cmd_match(buf, "-replacement")) {
|
||||||
|
/* Similarly, can only clear Replacement before start */
|
||||||
|
if (rdev->mddev->pers)
|
||||||
|
err = -EBUSY;
|
||||||
|
else {
|
||||||
|
clear_bit(Replacement, &rdev->flags);
|
||||||
|
err = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!err)
|
if (!err)
|
||||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||||
|
@ -2688,7 +2738,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||||
if (rdev->mddev->pers->hot_remove_disk == NULL)
|
if (rdev->mddev->pers->hot_remove_disk == NULL)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
err = rdev->mddev->pers->
|
err = rdev->mddev->pers->
|
||||||
hot_remove_disk(rdev->mddev, rdev->raid_disk);
|
hot_remove_disk(rdev->mddev, rdev);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
sysfs_unlink_rdev(rdev->mddev, rdev);
|
sysfs_unlink_rdev(rdev->mddev, rdev);
|
||||||
|
@ -2696,7 +2746,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||||
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
|
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
|
||||||
md_wakeup_thread(rdev->mddev->thread);
|
md_wakeup_thread(rdev->mddev->thread);
|
||||||
} else if (rdev->mddev->pers) {
|
} else if (rdev->mddev->pers) {
|
||||||
struct md_rdev *rdev2;
|
|
||||||
/* Activating a spare .. or possibly reactivating
|
/* Activating a spare .. or possibly reactivating
|
||||||
* if we ever get bitmaps working here.
|
* if we ever get bitmaps working here.
|
||||||
*/
|
*/
|
||||||
|
@ -2710,10 +2759,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||||
if (rdev->mddev->pers->hot_add_disk == NULL)
|
if (rdev->mddev->pers->hot_add_disk == NULL)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
|
|
||||||
if (rdev2->raid_disk == slot)
|
|
||||||
return -EEXIST;
|
|
||||||
|
|
||||||
if (slot >= rdev->mddev->raid_disks &&
|
if (slot >= rdev->mddev->raid_disks &&
|
||||||
slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
|
slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
|
@ -6053,8 +6098,15 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
|
||||||
struct mddev *mddev = NULL;
|
struct mddev *mddev = NULL;
|
||||||
int ro;
|
int ro;
|
||||||
|
|
||||||
if (!capable(CAP_SYS_ADMIN))
|
switch (cmd) {
|
||||||
return -EACCES;
|
case RAID_VERSION:
|
||||||
|
case GET_ARRAY_INFO:
|
||||||
|
case GET_DISK_INFO:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return -EACCES;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Commands dealing with the RAID driver but not any
|
* Commands dealing with the RAID driver but not any
|
||||||
|
@ -6714,8 +6766,11 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
||||||
if (test_bit(Faulty, &rdev->flags)) {
|
if (test_bit(Faulty, &rdev->flags)) {
|
||||||
seq_printf(seq, "(F)");
|
seq_printf(seq, "(F)");
|
||||||
continue;
|
continue;
|
||||||
} else if (rdev->raid_disk < 0)
|
}
|
||||||
|
if (rdev->raid_disk < 0)
|
||||||
seq_printf(seq, "(S)"); /* spare */
|
seq_printf(seq, "(S)"); /* spare */
|
||||||
|
if (test_bit(Replacement, &rdev->flags))
|
||||||
|
seq_printf(seq, "(R)");
|
||||||
sectors += rdev->sectors;
|
sectors += rdev->sectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7337,29 +7392,27 @@ static int remove_and_add_spares(struct mddev *mddev)
|
||||||
! test_bit(In_sync, &rdev->flags)) &&
|
! test_bit(In_sync, &rdev->flags)) &&
|
||||||
atomic_read(&rdev->nr_pending)==0) {
|
atomic_read(&rdev->nr_pending)==0) {
|
||||||
if (mddev->pers->hot_remove_disk(
|
if (mddev->pers->hot_remove_disk(
|
||||||
mddev, rdev->raid_disk)==0) {
|
mddev, rdev) == 0) {
|
||||||
sysfs_unlink_rdev(mddev, rdev);
|
sysfs_unlink_rdev(mddev, rdev);
|
||||||
rdev->raid_disk = -1;
|
rdev->raid_disk = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mddev->degraded) {
|
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
if (rdev->raid_disk >= 0 &&
|
||||||
if (rdev->raid_disk >= 0 &&
|
!test_bit(In_sync, &rdev->flags) &&
|
||||||
!test_bit(In_sync, &rdev->flags) &&
|
!test_bit(Faulty, &rdev->flags))
|
||||||
!test_bit(Faulty, &rdev->flags))
|
spares++;
|
||||||
|
if (rdev->raid_disk < 0
|
||||||
|
&& !test_bit(Faulty, &rdev->flags)) {
|
||||||
|
rdev->recovery_offset = 0;
|
||||||
|
if (mddev->pers->
|
||||||
|
hot_add_disk(mddev, rdev) == 0) {
|
||||||
|
if (sysfs_link_rdev(mddev, rdev))
|
||||||
|
/* failure here is OK */;
|
||||||
spares++;
|
spares++;
|
||||||
if (rdev->raid_disk < 0
|
md_new_event(mddev);
|
||||||
&& !test_bit(Faulty, &rdev->flags)) {
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
rdev->recovery_offset = 0;
|
|
||||||
if (mddev->pers->
|
|
||||||
hot_add_disk(mddev, rdev) == 0) {
|
|
||||||
if (sysfs_link_rdev(mddev, rdev))
|
|
||||||
/* failure here is OK */;
|
|
||||||
spares++;
|
|
||||||
md_new_event(mddev);
|
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7474,7 +7527,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||||
test_bit(Faulty, &rdev->flags) &&
|
test_bit(Faulty, &rdev->flags) &&
|
||||||
atomic_read(&rdev->nr_pending)==0) {
|
atomic_read(&rdev->nr_pending)==0) {
|
||||||
if (mddev->pers->hot_remove_disk(
|
if (mddev->pers->hot_remove_disk(
|
||||||
mddev, rdev->raid_disk)==0) {
|
mddev, rdev) == 0) {
|
||||||
sysfs_unlink_rdev(mddev, rdev);
|
sysfs_unlink_rdev(mddev, rdev);
|
||||||
rdev->raid_disk = -1;
|
rdev->raid_disk = -1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,34 +72,7 @@ struct md_rdev {
|
||||||
* This reduces the burden of testing multiple flags in many cases
|
* This reduces the burden of testing multiple flags in many cases
|
||||||
*/
|
*/
|
||||||
|
|
||||||
unsigned long flags;
|
unsigned long flags; /* bit set of 'enum flag_bits' bits. */
|
||||||
#define Faulty 1 /* device is known to have a fault */
|
|
||||||
#define In_sync 2 /* device is in_sync with rest of array */
|
|
||||||
#define WriteMostly 4 /* Avoid reading if at all possible */
|
|
||||||
#define AutoDetected 7 /* added by auto-detect */
|
|
||||||
#define Blocked 8 /* An error occurred but has not yet
|
|
||||||
* been acknowledged by the metadata
|
|
||||||
* handler, so don't allow writes
|
|
||||||
* until it is cleared */
|
|
||||||
#define WriteErrorSeen 9 /* A write error has been seen on this
|
|
||||||
* device
|
|
||||||
*/
|
|
||||||
#define FaultRecorded 10 /* Intermediate state for clearing
|
|
||||||
* Blocked. The Fault is/will-be
|
|
||||||
* recorded in the metadata, but that
|
|
||||||
* metadata hasn't been stored safely
|
|
||||||
* on disk yet.
|
|
||||||
*/
|
|
||||||
#define BlockedBadBlocks 11 /* A writer is blocked because they
|
|
||||||
* found an unacknowledged bad-block.
|
|
||||||
* This can safely be cleared at any
|
|
||||||
* time, and the writer will re-check.
|
|
||||||
* It may be set at any time, and at
|
|
||||||
* worst the writer will timeout and
|
|
||||||
* re-check. So setting it as
|
|
||||||
* accurately as possible is good, but
|
|
||||||
* not absolutely critical.
|
|
||||||
*/
|
|
||||||
wait_queue_head_t blocked_wait;
|
wait_queue_head_t blocked_wait;
|
||||||
|
|
||||||
int desc_nr; /* descriptor index in the superblock */
|
int desc_nr; /* descriptor index in the superblock */
|
||||||
|
@ -152,6 +125,44 @@ struct md_rdev {
|
||||||
sector_t size; /* in sectors */
|
sector_t size; /* in sectors */
|
||||||
} badblocks;
|
} badblocks;
|
||||||
};
|
};
|
||||||
|
enum flag_bits {
|
||||||
|
Faulty, /* device is known to have a fault */
|
||||||
|
In_sync, /* device is in_sync with rest of array */
|
||||||
|
WriteMostly, /* Avoid reading if at all possible */
|
||||||
|
AutoDetected, /* added by auto-detect */
|
||||||
|
Blocked, /* An error occurred but has not yet
|
||||||
|
* been acknowledged by the metadata
|
||||||
|
* handler, so don't allow writes
|
||||||
|
* until it is cleared */
|
||||||
|
WriteErrorSeen, /* A write error has been seen on this
|
||||||
|
* device
|
||||||
|
*/
|
||||||
|
FaultRecorded, /* Intermediate state for clearing
|
||||||
|
* Blocked. The Fault is/will-be
|
||||||
|
* recorded in the metadata, but that
|
||||||
|
* metadata hasn't been stored safely
|
||||||
|
* on disk yet.
|
||||||
|
*/
|
||||||
|
BlockedBadBlocks, /* A writer is blocked because they
|
||||||
|
* found an unacknowledged bad-block.
|
||||||
|
* This can safely be cleared at any
|
||||||
|
* time, and the writer will re-check.
|
||||||
|
* It may be set at any time, and at
|
||||||
|
* worst the writer will timeout and
|
||||||
|
* re-check. So setting it as
|
||||||
|
* accurately as possible is good, but
|
||||||
|
* not absolutely critical.
|
||||||
|
*/
|
||||||
|
WantReplacement, /* This device is a candidate to be
|
||||||
|
* hot-replaced, either because it has
|
||||||
|
* reported some faults, or because
|
||||||
|
* of explicit request.
|
||||||
|
*/
|
||||||
|
Replacement, /* This device is a replacement for
|
||||||
|
* a want_replacement device with same
|
||||||
|
* raid_disk number.
|
||||||
|
*/
|
||||||
|
};
|
||||||
|
|
||||||
#define BB_LEN_MASK (0x00000000000001FFULL)
|
#define BB_LEN_MASK (0x00000000000001FFULL)
|
||||||
#define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL)
|
#define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL)
|
||||||
|
@ -428,7 +439,7 @@ struct md_personality
|
||||||
*/
|
*/
|
||||||
void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
|
void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
|
||||||
int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
|
int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
|
||||||
int (*hot_remove_disk) (struct mddev *mddev, int number);
|
int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
|
||||||
int (*spare_active) (struct mddev *mddev);
|
int (*spare_active) (struct mddev *mddev);
|
||||||
sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster);
|
sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster);
|
||||||
int (*resize) (struct mddev *mddev, sector_t sectors);
|
int (*resize) (struct mddev *mddev, sector_t sectors);
|
||||||
|
@ -482,15 +493,20 @@ static inline char * mdname (struct mddev * mddev)
|
||||||
static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
|
static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
{
|
{
|
||||||
char nm[20];
|
char nm[20];
|
||||||
sprintf(nm, "rd%d", rdev->raid_disk);
|
if (!test_bit(Replacement, &rdev->flags)) {
|
||||||
return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
|
sprintf(nm, "rd%d", rdev->raid_disk);
|
||||||
|
return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
|
||||||
|
} else
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
|
static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
{
|
{
|
||||||
char nm[20];
|
char nm[20];
|
||||||
sprintf(nm, "rd%d", rdev->raid_disk);
|
if (!test_bit(Replacement, &rdev->flags)) {
|
||||||
sysfs_remove_link(&mddev->kobj, nm);
|
sprintf(nm, "rd%d", rdev->raid_disk);
|
||||||
|
sysfs_remove_link(&mddev->kobj, nm);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -292,17 +292,16 @@ static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int multipath_remove_disk(struct mddev *mddev, int number)
|
static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
{
|
{
|
||||||
struct mpconf *conf = mddev->private;
|
struct mpconf *conf = mddev->private;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
struct md_rdev *rdev;
|
int number = rdev->raid_disk;
|
||||||
struct multipath_info *p = conf->multipaths + number;
|
struct multipath_info *p = conf->multipaths + number;
|
||||||
|
|
||||||
print_multipath_conf(conf);
|
print_multipath_conf(conf);
|
||||||
|
|
||||||
rdev = p->rdev;
|
if (rdev == p->rdev) {
|
||||||
if (rdev) {
|
|
||||||
if (test_bit(In_sync, &rdev->flags) ||
|
if (test_bit(In_sync, &rdev->flags) ||
|
||||||
atomic_read(&rdev->nr_pending)) {
|
atomic_read(&rdev->nr_pending)) {
|
||||||
printk(KERN_ERR "hot-remove-disk, slot %d is identified"
|
printk(KERN_ERR "hot-remove-disk, slot %d is identified"
|
||||||
|
|
|
@ -135,7 +135,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
|
||||||
put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
|
put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
|
||||||
j = -1;
|
j = -1;
|
||||||
out_free_bio:
|
out_free_bio:
|
||||||
while ( ++j < pi->raid_disks )
|
while (++j < pi->raid_disks)
|
||||||
bio_put(r1_bio->bios[j]);
|
bio_put(r1_bio->bios[j]);
|
||||||
r1bio_pool_free(r1_bio, data);
|
r1bio_pool_free(r1_bio, data);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -164,7 +164,7 @@ static void put_all_bios(struct r1conf *conf, struct r1bio *r1_bio)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks * 2; i++) {
|
||||||
struct bio **bio = r1_bio->bios + i;
|
struct bio **bio = r1_bio->bios + i;
|
||||||
if (!BIO_SPECIAL(*bio))
|
if (!BIO_SPECIAL(*bio))
|
||||||
bio_put(*bio);
|
bio_put(*bio);
|
||||||
|
@ -185,7 +185,7 @@ static void put_buf(struct r1bio *r1_bio)
|
||||||
struct r1conf *conf = r1_bio->mddev->private;
|
struct r1conf *conf = r1_bio->mddev->private;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i=0; i<conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks * 2; i++) {
|
||||||
struct bio *bio = r1_bio->bios[i];
|
struct bio *bio = r1_bio->bios[i];
|
||||||
if (bio->bi_end_io)
|
if (bio->bi_end_io)
|
||||||
rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
|
rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
|
||||||
|
@ -277,13 +277,14 @@ static inline void update_head_pos(int disk, struct r1bio *r1_bio)
|
||||||
static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
|
static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
|
||||||
{
|
{
|
||||||
int mirror;
|
int mirror;
|
||||||
int raid_disks = r1_bio->mddev->raid_disks;
|
struct r1conf *conf = r1_bio->mddev->private;
|
||||||
|
int raid_disks = conf->raid_disks;
|
||||||
|
|
||||||
for (mirror = 0; mirror < raid_disks; mirror++)
|
for (mirror = 0; mirror < raid_disks * 2; mirror++)
|
||||||
if (r1_bio->bios[mirror] == bio)
|
if (r1_bio->bios[mirror] == bio)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
BUG_ON(mirror == raid_disks);
|
BUG_ON(mirror == raid_disks * 2);
|
||||||
update_head_pos(mirror, r1_bio);
|
update_head_pos(mirror, r1_bio);
|
||||||
|
|
||||||
return mirror;
|
return mirror;
|
||||||
|
@ -390,6 +391,11 @@ static void raid1_end_write_request(struct bio *bio, int error)
|
||||||
if (!uptodate) {
|
if (!uptodate) {
|
||||||
set_bit(WriteErrorSeen,
|
set_bit(WriteErrorSeen,
|
||||||
&conf->mirrors[mirror].rdev->flags);
|
&conf->mirrors[mirror].rdev->flags);
|
||||||
|
if (!test_and_set_bit(WantReplacement,
|
||||||
|
&conf->mirrors[mirror].rdev->flags))
|
||||||
|
set_bit(MD_RECOVERY_NEEDED, &
|
||||||
|
conf->mddev->recovery);
|
||||||
|
|
||||||
set_bit(R1BIO_WriteError, &r1_bio->state);
|
set_bit(R1BIO_WriteError, &r1_bio->state);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
|
@ -505,7 +511,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||||
start_disk = conf->last_used;
|
start_disk = conf->last_used;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0 ; i < conf->raid_disks ; i++) {
|
for (i = 0 ; i < conf->raid_disks * 2 ; i++) {
|
||||||
sector_t dist;
|
sector_t dist;
|
||||||
sector_t first_bad;
|
sector_t first_bad;
|
||||||
int bad_sectors;
|
int bad_sectors;
|
||||||
|
@ -609,7 +615,7 @@ int md_raid1_congested(struct mddev *mddev, int bits)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for (i = 0; i < mddev->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||||
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
||||||
struct request_queue *q = bdev_get_queue(rdev->bdev);
|
struct request_queue *q = bdev_get_queue(rdev->bdev);
|
||||||
|
@ -974,7 +980,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||||
*/
|
*/
|
||||||
plugged = mddev_check_plugged(mddev);
|
plugged = mddev_check_plugged(mddev);
|
||||||
|
|
||||||
disks = conf->raid_disks;
|
disks = conf->raid_disks * 2;
|
||||||
retry_write:
|
retry_write:
|
||||||
blocked_rdev = NULL;
|
blocked_rdev = NULL;
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
@ -988,7 +994,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||||
}
|
}
|
||||||
r1_bio->bios[i] = NULL;
|
r1_bio->bios[i] = NULL;
|
||||||
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
||||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
if (i < conf->raid_disks)
|
||||||
|
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1263,6 +1270,25 @@ static int raid1_spare_active(struct mddev *mddev)
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||||
|
struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
|
||||||
|
if (repl
|
||||||
|
&& repl->recovery_offset == MaxSector
|
||||||
|
&& !test_bit(Faulty, &repl->flags)
|
||||||
|
&& !test_and_set_bit(In_sync, &repl->flags)) {
|
||||||
|
/* replacement has just become active */
|
||||||
|
if (!rdev ||
|
||||||
|
!test_and_clear_bit(In_sync, &rdev->flags))
|
||||||
|
count++;
|
||||||
|
if (rdev) {
|
||||||
|
/* Replaced device not technically
|
||||||
|
* faulty, but we need to be sure
|
||||||
|
* it gets removed and never re-added
|
||||||
|
*/
|
||||||
|
set_bit(Faulty, &rdev->flags);
|
||||||
|
sysfs_notify_dirent_safe(
|
||||||
|
rdev->sysfs_state);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (rdev
|
if (rdev
|
||||||
&& !test_bit(Faulty, &rdev->flags)
|
&& !test_bit(Faulty, &rdev->flags)
|
||||||
&& !test_and_set_bit(In_sync, &rdev->flags)) {
|
&& !test_and_set_bit(In_sync, &rdev->flags)) {
|
||||||
|
@ -1286,7 +1312,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
int mirror = 0;
|
int mirror = 0;
|
||||||
struct mirror_info *p;
|
struct mirror_info *p;
|
||||||
int first = 0;
|
int first = 0;
|
||||||
int last = mddev->raid_disks - 1;
|
int last = conf->raid_disks - 1;
|
||||||
|
|
||||||
if (mddev->recovery_disabled == conf->recovery_disabled)
|
if (mddev->recovery_disabled == conf->recovery_disabled)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
@ -1294,8 +1320,9 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
if (rdev->raid_disk >= 0)
|
if (rdev->raid_disk >= 0)
|
||||||
first = last = rdev->raid_disk;
|
first = last = rdev->raid_disk;
|
||||||
|
|
||||||
for (mirror = first; mirror <= last; mirror++)
|
for (mirror = first; mirror <= last; mirror++) {
|
||||||
if ( !(p=conf->mirrors+mirror)->rdev) {
|
p = conf->mirrors+mirror;
|
||||||
|
if (!p->rdev) {
|
||||||
|
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
|
@ -1322,21 +1349,35 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
rcu_assign_pointer(p->rdev, rdev);
|
rcu_assign_pointer(p->rdev, rdev);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||||
|
p[conf->raid_disks].rdev == NULL) {
|
||||||
|
/* Add this device as a replacement */
|
||||||
|
clear_bit(In_sync, &rdev->flags);
|
||||||
|
set_bit(Replacement, &rdev->flags);
|
||||||
|
rdev->raid_disk = mirror;
|
||||||
|
err = 0;
|
||||||
|
conf->fullsync = 1;
|
||||||
|
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
md_integrity_add_rdev(rdev, mddev);
|
md_integrity_add_rdev(rdev, mddev);
|
||||||
print_conf(conf);
|
print_conf(conf);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int raid1_remove_disk(struct mddev *mddev, int number)
|
static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
{
|
{
|
||||||
struct r1conf *conf = mddev->private;
|
struct r1conf *conf = mddev->private;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
struct md_rdev *rdev;
|
int number = rdev->raid_disk;
|
||||||
struct mirror_info *p = conf->mirrors+ number;
|
struct mirror_info *p = conf->mirrors+ number;
|
||||||
|
|
||||||
|
if (rdev != p->rdev)
|
||||||
|
p = conf->mirrors + conf->raid_disks + number;
|
||||||
|
|
||||||
print_conf(conf);
|
print_conf(conf);
|
||||||
rdev = p->rdev;
|
if (rdev == p->rdev) {
|
||||||
if (rdev) {
|
|
||||||
if (test_bit(In_sync, &rdev->flags) ||
|
if (test_bit(In_sync, &rdev->flags) ||
|
||||||
atomic_read(&rdev->nr_pending)) {
|
atomic_read(&rdev->nr_pending)) {
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
|
@ -1358,7 +1399,21 @@ static int raid1_remove_disk(struct mddev *mddev, int number)
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
p->rdev = rdev;
|
p->rdev = rdev;
|
||||||
goto abort;
|
goto abort;
|
||||||
}
|
} else if (conf->mirrors[conf->raid_disks + number].rdev) {
|
||||||
|
/* We just removed a device that is being replaced.
|
||||||
|
* Move down the replacement. We drain all IO before
|
||||||
|
* doing this to avoid confusion.
|
||||||
|
*/
|
||||||
|
struct md_rdev *repl =
|
||||||
|
conf->mirrors[conf->raid_disks + number].rdev;
|
||||||
|
raise_barrier(conf);
|
||||||
|
clear_bit(Replacement, &repl->flags);
|
||||||
|
p->rdev = repl;
|
||||||
|
conf->mirrors[conf->raid_disks + number].rdev = NULL;
|
||||||
|
lower_barrier(conf);
|
||||||
|
clear_bit(WantReplacement, &rdev->flags);
|
||||||
|
} else
|
||||||
|
clear_bit(WantReplacement, &rdev->flags);
|
||||||
err = md_integrity_register(mddev);
|
err = md_integrity_register(mddev);
|
||||||
}
|
}
|
||||||
abort:
|
abort:
|
||||||
|
@ -1411,6 +1466,10 @@ static void end_sync_write(struct bio *bio, int error)
|
||||||
} while (sectors_to_go > 0);
|
} while (sectors_to_go > 0);
|
||||||
set_bit(WriteErrorSeen,
|
set_bit(WriteErrorSeen,
|
||||||
&conf->mirrors[mirror].rdev->flags);
|
&conf->mirrors[mirror].rdev->flags);
|
||||||
|
if (!test_and_set_bit(WantReplacement,
|
||||||
|
&conf->mirrors[mirror].rdev->flags))
|
||||||
|
set_bit(MD_RECOVERY_NEEDED, &
|
||||||
|
mddev->recovery);
|
||||||
set_bit(R1BIO_WriteError, &r1_bio->state);
|
set_bit(R1BIO_WriteError, &r1_bio->state);
|
||||||
} else if (is_badblock(conf->mirrors[mirror].rdev,
|
} else if (is_badblock(conf->mirrors[mirror].rdev,
|
||||||
r1_bio->sector,
|
r1_bio->sector,
|
||||||
|
@ -1441,8 +1500,13 @@ static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
||||||
if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
|
if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
|
||||||
/* success */
|
/* success */
|
||||||
return 1;
|
return 1;
|
||||||
if (rw == WRITE)
|
if (rw == WRITE) {
|
||||||
set_bit(WriteErrorSeen, &rdev->flags);
|
set_bit(WriteErrorSeen, &rdev->flags);
|
||||||
|
if (!test_and_set_bit(WantReplacement,
|
||||||
|
&rdev->flags))
|
||||||
|
set_bit(MD_RECOVERY_NEEDED, &
|
||||||
|
rdev->mddev->recovery);
|
||||||
|
}
|
||||||
/* need to record an error - either for the block or the device */
|
/* need to record an error - either for the block or the device */
|
||||||
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
|
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
|
||||||
md_error(rdev->mddev, rdev);
|
md_error(rdev->mddev, rdev);
|
||||||
|
@ -1493,7 +1557,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
d++;
|
d++;
|
||||||
if (d == conf->raid_disks)
|
if (d == conf->raid_disks * 2)
|
||||||
d = 0;
|
d = 0;
|
||||||
} while (!success && d != r1_bio->read_disk);
|
} while (!success && d != r1_bio->read_disk);
|
||||||
|
|
||||||
|
@ -1510,7 +1574,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
|
||||||
mdname(mddev),
|
mdname(mddev),
|
||||||
bdevname(bio->bi_bdev, b),
|
bdevname(bio->bi_bdev, b),
|
||||||
(unsigned long long)r1_bio->sector);
|
(unsigned long long)r1_bio->sector);
|
||||||
for (d = 0; d < conf->raid_disks; d++) {
|
for (d = 0; d < conf->raid_disks * 2; d++) {
|
||||||
rdev = conf->mirrors[d].rdev;
|
rdev = conf->mirrors[d].rdev;
|
||||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||||
continue;
|
continue;
|
||||||
|
@ -1536,7 +1600,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
|
||||||
/* write it back and re-read */
|
/* write it back and re-read */
|
||||||
while (d != r1_bio->read_disk) {
|
while (d != r1_bio->read_disk) {
|
||||||
if (d == 0)
|
if (d == 0)
|
||||||
d = conf->raid_disks;
|
d = conf->raid_disks * 2;
|
||||||
d--;
|
d--;
|
||||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||||
continue;
|
continue;
|
||||||
|
@ -1551,7 +1615,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
|
||||||
d = start;
|
d = start;
|
||||||
while (d != r1_bio->read_disk) {
|
while (d != r1_bio->read_disk) {
|
||||||
if (d == 0)
|
if (d == 0)
|
||||||
d = conf->raid_disks;
|
d = conf->raid_disks * 2;
|
||||||
d--;
|
d--;
|
||||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||||
continue;
|
continue;
|
||||||
|
@ -1584,7 +1648,7 @@ static int process_checks(struct r1bio *r1_bio)
|
||||||
int primary;
|
int primary;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (primary = 0; primary < conf->raid_disks; primary++)
|
for (primary = 0; primary < conf->raid_disks * 2; primary++)
|
||||||
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
|
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
|
||||||
test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
|
test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
|
||||||
r1_bio->bios[primary]->bi_end_io = NULL;
|
r1_bio->bios[primary]->bi_end_io = NULL;
|
||||||
|
@ -1592,7 +1656,7 @@ static int process_checks(struct r1bio *r1_bio)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
r1_bio->read_disk = primary;
|
r1_bio->read_disk = primary;
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks * 2; i++) {
|
||||||
int j;
|
int j;
|
||||||
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
|
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
|
||||||
struct bio *pbio = r1_bio->bios[primary];
|
struct bio *pbio = r1_bio->bios[primary];
|
||||||
|
@ -1656,7 +1720,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
|
||||||
{
|
{
|
||||||
struct r1conf *conf = mddev->private;
|
struct r1conf *conf = mddev->private;
|
||||||
int i;
|
int i;
|
||||||
int disks = conf->raid_disks;
|
int disks = conf->raid_disks * 2;
|
||||||
struct bio *bio, *wbio;
|
struct bio *bio, *wbio;
|
||||||
|
|
||||||
bio = r1_bio->bios[r1_bio->read_disk];
|
bio = r1_bio->bios[r1_bio->read_disk];
|
||||||
|
@ -1737,7 +1801,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||||
success = 1;
|
success = 1;
|
||||||
else {
|
else {
|
||||||
d++;
|
d++;
|
||||||
if (d == conf->raid_disks)
|
if (d == conf->raid_disks * 2)
|
||||||
d = 0;
|
d = 0;
|
||||||
}
|
}
|
||||||
} while (!success && d != read_disk);
|
} while (!success && d != read_disk);
|
||||||
|
@ -1753,7 +1817,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||||
start = d;
|
start = d;
|
||||||
while (d != read_disk) {
|
while (d != read_disk) {
|
||||||
if (d==0)
|
if (d==0)
|
||||||
d = conf->raid_disks;
|
d = conf->raid_disks * 2;
|
||||||
d--;
|
d--;
|
||||||
rdev = conf->mirrors[d].rdev;
|
rdev = conf->mirrors[d].rdev;
|
||||||
if (rdev &&
|
if (rdev &&
|
||||||
|
@ -1765,7 +1829,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||||
while (d != read_disk) {
|
while (d != read_disk) {
|
||||||
char b[BDEVNAME_SIZE];
|
char b[BDEVNAME_SIZE];
|
||||||
if (d==0)
|
if (d==0)
|
||||||
d = conf->raid_disks;
|
d = conf->raid_disks * 2;
|
||||||
d--;
|
d--;
|
||||||
rdev = conf->mirrors[d].rdev;
|
rdev = conf->mirrors[d].rdev;
|
||||||
if (rdev &&
|
if (rdev &&
|
||||||
|
@ -1887,7 +1951,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
|
||||||
{
|
{
|
||||||
int m;
|
int m;
|
||||||
int s = r1_bio->sectors;
|
int s = r1_bio->sectors;
|
||||||
for (m = 0; m < conf->raid_disks ; m++) {
|
for (m = 0; m < conf->raid_disks * 2 ; m++) {
|
||||||
struct md_rdev *rdev = conf->mirrors[m].rdev;
|
struct md_rdev *rdev = conf->mirrors[m].rdev;
|
||||||
struct bio *bio = r1_bio->bios[m];
|
struct bio *bio = r1_bio->bios[m];
|
||||||
if (bio->bi_end_io == NULL)
|
if (bio->bi_end_io == NULL)
|
||||||
|
@ -1909,7 +1973,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
|
||||||
static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
|
static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
|
||||||
{
|
{
|
||||||
int m;
|
int m;
|
||||||
for (m = 0; m < conf->raid_disks ; m++)
|
for (m = 0; m < conf->raid_disks * 2 ; m++)
|
||||||
if (r1_bio->bios[m] == IO_MADE_GOOD) {
|
if (r1_bio->bios[m] == IO_MADE_GOOD) {
|
||||||
struct md_rdev *rdev = conf->mirrors[m].rdev;
|
struct md_rdev *rdev = conf->mirrors[m].rdev;
|
||||||
rdev_clear_badblocks(rdev,
|
rdev_clear_badblocks(rdev,
|
||||||
|
@ -2184,7 +2248,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||||
r1_bio->state = 0;
|
r1_bio->state = 0;
|
||||||
set_bit(R1BIO_IsSync, &r1_bio->state);
|
set_bit(R1BIO_IsSync, &r1_bio->state);
|
||||||
|
|
||||||
for (i=0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks * 2; i++) {
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
bio = r1_bio->bios[i];
|
bio = r1_bio->bios[i];
|
||||||
|
|
||||||
|
@ -2203,7 +2267,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||||
rdev = rcu_dereference(conf->mirrors[i].rdev);
|
rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||||
if (rdev == NULL ||
|
if (rdev == NULL ||
|
||||||
test_bit(Faulty, &rdev->flags)) {
|
test_bit(Faulty, &rdev->flags)) {
|
||||||
still_degraded = 1;
|
if (i < conf->raid_disks)
|
||||||
|
still_degraded = 1;
|
||||||
} else if (!test_bit(In_sync, &rdev->flags)) {
|
} else if (!test_bit(In_sync, &rdev->flags)) {
|
||||||
bio->bi_rw = WRITE;
|
bio->bi_rw = WRITE;
|
||||||
bio->bi_end_io = end_sync_write;
|
bio->bi_end_io = end_sync_write;
|
||||||
|
@ -2254,7 +2319,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||||
* need to mark them bad on all write targets
|
* need to mark them bad on all write targets
|
||||||
*/
|
*/
|
||||||
int ok = 1;
|
int ok = 1;
|
||||||
for (i = 0 ; i < conf->raid_disks ; i++)
|
for (i = 0 ; i < conf->raid_disks * 2 ; i++)
|
||||||
if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
|
if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
|
||||||
struct md_rdev *rdev =
|
struct md_rdev *rdev =
|
||||||
rcu_dereference(conf->mirrors[i].rdev);
|
rcu_dereference(conf->mirrors[i].rdev);
|
||||||
|
@ -2323,7 +2388,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||||
len = sync_blocks<<9;
|
len = sync_blocks<<9;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i=0 ; i < conf->raid_disks; i++) {
|
for (i = 0 ; i < conf->raid_disks * 2; i++) {
|
||||||
bio = r1_bio->bios[i];
|
bio = r1_bio->bios[i];
|
||||||
if (bio->bi_end_io) {
|
if (bio->bi_end_io) {
|
||||||
page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
|
page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
|
||||||
|
@ -2356,7 +2421,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||||
*/
|
*/
|
||||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||||
atomic_set(&r1_bio->remaining, read_targets);
|
atomic_set(&r1_bio->remaining, read_targets);
|
||||||
for (i=0; i<conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks * 2; i++) {
|
||||||
bio = r1_bio->bios[i];
|
bio = r1_bio->bios[i];
|
||||||
if (bio->bi_end_io == end_sync_read) {
|
if (bio->bi_end_io == end_sync_read) {
|
||||||
md_sync_acct(bio->bi_bdev, nr_sectors);
|
md_sync_acct(bio->bi_bdev, nr_sectors);
|
||||||
|
@ -2393,7 +2458,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||||
if (!conf)
|
if (!conf)
|
||||||
goto abort;
|
goto abort;
|
||||||
|
|
||||||
conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
|
conf->mirrors = kzalloc(sizeof(struct mirror_info)
|
||||||
|
* mddev->raid_disks * 2,
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
if (!conf->mirrors)
|
if (!conf->mirrors)
|
||||||
goto abort;
|
goto abort;
|
||||||
|
@ -2405,7 +2471,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||||
conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
|
conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
|
||||||
if (!conf->poolinfo)
|
if (!conf->poolinfo)
|
||||||
goto abort;
|
goto abort;
|
||||||
conf->poolinfo->raid_disks = mddev->raid_disks;
|
conf->poolinfo->raid_disks = mddev->raid_disks * 2;
|
||||||
conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
|
conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
|
||||||
r1bio_pool_free,
|
r1bio_pool_free,
|
||||||
conf->poolinfo);
|
conf->poolinfo);
|
||||||
|
@ -2414,14 +2480,20 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||||
|
|
||||||
conf->poolinfo->mddev = mddev;
|
conf->poolinfo->mddev = mddev;
|
||||||
|
|
||||||
|
err = -EINVAL;
|
||||||
spin_lock_init(&conf->device_lock);
|
spin_lock_init(&conf->device_lock);
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||||
int disk_idx = rdev->raid_disk;
|
int disk_idx = rdev->raid_disk;
|
||||||
if (disk_idx >= mddev->raid_disks
|
if (disk_idx >= mddev->raid_disks
|
||||||
|| disk_idx < 0)
|
|| disk_idx < 0)
|
||||||
continue;
|
continue;
|
||||||
disk = conf->mirrors + disk_idx;
|
if (test_bit(Replacement, &rdev->flags))
|
||||||
|
disk = conf->mirrors + conf->raid_disks + disk_idx;
|
||||||
|
else
|
||||||
|
disk = conf->mirrors + disk_idx;
|
||||||
|
|
||||||
|
if (disk->rdev)
|
||||||
|
goto abort;
|
||||||
disk->rdev = rdev;
|
disk->rdev = rdev;
|
||||||
|
|
||||||
disk->head_position = 0;
|
disk->head_position = 0;
|
||||||
|
@ -2437,11 +2509,27 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||||
conf->pending_count = 0;
|
conf->pending_count = 0;
|
||||||
conf->recovery_disabled = mddev->recovery_disabled - 1;
|
conf->recovery_disabled = mddev->recovery_disabled - 1;
|
||||||
|
|
||||||
|
err = -EIO;
|
||||||
conf->last_used = -1;
|
conf->last_used = -1;
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks * 2; i++) {
|
||||||
|
|
||||||
disk = conf->mirrors + i;
|
disk = conf->mirrors + i;
|
||||||
|
|
||||||
|
if (i < conf->raid_disks &&
|
||||||
|
disk[conf->raid_disks].rdev) {
|
||||||
|
/* This slot has a replacement. */
|
||||||
|
if (!disk->rdev) {
|
||||||
|
/* No original, just make the replacement
|
||||||
|
* a recovering spare
|
||||||
|
*/
|
||||||
|
disk->rdev =
|
||||||
|
disk[conf->raid_disks].rdev;
|
||||||
|
disk[conf->raid_disks].rdev = NULL;
|
||||||
|
} else if (!test_bit(In_sync, &disk->rdev->flags))
|
||||||
|
/* Original is not in_sync - bad */
|
||||||
|
goto abort;
|
||||||
|
}
|
||||||
|
|
||||||
if (!disk->rdev ||
|
if (!disk->rdev ||
|
||||||
!test_bit(In_sync, &disk->rdev->flags)) {
|
!test_bit(In_sync, &disk->rdev->flags)) {
|
||||||
disk->head_position = 0;
|
disk->head_position = 0;
|
||||||
|
@ -2455,7 +2543,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||||
conf->last_used = i;
|
conf->last_used = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = -EIO;
|
|
||||||
if (conf->last_used < 0) {
|
if (conf->last_used < 0) {
|
||||||
printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
|
printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
|
||||||
mdname(mddev));
|
mdname(mddev));
|
||||||
|
@ -2665,7 +2752,7 @@ static int raid1_reshape(struct mddev *mddev)
|
||||||
if (!newpoolinfo)
|
if (!newpoolinfo)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
newpoolinfo->mddev = mddev;
|
newpoolinfo->mddev = mddev;
|
||||||
newpoolinfo->raid_disks = raid_disks;
|
newpoolinfo->raid_disks = raid_disks * 2;
|
||||||
|
|
||||||
newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
|
newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
|
||||||
r1bio_pool_free, newpoolinfo);
|
r1bio_pool_free, newpoolinfo);
|
||||||
|
@ -2673,7 +2760,8 @@ static int raid1_reshape(struct mddev *mddev)
|
||||||
kfree(newpoolinfo);
|
kfree(newpoolinfo);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL);
|
newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2,
|
||||||
|
GFP_KERNEL);
|
||||||
if (!newmirrors) {
|
if (!newmirrors) {
|
||||||
kfree(newpoolinfo);
|
kfree(newpoolinfo);
|
||||||
mempool_destroy(newpool);
|
mempool_destroy(newpool);
|
||||||
|
|
|
@ -12,6 +12,9 @@ struct mirror_info {
|
||||||
* pool was allocated for, so they know how much to allocate and free.
|
* pool was allocated for, so they know how much to allocate and free.
|
||||||
* mddev->raid_disks cannot be used, as it can change while a pool is active
|
* mddev->raid_disks cannot be used, as it can change while a pool is active
|
||||||
* These two datums are stored in a kmalloced struct.
|
* These two datums are stored in a kmalloced struct.
|
||||||
|
* The 'raid_disks' here is twice the raid_disks in r1conf.
|
||||||
|
* This allows space for each 'real' device can have a replacement in the
|
||||||
|
* second half of the array.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct pool_info {
|
struct pool_info {
|
||||||
|
@ -21,7 +24,9 @@ struct pool_info {
|
||||||
|
|
||||||
struct r1conf {
|
struct r1conf {
|
||||||
struct mddev *mddev;
|
struct mddev *mddev;
|
||||||
struct mirror_info *mirrors;
|
struct mirror_info *mirrors; /* twice 'raid_disks' to
|
||||||
|
* allow for replacements.
|
||||||
|
*/
|
||||||
int raid_disks;
|
int raid_disks;
|
||||||
|
|
||||||
/* When choose the best device for a read (read_balance())
|
/* When choose the best device for a read (read_balance())
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
||||||
#define _RAID10_H
|
#define _RAID10_H
|
||||||
|
|
||||||
struct mirror_info {
|
struct mirror_info {
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev, *replacement;
|
||||||
sector_t head_position;
|
sector_t head_position;
|
||||||
int recovery_disabled; /* matches
|
int recovery_disabled; /* matches
|
||||||
* mddev->recovery_disabled
|
* mddev->recovery_disabled
|
||||||
|
@ -18,12 +18,13 @@ struct r10conf {
|
||||||
spinlock_t device_lock;
|
spinlock_t device_lock;
|
||||||
|
|
||||||
/* geometry */
|
/* geometry */
|
||||||
int near_copies; /* number of copies laid out raid0 style */
|
int near_copies; /* number of copies laid out
|
||||||
|
* raid0 style */
|
||||||
int far_copies; /* number of copies laid out
|
int far_copies; /* number of copies laid out
|
||||||
* at large strides across drives
|
* at large strides across drives
|
||||||
*/
|
*/
|
||||||
int far_offset; /* far_copies are offset by 1 stripe
|
int far_offset; /* far_copies are offset by 1
|
||||||
* instead of many
|
* stripe instead of many
|
||||||
*/
|
*/
|
||||||
int copies; /* near_copies * far_copies.
|
int copies; /* near_copies * far_copies.
|
||||||
* must be <= raid_disks
|
* must be <= raid_disks
|
||||||
|
@ -34,10 +35,11 @@ struct r10conf {
|
||||||
* 1 stripe.
|
* 1 stripe.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
sector_t dev_sectors; /* temp copy of mddev->dev_sectors */
|
sector_t dev_sectors; /* temp copy of
|
||||||
|
* mddev->dev_sectors */
|
||||||
|
|
||||||
int chunk_shift; /* shift from chunks to sectors */
|
int chunk_shift; /* shift from chunks to sectors */
|
||||||
sector_t chunk_mask;
|
sector_t chunk_mask;
|
||||||
|
|
||||||
struct list_head retry_list;
|
struct list_head retry_list;
|
||||||
/* queue pending writes and submit them on unplug */
|
/* queue pending writes and submit them on unplug */
|
||||||
|
@ -45,20 +47,22 @@ struct r10conf {
|
||||||
int pending_count;
|
int pending_count;
|
||||||
|
|
||||||
spinlock_t resync_lock;
|
spinlock_t resync_lock;
|
||||||
int nr_pending;
|
int nr_pending;
|
||||||
int nr_waiting;
|
int nr_waiting;
|
||||||
int nr_queued;
|
int nr_queued;
|
||||||
int barrier;
|
int barrier;
|
||||||
sector_t next_resync;
|
sector_t next_resync;
|
||||||
int fullsync; /* set to 1 if a full sync is needed,
|
int fullsync; /* set to 1 if a full sync is needed,
|
||||||
* (fresh device added).
|
* (fresh device added).
|
||||||
* Cleared when a sync completes.
|
* Cleared when a sync completes.
|
||||||
*/
|
*/
|
||||||
|
int have_replacement; /* There is at least one
|
||||||
|
* replacement device.
|
||||||
|
*/
|
||||||
wait_queue_head_t wait_barrier;
|
wait_queue_head_t wait_barrier;
|
||||||
|
|
||||||
mempool_t *r10bio_pool;
|
mempool_t *r10bio_pool;
|
||||||
mempool_t *r10buf_pool;
|
mempool_t *r10buf_pool;
|
||||||
struct page *tmppage;
|
struct page *tmppage;
|
||||||
|
|
||||||
/* When taking over an array from a different personality, we store
|
/* When taking over an array from a different personality, we store
|
||||||
|
@ -98,11 +102,18 @@ struct r10bio {
|
||||||
* When resyncing we also use one for each copy.
|
* When resyncing we also use one for each copy.
|
||||||
* When reconstructing, we use 2 bios, one for read, one for write.
|
* When reconstructing, we use 2 bios, one for read, one for write.
|
||||||
* We choose the number when they are allocated.
|
* We choose the number when they are allocated.
|
||||||
|
* We sometimes need an extra bio to write to the replacement.
|
||||||
*/
|
*/
|
||||||
struct {
|
struct {
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
sector_t addr;
|
union {
|
||||||
int devnum;
|
struct bio *repl_bio; /* used for resync and
|
||||||
|
* writes */
|
||||||
|
struct md_rdev *rdev; /* used for reads
|
||||||
|
* (read_slot >= 0) */
|
||||||
|
};
|
||||||
|
sector_t addr;
|
||||||
|
int devnum;
|
||||||
} devs[0];
|
} devs[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -121,17 +132,19 @@ struct r10bio {
|
||||||
#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
|
#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
|
||||||
|
|
||||||
/* bits for r10bio.state */
|
/* bits for r10bio.state */
|
||||||
#define R10BIO_Uptodate 0
|
enum r10bio_state {
|
||||||
#define R10BIO_IsSync 1
|
R10BIO_Uptodate,
|
||||||
#define R10BIO_IsRecover 2
|
R10BIO_IsSync,
|
||||||
#define R10BIO_Degraded 3
|
R10BIO_IsRecover,
|
||||||
|
R10BIO_Degraded,
|
||||||
/* Set ReadError on bios that experience a read error
|
/* Set ReadError on bios that experience a read error
|
||||||
* so that raid10d knows what to do with them.
|
* so that raid10d knows what to do with them.
|
||||||
*/
|
*/
|
||||||
#define R10BIO_ReadError 4
|
R10BIO_ReadError,
|
||||||
/* If a write for this request means we can clear some
|
/* If a write for this request means we can clear some
|
||||||
* known-bad-block records, we set this flag.
|
* known-bad-block records, we set this flag.
|
||||||
*/
|
*/
|
||||||
#define R10BIO_MadeGood 5
|
R10BIO_MadeGood,
|
||||||
#define R10BIO_WriteError 6
|
R10BIO_WriteError,
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -370,12 +370,10 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
|
||||||
* of the two sections, and some non-in_sync devices may
|
* of the two sections, and some non-in_sync devices may
|
||||||
* be insync in the section most affected by failed devices.
|
* be insync in the section most affected by failed devices.
|
||||||
*/
|
*/
|
||||||
static int has_failed(struct r5conf *conf)
|
static int calc_degraded(struct r5conf *conf)
|
||||||
{
|
{
|
||||||
int degraded;
|
int degraded, degraded2;
|
||||||
int i;
|
int i;
|
||||||
if (conf->mddev->reshape_position == MaxSector)
|
|
||||||
return conf->mddev->degraded > conf->max_degraded;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
degraded = 0;
|
degraded = 0;
|
||||||
|
@ -399,14 +397,14 @@ static int has_failed(struct r5conf *conf)
|
||||||
degraded++;
|
degraded++;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
if (degraded > conf->max_degraded)
|
if (conf->raid_disks == conf->previous_raid_disks)
|
||||||
return 1;
|
return degraded;
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
degraded = 0;
|
degraded2 = 0;
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||||
degraded++;
|
degraded2++;
|
||||||
else if (test_bit(In_sync, &rdev->flags))
|
else if (test_bit(In_sync, &rdev->flags))
|
||||||
;
|
;
|
||||||
else
|
else
|
||||||
|
@ -416,9 +414,22 @@ static int has_failed(struct r5conf *conf)
|
||||||
* almost certainly hasn't.
|
* almost certainly hasn't.
|
||||||
*/
|
*/
|
||||||
if (conf->raid_disks <= conf->previous_raid_disks)
|
if (conf->raid_disks <= conf->previous_raid_disks)
|
||||||
degraded++;
|
degraded2++;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
if (degraded2 > degraded)
|
||||||
|
return degraded2;
|
||||||
|
return degraded;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int has_failed(struct r5conf *conf)
|
||||||
|
{
|
||||||
|
int degraded;
|
||||||
|
|
||||||
|
if (conf->mddev->reshape_position == MaxSector)
|
||||||
|
return conf->mddev->degraded > conf->max_degraded;
|
||||||
|
|
||||||
|
degraded = calc_degraded(conf);
|
||||||
if (degraded > conf->max_degraded)
|
if (degraded > conf->max_degraded)
|
||||||
return 1;
|
return 1;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -492,8 +503,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
|
|
||||||
for (i = disks; i--; ) {
|
for (i = disks; i--; ) {
|
||||||
int rw;
|
int rw;
|
||||||
struct bio *bi;
|
int replace_only = 0;
|
||||||
struct md_rdev *rdev;
|
struct bio *bi, *rbi;
|
||||||
|
struct md_rdev *rdev, *rrdev = NULL;
|
||||||
if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
|
if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
|
||||||
if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
|
if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
|
||||||
rw = WRITE_FUA;
|
rw = WRITE_FUA;
|
||||||
|
@ -501,27 +513,57 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
rw = WRITE;
|
rw = WRITE;
|
||||||
} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
|
} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
|
||||||
rw = READ;
|
rw = READ;
|
||||||
else
|
else if (test_and_clear_bit(R5_WantReplace,
|
||||||
|
&sh->dev[i].flags)) {
|
||||||
|
rw = WRITE;
|
||||||
|
replace_only = 1;
|
||||||
|
} else
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bi = &sh->dev[i].req;
|
bi = &sh->dev[i].req;
|
||||||
|
rbi = &sh->dev[i].rreq; /* For writing to replacement */
|
||||||
|
|
||||||
bi->bi_rw = rw;
|
bi->bi_rw = rw;
|
||||||
if (rw & WRITE)
|
rbi->bi_rw = rw;
|
||||||
|
if (rw & WRITE) {
|
||||||
bi->bi_end_io = raid5_end_write_request;
|
bi->bi_end_io = raid5_end_write_request;
|
||||||
else
|
rbi->bi_end_io = raid5_end_write_request;
|
||||||
|
} else
|
||||||
bi->bi_end_io = raid5_end_read_request;
|
bi->bi_end_io = raid5_end_read_request;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
rrdev = rcu_dereference(conf->disks[i].replacement);
|
||||||
|
smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
|
||||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||||
|
if (!rdev) {
|
||||||
|
rdev = rrdev;
|
||||||
|
rrdev = NULL;
|
||||||
|
}
|
||||||
|
if (rw & WRITE) {
|
||||||
|
if (replace_only)
|
||||||
|
rdev = NULL;
|
||||||
|
if (rdev == rrdev)
|
||||||
|
/* We raced and saw duplicates */
|
||||||
|
rrdev = NULL;
|
||||||
|
} else {
|
||||||
|
if (test_bit(R5_ReadRepl, &sh->dev[i].flags) && rrdev)
|
||||||
|
rdev = rrdev;
|
||||||
|
rrdev = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (rdev && test_bit(Faulty, &rdev->flags))
|
if (rdev && test_bit(Faulty, &rdev->flags))
|
||||||
rdev = NULL;
|
rdev = NULL;
|
||||||
if (rdev)
|
if (rdev)
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev->nr_pending);
|
||||||
|
if (rrdev && test_bit(Faulty, &rrdev->flags))
|
||||||
|
rrdev = NULL;
|
||||||
|
if (rrdev)
|
||||||
|
atomic_inc(&rrdev->nr_pending);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
/* We have already checked bad blocks for reads. Now
|
/* We have already checked bad blocks for reads. Now
|
||||||
* need to check for writes.
|
* need to check for writes. We never accept write errors
|
||||||
|
* on the replacement, so we don't to check rrdev.
|
||||||
*/
|
*/
|
||||||
while ((rw & WRITE) && rdev &&
|
while ((rw & WRITE) && rdev &&
|
||||||
test_bit(WriteErrorSeen, &rdev->flags)) {
|
test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||||
|
@ -551,7 +593,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rdev) {
|
if (rdev) {
|
||||||
if (s->syncing || s->expanding || s->expanded)
|
if (s->syncing || s->expanding || s->expanded
|
||||||
|
|| s->replacing)
|
||||||
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
|
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
|
||||||
|
|
||||||
set_bit(STRIPE_IO_STARTED, &sh->state);
|
set_bit(STRIPE_IO_STARTED, &sh->state);
|
||||||
|
@ -563,16 +606,38 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
atomic_inc(&sh->count);
|
atomic_inc(&sh->count);
|
||||||
bi->bi_sector = sh->sector + rdev->data_offset;
|
bi->bi_sector = sh->sector + rdev->data_offset;
|
||||||
bi->bi_flags = 1 << BIO_UPTODATE;
|
bi->bi_flags = 1 << BIO_UPTODATE;
|
||||||
bi->bi_vcnt = 1;
|
|
||||||
bi->bi_max_vecs = 1;
|
|
||||||
bi->bi_idx = 0;
|
bi->bi_idx = 0;
|
||||||
bi->bi_io_vec = &sh->dev[i].vec;
|
|
||||||
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
||||||
bi->bi_io_vec[0].bv_offset = 0;
|
bi->bi_io_vec[0].bv_offset = 0;
|
||||||
bi->bi_size = STRIPE_SIZE;
|
bi->bi_size = STRIPE_SIZE;
|
||||||
bi->bi_next = NULL;
|
bi->bi_next = NULL;
|
||||||
|
if (rrdev)
|
||||||
|
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
|
||||||
generic_make_request(bi);
|
generic_make_request(bi);
|
||||||
} else {
|
}
|
||||||
|
if (rrdev) {
|
||||||
|
if (s->syncing || s->expanding || s->expanded
|
||||||
|
|| s->replacing)
|
||||||
|
md_sync_acct(rrdev->bdev, STRIPE_SECTORS);
|
||||||
|
|
||||||
|
set_bit(STRIPE_IO_STARTED, &sh->state);
|
||||||
|
|
||||||
|
rbi->bi_bdev = rrdev->bdev;
|
||||||
|
pr_debug("%s: for %llu schedule op %ld on "
|
||||||
|
"replacement disc %d\n",
|
||||||
|
__func__, (unsigned long long)sh->sector,
|
||||||
|
rbi->bi_rw, i);
|
||||||
|
atomic_inc(&sh->count);
|
||||||
|
rbi->bi_sector = sh->sector + rrdev->data_offset;
|
||||||
|
rbi->bi_flags = 1 << BIO_UPTODATE;
|
||||||
|
rbi->bi_idx = 0;
|
||||||
|
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
||||||
|
rbi->bi_io_vec[0].bv_offset = 0;
|
||||||
|
rbi->bi_size = STRIPE_SIZE;
|
||||||
|
rbi->bi_next = NULL;
|
||||||
|
generic_make_request(rbi);
|
||||||
|
}
|
||||||
|
if (!rdev && !rrdev) {
|
||||||
if (rw & WRITE)
|
if (rw & WRITE)
|
||||||
set_bit(STRIPE_DEGRADED, &sh->state);
|
set_bit(STRIPE_DEGRADED, &sh->state);
|
||||||
pr_debug("skip op %ld on disc %d for sector %llu\n",
|
pr_debug("skip op %ld on disc %d for sector %llu\n",
|
||||||
|
@ -1583,7 +1648,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||||
int disks = sh->disks, i;
|
int disks = sh->disks, i;
|
||||||
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
|
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
|
||||||
char b[BDEVNAME_SIZE];
|
char b[BDEVNAME_SIZE];
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev = NULL;
|
||||||
|
|
||||||
|
|
||||||
for (i=0 ; i<disks; i++)
|
for (i=0 ; i<disks; i++)
|
||||||
|
@ -1597,11 +1662,23 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||||
BUG();
|
BUG();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
||||||
|
/* If replacement finished while this request was outstanding,
|
||||||
|
* 'replacement' might be NULL already.
|
||||||
|
* In that case it moved down to 'rdev'.
|
||||||
|
* rdev is not removed until all requests are finished.
|
||||||
|
*/
|
||||||
|
rdev = conf->disks[i].replacement;
|
||||||
|
if (!rdev)
|
||||||
|
rdev = conf->disks[i].rdev;
|
||||||
|
|
||||||
if (uptodate) {
|
if (uptodate) {
|
||||||
set_bit(R5_UPTODATE, &sh->dev[i].flags);
|
set_bit(R5_UPTODATE, &sh->dev[i].flags);
|
||||||
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
|
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
|
||||||
rdev = conf->disks[i].rdev;
|
/* Note that this cannot happen on a
|
||||||
|
* replacement device. We just fail those on
|
||||||
|
* any error
|
||||||
|
*/
|
||||||
printk_ratelimited(
|
printk_ratelimited(
|
||||||
KERN_INFO
|
KERN_INFO
|
||||||
"md/raid:%s: read error corrected"
|
"md/raid:%s: read error corrected"
|
||||||
|
@ -1614,16 +1691,24 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||||
clear_bit(R5_ReadError, &sh->dev[i].flags);
|
clear_bit(R5_ReadError, &sh->dev[i].flags);
|
||||||
clear_bit(R5_ReWrite, &sh->dev[i].flags);
|
clear_bit(R5_ReWrite, &sh->dev[i].flags);
|
||||||
}
|
}
|
||||||
if (atomic_read(&conf->disks[i].rdev->read_errors))
|
if (atomic_read(&rdev->read_errors))
|
||||||
atomic_set(&conf->disks[i].rdev->read_errors, 0);
|
atomic_set(&rdev->read_errors, 0);
|
||||||
} else {
|
} else {
|
||||||
const char *bdn = bdevname(conf->disks[i].rdev->bdev, b);
|
const char *bdn = bdevname(rdev->bdev, b);
|
||||||
int retry = 0;
|
int retry = 0;
|
||||||
rdev = conf->disks[i].rdev;
|
|
||||||
|
|
||||||
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
|
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
|
||||||
atomic_inc(&rdev->read_errors);
|
atomic_inc(&rdev->read_errors);
|
||||||
if (conf->mddev->degraded >= conf->max_degraded)
|
if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
||||||
|
printk_ratelimited(
|
||||||
|
KERN_WARNING
|
||||||
|
"md/raid:%s: read error on replacement device "
|
||||||
|
"(sector %llu on %s).\n",
|
||||||
|
mdname(conf->mddev),
|
||||||
|
(unsigned long long)(sh->sector
|
||||||
|
+ rdev->data_offset),
|
||||||
|
bdn);
|
||||||
|
else if (conf->mddev->degraded >= conf->max_degraded)
|
||||||
printk_ratelimited(
|
printk_ratelimited(
|
||||||
KERN_WARNING
|
KERN_WARNING
|
||||||
"md/raid:%s: read error not correctable "
|
"md/raid:%s: read error not correctable "
|
||||||
|
@ -1657,7 +1742,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||||
md_error(conf->mddev, rdev);
|
md_error(conf->mddev, rdev);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
|
rdev_dec_pending(rdev, conf->mddev);
|
||||||
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
||||||
set_bit(STRIPE_HANDLE, &sh->state);
|
set_bit(STRIPE_HANDLE, &sh->state);
|
||||||
release_stripe(sh);
|
release_stripe(sh);
|
||||||
|
@ -1668,14 +1753,30 @@ static void raid5_end_write_request(struct bio *bi, int error)
|
||||||
struct stripe_head *sh = bi->bi_private;
|
struct stripe_head *sh = bi->bi_private;
|
||||||
struct r5conf *conf = sh->raid_conf;
|
struct r5conf *conf = sh->raid_conf;
|
||||||
int disks = sh->disks, i;
|
int disks = sh->disks, i;
|
||||||
|
struct md_rdev *uninitialized_var(rdev);
|
||||||
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
|
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
|
||||||
sector_t first_bad;
|
sector_t first_bad;
|
||||||
int bad_sectors;
|
int bad_sectors;
|
||||||
|
int replacement = 0;
|
||||||
|
|
||||||
for (i=0 ; i<disks; i++)
|
for (i = 0 ; i < disks; i++) {
|
||||||
if (bi == &sh->dev[i].req)
|
if (bi == &sh->dev[i].req) {
|
||||||
|
rdev = conf->disks[i].rdev;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
if (bi == &sh->dev[i].rreq) {
|
||||||
|
rdev = conf->disks[i].replacement;
|
||||||
|
if (rdev)
|
||||||
|
replacement = 1;
|
||||||
|
else
|
||||||
|
/* rdev was removed and 'replacement'
|
||||||
|
* replaced it. rdev is not removed
|
||||||
|
* until all requests are finished.
|
||||||
|
*/
|
||||||
|
rdev = conf->disks[i].rdev;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
|
pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
|
||||||
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
|
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
|
||||||
uptodate);
|
uptodate);
|
||||||
|
@ -1684,21 +1785,33 @@ static void raid5_end_write_request(struct bio *bi, int error)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!uptodate) {
|
if (replacement) {
|
||||||
set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
|
if (!uptodate)
|
||||||
set_bit(R5_WriteError, &sh->dev[i].flags);
|
md_error(conf->mddev, rdev);
|
||||||
} else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS,
|
else if (is_badblock(rdev, sh->sector,
|
||||||
&first_bad, &bad_sectors))
|
STRIPE_SECTORS,
|
||||||
set_bit(R5_MadeGood, &sh->dev[i].flags);
|
&first_bad, &bad_sectors))
|
||||||
|
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
|
||||||
|
} else {
|
||||||
|
if (!uptodate) {
|
||||||
|
set_bit(WriteErrorSeen, &rdev->flags);
|
||||||
|
set_bit(R5_WriteError, &sh->dev[i].flags);
|
||||||
|
if (!test_and_set_bit(WantReplacement, &rdev->flags))
|
||||||
|
set_bit(MD_RECOVERY_NEEDED,
|
||||||
|
&rdev->mddev->recovery);
|
||||||
|
} else if (is_badblock(rdev, sh->sector,
|
||||||
|
STRIPE_SECTORS,
|
||||||
|
&first_bad, &bad_sectors))
|
||||||
|
set_bit(R5_MadeGood, &sh->dev[i].flags);
|
||||||
|
}
|
||||||
|
rdev_dec_pending(rdev, conf->mddev);
|
||||||
|
|
||||||
rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
|
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
|
||||||
|
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
||||||
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
|
||||||
set_bit(STRIPE_HANDLE, &sh->state);
|
set_bit(STRIPE_HANDLE, &sh->state);
|
||||||
release_stripe(sh);
|
release_stripe(sh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
|
static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
|
||||||
|
|
||||||
static void raid5_build_block(struct stripe_head *sh, int i, int previous)
|
static void raid5_build_block(struct stripe_head *sh, int i, int previous)
|
||||||
|
@ -1709,12 +1822,15 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous)
|
||||||
dev->req.bi_io_vec = &dev->vec;
|
dev->req.bi_io_vec = &dev->vec;
|
||||||
dev->req.bi_vcnt++;
|
dev->req.bi_vcnt++;
|
||||||
dev->req.bi_max_vecs++;
|
dev->req.bi_max_vecs++;
|
||||||
dev->vec.bv_page = dev->page;
|
|
||||||
dev->vec.bv_len = STRIPE_SIZE;
|
|
||||||
dev->vec.bv_offset = 0;
|
|
||||||
|
|
||||||
dev->req.bi_sector = sh->sector;
|
|
||||||
dev->req.bi_private = sh;
|
dev->req.bi_private = sh;
|
||||||
|
dev->vec.bv_page = dev->page;
|
||||||
|
|
||||||
|
bio_init(&dev->rreq);
|
||||||
|
dev->rreq.bi_io_vec = &dev->rvec;
|
||||||
|
dev->rreq.bi_vcnt++;
|
||||||
|
dev->rreq.bi_max_vecs++;
|
||||||
|
dev->rreq.bi_private = sh;
|
||||||
|
dev->rvec.bv_page = dev->page;
|
||||||
|
|
||||||
dev->flags = 0;
|
dev->flags = 0;
|
||||||
dev->sector = compute_blocknr(sh, i, previous);
|
dev->sector = compute_blocknr(sh, i, previous);
|
||||||
|
@ -1724,18 +1840,15 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
{
|
{
|
||||||
char b[BDEVNAME_SIZE];
|
char b[BDEVNAME_SIZE];
|
||||||
struct r5conf *conf = mddev->private;
|
struct r5conf *conf = mddev->private;
|
||||||
|
unsigned long flags;
|
||||||
pr_debug("raid456: error called\n");
|
pr_debug("raid456: error called\n");
|
||||||
|
|
||||||
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
unsigned long flags;
|
clear_bit(In_sync, &rdev->flags);
|
||||||
spin_lock_irqsave(&conf->device_lock, flags);
|
mddev->degraded = calc_degraded(conf);
|
||||||
mddev->degraded++;
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
/*
|
|
||||||
* if recovery was running, make sure it aborts.
|
|
||||||
*/
|
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
|
||||||
}
|
|
||||||
set_bit(Blocked, &rdev->flags);
|
set_bit(Blocked, &rdev->flags);
|
||||||
set_bit(Faulty, &rdev->flags);
|
set_bit(Faulty, &rdev->flags);
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
@ -2362,8 +2475,9 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
||||||
md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
|
md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
|
||||||
clear_bit(STRIPE_SYNCING, &sh->state);
|
clear_bit(STRIPE_SYNCING, &sh->state);
|
||||||
s->syncing = 0;
|
s->syncing = 0;
|
||||||
|
s->replacing = 0;
|
||||||
/* There is nothing more to do for sync/check/repair.
|
/* There is nothing more to do for sync/check/repair.
|
||||||
* For recover we need to record a bad block on all
|
* For recover/replace we need to record a bad block on all
|
||||||
* non-sync devices, or abort the recovery
|
* non-sync devices, or abort the recovery
|
||||||
*/
|
*/
|
||||||
if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
|
if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
|
||||||
|
@ -2373,12 +2487,18 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||||
if (!rdev
|
if (rdev
|
||||||
|| test_bit(Faulty, &rdev->flags)
|
&& !test_bit(Faulty, &rdev->flags)
|
||||||
|| test_bit(In_sync, &rdev->flags))
|
&& !test_bit(In_sync, &rdev->flags)
|
||||||
continue;
|
&& !rdev_set_badblocks(rdev, sh->sector,
|
||||||
if (!rdev_set_badblocks(rdev, sh->sector,
|
STRIPE_SECTORS, 0))
|
||||||
STRIPE_SECTORS, 0))
|
abort = 1;
|
||||||
|
rdev = conf->disks[i].replacement;
|
||||||
|
if (rdev
|
||||||
|
&& !test_bit(Faulty, &rdev->flags)
|
||||||
|
&& !test_bit(In_sync, &rdev->flags)
|
||||||
|
&& !rdev_set_badblocks(rdev, sh->sector,
|
||||||
|
STRIPE_SECTORS, 0))
|
||||||
abort = 1;
|
abort = 1;
|
||||||
}
|
}
|
||||||
if (abort) {
|
if (abort) {
|
||||||
|
@ -2387,6 +2507,22 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int want_replace(struct stripe_head *sh, int disk_idx)
|
||||||
|
{
|
||||||
|
struct md_rdev *rdev;
|
||||||
|
int rv = 0;
|
||||||
|
/* Doing recovery so rcu locking not required */
|
||||||
|
rdev = sh->raid_conf->disks[disk_idx].replacement;
|
||||||
|
if (rdev
|
||||||
|
&& !test_bit(Faulty, &rdev->flags)
|
||||||
|
&& !test_bit(In_sync, &rdev->flags)
|
||||||
|
&& (rdev->recovery_offset <= sh->sector
|
||||||
|
|| rdev->mddev->recovery_cp <= sh->sector))
|
||||||
|
rv = 1;
|
||||||
|
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
/* fetch_block - checks the given member device to see if its data needs
|
/* fetch_block - checks the given member device to see if its data needs
|
||||||
* to be read or computed to satisfy a request.
|
* to be read or computed to satisfy a request.
|
||||||
*
|
*
|
||||||
|
@ -2406,6 +2542,7 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
|
||||||
(dev->toread ||
|
(dev->toread ||
|
||||||
(dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
|
(dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
|
||||||
s->syncing || s->expanding ||
|
s->syncing || s->expanding ||
|
||||||
|
(s->replacing && want_replace(sh, disk_idx)) ||
|
||||||
(s->failed >= 1 && fdev[0]->toread) ||
|
(s->failed >= 1 && fdev[0]->toread) ||
|
||||||
(s->failed >= 2 && fdev[1]->toread) ||
|
(s->failed >= 2 && fdev[1]->toread) ||
|
||||||
(sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
|
(sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
|
||||||
|
@ -2959,22 +3096,18 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* handle_stripe - do things to a stripe.
|
* handle_stripe - do things to a stripe.
|
||||||
*
|
*
|
||||||
* We lock the stripe and then examine the state of various bits
|
* We lock the stripe by setting STRIPE_ACTIVE and then examine the
|
||||||
* to see what needs to be done.
|
* state of various bits to see what needs to be done.
|
||||||
* Possible results:
|
* Possible results:
|
||||||
* return some read request which now have data
|
* return some read requests which now have data
|
||||||
* return some write requests which are safely on disc
|
* return some write requests which are safely on storage
|
||||||
* schedule a read on some buffers
|
* schedule a read on some buffers
|
||||||
* schedule a write of some buffers
|
* schedule a write of some buffers
|
||||||
* return confirmation of parity correctness
|
* return confirmation of parity correctness
|
||||||
*
|
*
|
||||||
* buffers are taken off read_list or write_list, and bh_cache buffers
|
|
||||||
* get BH_Lock set before the stripe lock is released.
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
|
@ -2983,10 +3116,10 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
int disks = sh->disks;
|
int disks = sh->disks;
|
||||||
struct r5dev *dev;
|
struct r5dev *dev;
|
||||||
int i;
|
int i;
|
||||||
|
int do_recovery = 0;
|
||||||
|
|
||||||
memset(s, 0, sizeof(*s));
|
memset(s, 0, sizeof(*s));
|
||||||
|
|
||||||
s->syncing = test_bit(STRIPE_SYNCING, &sh->state);
|
|
||||||
s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
|
s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
|
||||||
s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
|
s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
|
||||||
s->failed_num[0] = -1;
|
s->failed_num[0] = -1;
|
||||||
|
@ -3004,7 +3137,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
dev = &sh->dev[i];
|
dev = &sh->dev[i];
|
||||||
|
|
||||||
pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
|
pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
|
||||||
i, dev->flags, dev->toread, dev->towrite, dev->written);
|
i, dev->flags,
|
||||||
|
dev->toread, dev->towrite, dev->written);
|
||||||
/* maybe we can reply to a read
|
/* maybe we can reply to a read
|
||||||
*
|
*
|
||||||
* new wantfill requests are only permitted while
|
* new wantfill requests are only permitted while
|
||||||
|
@ -3035,7 +3169,21 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
}
|
}
|
||||||
if (dev->written)
|
if (dev->written)
|
||||||
s->written++;
|
s->written++;
|
||||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
/* Prefer to use the replacement for reads, but only
|
||||||
|
* if it is recovered enough and has no bad blocks.
|
||||||
|
*/
|
||||||
|
rdev = rcu_dereference(conf->disks[i].replacement);
|
||||||
|
if (rdev && !test_bit(Faulty, &rdev->flags) &&
|
||||||
|
rdev->recovery_offset >= sh->sector + STRIPE_SECTORS &&
|
||||||
|
!is_badblock(rdev, sh->sector, STRIPE_SECTORS,
|
||||||
|
&first_bad, &bad_sectors))
|
||||||
|
set_bit(R5_ReadRepl, &dev->flags);
|
||||||
|
else {
|
||||||
|
if (rdev)
|
||||||
|
set_bit(R5_NeedReplace, &dev->flags);
|
||||||
|
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||||
|
clear_bit(R5_ReadRepl, &dev->flags);
|
||||||
|
}
|
||||||
if (rdev && test_bit(Faulty, &rdev->flags))
|
if (rdev && test_bit(Faulty, &rdev->flags))
|
||||||
rdev = NULL;
|
rdev = NULL;
|
||||||
if (rdev) {
|
if (rdev) {
|
||||||
|
@ -3077,20 +3225,38 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
set_bit(R5_Insync, &dev->flags);
|
set_bit(R5_Insync, &dev->flags);
|
||||||
|
|
||||||
if (rdev && test_bit(R5_WriteError, &dev->flags)) {
|
if (rdev && test_bit(R5_WriteError, &dev->flags)) {
|
||||||
clear_bit(R5_Insync, &dev->flags);
|
/* This flag does not apply to '.replacement'
|
||||||
if (!test_bit(Faulty, &rdev->flags)) {
|
* only to .rdev, so make sure to check that*/
|
||||||
|
struct md_rdev *rdev2 = rcu_dereference(
|
||||||
|
conf->disks[i].rdev);
|
||||||
|
if (rdev2 == rdev)
|
||||||
|
clear_bit(R5_Insync, &dev->flags);
|
||||||
|
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
||||||
s->handle_bad_blocks = 1;
|
s->handle_bad_blocks = 1;
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev2->nr_pending);
|
||||||
} else
|
} else
|
||||||
clear_bit(R5_WriteError, &dev->flags);
|
clear_bit(R5_WriteError, &dev->flags);
|
||||||
}
|
}
|
||||||
if (rdev && test_bit(R5_MadeGood, &dev->flags)) {
|
if (rdev && test_bit(R5_MadeGood, &dev->flags)) {
|
||||||
if (!test_bit(Faulty, &rdev->flags)) {
|
/* This flag does not apply to '.replacement'
|
||||||
|
* only to .rdev, so make sure to check that*/
|
||||||
|
struct md_rdev *rdev2 = rcu_dereference(
|
||||||
|
conf->disks[i].rdev);
|
||||||
|
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
||||||
s->handle_bad_blocks = 1;
|
s->handle_bad_blocks = 1;
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev2->nr_pending);
|
||||||
} else
|
} else
|
||||||
clear_bit(R5_MadeGood, &dev->flags);
|
clear_bit(R5_MadeGood, &dev->flags);
|
||||||
}
|
}
|
||||||
|
if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
|
||||||
|
struct md_rdev *rdev2 = rcu_dereference(
|
||||||
|
conf->disks[i].replacement);
|
||||||
|
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
||||||
|
s->handle_bad_blocks = 1;
|
||||||
|
atomic_inc(&rdev2->nr_pending);
|
||||||
|
} else
|
||||||
|
clear_bit(R5_MadeGoodRepl, &dev->flags);
|
||||||
|
}
|
||||||
if (!test_bit(R5_Insync, &dev->flags)) {
|
if (!test_bit(R5_Insync, &dev->flags)) {
|
||||||
/* The ReadError flag will just be confusing now */
|
/* The ReadError flag will just be confusing now */
|
||||||
clear_bit(R5_ReadError, &dev->flags);
|
clear_bit(R5_ReadError, &dev->flags);
|
||||||
|
@ -3102,9 +3268,25 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
if (s->failed < 2)
|
if (s->failed < 2)
|
||||||
s->failed_num[s->failed] = i;
|
s->failed_num[s->failed] = i;
|
||||||
s->failed++;
|
s->failed++;
|
||||||
|
if (rdev && !test_bit(Faulty, &rdev->flags))
|
||||||
|
do_recovery = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_unlock_irq(&conf->device_lock);
|
spin_unlock_irq(&conf->device_lock);
|
||||||
|
if (test_bit(STRIPE_SYNCING, &sh->state)) {
|
||||||
|
/* If there is a failed device being replaced,
|
||||||
|
* we must be recovering.
|
||||||
|
* else if we are after recovery_cp, we must be syncing
|
||||||
|
* else we can only be replacing
|
||||||
|
* sync and recovery both need to read all devices, and so
|
||||||
|
* use the same flag.
|
||||||
|
*/
|
||||||
|
if (do_recovery ||
|
||||||
|
sh->sector >= conf->mddev->recovery_cp)
|
||||||
|
s->syncing = 1;
|
||||||
|
else
|
||||||
|
s->replacing = 1;
|
||||||
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3146,7 +3328,7 @@ static void handle_stripe(struct stripe_head *sh)
|
||||||
|
|
||||||
if (unlikely(s.blocked_rdev)) {
|
if (unlikely(s.blocked_rdev)) {
|
||||||
if (s.syncing || s.expanding || s.expanded ||
|
if (s.syncing || s.expanding || s.expanded ||
|
||||||
s.to_write || s.written) {
|
s.replacing || s.to_write || s.written) {
|
||||||
set_bit(STRIPE_HANDLE, &sh->state);
|
set_bit(STRIPE_HANDLE, &sh->state);
|
||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
@ -3172,7 +3354,7 @@ static void handle_stripe(struct stripe_head *sh)
|
||||||
sh->reconstruct_state = 0;
|
sh->reconstruct_state = 0;
|
||||||
if (s.to_read+s.to_write+s.written)
|
if (s.to_read+s.to_write+s.written)
|
||||||
handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
|
handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
|
||||||
if (s.syncing)
|
if (s.syncing + s.replacing)
|
||||||
handle_failed_sync(conf, sh, &s);
|
handle_failed_sync(conf, sh, &s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3203,7 +3385,9 @@ static void handle_stripe(struct stripe_head *sh)
|
||||||
*/
|
*/
|
||||||
if (s.to_read || s.non_overwrite
|
if (s.to_read || s.non_overwrite
|
||||||
|| (conf->level == 6 && s.to_write && s.failed)
|
|| (conf->level == 6 && s.to_write && s.failed)
|
||||||
|| (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
|
|| (s.syncing && (s.uptodate + s.compute < disks))
|
||||||
|
|| s.replacing
|
||||||
|
|| s.expanding)
|
||||||
handle_stripe_fill(sh, &s, disks);
|
handle_stripe_fill(sh, &s, disks);
|
||||||
|
|
||||||
/* Now we check to see if any write operations have recently
|
/* Now we check to see if any write operations have recently
|
||||||
|
@ -3265,7 +3449,20 @@ static void handle_stripe(struct stripe_head *sh)
|
||||||
handle_parity_checks5(conf, sh, &s, disks);
|
handle_parity_checks5(conf, sh, &s, disks);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
|
if (s.replacing && s.locked == 0
|
||||||
|
&& !test_bit(STRIPE_INSYNC, &sh->state)) {
|
||||||
|
/* Write out to replacement devices where possible */
|
||||||
|
for (i = 0; i < conf->raid_disks; i++)
|
||||||
|
if (test_bit(R5_UPTODATE, &sh->dev[i].flags) &&
|
||||||
|
test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
|
||||||
|
set_bit(R5_WantReplace, &sh->dev[i].flags);
|
||||||
|
set_bit(R5_LOCKED, &sh->dev[i].flags);
|
||||||
|
s.locked++;
|
||||||
|
}
|
||||||
|
set_bit(STRIPE_INSYNC, &sh->state);
|
||||||
|
}
|
||||||
|
if ((s.syncing || s.replacing) && s.locked == 0 &&
|
||||||
|
test_bit(STRIPE_INSYNC, &sh->state)) {
|
||||||
md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
|
md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
|
||||||
clear_bit(STRIPE_SYNCING, &sh->state);
|
clear_bit(STRIPE_SYNCING, &sh->state);
|
||||||
}
|
}
|
||||||
|
@ -3363,6 +3560,15 @@ static void handle_stripe(struct stripe_head *sh)
|
||||||
STRIPE_SECTORS);
|
STRIPE_SECTORS);
|
||||||
rdev_dec_pending(rdev, conf->mddev);
|
rdev_dec_pending(rdev, conf->mddev);
|
||||||
}
|
}
|
||||||
|
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
|
||||||
|
rdev = conf->disks[i].replacement;
|
||||||
|
if (!rdev)
|
||||||
|
/* rdev have been moved down */
|
||||||
|
rdev = conf->disks[i].rdev;
|
||||||
|
rdev_clear_badblocks(rdev, sh->sector,
|
||||||
|
STRIPE_SECTORS);
|
||||||
|
rdev_dec_pending(rdev, conf->mddev);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s.ops_request)
|
if (s.ops_request)
|
||||||
|
@ -3586,6 +3792,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
|
||||||
int dd_idx;
|
int dd_idx;
|
||||||
struct bio* align_bi;
|
struct bio* align_bi;
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
|
sector_t end_sector;
|
||||||
|
|
||||||
if (!in_chunk_boundary(mddev, raid_bio)) {
|
if (!in_chunk_boundary(mddev, raid_bio)) {
|
||||||
pr_debug("chunk_aligned_read : non aligned\n");
|
pr_debug("chunk_aligned_read : non aligned\n");
|
||||||
|
@ -3610,9 +3817,19 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
|
||||||
0,
|
0,
|
||||||
&dd_idx, NULL);
|
&dd_idx, NULL);
|
||||||
|
|
||||||
|
end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9);
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
|
rdev = rcu_dereference(conf->disks[dd_idx].replacement);
|
||||||
if (rdev && test_bit(In_sync, &rdev->flags)) {
|
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
||||||
|
rdev->recovery_offset < end_sector) {
|
||||||
|
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
|
||||||
|
if (rdev &&
|
||||||
|
(test_bit(Faulty, &rdev->flags) ||
|
||||||
|
!(test_bit(In_sync, &rdev->flags) ||
|
||||||
|
rdev->recovery_offset >= end_sector)))
|
||||||
|
rdev = NULL;
|
||||||
|
}
|
||||||
|
if (rdev) {
|
||||||
sector_t first_bad;
|
sector_t first_bad;
|
||||||
int bad_sectors;
|
int bad_sectors;
|
||||||
|
|
||||||
|
@ -4137,7 +4354,6 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
|
||||||
return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
|
return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
|
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
|
||||||
|
|
||||||
sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
|
sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
|
||||||
|
@ -4208,7 +4424,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
|
||||||
return handled;
|
return handled;
|
||||||
}
|
}
|
||||||
|
|
||||||
set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
|
|
||||||
if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
|
if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
|
||||||
release_stripe(sh);
|
release_stripe(sh);
|
||||||
raid5_set_bi_hw_segments(raid_bio, scnt);
|
raid5_set_bi_hw_segments(raid_bio, scnt);
|
||||||
|
@ -4635,7 +4850,15 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||||
continue;
|
continue;
|
||||||
disk = conf->disks + raid_disk;
|
disk = conf->disks + raid_disk;
|
||||||
|
|
||||||
disk->rdev = rdev;
|
if (test_bit(Replacement, &rdev->flags)) {
|
||||||
|
if (disk->replacement)
|
||||||
|
goto abort;
|
||||||
|
disk->replacement = rdev;
|
||||||
|
} else {
|
||||||
|
if (disk->rdev)
|
||||||
|
goto abort;
|
||||||
|
disk->rdev = rdev;
|
||||||
|
}
|
||||||
|
|
||||||
if (test_bit(In_sync, &rdev->flags)) {
|
if (test_bit(In_sync, &rdev->flags)) {
|
||||||
char b[BDEVNAME_SIZE];
|
char b[BDEVNAME_SIZE];
|
||||||
|
@ -4724,6 +4947,7 @@ static int run(struct mddev *mddev)
|
||||||
int dirty_parity_disks = 0;
|
int dirty_parity_disks = 0;
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
sector_t reshape_offset = 0;
|
sector_t reshape_offset = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
if (mddev->recovery_cp != MaxSector)
|
if (mddev->recovery_cp != MaxSector)
|
||||||
printk(KERN_NOTICE "md/raid:%s: not clean"
|
printk(KERN_NOTICE "md/raid:%s: not clean"
|
||||||
|
@ -4813,12 +5037,25 @@ static int run(struct mddev *mddev)
|
||||||
conf->thread = NULL;
|
conf->thread = NULL;
|
||||||
mddev->private = conf;
|
mddev->private = conf;
|
||||||
|
|
||||||
/*
|
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
|
||||||
* 0 for a fully functional array, 1 or 2 for a degraded array.
|
i++) {
|
||||||
*/
|
rdev = conf->disks[i].rdev;
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
if (!rdev && conf->disks[i].replacement) {
|
||||||
if (rdev->raid_disk < 0)
|
/* The replacement is all we have yet */
|
||||||
|
rdev = conf->disks[i].replacement;
|
||||||
|
conf->disks[i].replacement = NULL;
|
||||||
|
clear_bit(Replacement, &rdev->flags);
|
||||||
|
conf->disks[i].rdev = rdev;
|
||||||
|
}
|
||||||
|
if (!rdev)
|
||||||
continue;
|
continue;
|
||||||
|
if (conf->disks[i].replacement &&
|
||||||
|
conf->reshape_progress != MaxSector) {
|
||||||
|
/* replacements and reshape simply do not mix. */
|
||||||
|
printk(KERN_ERR "md: cannot handle concurrent "
|
||||||
|
"replacement and reshape.\n");
|
||||||
|
goto abort;
|
||||||
|
}
|
||||||
if (test_bit(In_sync, &rdev->flags)) {
|
if (test_bit(In_sync, &rdev->flags)) {
|
||||||
working_disks++;
|
working_disks++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -4852,8 +5089,10 @@ static int run(struct mddev *mddev)
|
||||||
dirty_parity_disks++;
|
dirty_parity_disks++;
|
||||||
}
|
}
|
||||||
|
|
||||||
mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
|
/*
|
||||||
- working_disks);
|
* 0 for a fully functional array, 1 or 2 for a degraded array.
|
||||||
|
*/
|
||||||
|
mddev->degraded = calc_degraded(conf);
|
||||||
|
|
||||||
if (has_failed(conf)) {
|
if (has_failed(conf)) {
|
||||||
printk(KERN_ERR "md/raid:%s: not enough operational devices"
|
printk(KERN_ERR "md/raid:%s: not enough operational devices"
|
||||||
|
@ -5016,7 +5255,25 @@ static int raid5_spare_active(struct mddev *mddev)
|
||||||
|
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
tmp = conf->disks + i;
|
tmp = conf->disks + i;
|
||||||
if (tmp->rdev
|
if (tmp->replacement
|
||||||
|
&& tmp->replacement->recovery_offset == MaxSector
|
||||||
|
&& !test_bit(Faulty, &tmp->replacement->flags)
|
||||||
|
&& !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
|
||||||
|
/* Replacement has just become active. */
|
||||||
|
if (!tmp->rdev
|
||||||
|
|| !test_and_clear_bit(In_sync, &tmp->rdev->flags))
|
||||||
|
count++;
|
||||||
|
if (tmp->rdev) {
|
||||||
|
/* Replaced device not technically faulty,
|
||||||
|
* but we need to be sure it gets removed
|
||||||
|
* and never re-added.
|
||||||
|
*/
|
||||||
|
set_bit(Faulty, &tmp->rdev->flags);
|
||||||
|
sysfs_notify_dirent_safe(
|
||||||
|
tmp->rdev->sysfs_state);
|
||||||
|
}
|
||||||
|
sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
|
||||||
|
} else if (tmp->rdev
|
||||||
&& tmp->rdev->recovery_offset == MaxSector
|
&& tmp->rdev->recovery_offset == MaxSector
|
||||||
&& !test_bit(Faulty, &tmp->rdev->flags)
|
&& !test_bit(Faulty, &tmp->rdev->flags)
|
||||||
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
|
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
|
||||||
|
@ -5025,49 +5282,68 @@ static int raid5_spare_active(struct mddev *mddev)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_lock_irqsave(&conf->device_lock, flags);
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded -= count;
|
mddev->degraded = calc_degraded(conf);
|
||||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
print_raid5_conf(conf);
|
print_raid5_conf(conf);
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int raid5_remove_disk(struct mddev *mddev, int number)
|
static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
{
|
{
|
||||||
struct r5conf *conf = mddev->private;
|
struct r5conf *conf = mddev->private;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
struct md_rdev *rdev;
|
int number = rdev->raid_disk;
|
||||||
|
struct md_rdev **rdevp;
|
||||||
struct disk_info *p = conf->disks + number;
|
struct disk_info *p = conf->disks + number;
|
||||||
|
|
||||||
print_raid5_conf(conf);
|
print_raid5_conf(conf);
|
||||||
rdev = p->rdev;
|
if (rdev == p->rdev)
|
||||||
if (rdev) {
|
rdevp = &p->rdev;
|
||||||
if (number >= conf->raid_disks &&
|
else if (rdev == p->replacement)
|
||||||
conf->reshape_progress == MaxSector)
|
rdevp = &p->replacement;
|
||||||
clear_bit(In_sync, &rdev->flags);
|
else
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (test_bit(In_sync, &rdev->flags) ||
|
if (number >= conf->raid_disks &&
|
||||||
atomic_read(&rdev->nr_pending)) {
|
conf->reshape_progress == MaxSector)
|
||||||
err = -EBUSY;
|
clear_bit(In_sync, &rdev->flags);
|
||||||
goto abort;
|
|
||||||
}
|
if (test_bit(In_sync, &rdev->flags) ||
|
||||||
/* Only remove non-faulty devices if recovery
|
atomic_read(&rdev->nr_pending)) {
|
||||||
* isn't possible.
|
err = -EBUSY;
|
||||||
*/
|
goto abort;
|
||||||
if (!test_bit(Faulty, &rdev->flags) &&
|
|
||||||
mddev->recovery_disabled != conf->recovery_disabled &&
|
|
||||||
!has_failed(conf) &&
|
|
||||||
number < conf->raid_disks) {
|
|
||||||
err = -EBUSY;
|
|
||||||
goto abort;
|
|
||||||
}
|
|
||||||
p->rdev = NULL;
|
|
||||||
synchronize_rcu();
|
|
||||||
if (atomic_read(&rdev->nr_pending)) {
|
|
||||||
/* lost the race, try later */
|
|
||||||
err = -EBUSY;
|
|
||||||
p->rdev = rdev;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
/* Only remove non-faulty devices if recovery
|
||||||
|
* isn't possible.
|
||||||
|
*/
|
||||||
|
if (!test_bit(Faulty, &rdev->flags) &&
|
||||||
|
mddev->recovery_disabled != conf->recovery_disabled &&
|
||||||
|
!has_failed(conf) &&
|
||||||
|
(!p->replacement || p->replacement == rdev) &&
|
||||||
|
number < conf->raid_disks) {
|
||||||
|
err = -EBUSY;
|
||||||
|
goto abort;
|
||||||
|
}
|
||||||
|
*rdevp = NULL;
|
||||||
|
synchronize_rcu();
|
||||||
|
if (atomic_read(&rdev->nr_pending)) {
|
||||||
|
/* lost the race, try later */
|
||||||
|
err = -EBUSY;
|
||||||
|
*rdevp = rdev;
|
||||||
|
} else if (p->replacement) {
|
||||||
|
/* We must have just cleared 'rdev' */
|
||||||
|
p->rdev = p->replacement;
|
||||||
|
clear_bit(Replacement, &p->replacement->flags);
|
||||||
|
smp_mb(); /* Make sure other CPUs may see both as identical
|
||||||
|
* but will never see neither - if they are careful
|
||||||
|
*/
|
||||||
|
p->replacement = NULL;
|
||||||
|
clear_bit(WantReplacement, &rdev->flags);
|
||||||
|
} else
|
||||||
|
/* We might have just removed the Replacement as faulty-
|
||||||
|
* clear the bit just in case
|
||||||
|
*/
|
||||||
|
clear_bit(WantReplacement, &rdev->flags);
|
||||||
abort:
|
abort:
|
||||||
|
|
||||||
print_raid5_conf(conf);
|
print_raid5_conf(conf);
|
||||||
|
@ -5103,8 +5379,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
disk = rdev->saved_raid_disk;
|
disk = rdev->saved_raid_disk;
|
||||||
else
|
else
|
||||||
disk = first;
|
disk = first;
|
||||||
for ( ; disk <= last ; disk++)
|
for ( ; disk <= last ; disk++) {
|
||||||
if ((p=conf->disks + disk)->rdev == NULL) {
|
p = conf->disks + disk;
|
||||||
|
if (p->rdev == NULL) {
|
||||||
clear_bit(In_sync, &rdev->flags);
|
clear_bit(In_sync, &rdev->flags);
|
||||||
rdev->raid_disk = disk;
|
rdev->raid_disk = disk;
|
||||||
err = 0;
|
err = 0;
|
||||||
|
@ -5113,6 +5390,17 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
rcu_assign_pointer(p->rdev, rdev);
|
rcu_assign_pointer(p->rdev, rdev);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||||
|
p->replacement == NULL) {
|
||||||
|
clear_bit(In_sync, &rdev->flags);
|
||||||
|
set_bit(Replacement, &rdev->flags);
|
||||||
|
rdev->raid_disk = disk;
|
||||||
|
err = 0;
|
||||||
|
conf->fullsync = 1;
|
||||||
|
rcu_assign_pointer(p->replacement, rdev);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
print_raid5_conf(conf);
|
print_raid5_conf(conf);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -5286,8 +5574,7 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||||
* pre and post number of devices.
|
* pre and post number of devices.
|
||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&conf->device_lock, flags);
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded += (conf->raid_disks - conf->previous_raid_disks)
|
mddev->degraded = calc_degraded(conf);
|
||||||
- added_devices;
|
|
||||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
}
|
}
|
||||||
mddev->raid_disks = conf->raid_disks;
|
mddev->raid_disks = conf->raid_disks;
|
||||||
|
@ -5356,17 +5643,15 @@ static void raid5_finish_reshape(struct mddev *mddev)
|
||||||
revalidate_disk(mddev->gendisk);
|
revalidate_disk(mddev->gendisk);
|
||||||
} else {
|
} else {
|
||||||
int d;
|
int d;
|
||||||
mddev->degraded = conf->raid_disks;
|
spin_lock_irq(&conf->device_lock);
|
||||||
for (d = 0; d < conf->raid_disks ; d++)
|
mddev->degraded = calc_degraded(conf);
|
||||||
if (conf->disks[d].rdev &&
|
spin_unlock_irq(&conf->device_lock);
|
||||||
test_bit(In_sync,
|
|
||||||
&conf->disks[d].rdev->flags))
|
|
||||||
mddev->degraded--;
|
|
||||||
for (d = conf->raid_disks ;
|
for (d = conf->raid_disks ;
|
||||||
d < conf->raid_disks - mddev->delta_disks;
|
d < conf->raid_disks - mddev->delta_disks;
|
||||||
d++) {
|
d++) {
|
||||||
struct md_rdev *rdev = conf->disks[d].rdev;
|
struct md_rdev *rdev = conf->disks[d].rdev;
|
||||||
if (rdev && raid5_remove_disk(mddev, d) == 0) {
|
if (rdev &&
|
||||||
|
raid5_remove_disk(mddev, rdev) == 0) {
|
||||||
sysfs_unlink_rdev(mddev, rdev);
|
sysfs_unlink_rdev(mddev, rdev);
|
||||||
rdev->raid_disk = -1;
|
rdev->raid_disk = -1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
* The possible state transitions are:
|
* The possible state transitions are:
|
||||||
*
|
*
|
||||||
* Empty -> Want - on read or write to get old data for parity calc
|
* Empty -> Want - on read or write to get old data for parity calc
|
||||||
* Empty -> Dirty - on compute_parity to satisfy write/sync request.(RECONSTRUCT_WRITE)
|
* Empty -> Dirty - on compute_parity to satisfy write/sync request.
|
||||||
* Empty -> Clean - on compute_block when computing a block for failed drive
|
* Empty -> Clean - on compute_block when computing a block for failed drive
|
||||||
* Want -> Empty - on failed read
|
* Want -> Empty - on failed read
|
||||||
* Want -> Clean - on successful completion of read request
|
* Want -> Clean - on successful completion of read request
|
||||||
|
@ -226,8 +226,11 @@ struct stripe_head {
|
||||||
#endif
|
#endif
|
||||||
} ops;
|
} ops;
|
||||||
struct r5dev {
|
struct r5dev {
|
||||||
struct bio req;
|
/* rreq and rvec are used for the replacement device when
|
||||||
struct bio_vec vec;
|
* writing data to both devices.
|
||||||
|
*/
|
||||||
|
struct bio req, rreq;
|
||||||
|
struct bio_vec vec, rvec;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
struct bio *toread, *read, *towrite, *written;
|
struct bio *toread, *read, *towrite, *written;
|
||||||
sector_t sector; /* sector of this page */
|
sector_t sector; /* sector of this page */
|
||||||
|
@ -239,7 +242,13 @@ struct stripe_head {
|
||||||
* for handle_stripe.
|
* for handle_stripe.
|
||||||
*/
|
*/
|
||||||
struct stripe_head_state {
|
struct stripe_head_state {
|
||||||
int syncing, expanding, expanded;
|
/* 'syncing' means that we need to read all devices, either
|
||||||
|
* to check/correct parity, or to reconstruct a missing device.
|
||||||
|
* 'replacing' means we are replacing one or more drives and
|
||||||
|
* the source is valid at this point so we don't need to
|
||||||
|
* read all devices, just the replacement targets.
|
||||||
|
*/
|
||||||
|
int syncing, expanding, expanded, replacing;
|
||||||
int locked, uptodate, to_read, to_write, failed, written;
|
int locked, uptodate, to_read, to_write, failed, written;
|
||||||
int to_fill, compute, req_compute, non_overwrite;
|
int to_fill, compute, req_compute, non_overwrite;
|
||||||
int failed_num[2];
|
int failed_num[2];
|
||||||
|
@ -252,38 +261,41 @@ struct stripe_head_state {
|
||||||
int handle_bad_blocks;
|
int handle_bad_blocks;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Flags */
|
/* Flags for struct r5dev.flags */
|
||||||
#define R5_UPTODATE 0 /* page contains current data */
|
enum r5dev_flags {
|
||||||
#define R5_LOCKED 1 /* IO has been submitted on "req" */
|
R5_UPTODATE, /* page contains current data */
|
||||||
#define R5_OVERWRITE 2 /* towrite covers whole page */
|
R5_LOCKED, /* IO has been submitted on "req" */
|
||||||
|
R5_DOUBLE_LOCKED,/* Cannot clear R5_LOCKED until 2 writes complete */
|
||||||
|
R5_OVERWRITE, /* towrite covers whole page */
|
||||||
/* and some that are internal to handle_stripe */
|
/* and some that are internal to handle_stripe */
|
||||||
#define R5_Insync 3 /* rdev && rdev->in_sync at start */
|
R5_Insync, /* rdev && rdev->in_sync at start */
|
||||||
#define R5_Wantread 4 /* want to schedule a read */
|
R5_Wantread, /* want to schedule a read */
|
||||||
#define R5_Wantwrite 5
|
R5_Wantwrite,
|
||||||
#define R5_Overlap 7 /* There is a pending overlapping request on this block */
|
R5_Overlap, /* There is a pending overlapping request
|
||||||
#define R5_ReadError 8 /* seen a read error here recently */
|
* on this block */
|
||||||
#define R5_ReWrite 9 /* have tried to over-write the readerror */
|
R5_ReadError, /* seen a read error here recently */
|
||||||
|
R5_ReWrite, /* have tried to over-write the readerror */
|
||||||
|
|
||||||
#define R5_Expanded 10 /* This block now has post-expand data */
|
R5_Expanded, /* This block now has post-expand data */
|
||||||
#define R5_Wantcompute 11 /* compute_block in progress treat as
|
R5_Wantcompute, /* compute_block in progress treat as
|
||||||
* uptodate
|
* uptodate
|
||||||
*/
|
*/
|
||||||
#define R5_Wantfill 12 /* dev->toread contains a bio that needs
|
R5_Wantfill, /* dev->toread contains a bio that needs
|
||||||
* filling
|
* filling
|
||||||
*/
|
*/
|
||||||
#define R5_Wantdrain 13 /* dev->towrite needs to be drained */
|
R5_Wantdrain, /* dev->towrite needs to be drained */
|
||||||
#define R5_WantFUA 14 /* Write should be FUA */
|
R5_WantFUA, /* Write should be FUA */
|
||||||
#define R5_WriteError 15 /* got a write error - need to record it */
|
R5_WriteError, /* got a write error - need to record it */
|
||||||
#define R5_MadeGood 16 /* A bad block has been fixed by writing to it*/
|
R5_MadeGood, /* A bad block has been fixed by writing to it */
|
||||||
/*
|
R5_ReadRepl, /* Will/did read from replacement rather than orig */
|
||||||
* Write method
|
R5_MadeGoodRepl,/* A bad block on the replacement device has been
|
||||||
*/
|
* fixed by writing to it */
|
||||||
#define RECONSTRUCT_WRITE 1
|
R5_NeedReplace, /* This device has a replacement which is not
|
||||||
#define READ_MODIFY_WRITE 2
|
* up-to-date at this stripe. */
|
||||||
/* not a write method, but a compute_parity mode */
|
R5_WantReplace, /* We need to update the replacement, we have read
|
||||||
#define CHECK_PARITY 3
|
* data in, and now is a good time to write it out.
|
||||||
/* Additional compute_parity mode -- updates the parity w/o LOCKING */
|
*/
|
||||||
#define UPDATE_PARITY 4
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stripe state
|
* Stripe state
|
||||||
|
@ -311,13 +323,14 @@ enum {
|
||||||
/*
|
/*
|
||||||
* Operation request flags
|
* Operation request flags
|
||||||
*/
|
*/
|
||||||
#define STRIPE_OP_BIOFILL 0
|
enum {
|
||||||
#define STRIPE_OP_COMPUTE_BLK 1
|
STRIPE_OP_BIOFILL,
|
||||||
#define STRIPE_OP_PREXOR 2
|
STRIPE_OP_COMPUTE_BLK,
|
||||||
#define STRIPE_OP_BIODRAIN 3
|
STRIPE_OP_PREXOR,
|
||||||
#define STRIPE_OP_RECONSTRUCT 4
|
STRIPE_OP_BIODRAIN,
|
||||||
#define STRIPE_OP_CHECK 5
|
STRIPE_OP_RECONSTRUCT,
|
||||||
|
STRIPE_OP_CHECK,
|
||||||
|
};
|
||||||
/*
|
/*
|
||||||
* Plugging:
|
* Plugging:
|
||||||
*
|
*
|
||||||
|
@ -344,13 +357,12 @@ enum {
|
||||||
|
|
||||||
|
|
||||||
struct disk_info {
|
struct disk_info {
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev, *replacement;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct r5conf {
|
struct r5conf {
|
||||||
struct hlist_head *stripe_hashtbl;
|
struct hlist_head *stripe_hashtbl;
|
||||||
struct mddev *mddev;
|
struct mddev *mddev;
|
||||||
struct disk_info *spare;
|
|
||||||
int chunk_sectors;
|
int chunk_sectors;
|
||||||
int level, algorithm;
|
int level, algorithm;
|
||||||
int max_degraded;
|
int max_degraded;
|
||||||
|
|
|
@ -277,7 +277,10 @@ struct mdp_superblock_1 {
|
||||||
*/
|
*/
|
||||||
#define MD_FEATURE_RESHAPE_ACTIVE 4
|
#define MD_FEATURE_RESHAPE_ACTIVE 4
|
||||||
#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */
|
#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */
|
||||||
|
#define MD_FEATURE_REPLACEMENT 16 /* This device is replacing an
|
||||||
#define MD_FEATURE_ALL (1|2|4|8)
|
* active device with same 'role'.
|
||||||
|
* 'recovery_offset' is also set.
|
||||||
|
*/
|
||||||
|
#define MD_FEATURE_ALL (1|2|4|8|16)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -132,7 +132,7 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
|
||||||
PROT_READ|PROT_WRITE, \
|
PROT_READ|PROT_WRITE, \
|
||||||
MAP_PRIVATE|MAP_ANONYMOUS,\
|
MAP_PRIVATE|MAP_ANONYMOUS,\
|
||||||
0, 0))
|
0, 0))
|
||||||
# define free_pages(x, y) munmap((void *)(x), (y)*PAGE_SIZE)
|
# define free_pages(x, y) munmap((void *)(x), PAGE_SIZE << (y))
|
||||||
|
|
||||||
static inline void cpu_relax(void)
|
static inline void cpu_relax(void)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue