mirror of https://gitee.com/openkylin/linux.git
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: Use revalidate_disk to effect changes in size of device. md: allow raid5_quiesce to work properly when reshape is happening. md/raid5: set reshape_position correctly when reshape starts. md: Handle growth of v1.x metadata correctly. md: avoid array overflow with bad v1.x metadata md: when a level change reduces the number of devices, remove the excess. md: Push down data integrity code to personalities. md/raid6: release spare page at ->stop()
This commit is contained in:
commit
a33a052f19
|
@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev)
|
|||
mddev->queue->unplug_fn = linear_unplug;
|
||||
mddev->queue->backing_dev_info.congested_fn = linear_congested;
|
||||
mddev->queue->backing_dev_info.congested_data = mddev;
|
||||
md_integrity_register(mddev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
rcu_assign_pointer(mddev->private, newconf);
|
||||
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
revalidate_disk(mddev->gendisk);
|
||||
call_rcu(&oldconf->rcu, free_conf);
|
||||
return 0;
|
||||
}
|
||||
|
|
146
drivers/md/md.c
146
drivers/md/md.c
|
@ -1308,7 +1308,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
}
|
||||
if (mddev->level != LEVEL_MULTIPATH) {
|
||||
int role;
|
||||
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
|
||||
if (rdev->desc_nr < 0 ||
|
||||
rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
|
||||
role = 0xffff;
|
||||
rdev->desc_nr = -1;
|
||||
} else
|
||||
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
|
||||
switch(role) {
|
||||
case 0xffff: /* spare */
|
||||
break;
|
||||
|
@ -1394,8 +1399,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
if (rdev2->desc_nr+1 > max_dev)
|
||||
max_dev = rdev2->desc_nr+1;
|
||||
|
||||
if (max_dev > le32_to_cpu(sb->max_dev))
|
||||
if (max_dev > le32_to_cpu(sb->max_dev)) {
|
||||
int bmask;
|
||||
sb->max_dev = cpu_to_le32(max_dev);
|
||||
rdev->sb_size = max_dev * 2 + 256;
|
||||
bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
|
||||
if (rdev->sb_size & bmask)
|
||||
rdev->sb_size = (rdev->sb_size | bmask) + 1;
|
||||
}
|
||||
for (i=0; i<max_dev;i++)
|
||||
sb->dev_roles[i] = cpu_to_le16(0xfffe);
|
||||
|
||||
|
@ -1487,37 +1498,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
|
|||
|
||||
static LIST_HEAD(pending_raid_disks);
|
||||
|
||||
static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev)
|
||||
/*
|
||||
* Try to register data integrity profile for an mddev
|
||||
*
|
||||
* This is called when an array is started and after a disk has been kicked
|
||||
* from the array. It only succeeds if all working and active component devices
|
||||
* are integrity capable with matching profiles.
|
||||
*/
|
||||
int md_integrity_register(mddev_t *mddev)
|
||||
{
|
||||
struct mdk_personality *pers = mddev->pers;
|
||||
struct gendisk *disk = mddev->gendisk;
|
||||
struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
|
||||
struct blk_integrity *bi_mddev = blk_get_integrity(disk);
|
||||
mdk_rdev_t *rdev, *reference = NULL;
|
||||
|
||||
/* Data integrity passthrough not supported on RAID 4, 5 and 6 */
|
||||
if (pers && pers->level >= 4 && pers->level <= 6)
|
||||
return;
|
||||
|
||||
/* If rdev is integrity capable, register profile for mddev */
|
||||
if (!bi_mddev && bi_rdev) {
|
||||
if (blk_integrity_register(disk, bi_rdev))
|
||||
printk(KERN_ERR "%s: %s Could not register integrity!\n",
|
||||
__func__, disk->disk_name);
|
||||
else
|
||||
printk(KERN_NOTICE "Enabling data integrity on %s\n",
|
||||
disk->disk_name);
|
||||
return;
|
||||
if (list_empty(&mddev->disks))
|
||||
return 0; /* nothing to do */
|
||||
if (blk_get_integrity(mddev->gendisk))
|
||||
return 0; /* already registered */
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||
/* skip spares and non-functional disks */
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
if (rdev->raid_disk < 0)
|
||||
continue;
|
||||
/*
|
||||
* If at least one rdev is not integrity capable, we can not
|
||||
* enable data integrity for the md device.
|
||||
*/
|
||||
if (!bdev_get_integrity(rdev->bdev))
|
||||
return -EINVAL;
|
||||
if (!reference) {
|
||||
/* Use the first rdev as the reference */
|
||||
reference = rdev;
|
||||
continue;
|
||||
}
|
||||
/* does this rdev's profile match the reference profile? */
|
||||
if (blk_integrity_compare(reference->bdev->bd_disk,
|
||||
rdev->bdev->bd_disk) < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Check that mddev and rdev have matching profiles */
|
||||
if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) {
|
||||
printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__,
|
||||
disk->disk_name, rdev->bdev->bd_disk->disk_name);
|
||||
printk(KERN_NOTICE "Disabling data integrity on %s\n",
|
||||
disk->disk_name);
|
||||
blk_integrity_unregister(disk);
|
||||
/*
|
||||
* All component devices are integrity capable and have matching
|
||||
* profiles, register the common profile for the md device.
|
||||
*/
|
||||
if (blk_integrity_register(mddev->gendisk,
|
||||
bdev_get_integrity(reference->bdev)) != 0) {
|
||||
printk(KERN_ERR "md: failed to register integrity for %s\n",
|
||||
mdname(mddev));
|
||||
return -EINVAL;
|
||||
}
|
||||
printk(KERN_NOTICE "md: data integrity on %s enabled\n",
|
||||
mdname(mddev));
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(md_integrity_register);
|
||||
|
||||
/* Disable data integrity if non-capable/non-matching disk is being added */
|
||||
void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
|
||||
{
|
||||
struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
|
||||
struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
|
||||
|
||||
if (!bi_mddev) /* nothing to do */
|
||||
return;
|
||||
if (rdev->raid_disk < 0) /* skip spares */
|
||||
return;
|
||||
if (bi_rdev && blk_integrity_compare(mddev->gendisk,
|
||||
rdev->bdev->bd_disk) >= 0)
|
||||
return;
|
||||
printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
|
||||
blk_integrity_unregister(mddev->gendisk);
|
||||
}
|
||||
EXPORT_SYMBOL(md_integrity_add_rdev);
|
||||
|
||||
static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
|
||||
{
|
||||
|
@ -1591,7 +1641,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
|
|||
/* May as well allow recovery to be retried once */
|
||||
mddev->recovery_disabled = 0;
|
||||
|
||||
md_integrity_check(rdev, mddev);
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
|
@ -2657,6 +2706,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
|
|||
ssize_t rv = len;
|
||||
struct mdk_personality *pers;
|
||||
void *priv;
|
||||
mdk_rdev_t *rdev;
|
||||
|
||||
if (mddev->pers == NULL) {
|
||||
if (len == 0)
|
||||
|
@ -2736,6 +2786,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
|
|||
mddev_suspend(mddev);
|
||||
mddev->pers->stop(mddev);
|
||||
module_put(mddev->pers->owner);
|
||||
/* Invalidate devices that are now superfluous */
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
||||
if (rdev->raid_disk >= mddev->raid_disks) {
|
||||
rdev->raid_disk = -1;
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
mddev->pers = pers;
|
||||
mddev->private = priv;
|
||||
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
|
@ -3685,17 +3741,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
|
|||
|
||||
mddev->array_sectors = sectors;
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
if (mddev->pers) {
|
||||
struct block_device *bdev = bdget_disk(mddev->gendisk, 0);
|
||||
|
||||
if (bdev) {
|
||||
mutex_lock(&bdev->bd_inode->i_mutex);
|
||||
i_size_write(bdev->bd_inode,
|
||||
(loff_t)mddev->array_sectors << 9);
|
||||
mutex_unlock(&bdev->bd_inode->i_mutex);
|
||||
bdput(bdev);
|
||||
}
|
||||
}
|
||||
if (mddev->pers)
|
||||
revalidate_disk(mddev->gendisk);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
@ -4048,10 +4095,6 @@ static int do_md_run(mddev_t * mddev)
|
|||
}
|
||||
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
|
||||
if (pers->level >= 4 && pers->level <= 6)
|
||||
/* Cannot support integrity (yet) */
|
||||
blk_integrity_unregister(mddev->gendisk);
|
||||
|
||||
if (mddev->reshape_position != MaxSector &&
|
||||
pers->start_reshape == NULL) {
|
||||
/* This personality cannot handle reshaping... */
|
||||
|
@ -4189,6 +4232,7 @@ static int do_md_run(mddev_t * mddev)
|
|||
md_wakeup_thread(mddev->thread);
|
||||
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
|
||||
|
||||
revalidate_disk(mddev->gendisk);
|
||||
mddev->changed = 1;
|
||||
md_new_event(mddev);
|
||||
sysfs_notify_dirent(mddev->sysfs_state);
|
||||
|
@ -5087,18 +5131,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
|
|||
return -ENOSPC;
|
||||
}
|
||||
rv = mddev->pers->resize(mddev, num_sectors);
|
||||
if (!rv) {
|
||||
struct block_device *bdev;
|
||||
|
||||
bdev = bdget_disk(mddev->gendisk, 0);
|
||||
if (bdev) {
|
||||
mutex_lock(&bdev->bd_inode->i_mutex);
|
||||
i_size_write(bdev->bd_inode,
|
||||
(loff_t)mddev->array_sectors << 9);
|
||||
mutex_unlock(&bdev->bd_inode->i_mutex);
|
||||
bdput(bdev);
|
||||
}
|
||||
}
|
||||
if (!rv)
|
||||
revalidate_disk(mddev->gendisk);
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
|
|
@ -431,5 +431,7 @@ extern int md_allow_write(mddev_t *mddev);
|
|||
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
|
||||
extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
|
||||
extern int md_check_no_bitmap(mddev_t *mddev);
|
||||
extern int md_integrity_register(mddev_t *mddev);
|
||||
void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
|
||||
|
||||
#endif /* _MD_MD_H */
|
||||
|
|
|
@ -313,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
set_bit(In_sync, &rdev->flags);
|
||||
rcu_assign_pointer(p->rdev, rdev);
|
||||
err = 0;
|
||||
md_integrity_add_rdev(rdev, mddev);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -345,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
|
|||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
p->rdev = rdev;
|
||||
goto abort;
|
||||
}
|
||||
md_integrity_register(mddev);
|
||||
}
|
||||
abort:
|
||||
|
||||
|
@ -519,7 +522,7 @@ static int multipath_run (mddev_t *mddev)
|
|||
mddev->queue->unplug_fn = multipath_unplug;
|
||||
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
|
||||
mddev->queue->backing_dev_info.congested_data = mddev;
|
||||
|
||||
md_integrity_register(mddev);
|
||||
return 0;
|
||||
|
||||
out_free_conf:
|
||||
|
|
|
@ -351,6 +351,7 @@ static int raid0_run(mddev_t *mddev)
|
|||
|
||||
blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
|
||||
dump_zones(mddev);
|
||||
md_integrity_register(mddev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
rcu_assign_pointer(p->rdev, rdev);
|
||||
break;
|
||||
}
|
||||
|
||||
md_integrity_add_rdev(rdev, mddev);
|
||||
print_conf(conf);
|
||||
return err;
|
||||
}
|
||||
|
@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
|
|||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
p->rdev = rdev;
|
||||
goto abort;
|
||||
}
|
||||
md_integrity_register(mddev);
|
||||
}
|
||||
abort:
|
||||
|
||||
|
@ -2067,7 +2069,7 @@ static int run(mddev_t *mddev)
|
|||
mddev->queue->unplug_fn = raid1_unplug;
|
||||
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
|
||||
mddev->queue->backing_dev_info.congested_data = mddev;
|
||||
|
||||
md_integrity_register(mddev);
|
||||
return 0;
|
||||
|
||||
out_no_mem:
|
||||
|
@ -2132,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
|
|||
return -EINVAL;
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
mddev->changed = 1;
|
||||
revalidate_disk(mddev->gendisk);
|
||||
if (sectors > mddev->dev_sectors &&
|
||||
mddev->recovery_cp == MaxSector) {
|
||||
mddev->recovery_cp = mddev->dev_sectors;
|
||||
|
|
|
@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
break;
|
||||
}
|
||||
|
||||
md_integrity_add_rdev(rdev, mddev);
|
||||
print_conf(conf);
|
||||
return err;
|
||||
}
|
||||
|
@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
|
|||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
p->rdev = rdev;
|
||||
goto abort;
|
||||
}
|
||||
md_integrity_register(mddev);
|
||||
}
|
||||
abort:
|
||||
|
||||
|
@ -2225,6 +2228,7 @@ static int run(mddev_t *mddev)
|
|||
|
||||
if (conf->near_copies < mddev->raid_disks)
|
||||
blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
|
||||
md_integrity_register(mddev);
|
||||
return 0;
|
||||
|
||||
out_free_conf:
|
||||
|
|
|
@ -3999,6 +3999,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Allow raid5_quiesce to complete */
|
||||
wait_event(conf->wait_for_overlap, conf->quiesce != 2);
|
||||
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
return reshape_request(mddev, sector_nr, skipped);
|
||||
|
||||
|
@ -4316,6 +4319,15 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
|
|||
return sectors * (raid_disks - conf->max_degraded);
|
||||
}
|
||||
|
||||
static void free_conf(raid5_conf_t *conf)
|
||||
{
|
||||
shrink_stripes(conf);
|
||||
safe_put_page(conf->spare_page);
|
||||
kfree(conf->disks);
|
||||
kfree(conf->stripe_hashtbl);
|
||||
kfree(conf);
|
||||
}
|
||||
|
||||
static raid5_conf_t *setup_conf(mddev_t *mddev)
|
||||
{
|
||||
raid5_conf_t *conf;
|
||||
|
@ -4447,11 +4459,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
|
|||
|
||||
abort:
|
||||
if (conf) {
|
||||
shrink_stripes(conf);
|
||||
safe_put_page(conf->spare_page);
|
||||
kfree(conf->disks);
|
||||
kfree(conf->stripe_hashtbl);
|
||||
kfree(conf);
|
||||
free_conf(conf);
|
||||
return ERR_PTR(-EIO);
|
||||
} else
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
@ -4629,12 +4637,8 @@ static int run(mddev_t *mddev)
|
|||
md_unregister_thread(mddev->thread);
|
||||
mddev->thread = NULL;
|
||||
if (conf) {
|
||||
shrink_stripes(conf);
|
||||
print_raid5_conf(conf);
|
||||
safe_put_page(conf->spare_page);
|
||||
kfree(conf->disks);
|
||||
kfree(conf->stripe_hashtbl);
|
||||
kfree(conf);
|
||||
free_conf(conf);
|
||||
}
|
||||
mddev->private = NULL;
|
||||
printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev));
|
||||
|
@ -4649,13 +4653,10 @@ static int stop(mddev_t *mddev)
|
|||
|
||||
md_unregister_thread(mddev->thread);
|
||||
mddev->thread = NULL;
|
||||
shrink_stripes(conf);
|
||||
kfree(conf->stripe_hashtbl);
|
||||
mddev->queue->backing_dev_info.congested_fn = NULL;
|
||||
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
|
||||
sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
|
||||
kfree(conf->disks);
|
||||
kfree(conf);
|
||||
free_conf(conf);
|
||||
mddev->private = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
@ -4857,6 +4858,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
|
|||
return -EINVAL;
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
mddev->changed = 1;
|
||||
revalidate_disk(mddev->gendisk);
|
||||
if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
|
||||
mddev->recovery_cp = mddev->dev_sectors;
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
|
@ -5002,7 +5004,7 @@ static int raid5_start_reshape(mddev_t *mddev)
|
|||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
}
|
||||
mddev->raid_disks = conf->raid_disks;
|
||||
mddev->reshape_position = 0;
|
||||
mddev->reshape_position = conf->reshape_progress;
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
|
@ -5057,7 +5059,6 @@ static void end_reshape(raid5_conf_t *conf)
|
|||
*/
|
||||
static void raid5_finish_reshape(mddev_t *mddev)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
raid5_conf_t *conf = mddev->private;
|
||||
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
|
@ -5066,15 +5067,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
|
|||
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
mddev->changed = 1;
|
||||
|
||||
bdev = bdget_disk(mddev->gendisk, 0);
|
||||
if (bdev) {
|
||||
mutex_lock(&bdev->bd_inode->i_mutex);
|
||||
i_size_write(bdev->bd_inode,
|
||||
(loff_t)mddev->array_sectors << 9);
|
||||
mutex_unlock(&bdev->bd_inode->i_mutex);
|
||||
bdput(bdev);
|
||||
}
|
||||
revalidate_disk(mddev->gendisk);
|
||||
} else {
|
||||
int d;
|
||||
mddev->degraded = conf->raid_disks;
|
||||
|
@ -5106,12 +5099,18 @@ static void raid5_quiesce(mddev_t *mddev, int state)
|
|||
|
||||
case 1: /* stop all writes */
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
conf->quiesce = 1;
|
||||
/* '2' tells resync/reshape to pause so that all
|
||||
* active stripes can drain
|
||||
*/
|
||||
conf->quiesce = 2;
|
||||
wait_event_lock_irq(conf->wait_for_stripe,
|
||||
atomic_read(&conf->active_stripes) == 0 &&
|
||||
atomic_read(&conf->active_aligned_reads) == 0,
|
||||
conf->device_lock, /* nothing */);
|
||||
conf->quiesce = 1;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
/* allow reshape to continue */
|
||||
wake_up(&conf->wait_for_overlap);
|
||||
break;
|
||||
|
||||
case 0: /* re-enable writes */
|
||||
|
|
Loading…
Reference in New Issue