Merge tag 'md/4.10-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull md fixes from Shaohua Li: "Basically one fix for raid5 cache which is merged in this cycle, others are trival fixes" * tag 'md/4.10-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: md/raid5: Use correct IS_ERR() variation on pointer check md: cleanup mddev flag clear for takeover md/r5cache: fix spelling mistake on "recoverying" md/r5cache: assign conf->log before r5l_load_log() md/r5cache: simplify handling of sh->log_start in recovery md/raid5-cache: removes unnecessary write-through mode judgments md/raid10: Refactor raid10_make_request md/raid1: Refactor raid1_make_request
This commit is contained in:
commit
607ae5f269
|
@ -212,6 +212,7 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||||
int is_new);
|
int is_new);
|
||||||
struct md_cluster_info;
|
struct md_cluster_info;
|
||||||
|
|
||||||
|
/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
|
||||||
enum mddev_flags {
|
enum mddev_flags {
|
||||||
MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
|
MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
|
||||||
MD_CLOSING, /* If set, we are closing the array, do not open
|
MD_CLOSING, /* If set, we are closing the array, do not open
|
||||||
|
@ -702,4 +703,11 @@ static inline int mddev_is_clustered(struct mddev *mddev)
|
||||||
{
|
{
|
||||||
return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
|
return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* clear unsupported mddev_flags */
|
||||||
|
static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
|
||||||
|
unsigned long unsupported_flags)
|
||||||
|
{
|
||||||
|
mddev->flags &= ~unsupported_flags;
|
||||||
|
}
|
||||||
#endif /* _MD_MD_H */
|
#endif /* _MD_MD_H */
|
||||||
|
|
|
@ -26,6 +26,11 @@
|
||||||
#include "raid0.h"
|
#include "raid0.h"
|
||||||
#include "raid5.h"
|
#include "raid5.h"
|
||||||
|
|
||||||
|
#define UNSUPPORTED_MDDEV_FLAGS \
|
||||||
|
((1L << MD_HAS_JOURNAL) | \
|
||||||
|
(1L << MD_JOURNAL_CLEAN) | \
|
||||||
|
(1L << MD_FAILFAST_SUPPORTED))
|
||||||
|
|
||||||
static int raid0_congested(struct mddev *mddev, int bits)
|
static int raid0_congested(struct mddev *mddev, int bits)
|
||||||
{
|
{
|
||||||
struct r0conf *conf = mddev->private;
|
struct r0conf *conf = mddev->private;
|
||||||
|
@ -539,8 +544,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
|
||||||
mddev->delta_disks = -1;
|
mddev->delta_disks = -1;
|
||||||
/* make sure it will be not marked as dirty */
|
/* make sure it will be not marked as dirty */
|
||||||
mddev->recovery_cp = MaxSector;
|
mddev->recovery_cp = MaxSector;
|
||||||
clear_bit(MD_HAS_JOURNAL, &mddev->flags);
|
mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
|
||||||
clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
|
|
||||||
|
|
||||||
create_strip_zones(mddev, &priv_conf);
|
create_strip_zones(mddev, &priv_conf);
|
||||||
|
|
||||||
|
@ -583,7 +587,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
|
||||||
mddev->degraded = 0;
|
mddev->degraded = 0;
|
||||||
/* make sure it will be not marked as dirty */
|
/* make sure it will be not marked as dirty */
|
||||||
mddev->recovery_cp = MaxSector;
|
mddev->recovery_cp = MaxSector;
|
||||||
clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
|
mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
|
||||||
|
|
||||||
create_strip_zones(mddev, &priv_conf);
|
create_strip_zones(mddev, &priv_conf);
|
||||||
return priv_conf;
|
return priv_conf;
|
||||||
|
@ -626,7 +630,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
|
||||||
mddev->raid_disks = 1;
|
mddev->raid_disks = 1;
|
||||||
/* make sure it will be not marked as dirty */
|
/* make sure it will be not marked as dirty */
|
||||||
mddev->recovery_cp = MaxSector;
|
mddev->recovery_cp = MaxSector;
|
||||||
clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
|
mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
|
||||||
|
|
||||||
create_strip_zones(mddev, &priv_conf);
|
create_strip_zones(mddev, &priv_conf);
|
||||||
return priv_conf;
|
return priv_conf;
|
||||||
|
|
|
@ -42,6 +42,10 @@
|
||||||
#include "raid1.h"
|
#include "raid1.h"
|
||||||
#include "bitmap.h"
|
#include "bitmap.h"
|
||||||
|
|
||||||
|
#define UNSUPPORTED_MDDEV_FLAGS \
|
||||||
|
((1L << MD_HAS_JOURNAL) | \
|
||||||
|
(1L << MD_JOURNAL_CLEAN))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Number of guaranteed r1bios in case of extreme VM load:
|
* Number of guaranteed r1bios in case of extreme VM load:
|
||||||
*/
|
*/
|
||||||
|
@ -1066,17 +1070,107 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||||
kfree(plug);
|
kfree(plug);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void raid1_make_request(struct mddev *mddev, struct bio * bio)
|
static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||||
|
struct r1bio *r1_bio)
|
||||||
{
|
{
|
||||||
struct r1conf *conf = mddev->private;
|
struct r1conf *conf = mddev->private;
|
||||||
struct raid1_info *mirror;
|
struct raid1_info *mirror;
|
||||||
struct r1bio *r1_bio;
|
|
||||||
struct bio *read_bio;
|
struct bio *read_bio;
|
||||||
|
struct bitmap *bitmap = mddev->bitmap;
|
||||||
|
const int op = bio_op(bio);
|
||||||
|
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
|
||||||
|
int sectors_handled;
|
||||||
|
int max_sectors;
|
||||||
|
int rdisk;
|
||||||
|
|
||||||
|
wait_barrier(conf, bio);
|
||||||
|
|
||||||
|
read_again:
|
||||||
|
rdisk = read_balance(conf, r1_bio, &max_sectors);
|
||||||
|
|
||||||
|
if (rdisk < 0) {
|
||||||
|
/* couldn't find anywhere to read from */
|
||||||
|
raid_end_bio_io(r1_bio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mirror = conf->mirrors + rdisk;
|
||||||
|
|
||||||
|
if (test_bit(WriteMostly, &mirror->rdev->flags) &&
|
||||||
|
bitmap) {
|
||||||
|
/*
|
||||||
|
* Reading from a write-mostly device must take care not to
|
||||||
|
* over-take any writes that are 'behind'
|
||||||
|
*/
|
||||||
|
raid1_log(mddev, "wait behind writes");
|
||||||
|
wait_event(bitmap->behind_wait,
|
||||||
|
atomic_read(&bitmap->behind_writes) == 0);
|
||||||
|
}
|
||||||
|
r1_bio->read_disk = rdisk;
|
||||||
|
r1_bio->start_next_window = 0;
|
||||||
|
|
||||||
|
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||||
|
bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
|
||||||
|
max_sectors);
|
||||||
|
|
||||||
|
r1_bio->bios[rdisk] = read_bio;
|
||||||
|
|
||||||
|
read_bio->bi_iter.bi_sector = r1_bio->sector +
|
||||||
|
mirror->rdev->data_offset;
|
||||||
|
read_bio->bi_bdev = mirror->rdev->bdev;
|
||||||
|
read_bio->bi_end_io = raid1_end_read_request;
|
||||||
|
bio_set_op_attrs(read_bio, op, do_sync);
|
||||||
|
if (test_bit(FailFast, &mirror->rdev->flags) &&
|
||||||
|
test_bit(R1BIO_FailFast, &r1_bio->state))
|
||||||
|
read_bio->bi_opf |= MD_FAILFAST;
|
||||||
|
read_bio->bi_private = r1_bio;
|
||||||
|
|
||||||
|
if (mddev->gendisk)
|
||||||
|
trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
|
||||||
|
read_bio, disk_devt(mddev->gendisk),
|
||||||
|
r1_bio->sector);
|
||||||
|
|
||||||
|
if (max_sectors < r1_bio->sectors) {
|
||||||
|
/*
|
||||||
|
* could not read all from this device, so we will need another
|
||||||
|
* r1_bio.
|
||||||
|
*/
|
||||||
|
sectors_handled = (r1_bio->sector + max_sectors
|
||||||
|
- bio->bi_iter.bi_sector);
|
||||||
|
r1_bio->sectors = max_sectors;
|
||||||
|
spin_lock_irq(&conf->device_lock);
|
||||||
|
if (bio->bi_phys_segments == 0)
|
||||||
|
bio->bi_phys_segments = 2;
|
||||||
|
else
|
||||||
|
bio->bi_phys_segments++;
|
||||||
|
spin_unlock_irq(&conf->device_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cannot call generic_make_request directly as that will be
|
||||||
|
* queued in __make_request and subsequent mempool_alloc might
|
||||||
|
* block waiting for it. So hand bio over to raid1d.
|
||||||
|
*/
|
||||||
|
reschedule_retry(r1_bio);
|
||||||
|
|
||||||
|
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
|
||||||
|
|
||||||
|
r1_bio->master_bio = bio;
|
||||||
|
r1_bio->sectors = bio_sectors(bio) - sectors_handled;
|
||||||
|
r1_bio->state = 0;
|
||||||
|
r1_bio->mddev = mddev;
|
||||||
|
r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
|
||||||
|
goto read_again;
|
||||||
|
} else
|
||||||
|
generic_make_request(read_bio);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||||
|
struct r1bio *r1_bio)
|
||||||
|
{
|
||||||
|
struct r1conf *conf = mddev->private;
|
||||||
int i, disks;
|
int i, disks;
|
||||||
struct bitmap *bitmap;
|
struct bitmap *bitmap = mddev->bitmap;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
const int op = bio_op(bio);
|
const int op = bio_op(bio);
|
||||||
const int rw = bio_data_dir(bio);
|
|
||||||
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
|
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
|
||||||
const unsigned long do_flush_fua = (bio->bi_opf &
|
const unsigned long do_flush_fua = (bio->bi_opf &
|
||||||
(REQ_PREFLUSH | REQ_FUA));
|
(REQ_PREFLUSH | REQ_FUA));
|
||||||
|
@ -1096,15 +1190,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
|
||||||
|
|
||||||
md_write_start(mddev, bio); /* wait on superblock update early */
|
md_write_start(mddev, bio); /* wait on superblock update early */
|
||||||
|
|
||||||
if (bio_data_dir(bio) == WRITE &&
|
if ((bio_end_sector(bio) > mddev->suspend_lo &&
|
||||||
((bio_end_sector(bio) > mddev->suspend_lo &&
|
|
||||||
bio->bi_iter.bi_sector < mddev->suspend_hi) ||
|
bio->bi_iter.bi_sector < mddev->suspend_hi) ||
|
||||||
(mddev_is_clustered(mddev) &&
|
(mddev_is_clustered(mddev) &&
|
||||||
md_cluster_ops->area_resyncing(mddev, WRITE,
|
md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||||
bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
|
bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
|
||||||
/* As the suspend_* range is controlled by
|
|
||||||
* userspace, we want an interruptible
|
/*
|
||||||
* wait.
|
* As the suspend_* range is controlled by userspace, we want
|
||||||
|
* an interruptible wait.
|
||||||
*/
|
*/
|
||||||
DEFINE_WAIT(w);
|
DEFINE_WAIT(w);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
@ -1115,128 +1209,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
|
||||||
bio->bi_iter.bi_sector >= mddev->suspend_hi ||
|
bio->bi_iter.bi_sector >= mddev->suspend_hi ||
|
||||||
(mddev_is_clustered(mddev) &&
|
(mddev_is_clustered(mddev) &&
|
||||||
!md_cluster_ops->area_resyncing(mddev, WRITE,
|
!md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||||
bio->bi_iter.bi_sector, bio_end_sector(bio))))
|
bio->bi_iter.bi_sector,
|
||||||
|
bio_end_sector(bio))))
|
||||||
break;
|
break;
|
||||||
schedule();
|
schedule();
|
||||||
}
|
}
|
||||||
finish_wait(&conf->wait_barrier, &w);
|
finish_wait(&conf->wait_barrier, &w);
|
||||||
}
|
}
|
||||||
|
|
||||||
start_next_window = wait_barrier(conf, bio);
|
start_next_window = wait_barrier(conf, bio);
|
||||||
|
|
||||||
bitmap = mddev->bitmap;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* make_request() can abort the operation when read-ahead is being
|
|
||||||
* used and no empty request is available.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
|
|
||||||
|
|
||||||
r1_bio->master_bio = bio;
|
|
||||||
r1_bio->sectors = bio_sectors(bio);
|
|
||||||
r1_bio->state = 0;
|
|
||||||
r1_bio->mddev = mddev;
|
|
||||||
r1_bio->sector = bio->bi_iter.bi_sector;
|
|
||||||
|
|
||||||
/* We might need to issue multiple reads to different
|
|
||||||
* devices if there are bad blocks around, so we keep
|
|
||||||
* track of the number of reads in bio->bi_phys_segments.
|
|
||||||
* If this is 0, there is only one r1_bio and no locking
|
|
||||||
* will be needed when requests complete. If it is
|
|
||||||
* non-zero, then it is the number of not-completed requests.
|
|
||||||
*/
|
|
||||||
bio->bi_phys_segments = 0;
|
|
||||||
bio_clear_flag(bio, BIO_SEG_VALID);
|
|
||||||
|
|
||||||
if (rw == READ) {
|
|
||||||
/*
|
|
||||||
* read balancing logic:
|
|
||||||
*/
|
|
||||||
int rdisk;
|
|
||||||
|
|
||||||
read_again:
|
|
||||||
rdisk = read_balance(conf, r1_bio, &max_sectors);
|
|
||||||
|
|
||||||
if (rdisk < 0) {
|
|
||||||
/* couldn't find anywhere to read from */
|
|
||||||
raid_end_bio_io(r1_bio);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
mirror = conf->mirrors + rdisk;
|
|
||||||
|
|
||||||
if (test_bit(WriteMostly, &mirror->rdev->flags) &&
|
|
||||||
bitmap) {
|
|
||||||
/* Reading from a write-mostly device must
|
|
||||||
* take care not to over-take any writes
|
|
||||||
* that are 'behind'
|
|
||||||
*/
|
|
||||||
raid1_log(mddev, "wait behind writes");
|
|
||||||
wait_event(bitmap->behind_wait,
|
|
||||||
atomic_read(&bitmap->behind_writes) == 0);
|
|
||||||
}
|
|
||||||
r1_bio->read_disk = rdisk;
|
|
||||||
r1_bio->start_next_window = 0;
|
|
||||||
|
|
||||||
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
|
||||||
bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
|
|
||||||
max_sectors);
|
|
||||||
|
|
||||||
r1_bio->bios[rdisk] = read_bio;
|
|
||||||
|
|
||||||
read_bio->bi_iter.bi_sector = r1_bio->sector +
|
|
||||||
mirror->rdev->data_offset;
|
|
||||||
read_bio->bi_bdev = mirror->rdev->bdev;
|
|
||||||
read_bio->bi_end_io = raid1_end_read_request;
|
|
||||||
bio_set_op_attrs(read_bio, op, do_sync);
|
|
||||||
if (test_bit(FailFast, &mirror->rdev->flags) &&
|
|
||||||
test_bit(R1BIO_FailFast, &r1_bio->state))
|
|
||||||
read_bio->bi_opf |= MD_FAILFAST;
|
|
||||||
read_bio->bi_private = r1_bio;
|
|
||||||
|
|
||||||
if (mddev->gendisk)
|
|
||||||
trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
|
|
||||||
read_bio, disk_devt(mddev->gendisk),
|
|
||||||
r1_bio->sector);
|
|
||||||
|
|
||||||
if (max_sectors < r1_bio->sectors) {
|
|
||||||
/* could not read all from this device, so we will
|
|
||||||
* need another r1_bio.
|
|
||||||
*/
|
|
||||||
|
|
||||||
sectors_handled = (r1_bio->sector + max_sectors
|
|
||||||
- bio->bi_iter.bi_sector);
|
|
||||||
r1_bio->sectors = max_sectors;
|
|
||||||
spin_lock_irq(&conf->device_lock);
|
|
||||||
if (bio->bi_phys_segments == 0)
|
|
||||||
bio->bi_phys_segments = 2;
|
|
||||||
else
|
|
||||||
bio->bi_phys_segments++;
|
|
||||||
spin_unlock_irq(&conf->device_lock);
|
|
||||||
/* Cannot call generic_make_request directly
|
|
||||||
* as that will be queued in __make_request
|
|
||||||
* and subsequent mempool_alloc might block waiting
|
|
||||||
* for it. So hand bio over to raid1d.
|
|
||||||
*/
|
|
||||||
reschedule_retry(r1_bio);
|
|
||||||
|
|
||||||
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
|
|
||||||
|
|
||||||
r1_bio->master_bio = bio;
|
|
||||||
r1_bio->sectors = bio_sectors(bio) - sectors_handled;
|
|
||||||
r1_bio->state = 0;
|
|
||||||
r1_bio->mddev = mddev;
|
|
||||||
r1_bio->sector = bio->bi_iter.bi_sector +
|
|
||||||
sectors_handled;
|
|
||||||
goto read_again;
|
|
||||||
} else
|
|
||||||
generic_make_request(read_bio);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* WRITE:
|
|
||||||
*/
|
|
||||||
if (conf->pending_count >= max_queued_requests) {
|
if (conf->pending_count >= max_queued_requests) {
|
||||||
md_wakeup_thread(mddev->thread);
|
md_wakeup_thread(mddev->thread);
|
||||||
raid1_log(mddev, "wait queued");
|
raid1_log(mddev, "wait queued");
|
||||||
|
@ -1280,8 +1261,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
|
||||||
int bad_sectors;
|
int bad_sectors;
|
||||||
int is_bad;
|
int is_bad;
|
||||||
|
|
||||||
is_bad = is_badblock(rdev, r1_bio->sector,
|
is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
|
||||||
max_sectors,
|
|
||||||
&first_bad, &bad_sectors);
|
&first_bad, &bad_sectors);
|
||||||
if (is_bad < 0) {
|
if (is_bad < 0) {
|
||||||
/* mustn't write here until the bad block is
|
/* mustn't write here until the bad block is
|
||||||
|
@ -1370,7 +1350,8 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||||
bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
|
bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
|
||||||
|
max_sectors);
|
||||||
|
|
||||||
if (first_clone) {
|
if (first_clone) {
|
||||||
/* do behind I/O ?
|
/* do behind I/O ?
|
||||||
|
@ -1464,6 +1445,40 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
|
||||||
wake_up(&conf->wait_barrier);
|
wake_up(&conf->wait_barrier);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void raid1_make_request(struct mddev *mddev, struct bio *bio)
|
||||||
|
{
|
||||||
|
struct r1conf *conf = mddev->private;
|
||||||
|
struct r1bio *r1_bio;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* make_request() can abort the operation when read-ahead is being
|
||||||
|
* used and no empty request is available.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
|
||||||
|
|
||||||
|
r1_bio->master_bio = bio;
|
||||||
|
r1_bio->sectors = bio_sectors(bio);
|
||||||
|
r1_bio->state = 0;
|
||||||
|
r1_bio->mddev = mddev;
|
||||||
|
r1_bio->sector = bio->bi_iter.bi_sector;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We might need to issue multiple reads to different devices if there
|
||||||
|
* are bad blocks around, so we keep track of the number of reads in
|
||||||
|
* bio->bi_phys_segments. If this is 0, there is only one r1_bio and
|
||||||
|
* no locking will be needed when requests complete. If it is
|
||||||
|
* non-zero, then it is the number of not-completed requests.
|
||||||
|
*/
|
||||||
|
bio->bi_phys_segments = 0;
|
||||||
|
bio_clear_flag(bio, BIO_SEG_VALID);
|
||||||
|
|
||||||
|
if (bio_data_dir(bio) == READ)
|
||||||
|
raid1_read_request(mddev, bio, r1_bio);
|
||||||
|
else
|
||||||
|
raid1_write_request(mddev, bio, r1_bio);
|
||||||
|
}
|
||||||
|
|
||||||
static void raid1_status(struct seq_file *seq, struct mddev *mddev)
|
static void raid1_status(struct seq_file *seq, struct mddev *mddev)
|
||||||
{
|
{
|
||||||
struct r1conf *conf = mddev->private;
|
struct r1conf *conf = mddev->private;
|
||||||
|
@ -3246,8 +3261,8 @@ static void *raid1_takeover(struct mddev *mddev)
|
||||||
if (!IS_ERR(conf)) {
|
if (!IS_ERR(conf)) {
|
||||||
/* Array must appear to be quiesced */
|
/* Array must appear to be quiesced */
|
||||||
conf->array_frozen = 1;
|
conf->array_frozen = 1;
|
||||||
clear_bit(MD_HAS_JOURNAL, &mddev->flags);
|
mddev_clear_unsupported_flags(mddev,
|
||||||
clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
|
UNSUPPORTED_MDDEV_FLAGS);
|
||||||
}
|
}
|
||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||||
kfree(plug);
|
kfree(plug);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __make_request(struct mddev *mddev, struct bio *bio)
|
static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||||
|
struct r10bio *r10_bio)
|
||||||
{
|
{
|
||||||
struct r10conf *conf = mddev->private;
|
struct r10conf *conf = mddev->private;
|
||||||
struct r10bio *r10_bio;
|
|
||||||
struct bio *read_bio;
|
struct bio *read_bio;
|
||||||
|
const int op = bio_op(bio);
|
||||||
|
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
|
||||||
|
int sectors_handled;
|
||||||
|
int max_sectors;
|
||||||
|
sector_t sectors;
|
||||||
|
struct md_rdev *rdev;
|
||||||
|
int slot;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Register the new request and wait if the reconstruction
|
||||||
|
* thread has put up a bar for new requests.
|
||||||
|
* Continue immediately if no resync is active currently.
|
||||||
|
*/
|
||||||
|
wait_barrier(conf);
|
||||||
|
|
||||||
|
sectors = bio_sectors(bio);
|
||||||
|
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||||
|
bio->bi_iter.bi_sector < conf->reshape_progress &&
|
||||||
|
bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
|
||||||
|
/*
|
||||||
|
* IO spans the reshape position. Need to wait for reshape to
|
||||||
|
* pass
|
||||||
|
*/
|
||||||
|
raid10_log(conf->mddev, "wait reshape");
|
||||||
|
allow_barrier(conf);
|
||||||
|
wait_event(conf->wait_barrier,
|
||||||
|
conf->reshape_progress <= bio->bi_iter.bi_sector ||
|
||||||
|
conf->reshape_progress >= bio->bi_iter.bi_sector +
|
||||||
|
sectors);
|
||||||
|
wait_barrier(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
read_again:
|
||||||
|
rdev = read_balance(conf, r10_bio, &max_sectors);
|
||||||
|
if (!rdev) {
|
||||||
|
raid_end_bio_io(r10_bio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
slot = r10_bio->read_slot;
|
||||||
|
|
||||||
|
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||||
|
bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
|
||||||
|
max_sectors);
|
||||||
|
|
||||||
|
r10_bio->devs[slot].bio = read_bio;
|
||||||
|
r10_bio->devs[slot].rdev = rdev;
|
||||||
|
|
||||||
|
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
|
||||||
|
choose_data_offset(r10_bio, rdev);
|
||||||
|
read_bio->bi_bdev = rdev->bdev;
|
||||||
|
read_bio->bi_end_io = raid10_end_read_request;
|
||||||
|
bio_set_op_attrs(read_bio, op, do_sync);
|
||||||
|
if (test_bit(FailFast, &rdev->flags) &&
|
||||||
|
test_bit(R10BIO_FailFast, &r10_bio->state))
|
||||||
|
read_bio->bi_opf |= MD_FAILFAST;
|
||||||
|
read_bio->bi_private = r10_bio;
|
||||||
|
|
||||||
|
if (mddev->gendisk)
|
||||||
|
trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
|
||||||
|
read_bio, disk_devt(mddev->gendisk),
|
||||||
|
r10_bio->sector);
|
||||||
|
if (max_sectors < r10_bio->sectors) {
|
||||||
|
/*
|
||||||
|
* Could not read all from this device, so we will need another
|
||||||
|
* r10_bio.
|
||||||
|
*/
|
||||||
|
sectors_handled = (r10_bio->sector + max_sectors
|
||||||
|
- bio->bi_iter.bi_sector);
|
||||||
|
r10_bio->sectors = max_sectors;
|
||||||
|
spin_lock_irq(&conf->device_lock);
|
||||||
|
if (bio->bi_phys_segments == 0)
|
||||||
|
bio->bi_phys_segments = 2;
|
||||||
|
else
|
||||||
|
bio->bi_phys_segments++;
|
||||||
|
spin_unlock_irq(&conf->device_lock);
|
||||||
|
/*
|
||||||
|
* Cannot call generic_make_request directly as that will be
|
||||||
|
* queued in __generic_make_request and subsequent
|
||||||
|
* mempool_alloc might block waiting for it. so hand bio over
|
||||||
|
* to raid10d.
|
||||||
|
*/
|
||||||
|
reschedule_retry(r10_bio);
|
||||||
|
|
||||||
|
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
||||||
|
|
||||||
|
r10_bio->master_bio = bio;
|
||||||
|
r10_bio->sectors = bio_sectors(bio) - sectors_handled;
|
||||||
|
r10_bio->state = 0;
|
||||||
|
r10_bio->mddev = mddev;
|
||||||
|
r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
|
||||||
|
goto read_again;
|
||||||
|
} else
|
||||||
|
generic_make_request(read_bio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||||
|
struct r10bio *r10_bio)
|
||||||
|
{
|
||||||
|
struct r10conf *conf = mddev->private;
|
||||||
int i;
|
int i;
|
||||||
const int op = bio_op(bio);
|
const int op = bio_op(bio);
|
||||||
const int rw = bio_data_dir(bio);
|
|
||||||
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
|
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
|
||||||
const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
|
const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct md_rdev *blocked_rdev;
|
struct md_rdev *blocked_rdev;
|
||||||
struct blk_plug_cb *cb;
|
struct blk_plug_cb *cb;
|
||||||
struct raid10_plug_cb *plug = NULL;
|
struct raid10_plug_cb *plug = NULL;
|
||||||
|
sector_t sectors;
|
||||||
int sectors_handled;
|
int sectors_handled;
|
||||||
int max_sectors;
|
int max_sectors;
|
||||||
int sectors;
|
|
||||||
|
|
||||||
md_write_start(mddev, bio);
|
md_write_start(mddev, bio);
|
||||||
|
|
||||||
|
@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||||
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||||
bio->bi_iter.bi_sector < conf->reshape_progress &&
|
bio->bi_iter.bi_sector < conf->reshape_progress &&
|
||||||
bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
|
bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
|
||||||
/* IO spans the reshape position. Need to wait for
|
/*
|
||||||
* reshape to pass
|
* IO spans the reshape position. Need to wait for reshape to
|
||||||
|
* pass
|
||||||
*/
|
*/
|
||||||
raid10_log(conf->mddev, "wait reshape");
|
raid10_log(conf->mddev, "wait reshape");
|
||||||
allow_barrier(conf);
|
allow_barrier(conf);
|
||||||
|
@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||||
sectors);
|
sectors);
|
||||||
wait_barrier(conf);
|
wait_barrier(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||||
bio_data_dir(bio) == WRITE &&
|
|
||||||
(mddev->reshape_backwards
|
(mddev->reshape_backwards
|
||||||
? (bio->bi_iter.bi_sector < conf->reshape_safe &&
|
? (bio->bi_iter.bi_sector < conf->reshape_safe &&
|
||||||
bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
|
bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
|
||||||
|
@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||||
conf->reshape_safe = mddev->reshape_position;
|
conf->reshape_safe = mddev->reshape_position;
|
||||||
}
|
}
|
||||||
|
|
||||||
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
|
||||||
|
|
||||||
r10_bio->master_bio = bio;
|
|
||||||
r10_bio->sectors = sectors;
|
|
||||||
|
|
||||||
r10_bio->mddev = mddev;
|
|
||||||
r10_bio->sector = bio->bi_iter.bi_sector;
|
|
||||||
r10_bio->state = 0;
|
|
||||||
|
|
||||||
/* We might need to issue multiple reads to different
|
|
||||||
* devices if there are bad blocks around, so we keep
|
|
||||||
* track of the number of reads in bio->bi_phys_segments.
|
|
||||||
* If this is 0, there is only one r10_bio and no locking
|
|
||||||
* will be needed when the request completes. If it is
|
|
||||||
* non-zero, then it is the number of not-completed requests.
|
|
||||||
*/
|
|
||||||
bio->bi_phys_segments = 0;
|
|
||||||
bio_clear_flag(bio, BIO_SEG_VALID);
|
|
||||||
|
|
||||||
if (rw == READ) {
|
|
||||||
/*
|
|
||||||
* read balancing logic:
|
|
||||||
*/
|
|
||||||
struct md_rdev *rdev;
|
|
||||||
int slot;
|
|
||||||
|
|
||||||
read_again:
|
|
||||||
rdev = read_balance(conf, r10_bio, &max_sectors);
|
|
||||||
if (!rdev) {
|
|
||||||
raid_end_bio_io(r10_bio);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
slot = r10_bio->read_slot;
|
|
||||||
|
|
||||||
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
|
||||||
bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
|
|
||||||
max_sectors);
|
|
||||||
|
|
||||||
r10_bio->devs[slot].bio = read_bio;
|
|
||||||
r10_bio->devs[slot].rdev = rdev;
|
|
||||||
|
|
||||||
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
|
|
||||||
choose_data_offset(r10_bio, rdev);
|
|
||||||
read_bio->bi_bdev = rdev->bdev;
|
|
||||||
read_bio->bi_end_io = raid10_end_read_request;
|
|
||||||
bio_set_op_attrs(read_bio, op, do_sync);
|
|
||||||
if (test_bit(FailFast, &rdev->flags) &&
|
|
||||||
test_bit(R10BIO_FailFast, &r10_bio->state))
|
|
||||||
read_bio->bi_opf |= MD_FAILFAST;
|
|
||||||
read_bio->bi_private = r10_bio;
|
|
||||||
|
|
||||||
if (mddev->gendisk)
|
|
||||||
trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
|
|
||||||
read_bio, disk_devt(mddev->gendisk),
|
|
||||||
r10_bio->sector);
|
|
||||||
if (max_sectors < r10_bio->sectors) {
|
|
||||||
/* Could not read all from this device, so we will
|
|
||||||
* need another r10_bio.
|
|
||||||
*/
|
|
||||||
sectors_handled = (r10_bio->sector + max_sectors
|
|
||||||
- bio->bi_iter.bi_sector);
|
|
||||||
r10_bio->sectors = max_sectors;
|
|
||||||
spin_lock_irq(&conf->device_lock);
|
|
||||||
if (bio->bi_phys_segments == 0)
|
|
||||||
bio->bi_phys_segments = 2;
|
|
||||||
else
|
|
||||||
bio->bi_phys_segments++;
|
|
||||||
spin_unlock_irq(&conf->device_lock);
|
|
||||||
/* Cannot call generic_make_request directly
|
|
||||||
* as that will be queued in __generic_make_request
|
|
||||||
* and subsequent mempool_alloc might block
|
|
||||||
* waiting for it. so hand bio over to raid10d.
|
|
||||||
*/
|
|
||||||
reschedule_retry(r10_bio);
|
|
||||||
|
|
||||||
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
|
||||||
|
|
||||||
r10_bio->master_bio = bio;
|
|
||||||
r10_bio->sectors = bio_sectors(bio) - sectors_handled;
|
|
||||||
r10_bio->state = 0;
|
|
||||||
r10_bio->mddev = mddev;
|
|
||||||
r10_bio->sector = bio->bi_iter.bi_sector +
|
|
||||||
sectors_handled;
|
|
||||||
goto read_again;
|
|
||||||
} else
|
|
||||||
generic_make_request(read_bio);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* WRITE:
|
|
||||||
*/
|
|
||||||
if (conf->pending_count >= max_queued_requests) {
|
if (conf->pending_count >= max_queued_requests) {
|
||||||
md_wakeup_thread(mddev->thread);
|
md_wakeup_thread(mddev->thread);
|
||||||
raid10_log(mddev, "wait queued");
|
raid10_log(mddev, "wait queued");
|
||||||
|
@ -1300,8 +1308,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||||
int bad_sectors;
|
int bad_sectors;
|
||||||
int is_bad;
|
int is_bad;
|
||||||
|
|
||||||
is_bad = is_badblock(rdev, dev_sector,
|
is_bad = is_badblock(rdev, dev_sector, max_sectors,
|
||||||
max_sectors,
|
|
||||||
&first_bad, &bad_sectors);
|
&first_bad, &bad_sectors);
|
||||||
if (is_bad < 0) {
|
if (is_bad < 0) {
|
||||||
/* Mustn't write here until the bad block
|
/* Mustn't write here until the bad block
|
||||||
|
@ -1405,8 +1412,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||||
r10_bio->devs[i].bio = mbio;
|
r10_bio->devs[i].bio = mbio;
|
||||||
|
|
||||||
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
|
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
|
||||||
choose_data_offset(r10_bio,
|
choose_data_offset(r10_bio, rdev));
|
||||||
rdev));
|
|
||||||
mbio->bi_bdev = rdev->bdev;
|
mbio->bi_bdev = rdev->bdev;
|
||||||
mbio->bi_end_io = raid10_end_write_request;
|
mbio->bi_end_io = raid10_end_write_request;
|
||||||
bio_set_op_attrs(mbio, op, do_sync | do_fua);
|
bio_set_op_attrs(mbio, op, do_sync | do_fua);
|
||||||
|
@ -1457,8 +1463,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||||
r10_bio->devs[i].repl_bio = mbio;
|
r10_bio->devs[i].repl_bio = mbio;
|
||||||
|
|
||||||
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
|
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
|
||||||
choose_data_offset(
|
choose_data_offset(r10_bio, rdev));
|
||||||
r10_bio, rdev));
|
|
||||||
mbio->bi_bdev = rdev->bdev;
|
mbio->bi_bdev = rdev->bdev;
|
||||||
mbio->bi_end_io = raid10_end_write_request;
|
mbio->bi_end_io = raid10_end_write_request;
|
||||||
bio_set_op_attrs(mbio, op, do_sync | do_fua);
|
bio_set_op_attrs(mbio, op, do_sync | do_fua);
|
||||||
|
@ -1503,6 +1508,36 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||||
one_write_done(r10_bio);
|
one_write_done(r10_bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||||
|
{
|
||||||
|
struct r10conf *conf = mddev->private;
|
||||||
|
struct r10bio *r10_bio;
|
||||||
|
|
||||||
|
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
||||||
|
|
||||||
|
r10_bio->master_bio = bio;
|
||||||
|
r10_bio->sectors = bio_sectors(bio);
|
||||||
|
|
||||||
|
r10_bio->mddev = mddev;
|
||||||
|
r10_bio->sector = bio->bi_iter.bi_sector;
|
||||||
|
r10_bio->state = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We might need to issue multiple reads to different devices if there
|
||||||
|
* are bad blocks around, so we keep track of the number of reads in
|
||||||
|
* bio->bi_phys_segments. If this is 0, there is only one r10_bio and
|
||||||
|
* no locking will be needed when the request completes. If it is
|
||||||
|
* non-zero, then it is the number of not-completed requests.
|
||||||
|
*/
|
||||||
|
bio->bi_phys_segments = 0;
|
||||||
|
bio_clear_flag(bio, BIO_SEG_VALID);
|
||||||
|
|
||||||
|
if (bio_data_dir(bio) == READ)
|
||||||
|
raid10_read_request(mddev, bio, r10_bio);
|
||||||
|
else
|
||||||
|
raid10_write_request(mddev, bio, r10_bio);
|
||||||
|
}
|
||||||
|
|
||||||
static void raid10_make_request(struct mddev *mddev, struct bio *bio)
|
static void raid10_make_request(struct mddev *mddev, struct bio *bio)
|
||||||
{
|
{
|
||||||
struct r10conf *conf = mddev->private;
|
struct r10conf *conf = mddev->private;
|
||||||
|
|
|
@ -1682,8 +1682,7 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
|
||||||
|
|
||||||
static struct stripe_head *
|
static struct stripe_head *
|
||||||
r5c_recovery_alloc_stripe(struct r5conf *conf,
|
r5c_recovery_alloc_stripe(struct r5conf *conf,
|
||||||
sector_t stripe_sect,
|
sector_t stripe_sect)
|
||||||
sector_t log_start)
|
|
||||||
{
|
{
|
||||||
struct stripe_head *sh;
|
struct stripe_head *sh;
|
||||||
|
|
||||||
|
@ -1692,7 +1691,6 @@ r5c_recovery_alloc_stripe(struct r5conf *conf,
|
||||||
return NULL; /* no more stripe available */
|
return NULL; /* no more stripe available */
|
||||||
|
|
||||||
r5l_recovery_reset_stripe(sh);
|
r5l_recovery_reset_stripe(sh);
|
||||||
sh->log_start = log_start;
|
|
||||||
|
|
||||||
return sh;
|
return sh;
|
||||||
}
|
}
|
||||||
|
@ -1862,7 +1860,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
||||||
stripe_sect);
|
stripe_sect);
|
||||||
|
|
||||||
if (!sh) {
|
if (!sh) {
|
||||||
sh = r5c_recovery_alloc_stripe(conf, stripe_sect, ctx->pos);
|
sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
|
||||||
/*
|
/*
|
||||||
* cannot get stripe from raid5_get_active_stripe
|
* cannot get stripe from raid5_get_active_stripe
|
||||||
* try replay some stripes
|
* try replay some stripes
|
||||||
|
@ -1871,7 +1869,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
||||||
r5c_recovery_replay_stripes(
|
r5c_recovery_replay_stripes(
|
||||||
cached_stripe_list, ctx);
|
cached_stripe_list, ctx);
|
||||||
sh = r5c_recovery_alloc_stripe(
|
sh = r5c_recovery_alloc_stripe(
|
||||||
conf, stripe_sect, ctx->pos);
|
conf, stripe_sect);
|
||||||
}
|
}
|
||||||
if (!sh) {
|
if (!sh) {
|
||||||
pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
|
pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
|
||||||
|
@ -1879,8 +1877,8 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
||||||
conf->min_nr_stripes * 2);
|
conf->min_nr_stripes * 2);
|
||||||
raid5_set_cache_size(mddev,
|
raid5_set_cache_size(mddev,
|
||||||
conf->min_nr_stripes * 2);
|
conf->min_nr_stripes * 2);
|
||||||
sh = r5c_recovery_alloc_stripe(
|
sh = r5c_recovery_alloc_stripe(conf,
|
||||||
conf, stripe_sect, ctx->pos);
|
stripe_sect);
|
||||||
}
|
}
|
||||||
if (!sh) {
|
if (!sh) {
|
||||||
pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
|
pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
|
||||||
|
@ -1894,7 +1892,6 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
||||||
if (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
|
if (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
|
||||||
test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) {
|
test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) {
|
||||||
r5l_recovery_replay_one_stripe(conf, sh, ctx);
|
r5l_recovery_replay_one_stripe(conf, sh, ctx);
|
||||||
sh->log_start = ctx->pos;
|
|
||||||
list_move_tail(&sh->lru, cached_stripe_list);
|
list_move_tail(&sh->lru, cached_stripe_list);
|
||||||
}
|
}
|
||||||
r5l_recovery_load_data(log, sh, ctx, payload,
|
r5l_recovery_load_data(log, sh, ctx, payload,
|
||||||
|
@ -1933,8 +1930,6 @@ static void r5c_recovery_load_one_stripe(struct r5l_log *log,
|
||||||
set_bit(R5_UPTODATE, &dev->flags);
|
set_bit(R5_UPTODATE, &dev->flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
|
|
||||||
atomic_inc(&log->stripe_in_journal_count);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2070,6 +2065,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
|
||||||
struct stripe_head *sh, *next;
|
struct stripe_head *sh, *next;
|
||||||
struct mddev *mddev = log->rdev->mddev;
|
struct mddev *mddev = log->rdev->mddev;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
sector_t next_checkpoint = MaxSector;
|
||||||
|
|
||||||
page = alloc_page(GFP_KERNEL);
|
page = alloc_page(GFP_KERNEL);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
|
@ -2078,6 +2074,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WARN_ON(list_empty(&ctx->cached_list));
|
||||||
|
|
||||||
list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
|
list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
|
||||||
struct r5l_meta_block *mb;
|
struct r5l_meta_block *mb;
|
||||||
int i;
|
int i;
|
||||||
|
@ -2123,12 +2121,15 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
|
||||||
sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
|
sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
|
||||||
REQ_OP_WRITE, REQ_FUA, false);
|
REQ_OP_WRITE, REQ_FUA, false);
|
||||||
sh->log_start = ctx->pos;
|
sh->log_start = ctx->pos;
|
||||||
|
list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
|
||||||
|
atomic_inc(&log->stripe_in_journal_count);
|
||||||
ctx->pos = write_pos;
|
ctx->pos = write_pos;
|
||||||
ctx->seq += 1;
|
ctx->seq += 1;
|
||||||
|
next_checkpoint = sh->log_start;
|
||||||
list_del_init(&sh->lru);
|
list_del_init(&sh->lru);
|
||||||
raid5_release_stripe(sh);
|
raid5_release_stripe(sh);
|
||||||
}
|
}
|
||||||
|
log->next_checkpoint = next_checkpoint;
|
||||||
__free_page(page);
|
__free_page(page);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2139,7 +2140,6 @@ static int r5l_recovery_log(struct r5l_log *log)
|
||||||
struct r5l_recovery_ctx ctx;
|
struct r5l_recovery_ctx ctx;
|
||||||
int ret;
|
int ret;
|
||||||
sector_t pos;
|
sector_t pos;
|
||||||
struct stripe_head *sh;
|
|
||||||
|
|
||||||
ctx.pos = log->last_checkpoint;
|
ctx.pos = log->last_checkpoint;
|
||||||
ctx.seq = log->last_cp_seq;
|
ctx.seq = log->last_cp_seq;
|
||||||
|
@ -2164,16 +2164,13 @@ static int r5l_recovery_log(struct r5l_log *log)
|
||||||
log->next_checkpoint = ctx.pos;
|
log->next_checkpoint = ctx.pos;
|
||||||
r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
|
r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
|
||||||
ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
|
ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
|
||||||
} else {
|
|
||||||
sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru);
|
|
||||||
log->next_checkpoint = sh->log_start;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
|
if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
|
||||||
pr_debug("md/raid:%s: starting from clean shutdown\n",
|
pr_debug("md/raid:%s: starting from clean shutdown\n",
|
||||||
mdname(mddev));
|
mdname(mddev));
|
||||||
else {
|
else {
|
||||||
pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n",
|
pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
|
||||||
mdname(mddev), ctx.data_only_stripes,
|
mdname(mddev), ctx.data_only_stripes,
|
||||||
ctx.data_parity_stripes);
|
ctx.data_parity_stripes);
|
||||||
|
|
||||||
|
@ -2418,9 +2415,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
|
||||||
if (do_wakeup)
|
if (do_wakeup)
|
||||||
wake_up(&conf->wait_for_overlap);
|
wake_up(&conf->wait_for_overlap);
|
||||||
|
|
||||||
if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
|
|
||||||
return;
|
|
||||||
|
|
||||||
spin_lock_irq(&conf->log->stripe_in_journal_lock);
|
spin_lock_irq(&conf->log->stripe_in_journal_lock);
|
||||||
list_del_init(&sh->r5c);
|
list_del_init(&sh->r5c);
|
||||||
spin_unlock_irq(&conf->log->stripe_in_journal_lock);
|
spin_unlock_irq(&conf->log->stripe_in_journal_lock);
|
||||||
|
@ -2639,14 +2633,16 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
|
||||||
spin_lock_init(&log->stripe_in_journal_lock);
|
spin_lock_init(&log->stripe_in_journal_lock);
|
||||||
atomic_set(&log->stripe_in_journal_count, 0);
|
atomic_set(&log->stripe_in_journal_count, 0);
|
||||||
|
|
||||||
|
rcu_assign_pointer(conf->log, log);
|
||||||
|
|
||||||
if (r5l_load_log(log))
|
if (r5l_load_log(log))
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
rcu_assign_pointer(conf->log, log);
|
|
||||||
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
|
rcu_assign_pointer(conf->log, NULL);
|
||||||
md_unregister_thread(&log->reclaim_thread);
|
md_unregister_thread(&log->reclaim_thread);
|
||||||
reclaim_thread:
|
reclaim_thread:
|
||||||
mempool_destroy(log->meta_pool);
|
mempool_destroy(log->meta_pool);
|
||||||
|
|
|
@ -62,6 +62,8 @@
|
||||||
#include "raid0.h"
|
#include "raid0.h"
|
||||||
#include "bitmap.h"
|
#include "bitmap.h"
|
||||||
|
|
||||||
|
#define UNSUPPORTED_MDDEV_FLAGS (1L << MD_FAILFAST_SUPPORTED)
|
||||||
|
|
||||||
#define cpu_to_group(cpu) cpu_to_node(cpu)
|
#define cpu_to_group(cpu) cpu_to_node(cpu)
|
||||||
#define ANY_GROUP NUMA_NO_NODE
|
#define ANY_GROUP NUMA_NO_NODE
|
||||||
|
|
||||||
|
@ -7829,8 +7831,9 @@ static void *raid5_takeover_raid1(struct mddev *mddev)
|
||||||
mddev->new_chunk_sectors = chunksect;
|
mddev->new_chunk_sectors = chunksect;
|
||||||
|
|
||||||
ret = setup_conf(mddev);
|
ret = setup_conf(mddev);
|
||||||
if (!IS_ERR_VALUE(ret))
|
if (!IS_ERR(ret))
|
||||||
clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
|
mddev_clear_unsupported_flags(mddev,
|
||||||
|
UNSUPPORTED_MDDEV_FLAGS);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue