mirror of https://gitee.com/openkylin/linux.git
drbd: The new, smarter resync speed controller
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
parent
8e26f9ccb9
commit
778f271dfe
|
@ -928,6 +928,12 @@ enum write_ordering_e {
|
||||||
WO_bio_barrier
|
WO_bio_barrier
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct fifo_buffer {
|
||||||
|
int *values;
|
||||||
|
unsigned int head_index;
|
||||||
|
unsigned int size;
|
||||||
|
};
|
||||||
|
|
||||||
struct drbd_conf {
|
struct drbd_conf {
|
||||||
/* things that are stored as / read from meta data on disk */
|
/* things that are stored as / read from meta data on disk */
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
@ -1068,6 +1074,11 @@ struct drbd_conf {
|
||||||
u64 ed_uuid; /* UUID of the exposed data */
|
u64 ed_uuid; /* UUID of the exposed data */
|
||||||
struct mutex state_mutex;
|
struct mutex state_mutex;
|
||||||
char congestion_reason; /* Why we where congested... */
|
char congestion_reason; /* Why we where congested... */
|
||||||
|
atomic_t rs_sect_in; /* counter to measure the incoming resync data rate */
|
||||||
|
int c_sync_rate; /* current resync rate after delay_probe magic */
|
||||||
|
struct fifo_buffer rs_plan_s; /* correction values of resync planer */
|
||||||
|
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
|
||||||
|
int rs_planed; /* resync sectors already planed */
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
|
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
|
||||||
|
|
|
@ -2734,6 +2734,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
|
||||||
atomic_set(&mdev->net_cnt, 0);
|
atomic_set(&mdev->net_cnt, 0);
|
||||||
atomic_set(&mdev->packet_seq, 0);
|
atomic_set(&mdev->packet_seq, 0);
|
||||||
atomic_set(&mdev->pp_in_use, 0);
|
atomic_set(&mdev->pp_in_use, 0);
|
||||||
|
atomic_set(&mdev->rs_sect_in, 0);
|
||||||
|
|
||||||
mutex_init(&mdev->md_io_mutex);
|
mutex_init(&mdev->md_io_mutex);
|
||||||
mutex_init(&mdev->data.mutex);
|
mutex_init(&mdev->data.mutex);
|
||||||
|
|
|
@ -1587,6 +1587,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
|
||||||
struct crypto_hash *csums_tfm = NULL;
|
struct crypto_hash *csums_tfm = NULL;
|
||||||
struct syncer_conf sc;
|
struct syncer_conf sc;
|
||||||
cpumask_var_t new_cpu_mask;
|
cpumask_var_t new_cpu_mask;
|
||||||
|
int *rs_plan_s = NULL;
|
||||||
|
int fifo_size;
|
||||||
|
|
||||||
if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
|
if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
|
||||||
retcode = ERR_NOMEM;
|
retcode = ERR_NOMEM;
|
||||||
|
@ -1687,6 +1689,16 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
|
||||||
if (retcode != NO_ERROR)
|
if (retcode != NO_ERROR)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
|
||||||
|
if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
|
||||||
|
rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
|
||||||
|
if (!rs_plan_s) {
|
||||||
|
dev_err(DEV, "kmalloc of fifo_buffer failed");
|
||||||
|
retcode = ERR_NOMEM;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* ok, assign the rest of it as well.
|
/* ok, assign the rest of it as well.
|
||||||
* lock against receive_SyncParam() */
|
* lock against receive_SyncParam() */
|
||||||
spin_lock(&mdev->peer_seq_lock);
|
spin_lock(&mdev->peer_seq_lock);
|
||||||
|
@ -1703,6 +1715,15 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
|
||||||
mdev->verify_tfm = verify_tfm;
|
mdev->verify_tfm = verify_tfm;
|
||||||
verify_tfm = NULL;
|
verify_tfm = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fifo_size != mdev->rs_plan_s.size) {
|
||||||
|
kfree(mdev->rs_plan_s.values);
|
||||||
|
mdev->rs_plan_s.values = rs_plan_s;
|
||||||
|
mdev->rs_plan_s.size = fifo_size;
|
||||||
|
mdev->rs_planed = 0;
|
||||||
|
rs_plan_s = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
spin_unlock(&mdev->peer_seq_lock);
|
spin_unlock(&mdev->peer_seq_lock);
|
||||||
|
|
||||||
if (get_ldev(mdev)) {
|
if (get_ldev(mdev)) {
|
||||||
|
@ -1734,6 +1755,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
|
||||||
|
|
||||||
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
|
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
|
||||||
fail:
|
fail:
|
||||||
|
kfree(rs_plan_s);
|
||||||
free_cpumask_var(new_cpu_mask);
|
free_cpumask_var(new_cpu_mask);
|
||||||
crypto_free_hash(csums_tfm);
|
crypto_free_hash(csums_tfm);
|
||||||
crypto_free_hash(verify_tfm);
|
crypto_free_hash(verify_tfm);
|
||||||
|
|
|
@ -1640,6 +1640,8 @@ static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
|
||||||
drbd_send_ack_dp(mdev, P_NEG_ACK, p);
|
drbd_send_ack_dp(mdev, P_NEG_ACK, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
atomic_add(data_size >> 9, &mdev->rs_sect_in);
|
||||||
|
|
||||||
return ok;
|
return ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2810,6 +2812,8 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
|
||||||
struct crypto_hash *verify_tfm = NULL;
|
struct crypto_hash *verify_tfm = NULL;
|
||||||
struct crypto_hash *csums_tfm = NULL;
|
struct crypto_hash *csums_tfm = NULL;
|
||||||
const int apv = mdev->agreed_pro_version;
|
const int apv = mdev->agreed_pro_version;
|
||||||
|
int *rs_plan_s = NULL;
|
||||||
|
int fifo_size = 0;
|
||||||
|
|
||||||
exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
|
exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
|
||||||
: apv == 88 ? sizeof(struct p_rs_param)
|
: apv == 88 ? sizeof(struct p_rs_param)
|
||||||
|
@ -2904,6 +2908,15 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
|
||||||
mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
|
mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
|
||||||
mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
|
mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
|
||||||
mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
|
mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
|
||||||
|
|
||||||
|
fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
|
||||||
|
if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
|
||||||
|
rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
|
||||||
|
if (!rs_plan_s) {
|
||||||
|
dev_err(DEV, "kmalloc of fifo_buffer failed");
|
||||||
|
goto disconnect;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(&mdev->peer_seq_lock);
|
spin_lock(&mdev->peer_seq_lock);
|
||||||
|
@ -2922,6 +2935,12 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
|
||||||
mdev->csums_tfm = csums_tfm;
|
mdev->csums_tfm = csums_tfm;
|
||||||
dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
|
dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
|
||||||
}
|
}
|
||||||
|
if (fifo_size != mdev->rs_plan_s.size) {
|
||||||
|
kfree(mdev->rs_plan_s.values);
|
||||||
|
mdev->rs_plan_s.values = rs_plan_s;
|
||||||
|
mdev->rs_plan_s.size = fifo_size;
|
||||||
|
mdev->rs_planed = 0;
|
||||||
|
}
|
||||||
spin_unlock(&mdev->peer_seq_lock);
|
spin_unlock(&mdev->peer_seq_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4202,6 +4221,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
|
||||||
/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
|
/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
|
||||||
mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
|
mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
|
||||||
dec_rs_pending(mdev);
|
dec_rs_pending(mdev);
|
||||||
|
atomic_add(blksize >> 9, &mdev->rs_sect_in);
|
||||||
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
|
@ -422,6 +422,89 @@ void resync_timer_fn(unsigned long data)
|
||||||
drbd_queue_work(&mdev->data.work, &mdev->resync_work);
|
drbd_queue_work(&mdev->data.work, &mdev->resync_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void fifo_set(struct fifo_buffer *fb, int value)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < fb->size; i++)
|
||||||
|
fb->values[i] += value;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int fifo_push(struct fifo_buffer *fb, int value)
|
||||||
|
{
|
||||||
|
int ov;
|
||||||
|
|
||||||
|
ov = fb->values[fb->head_index];
|
||||||
|
fb->values[fb->head_index++] = value;
|
||||||
|
|
||||||
|
if (fb->head_index >= fb->size)
|
||||||
|
fb->head_index = 0;
|
||||||
|
|
||||||
|
return ov;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fifo_add_val(struct fifo_buffer *fb, int value)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < fb->size; i++)
|
||||||
|
fb->values[i] += value;
|
||||||
|
}
|
||||||
|
|
||||||
|
int drbd_rs_controller(struct drbd_conf *mdev)
|
||||||
|
{
|
||||||
|
unsigned int sect_in; /* Number of sectors that came in since the last turn */
|
||||||
|
unsigned int want; /* The number of sectors we want in the proxy */
|
||||||
|
int req_sect; /* Number of sectors to request in this turn */
|
||||||
|
int correction; /* Number of sectors more we need in the proxy*/
|
||||||
|
int cps; /* correction per invocation of drbd_rs_controller() */
|
||||||
|
int steps; /* Number of time steps to plan ahead */
|
||||||
|
int curr_corr;
|
||||||
|
int max_sect;
|
||||||
|
|
||||||
|
sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
|
||||||
|
mdev->rs_in_flight -= sect_in;
|
||||||
|
|
||||||
|
spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
|
||||||
|
|
||||||
|
steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
|
||||||
|
|
||||||
|
if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
|
||||||
|
want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
|
||||||
|
} else { /* normal path */
|
||||||
|
want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
|
||||||
|
sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
correction = want - mdev->rs_in_flight - mdev->rs_planed;
|
||||||
|
|
||||||
|
/* Plan ahead */
|
||||||
|
cps = correction / steps;
|
||||||
|
fifo_add_val(&mdev->rs_plan_s, cps);
|
||||||
|
mdev->rs_planed += cps * steps;
|
||||||
|
|
||||||
|
/* What we do in this step */
|
||||||
|
curr_corr = fifo_push(&mdev->rs_plan_s, 0);
|
||||||
|
spin_unlock(&mdev->peer_seq_lock);
|
||||||
|
mdev->rs_planed -= curr_corr;
|
||||||
|
|
||||||
|
req_sect = sect_in + curr_corr;
|
||||||
|
if (req_sect < 0)
|
||||||
|
req_sect = 0;
|
||||||
|
|
||||||
|
max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
|
||||||
|
if (req_sect > max_sect)
|
||||||
|
req_sect = max_sect;
|
||||||
|
|
||||||
|
/*
|
||||||
|
dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
|
||||||
|
sect_in, mdev->rs_in_flight, want, correction,
|
||||||
|
steps, cps, mdev->rs_planed, curr_corr, req_sect);
|
||||||
|
*/
|
||||||
|
|
||||||
|
return req_sect;
|
||||||
|
}
|
||||||
|
|
||||||
int w_make_resync_request(struct drbd_conf *mdev,
|
int w_make_resync_request(struct drbd_conf *mdev,
|
||||||
struct drbd_work *w, int cancel)
|
struct drbd_work *w, int cancel)
|
||||||
{
|
{
|
||||||
|
@ -459,7 +542,13 @@ int w_make_resync_request(struct drbd_conf *mdev,
|
||||||
max_segment_size = mdev->agreed_pro_version < 94 ?
|
max_segment_size = mdev->agreed_pro_version < 94 ?
|
||||||
queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
|
queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
|
||||||
|
|
||||||
number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE / 1024) * HZ);
|
if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
|
||||||
|
number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
|
||||||
|
mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
|
||||||
|
} else {
|
||||||
|
mdev->c_sync_rate = mdev->sync_conf.rate;
|
||||||
|
number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
|
||||||
|
}
|
||||||
pe = atomic_read(&mdev->rs_pending_cnt);
|
pe = atomic_read(&mdev->rs_pending_cnt);
|
||||||
|
|
||||||
mutex_lock(&mdev->data.mutex);
|
mutex_lock(&mdev->data.mutex);
|
||||||
|
@ -593,6 +682,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
|
||||||
}
|
}
|
||||||
|
|
||||||
requeue:
|
requeue:
|
||||||
|
mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
|
||||||
mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
|
mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
|
||||||
put_ldev(mdev);
|
put_ldev(mdev);
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -1419,6 +1509,12 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
|
||||||
drbd_resync_finished(mdev);
|
drbd_resync_finished(mdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
atomic_set(&mdev->rs_sect_in, 0);
|
||||||
|
mdev->rs_in_flight = 0;
|
||||||
|
mdev->rs_planed = 0;
|
||||||
|
spin_lock(&mdev->peer_seq_lock);
|
||||||
|
fifo_set(&mdev->rs_plan_s, 0);
|
||||||
|
spin_unlock(&mdev->peer_seq_lock);
|
||||||
/* ns.conn may already be != mdev->state.conn,
|
/* ns.conn may already be != mdev->state.conn,
|
||||||
* we may have been paused in between, or become paused until
|
* we may have been paused in between, or become paused until
|
||||||
* the timer triggers.
|
* the timer triggers.
|
||||||
|
|
Loading…
Reference in New Issue