2016-07-27 15:01:50 +08:00
|
|
|
/*
|
|
|
|
* Replication Block filter
|
|
|
|
*
|
|
|
|
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
|
|
|
* Copyright (c) 2016 Intel Corporation
|
|
|
|
* Copyright (c) 2016 FUJITSU LIMITED
|
|
|
|
*
|
|
|
|
* Author:
|
|
|
|
* Wen Congyang <wency@cn.fujitsu.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
|
|
* See the COPYING file in the top-level directory.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "qemu/osdep.h"
|
2019-05-23 22:35:07 +08:00
|
|
|
#include "qemu/module.h"
|
2018-02-01 19:18:46 +08:00
|
|
|
#include "qemu/option.h"
|
2016-07-27 15:01:50 +08:00
|
|
|
#include "block/nbd.h"
|
|
|
|
#include "block/blockjob.h"
|
|
|
|
#include "block/block_int.h"
|
|
|
|
#include "block/block_backup.h"
|
|
|
|
#include "sysemu/block-backend.h"
|
|
|
|
#include "qapi/error.h"
|
2018-11-12 22:00:43 +08:00
|
|
|
#include "qapi/qmp/qdict.h"
|
2021-05-17 19:36:56 +08:00
|
|
|
#include "block/replication.h"
|
2016-07-27 15:01:50 +08:00
|
|
|
|
2017-03-17 10:17:39 +08:00
|
|
|
typedef enum {
|
|
|
|
BLOCK_REPLICATION_NONE, /* block replication is not started */
|
|
|
|
BLOCK_REPLICATION_RUNNING, /* block replication is running */
|
|
|
|
BLOCK_REPLICATION_FAILOVER, /* failover is running in background */
|
|
|
|
BLOCK_REPLICATION_FAILOVER_FAILED, /* failover failed */
|
|
|
|
BLOCK_REPLICATION_DONE, /* block replication is done */
|
|
|
|
} ReplicationStage;
|
|
|
|
|
2016-07-27 15:01:50 +08:00
|
|
|
typedef struct BDRVReplicationState {
|
|
|
|
ReplicationMode mode;
|
2017-03-17 10:17:39 +08:00
|
|
|
ReplicationStage stage;
|
2016-07-27 15:01:50 +08:00
|
|
|
BdrvChild *active_disk;
|
2019-06-06 23:41:29 +08:00
|
|
|
BlockJob *commit_job;
|
2016-07-27 15:01:50 +08:00
|
|
|
BdrvChild *hidden_disk;
|
|
|
|
BdrvChild *secondary_disk;
|
2019-06-06 23:41:29 +08:00
|
|
|
BlockJob *backup_job;
|
2016-07-27 15:01:50 +08:00
|
|
|
char *top_id;
|
|
|
|
ReplicationState *rs;
|
|
|
|
Error *blocker;
|
2018-11-12 22:00:43 +08:00
|
|
|
bool orig_hidden_read_only;
|
|
|
|
bool orig_secondary_read_only;
|
2016-07-27 15:01:50 +08:00
|
|
|
int error;
|
|
|
|
} BDRVReplicationState;
|
|
|
|
|
|
|
|
static void replication_start(ReplicationState *rs, ReplicationMode mode,
|
|
|
|
Error **errp);
|
|
|
|
static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
|
|
|
|
static void replication_get_error(ReplicationState *rs, Error **errp);
|
|
|
|
static void replication_stop(ReplicationState *rs, bool failover,
|
|
|
|
Error **errp);
|
|
|
|
|
|
|
|
#define REPLICATION_MODE "mode"
|
|
|
|
#define REPLICATION_TOP_ID "top-id"
|
|
|
|
static QemuOptsList replication_runtime_opts = {
|
|
|
|
.name = "replication",
|
|
|
|
.head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head),
|
|
|
|
.desc = {
|
|
|
|
{
|
|
|
|
.name = REPLICATION_MODE,
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = REPLICATION_TOP_ID,
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
|
|
|
{ /* end of list */ }
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static ReplicationOps replication_ops = {
|
|
|
|
.start = replication_start,
|
|
|
|
.checkpoint = replication_do_checkpoint,
|
|
|
|
.get_error = replication_get_error,
|
|
|
|
.stop = replication_stop,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int replication_open(BlockDriverState *bs, QDict *options,
|
|
|
|
int flags, Error **errp)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
BDRVReplicationState *s = bs->opaque;
|
|
|
|
QemuOpts *opts = NULL;
|
|
|
|
const char *mode;
|
|
|
|
const char *top_id;
|
|
|
|
|
2020-05-13 19:05:36 +08:00
|
|
|
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
|
|
|
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
2016-12-17 01:52:37 +08:00
|
|
|
false, errp);
|
|
|
|
if (!bs->file) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-07-27 15:01:50 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
|
2020-07-08 00:06:05 +08:00
|
|
|
if (!qemu_opts_absorb_qdict(opts, options, errp)) {
|
2016-07-27 15:01:50 +08:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
mode = qemu_opt_get(opts, REPLICATION_MODE);
|
|
|
|
if (!mode) {
|
error: Avoid unnecessary error_propagate() after error_setg()
Replace
error_setg(&err, ...);
error_propagate(errp, err);
by
error_setg(errp, ...);
Related pattern:
if (...) {
error_setg(&err, ...);
goto out;
}
...
out:
error_propagate(errp, err);
return;
When all paths to label out are that way, replace by
if (...) {
error_setg(errp, ...);
return;
}
and delete the label along with the error_propagate().
When we have at most one other path that actually needs to propagate,
and maybe one at the end that where propagation is unnecessary, e.g.
foo(..., &err);
if (err) {
goto out;
}
...
bar(..., &err);
out:
error_propagate(errp, err);
return;
move the error_propagate() to where it's needed, like
if (...) {
foo(..., &err);
error_propagate(errp, err);
return;
}
...
bar(..., errp);
return;
and transform the error_setg() as above.
In some places, the transformation results in obviously unnecessary
error_propagate(). The next few commits will eliminate them.
Bonus: the elimination of gotos will make later patches in this series
easier to review.
Candidates for conversion tracked down with this Coccinelle script:
@@
identifier err, errp;
expression list args;
@@
- error_setg(&err, args);
+ error_setg(errp, args);
... when != err
error_propagate(errp, err);
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200707160613.848843-34-armbru@redhat.com>
2020-07-08 00:06:01 +08:00
|
|
|
error_setg(errp, "Missing the option mode");
|
2016-07-27 15:01:50 +08:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(mode, "primary")) {
|
|
|
|
s->mode = REPLICATION_MODE_PRIMARY;
|
2016-10-12 12:50:08 +08:00
|
|
|
top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
|
|
|
|
if (top_id) {
|
error: Avoid unnecessary error_propagate() after error_setg()
Replace
error_setg(&err, ...);
error_propagate(errp, err);
by
error_setg(errp, ...);
Related pattern:
if (...) {
error_setg(&err, ...);
goto out;
}
...
out:
error_propagate(errp, err);
return;
When all paths to label out are that way, replace by
if (...) {
error_setg(errp, ...);
return;
}
and delete the label along with the error_propagate().
When we have at most one other path that actually needs to propagate,
and maybe one at the end that where propagation is unnecessary, e.g.
foo(..., &err);
if (err) {
goto out;
}
...
bar(..., &err);
out:
error_propagate(errp, err);
return;
move the error_propagate() to where it's needed, like
if (...) {
foo(..., &err);
error_propagate(errp, err);
return;
}
...
bar(..., errp);
return;
and transform the error_setg() as above.
In some places, the transformation results in obviously unnecessary
error_propagate(). The next few commits will eliminate them.
Bonus: the elimination of gotos will make later patches in this series
easier to review.
Candidates for conversion tracked down with this Coccinelle script:
@@
identifier err, errp;
expression list args;
@@
- error_setg(&err, args);
+ error_setg(errp, args);
... when != err
error_propagate(errp, err);
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200707160613.848843-34-armbru@redhat.com>
2020-07-08 00:06:01 +08:00
|
|
|
error_setg(errp,
|
|
|
|
"The primary side does not support option top-id");
|
2016-10-12 12:50:08 +08:00
|
|
|
goto fail;
|
|
|
|
}
|
2016-07-27 15:01:50 +08:00
|
|
|
} else if (!strcmp(mode, "secondary")) {
|
|
|
|
s->mode = REPLICATION_MODE_SECONDARY;
|
|
|
|
top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
|
|
|
|
s->top_id = g_strdup(top_id);
|
|
|
|
if (!s->top_id) {
|
error: Avoid unnecessary error_propagate() after error_setg()
Replace
error_setg(&err, ...);
error_propagate(errp, err);
by
error_setg(errp, ...);
Related pattern:
if (...) {
error_setg(&err, ...);
goto out;
}
...
out:
error_propagate(errp, err);
return;
When all paths to label out are that way, replace by
if (...) {
error_setg(errp, ...);
return;
}
and delete the label along with the error_propagate().
When we have at most one other path that actually needs to propagate,
and maybe one at the end that where propagation is unnecessary, e.g.
foo(..., &err);
if (err) {
goto out;
}
...
bar(..., &err);
out:
error_propagate(errp, err);
return;
move the error_propagate() to where it's needed, like
if (...) {
foo(..., &err);
error_propagate(errp, err);
return;
}
...
bar(..., errp);
return;
and transform the error_setg() as above.
In some places, the transformation results in obviously unnecessary
error_propagate(). The next few commits will eliminate them.
Bonus: the elimination of gotos will make later patches in this series
easier to review.
Candidates for conversion tracked down with this Coccinelle script:
@@
identifier err, errp;
expression list args;
@@
- error_setg(&err, args);
+ error_setg(errp, args);
... when != err
error_propagate(errp, err);
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200707160613.848843-34-armbru@redhat.com>
2020-07-08 00:06:01 +08:00
|
|
|
error_setg(errp, "Missing the option top-id");
|
2016-07-27 15:01:50 +08:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
} else {
|
error: Avoid unnecessary error_propagate() after error_setg()
Replace
error_setg(&err, ...);
error_propagate(errp, err);
by
error_setg(errp, ...);
Related pattern:
if (...) {
error_setg(&err, ...);
goto out;
}
...
out:
error_propagate(errp, err);
return;
When all paths to label out are that way, replace by
if (...) {
error_setg(errp, ...);
return;
}
and delete the label along with the error_propagate().
When we have at most one other path that actually needs to propagate,
and maybe one at the end that where propagation is unnecessary, e.g.
foo(..., &err);
if (err) {
goto out;
}
...
bar(..., &err);
out:
error_propagate(errp, err);
return;
move the error_propagate() to where it's needed, like
if (...) {
foo(..., &err);
error_propagate(errp, err);
return;
}
...
bar(..., errp);
return;
and transform the error_setg() as above.
In some places, the transformation results in obviously unnecessary
error_propagate(). The next few commits will eliminate them.
Bonus: the elimination of gotos will make later patches in this series
easier to review.
Candidates for conversion tracked down with this Coccinelle script:
@@
identifier err, errp;
expression list args;
@@
- error_setg(&err, args);
+ error_setg(errp, args);
... when != err
error_propagate(errp, err);
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200707160613.848843-34-armbru@redhat.com>
2020-07-08 00:06:01 +08:00
|
|
|
error_setg(errp,
|
2016-07-27 15:01:50 +08:00
|
|
|
"The option mode's value should be primary or secondary");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->rs = replication_new(bs, &replication_ops);
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
qemu_opts_del(opts);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void replication_close(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BDRVReplicationState *s = bs->opaque;
|
2020-04-07 19:56:50 +08:00
|
|
|
Job *commit_job;
|
2016-07-27 15:01:50 +08:00
|
|
|
|
2017-03-17 10:17:39 +08:00
|
|
|
if (s->stage == BLOCK_REPLICATION_RUNNING) {
|
2016-07-27 15:01:50 +08:00
|
|
|
replication_stop(s->rs, false, NULL);
|
|
|
|
}
|
2017-03-17 10:17:39 +08:00
|
|
|
if (s->stage == BLOCK_REPLICATION_FAILOVER) {
|
2020-04-07 19:56:50 +08:00
|
|
|
commit_job = &s->commit_job->job;
|
|
|
|
assert(commit_job->aio_context == qemu_get_current_aio_context());
|
|
|
|
job_cancel_sync(commit_job);
|
2016-10-27 18:48:49 +08:00
|
|
|
}
|
2016-07-27 15:01:50 +08:00
|
|
|
|
|
|
|
if (s->mode == REPLICATION_MODE_SECONDARY) {
|
|
|
|
g_free(s->top_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
replication_remove(s->rs);
|
|
|
|
}
|
|
|
|
|
2017-03-14 19:46:52 +08:00
|
|
|
static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
|
2020-05-13 19:05:16 +08:00
|
|
|
BdrvChildRole role,
|
2017-09-14 18:47:11 +08:00
|
|
|
BlockReopenQueue *reopen_queue,
|
2017-03-14 19:46:52 +08:00
|
|
|
uint64_t perm, uint64_t shared,
|
|
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
|
|
{
|
2017-10-25 14:51:23 +08:00
|
|
|
*nperm = BLK_PERM_CONSISTENT_READ;
|
|
|
|
if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
|
|
|
|
*nperm |= BLK_PERM_WRITE;
|
|
|
|
}
|
2020-04-13 06:35:56 +08:00
|
|
|
*nshared = BLK_PERM_CONSISTENT_READ
|
|
|
|
| BLK_PERM_WRITE
|
2017-10-25 14:51:23 +08:00
|
|
|
| BLK_PERM_WRITE_UNCHANGED;
|
2017-03-14 19:46:52 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-07-27 15:01:50 +08:00
|
|
|
static int64_t replication_getlength(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return bdrv_getlength(bs->file->bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int replication_get_io_status(BDRVReplicationState *s)
|
|
|
|
{
|
2017-03-17 10:17:39 +08:00
|
|
|
switch (s->stage) {
|
2016-07-27 15:01:50 +08:00
|
|
|
case BLOCK_REPLICATION_NONE:
|
|
|
|
return -EIO;
|
|
|
|
case BLOCK_REPLICATION_RUNNING:
|
|
|
|
return 0;
|
|
|
|
case BLOCK_REPLICATION_FAILOVER:
|
|
|
|
return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
|
|
|
|
case BLOCK_REPLICATION_FAILOVER_FAILED:
|
|
|
|
return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 1;
|
|
|
|
case BLOCK_REPLICATION_DONE:
|
|
|
|
/*
|
|
|
|
* active commit job completes, and active disk and secondary_disk
|
|
|
|
* is swapped, so we can operate bs->file directly
|
|
|
|
*/
|
|
|
|
return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int replication_return_value(BDRVReplicationState *s, int ret)
|
|
|
|
{
|
|
|
|
if (s->mode == REPLICATION_MODE_SECONDARY) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
s->error = ret;
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static coroutine_fn int replication_co_readv(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
int remaining_sectors,
|
|
|
|
QEMUIOVector *qiov)
|
|
|
|
{
|
|
|
|
BDRVReplicationState *s = bs->opaque;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (s->mode == REPLICATION_MODE_PRIMARY) {
|
|
|
|
/* We only use it to forward primary write requests */
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = replication_get_io_status(s);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-06-29 04:15:23 +08:00
|
|
|
ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
|
2018-09-19 20:43:42 +08:00
|
|
|
|
2016-07-27 15:01:50 +08:00
|
|
|
return replication_return_value(s, ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static coroutine_fn int replication_co_writev(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
int remaining_sectors,
|
2018-04-25 06:01:57 +08:00
|
|
|
QEMUIOVector *qiov,
|
|
|
|
int flags)
|
2016-07-27 15:01:50 +08:00
|
|
|
{
|
|
|
|
BDRVReplicationState *s = bs->opaque;
|
|
|
|
QEMUIOVector hd_qiov;
|
|
|
|
uint64_t bytes_done = 0;
|
|
|
|
BdrvChild *top = bs->file;
|
|
|
|
BdrvChild *base = s->secondary_disk;
|
|
|
|
BdrvChild *target;
|
block: Make bdrv_is_allocated_above() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, for the most part this patch is just the
addition of scaling at the callers followed by inverse scaling at
bdrv_is_allocated(). But some code, particularly stream_run(),
gets a lot simpler because it no longer has to mess with sectors.
Leave comments where we can further simplify by switching to
byte-based iterations, once later patches eliminate the need for
sector-aligned operations.
For ease of review, bdrv_is_allocated() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 20:44:59 +08:00
|
|
|
int ret;
|
|
|
|
int64_t n;
|
2016-07-27 15:01:50 +08:00
|
|
|
|
2018-04-25 06:01:57 +08:00
|
|
|
assert(!flags);
|
2016-07-27 15:01:50 +08:00
|
|
|
ret = replication_get_io_status(s);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret == 0) {
|
2018-06-29 04:15:23 +08:00
|
|
|
ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
|
2016-07-27 15:01:50 +08:00
|
|
|
return replication_return_value(s, ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Failover failed, only write to active disk if the sectors
|
|
|
|
* have already been allocated in active disk/hidden disk.
|
|
|
|
*/
|
|
|
|
qemu_iovec_init(&hd_qiov, qiov->niov);
|
|
|
|
while (remaining_sectors > 0) {
|
block: Make bdrv_is_allocated_above() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, for the most part this patch is just the
addition of scaling at the callers followed by inverse scaling at
bdrv_is_allocated(). But some code, particularly stream_run(),
gets a lot simpler because it no longer has to mess with sectors.
Leave comments where we can further simplify by switching to
byte-based iterations, once later patches eliminate the need for
sector-aligned operations.
For ease of review, bdrv_is_allocated() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 20:44:59 +08:00
|
|
|
int64_t count;
|
|
|
|
|
2019-05-30 01:56:14 +08:00
|
|
|
ret = bdrv_is_allocated_above(top->bs, base->bs, false,
|
block: Make bdrv_is_allocated_above() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, for the most part this patch is just the
addition of scaling at the callers followed by inverse scaling at
bdrv_is_allocated(). But some code, particularly stream_run(),
gets a lot simpler because it no longer has to mess with sectors.
Leave comments where we can further simplify by switching to
byte-based iterations, once later patches eliminate the need for
sector-aligned operations.
For ease of review, bdrv_is_allocated() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 20:44:59 +08:00
|
|
|
sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
remaining_sectors * BDRV_SECTOR_SIZE,
|
|
|
|
&count);
|
2016-07-27 15:01:50 +08:00
|
|
|
if (ret < 0) {
|
|
|
|
goto out1;
|
|
|
|
}
|
|
|
|
|
block: Make bdrv_is_allocated_above() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, for the most part this patch is just the
addition of scaling at the callers followed by inverse scaling at
bdrv_is_allocated(). But some code, particularly stream_run(),
gets a lot simpler because it no longer has to mess with sectors.
Leave comments where we can further simplify by switching to
byte-based iterations, once later patches eliminate the need for
sector-aligned operations.
For ease of review, bdrv_is_allocated() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 20:44:59 +08:00
|
|
|
assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
|
|
|
|
n = count >> BDRV_SECTOR_BITS;
|
2016-07-27 15:01:50 +08:00
|
|
|
qemu_iovec_reset(&hd_qiov);
|
block: Make bdrv_is_allocated_above() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, for the most part this patch is just the
addition of scaling at the callers followed by inverse scaling at
bdrv_is_allocated(). But some code, particularly stream_run(),
gets a lot simpler because it no longer has to mess with sectors.
Leave comments where we can further simplify by switching to
byte-based iterations, once later patches eliminate the need for
sector-aligned operations.
For ease of review, bdrv_is_allocated() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 20:44:59 +08:00
|
|
|
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count);
|
2016-07-27 15:01:50 +08:00
|
|
|
|
|
|
|
target = ret ? top : base;
|
2018-06-29 04:15:23 +08:00
|
|
|
ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
n * BDRV_SECTOR_SIZE, &hd_qiov, 0);
|
2016-07-27 15:01:50 +08:00
|
|
|
if (ret < 0) {
|
|
|
|
goto out1;
|
|
|
|
}
|
|
|
|
|
|
|
|
remaining_sectors -= n;
|
|
|
|
sector_num += n;
|
block: Make bdrv_is_allocated_above() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, for the most part this patch is just the
addition of scaling at the callers followed by inverse scaling at
bdrv_is_allocated(). But some code, particularly stream_run(),
gets a lot simpler because it no longer has to mess with sectors.
Leave comments where we can further simplify by switching to
byte-based iterations, once later patches eliminate the need for
sector-aligned operations.
For ease of review, bdrv_is_allocated() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 20:44:59 +08:00
|
|
|
bytes_done += count;
|
2016-07-27 15:01:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
out1:
|
|
|
|
qemu_iovec_destroy(&hd_qiov);
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
|
|
|
|
{
|
|
|
|
Error *local_err = NULL;
|
|
|
|
int ret;
|
|
|
|
|
2019-06-06 23:41:29 +08:00
|
|
|
if (!s->backup_job) {
|
2016-07-27 15:01:50 +08:00
|
|
|
error_setg(errp, "Backup job was cancelled unexpectedly");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-06-06 23:41:29 +08:00
|
|
|
backup_do_checkpoint(s->backup_job, &local_err);
|
2016-07-27 15:01:50 +08:00
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-11-11 04:31:09 +08:00
|
|
|
if (!s->active_disk->bs->drv) {
|
|
|
|
error_setg(errp, "Active disk %s is ejected",
|
|
|
|
s->active_disk->bs->node_name);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-04-29 22:11:24 +08:00
|
|
|
ret = bdrv_make_empty(s->active_disk, errp);
|
2016-07-27 15:01:50 +08:00
|
|
|
if (ret < 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-11-11 04:31:09 +08:00
|
|
|
if (!s->hidden_disk->bs->drv) {
|
|
|
|
error_setg(errp, "Hidden disk %s is ejected",
|
|
|
|
s->hidden_disk->bs->node_name);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-05-15 19:25:44 +08:00
|
|
|
BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
|
|
|
|
BLK_PERM_WRITE, BLK_PERM_ALL);
|
|
|
|
blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
blk_unref(blk);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = blk_make_empty(blk, errp);
|
|
|
|
blk_unref(blk);
|
2016-07-27 15:01:50 +08:00
|
|
|
if (ret < 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-12 22:00:43 +08:00
|
|
|
/* This function is supposed to be called twice:
|
|
|
|
* first with writable = true, then with writable = false.
|
|
|
|
* The first call puts s->hidden_disk and s->secondary_disk in
|
|
|
|
* r/w mode, and the second puts them back in their original state.
|
|
|
|
*/
|
2016-10-27 18:49:01 +08:00
|
|
|
static void reopen_backing_file(BlockDriverState *bs, bool writable,
|
2016-07-27 15:01:50 +08:00
|
|
|
Error **errp)
|
|
|
|
{
|
2016-10-27 18:49:01 +08:00
|
|
|
BDRVReplicationState *s = bs->opaque;
|
2016-07-27 15:01:50 +08:00
|
|
|
BlockReopenQueue *reopen_queue = NULL;
|
|
|
|
|
|
|
|
if (writable) {
|
2018-11-12 22:00:43 +08:00
|
|
|
s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs);
|
|
|
|
s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs);
|
2016-07-27 15:01:50 +08:00
|
|
|
}
|
|
|
|
|
2017-12-07 03:24:44 +08:00
|
|
|
bdrv_subtree_drained_begin(s->hidden_disk->bs);
|
|
|
|
bdrv_subtree_drained_begin(s->secondary_disk->bs);
|
|
|
|
|
2018-11-12 22:00:43 +08:00
|
|
|
if (s->orig_hidden_read_only) {
|
|
|
|
QDict *opts = qdict_new();
|
|
|
|
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
|
|
|
|
reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs,
|
2019-03-13 00:48:44 +08:00
|
|
|
opts, true);
|
2016-07-27 15:01:50 +08:00
|
|
|
}
|
|
|
|
|
2018-11-12 22:00:43 +08:00
|
|
|
if (s->orig_secondary_read_only) {
|
|
|
|
QDict *opts = qdict_new();
|
|
|
|
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
|
2016-07-27 15:01:50 +08:00
|
|
|
reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
|
2019-03-13 00:48:44 +08:00
|
|
|
opts, true);
|
2016-07-27 15:01:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (reopen_queue) {
|
2020-07-08 00:06:04 +08:00
|
|
|
bdrv_reopen_multiple(reopen_queue, errp);
|
2016-07-27 15:01:50 +08:00
|
|
|
}
|
2017-12-07 03:24:44 +08:00
|
|
|
|
|
|
|
bdrv_subtree_drained_end(s->hidden_disk->bs);
|
|
|
|
bdrv_subtree_drained_end(s->secondary_disk->bs);
|
2016-07-27 15:01:50 +08:00
|
|
|
}
|
|
|
|
|
2016-10-27 18:49:01 +08:00
|
|
|
static void backup_job_cleanup(BlockDriverState *bs)
|
2016-07-27 15:01:50 +08:00
|
|
|
{
|
2016-10-27 18:49:01 +08:00
|
|
|
BDRVReplicationState *s = bs->opaque;
|
2016-07-27 15:01:50 +08:00
|
|
|
BlockDriverState *top_bs;
|
|
|
|
|
2020-05-11 15:08:01 +08:00
|
|
|
s->backup_job = NULL;
|
|
|
|
|
2016-07-27 15:01:50 +08:00
|
|
|
top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
|
|
|
|
if (!top_bs) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
bdrv_op_unblock_all(top_bs, s->blocker);
|
|
|
|
error_free(s->blocker);
|
2016-10-27 18:49:01 +08:00
|
|
|
reopen_backing_file(bs, false, NULL);
|
2016-07-27 15:01:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void backup_job_completed(void *opaque, int ret)
|
|
|
|
{
|
2016-10-27 18:49:01 +08:00
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
BDRVReplicationState *s = bs->opaque;
|
2016-07-27 15:01:50 +08:00
|
|
|
|
2017-03-17 10:17:39 +08:00
|
|
|
if (s->stage != BLOCK_REPLICATION_FAILOVER) {
|
2016-07-27 15:01:50 +08:00
|
|
|
/* The backup job is cancelled unexpectedly */
|
|
|
|
s->error = -EIO;
|
|
|
|
}
|
|
|
|
|
2016-10-27 18:49:01 +08:00
|
|
|
backup_job_cleanup(bs);
|
2016-07-27 15:01:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BdrvChild *child;
|
|
|
|
|
|
|
|
/* The bs itself is the top_bs */
|
|
|
|
if (top_bs == bs) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Iterate over top_bs's children */
|
|
|
|
QLIST_FOREACH(child, &top_bs->children, next) {
|
|
|
|
if (child->bs == bs || check_top_bs(child->bs, bs)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void replication_start(ReplicationState *rs, ReplicationMode mode,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = rs->opaque;
|
|
|
|
BDRVReplicationState *s;
|
|
|
|
BlockDriverState *top_bs;
|
|
|
|
int64_t active_length, hidden_length, disk_length;
|
|
|
|
AioContext *aio_context;
|
|
|
|
Error *local_err = NULL;
|
2021-01-17 05:46:52 +08:00
|
|
|
BackupPerf perf = { .use_copy_range = true, .max_workers = 1 };
|
2016-07-27 15:01:50 +08:00
|
|
|
|
|
|
|
aio_context = bdrv_get_aio_context(bs);
|
|
|
|
aio_context_acquire(aio_context);
|
|
|
|
s = bs->opaque;
|
|
|
|
|
2019-10-24 22:25:35 +08:00
|
|
|
if (s->stage == BLOCK_REPLICATION_DONE ||
|
|
|
|
s->stage == BLOCK_REPLICATION_FAILOVER) {
|
|
|
|
/*
|
|
|
|
* This case happens when a secondary is promoted to primary.
|
|
|
|
* Ignore the request because the secondary side of replication
|
|
|
|
* doesn't have to do anything anymore.
|
|
|
|
*/
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-03-17 10:17:39 +08:00
|
|
|
if (s->stage != BLOCK_REPLICATION_NONE) {
|
2016-07-27 15:01:50 +08:00
|
|
|
error_setg(errp, "Block replication is running or done");
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->mode != mode) {
|
|
|
|
error_setg(errp, "The parameter mode's value is invalid, needs %d,"
|
|
|
|
" but got %d", s->mode, mode);
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (s->mode) {
|
|
|
|
case REPLICATION_MODE_PRIMARY:
|
|
|
|
break;
|
|
|
|
case REPLICATION_MODE_SECONDARY:
|
|
|
|
s->active_disk = bs->file;
|
|
|
|
if (!s->active_disk || !s->active_disk->bs ||
|
|
|
|
!s->active_disk->bs->backing) {
|
|
|
|
error_setg(errp, "Active disk doesn't have backing file");
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->hidden_disk = s->active_disk->bs->backing;
|
|
|
|
if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
|
|
|
|
error_setg(errp, "Hidden disk doesn't have backing file");
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->secondary_disk = s->hidden_disk->bs->backing;
|
|
|
|
if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) {
|
|
|
|
error_setg(errp, "The secondary disk doesn't have block backend");
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* verify the length */
|
|
|
|
active_length = bdrv_getlength(s->active_disk->bs);
|
|
|
|
hidden_length = bdrv_getlength(s->hidden_disk->bs);
|
|
|
|
disk_length = bdrv_getlength(s->secondary_disk->bs);
|
|
|
|
if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
|
|
|
|
active_length != hidden_length || hidden_length != disk_length) {
|
|
|
|
error_setg(errp, "Active disk, hidden disk, secondary disk's length"
|
|
|
|
" are not the same");
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-11-11 04:31:09 +08:00
|
|
|
/* Must be true, or the bdrv_getlength() calls would have failed */
|
|
|
|
assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
|
|
|
|
|
2016-07-27 15:01:50 +08:00
|
|
|
if (!s->active_disk->bs->drv->bdrv_make_empty ||
|
|
|
|
!s->hidden_disk->bs->drv->bdrv_make_empty) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Active disk or hidden disk doesn't support make_empty");
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* reopen the backing file in r/w mode */
|
2016-10-27 18:49:01 +08:00
|
|
|
reopen_backing_file(bs, true, &local_err);
|
2016-07-27 15:01:50 +08:00
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* start backup job now */
|
|
|
|
error_setg(&s->blocker,
|
|
|
|
"Block device is in use by internal backup job");
|
|
|
|
|
|
|
|
top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
|
|
|
|
if (!top_bs || !bdrv_is_root_node(top_bs) ||
|
|
|
|
!check_top_bs(top_bs, bs)) {
|
|
|
|
error_setg(errp, "No top_bs or it is invalid");
|
2016-10-27 18:49:01 +08:00
|
|
|
reopen_backing_file(bs, false, NULL);
|
2016-07-27 15:01:50 +08:00
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
bdrv_op_block_all(top_bs, s->blocker);
|
|
|
|
bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
|
|
|
|
|
2019-06-06 23:41:29 +08:00
|
|
|
s->backup_job = backup_job_create(
|
|
|
|
NULL, s->secondary_disk->bs, s->hidden_disk->bs,
|
block/backup: use backup-top instead of write notifiers
Drop write notifiers and use filter node instead.
= Changes =
1. Add filter-node-name argument for backup qmp api. We have to do it
in this commit, as 257 needs to be fixed.
2. There are no more write notifiers here, so is_write_notifier
parameter is dropped from block-copy paths.
3. To sync with in-flight requests at job finish we now have drained
removing of the filter, we don't need rw-lock.
4. Block-copy is now using BdrvChildren instead of BlockBackends
5. As backup-top owns these children, we also move block-copy state
into backup-top's ownership.
= Iotest changes =
56: op-blocker doesn't shoot now, as we set it on source, but then
check on filter, when trying to start second backup.
To keep the test we instead can catch another collision: both jobs will
get 'drive0' job-id, as job-id parameter is unspecified. To prevent
interleaving with file-posix locks (as they are dependent on config)
let's use another target for second backup.
Also, it's obvious now that we'd like to drop this op-blocker at all
and add a test-case for two backups from one node (to different
destinations) actually works. But not in these series.
141: Output changed: prepatch, "Node is in use" comes from bdrv_has_blk
check inside qmp_blockdev_del. But we've dropped block-copy blk
objects, so no more blk objects on source bs (job blk is on backup-top
filter bs). New message is from op-blocker, which is the next check in
qmp_blockdev_add.
257: The test wants to emulate guest write during backup. They should
go to filter node, not to original source node, of course. Therefore we
need to specify filter node name and use it.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20191001131409.14202-6-vsementsov@virtuozzo.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2019-10-01 21:14:09 +08:00
|
|
|
0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
|
qapi: backup: add perf.use-copy-range parameter
Experiments show, that copy_range is not always making things faster.
So, to make experimentation simpler, let's add a parameter. Some more
perf parameters will be added soon, so here is a new struct.
For now, add new backup qmp parameter with x- prefix for the following
reasons:
- We are going to add more performance parameters, some will be
related to the whole block-copy process, some only to background
copying in backup (ignored for copy-before-write operations).
- On the other hand, we are going to use block-copy interface in other
block jobs, which will need performance options as well.. And it
should be the same structure or at least somehow related.
So, there are too much unclean things about how the interface and now
we need the new options mostly for testing. Let's keep them
experimental for a while.
In do_backup_common() new x-perf parameter handled in a way to
make further options addition simpler.
We add use-copy-range with default=true, and we'll change the default
in further patch, after moving backup to use block-copy.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20210116214705.822267-2-vsementsov@virtuozzo.com>
[mreitz: s/5\.2/6.0/]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2021-01-17 05:46:43 +08:00
|
|
|
&perf,
|
2016-11-08 14:50:38 +08:00
|
|
|
BLOCKDEV_ON_ERROR_REPORT,
|
2018-04-19 23:54:56 +08:00
|
|
|
BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
|
2016-11-08 14:50:38 +08:00
|
|
|
backup_job_completed, bs, NULL, &local_err);
|
2016-07-27 15:01:50 +08:00
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
2016-10-27 18:49:01 +08:00
|
|
|
backup_job_cleanup(bs);
|
2016-07-27 15:01:50 +08:00
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
2019-06-06 23:41:29 +08:00
|
|
|
job_start(&s->backup_job->job);
|
2016-07-27 15:01:50 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2017-03-17 10:17:39 +08:00
|
|
|
s->stage = BLOCK_REPLICATION_RUNNING;
|
2016-07-27 15:01:50 +08:00
|
|
|
|
|
|
|
if (s->mode == REPLICATION_MODE_SECONDARY) {
|
|
|
|
secondary_do_checkpoint(s, errp);
|
|
|
|
}
|
|
|
|
|
|
|
|
s->error = 0;
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = rs->opaque;
|
|
|
|
BDRVReplicationState *s;
|
|
|
|
AioContext *aio_context;
|
|
|
|
|
|
|
|
aio_context = bdrv_get_aio_context(bs);
|
|
|
|
aio_context_acquire(aio_context);
|
|
|
|
s = bs->opaque;
|
|
|
|
|
2019-10-24 22:25:35 +08:00
|
|
|
if (s->stage == BLOCK_REPLICATION_DONE ||
|
|
|
|
s->stage == BLOCK_REPLICATION_FAILOVER) {
|
|
|
|
/*
|
|
|
|
* This case happens when a secondary was promoted to primary.
|
|
|
|
* Ignore the request because the secondary side of replication
|
|
|
|
* doesn't have to do anything anymore.
|
|
|
|
*/
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-07-27 15:01:50 +08:00
|
|
|
if (s->mode == REPLICATION_MODE_SECONDARY) {
|
|
|
|
secondary_do_checkpoint(s, errp);
|
|
|
|
}
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void replication_get_error(ReplicationState *rs, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = rs->opaque;
|
|
|
|
BDRVReplicationState *s;
|
|
|
|
AioContext *aio_context;
|
|
|
|
|
|
|
|
aio_context = bdrv_get_aio_context(bs);
|
|
|
|
aio_context_acquire(aio_context);
|
|
|
|
s = bs->opaque;
|
|
|
|
|
2019-10-24 22:25:35 +08:00
|
|
|
if (s->stage == BLOCK_REPLICATION_NONE) {
|
2016-07-27 15:01:50 +08:00
|
|
|
error_setg(errp, "Block replication is not running");
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->error) {
|
|
|
|
error_setg(errp, "I/O error occurred");
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void replication_done(void *opaque, int ret)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
BDRVReplicationState *s = bs->opaque;
|
|
|
|
|
|
|
|
if (ret == 0) {
|
2017-03-17 10:17:39 +08:00
|
|
|
s->stage = BLOCK_REPLICATION_DONE;
|
2016-07-27 15:01:50 +08:00
|
|
|
|
|
|
|
s->active_disk = NULL;
|
|
|
|
s->secondary_disk = NULL;
|
|
|
|
s->hidden_disk = NULL;
|
|
|
|
s->error = 0;
|
|
|
|
} else {
|
2017-03-17 10:17:39 +08:00
|
|
|
s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
|
2016-07-27 15:01:50 +08:00
|
|
|
s->error = -EIO;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = rs->opaque;
|
|
|
|
BDRVReplicationState *s;
|
|
|
|
AioContext *aio_context;
|
|
|
|
|
|
|
|
aio_context = bdrv_get_aio_context(bs);
|
|
|
|
aio_context_acquire(aio_context);
|
|
|
|
s = bs->opaque;
|
|
|
|
|
2019-10-24 22:25:35 +08:00
|
|
|
if (s->stage == BLOCK_REPLICATION_DONE ||
|
|
|
|
s->stage == BLOCK_REPLICATION_FAILOVER) {
|
|
|
|
/*
|
|
|
|
* This case happens when a secondary was promoted to primary.
|
|
|
|
* Ignore the request because the secondary side of replication
|
|
|
|
* doesn't have to do anything anymore.
|
|
|
|
*/
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-03-17 10:17:39 +08:00
|
|
|
if (s->stage != BLOCK_REPLICATION_RUNNING) {
|
2016-07-27 15:01:50 +08:00
|
|
|
error_setg(errp, "Block replication is not running");
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (s->mode) {
|
|
|
|
case REPLICATION_MODE_PRIMARY:
|
2017-03-17 10:17:39 +08:00
|
|
|
s->stage = BLOCK_REPLICATION_DONE;
|
2016-07-27 15:01:50 +08:00
|
|
|
s->error = 0;
|
|
|
|
break;
|
|
|
|
case REPLICATION_MODE_SECONDARY:
|
|
|
|
/*
|
|
|
|
* This BDS will be closed, and the job should be completed
|
|
|
|
* before the BDS is closed, because we will access hidden
|
|
|
|
* disk, secondary disk in backup_job_completed().
|
|
|
|
*/
|
2019-06-06 23:41:29 +08:00
|
|
|
if (s->backup_job) {
|
|
|
|
job_cancel_sync(&s->backup_job->job);
|
2016-07-27 15:01:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!failover) {
|
|
|
|
secondary_do_checkpoint(s, errp);
|
2017-03-17 10:17:39 +08:00
|
|
|
s->stage = BLOCK_REPLICATION_DONE;
|
2016-07-27 15:01:50 +08:00
|
|
|
aio_context_release(aio_context);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-03-17 10:17:39 +08:00
|
|
|
s->stage = BLOCK_REPLICATION_FAILOVER;
|
2019-06-06 23:41:29 +08:00
|
|
|
s->commit_job = commit_active_start(
|
|
|
|
NULL, s->active_disk->bs, s->secondary_disk->bs,
|
2018-04-19 23:54:56 +08:00
|
|
|
JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
|
2017-04-21 20:27:04 +08:00
|
|
|
NULL, replication_done, bs, true, errp);
|
2016-07-27 15:01:50 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
}
|
|
|
|
|
2019-02-02 03:29:25 +08:00
|
|
|
static const char *const replication_strong_runtime_opts[] = {
|
|
|
|
REPLICATION_MODE,
|
|
|
|
REPLICATION_TOP_ID,
|
|
|
|
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2019-03-18 23:48:01 +08:00
|
|
|
static BlockDriver bdrv_replication = {
|
2016-07-27 15:01:50 +08:00
|
|
|
.format_name = "replication",
|
|
|
|
.instance_size = sizeof(BDRVReplicationState),
|
|
|
|
|
|
|
|
.bdrv_open = replication_open,
|
|
|
|
.bdrv_close = replication_close,
|
2017-03-14 19:46:52 +08:00
|
|
|
.bdrv_child_perm = replication_child_perm,
|
2016-07-27 15:01:50 +08:00
|
|
|
|
|
|
|
.bdrv_getlength = replication_getlength,
|
|
|
|
.bdrv_co_readv = replication_co_readv,
|
|
|
|
.bdrv_co_writev = replication_co_writev,
|
|
|
|
|
|
|
|
.is_filter = true,
|
|
|
|
|
|
|
|
.has_variable_length = true,
|
2019-02-02 03:29:25 +08:00
|
|
|
.strong_runtime_opts = replication_strong_runtime_opts,
|
2016-07-27 15:01:50 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static void bdrv_replication_init(void)
|
|
|
|
{
|
|
|
|
bdrv_register(&bdrv_replication);
|
|
|
|
}
|
|
|
|
|
|
|
|
block_init(bdrv_replication_init);
|