From 50ba5b2d994853b38fed10e0841b119da0f8b8e5 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 10 May 2019 23:12:44 +0200 Subject: [PATCH 01/24] block/file-posix: Truncate in xfs_write_zeroes() XFS_IOC_ZERO_RANGE does not increase the file length: $ touch foo $ xfs_io -c 'zero 0 65536' foo $ stat -c "size=%s, blocks=%b" foo size=0, blocks=128 We do want writes beyond the EOF to automatically increase the file length, however. This is evidenced by the fact that iotest 061 is broken on XFS since qcow2's check implementation checks for blocks beyond the EOF. Reported-by: Kevin Wolf Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/file-posix.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index 1cf4ee49eb..e09e15bbf8 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1444,9 +1444,22 @@ out: #ifdef CONFIG_XFS static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes) { + int64_t len; struct xfs_flock64 fl; int err; + len = lseek(s->fd, 0, SEEK_END); + if (len < 0) { + return -errno; + } + + if (offset + bytes > len) { + /* XFS_IOC_ZERO_RANGE does not increase the file length */ + if (ftruncate(s->fd, offset + bytes) < 0) { + return -errno; + } + } + memset(&fl, 0, sizeof(fl)); fl.l_whence = SEEK_SET; fl.l_start = offset; From b6c246942b14d3e0dec46a6c5868ed84e7dbea19 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 10 May 2019 19:22:54 +0300 Subject: [PATCH 02/24] qcow2: Define and use QCOW2_COMPRESSED_SECTOR_SIZE When an L2 table entry points to a compressed cluster the space used by the data is specified in 512-byte sectors. This size is independent from BDRV_SECTOR_SIZE and is specific to the qcow2 file format. The QCOW2_COMPRESSED_SECTOR_SIZE constant defined in this patch makes this explicit. Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 5 +++-- block/qcow2-refcount.c | 25 ++++++++++++++----------- block/qcow2.c | 3 ++- block/qcow2.h | 4 ++++ 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 974a4e8656..b36f4aa84a 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -796,8 +796,9 @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset; } - nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - - (cluster_offset >> 9); + nb_csectors = + (cluster_offset + compressed_size - 1) / QCOW2_COMPRESSED_SECTOR_SIZE - + (cluster_offset / QCOW2_COMPRESSED_SECTOR_SIZE); cluster_offset |= QCOW_OFLAG_COMPRESSED | ((uint64_t)nb_csectors << s->csize_shift); diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 7481903396..0b09d6838b 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1172,12 +1172,11 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, switch (ctype) { case QCOW2_CLUSTER_COMPRESSED: { - int nb_csectors; - nb_csectors = ((l2_entry >> s->csize_shift) & - s->csize_mask) + 1; - qcow2_free_clusters(bs, - (l2_entry & s->cluster_offset_mask) & ~511, - nb_csectors * 512, type); + int64_t offset = (l2_entry & s->cluster_offset_mask) + & QCOW2_COMPRESSED_SECTOR_MASK; + int size = QCOW2_COMPRESSED_SECTOR_SIZE * + (((l2_entry >> s->csize_shift) & s->csize_mask) + 1); + qcow2_free_clusters(bs, offset, size, type); } break; case QCOW2_CLUSTER_NORMAL: @@ -1317,9 +1316,12 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, nb_csectors = ((entry >> s->csize_shift) & s->csize_mask) + 1; if (addend != 0) { + uint64_t coffset = (entry & s->cluster_offset_mask) + & QCOW2_COMPRESSED_SECTOR_MASK; ret = update_refcount( - bs, (entry & s->cluster_offset_mask) & ~511, - nb_csectors * 512, abs(addend), addend < 0, + bs, coffset, + nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE, + abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); if (ret < 0) { goto fail; @@ -1635,9 +1637,10 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1; l2_entry &= s->cluster_offset_mask; - ret = qcow2_inc_refcounts_imrt(bs, res, - refcount_table, refcount_table_size, - l2_entry & ~511, nb_csectors * 512); + ret = qcow2_inc_refcounts_imrt( + bs, res, refcount_table, refcount_table_size, + l2_entry & QCOW2_COMPRESSED_SECTOR_MASK, + nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE); if (ret < 0) { goto fail; } diff --git a/block/qcow2.c b/block/qcow2.c index 8e024007db..d39882785d 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -4187,7 +4187,8 @@ qcow2_co_preadv_compressed(BlockDriverState *bs, coffset = file_cluster_offset & s->cluster_offset_mask; nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1; - csize = nb_csectors * 512 - (coffset & 511); + csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE - + (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK); buf = g_try_malloc(csize); if (!buf) { diff --git a/block/qcow2.h b/block/qcow2.h index e62508d1ce..8d92ef1fee 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -74,6 +74,10 @@ #define MIN_CLUSTER_BITS 9 #define MAX_CLUSTER_BITS 21 +/* Defined in the qcow2 spec (compressed cluster descriptor) */ +#define QCOW2_COMPRESSED_SECTOR_SIZE 512U +#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1)) + /* Must be at least 2 to cover COW */ #define MIN_L2_CACHE_SIZE 2 /* cache entries */ From 41ae31e3d7696ec639b8fd6c162846244eeaa511 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Tue, 14 May 2019 16:57:35 +0300 Subject: [PATCH 03/24] block: Use BDRV_REQUEST_MAX_BYTES instead of BDRV_REQUEST_MAX_SECTORS There are a few places in which we turn a number of bytes into sectors in order to compare the result against BDRV_REQUEST_MAX_SECTORS instead of using BDRV_REQUEST_MAX_BYTES directly. Signed-off-by: Alberto Garcia Reviewed-by: Stefano Garzarella Signed-off-by: Kevin Wolf --- block/io.c | 6 +++--- qemu-io-cmds.c | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/block/io.c b/block/io.c index aeebc9c23c..3134a60a48 100644 --- a/block/io.c +++ b/block/io.c @@ -769,7 +769,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, size_t size) { - if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) { + if (size > BDRV_REQUEST_MAX_BYTES) { return -EIO; } @@ -1017,7 +1017,7 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS); + assert(bytes <= BDRV_REQUEST_MAX_BYTES); assert(drv->bdrv_co_readv); return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); @@ -1070,7 +1070,7 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS); + assert(bytes <= BDRV_REQUEST_MAX_BYTES); assert(drv->bdrv_co_writev); ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 8826bebaf6..30a7d9a13b 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -538,7 +538,7 @@ static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset, { int ret; - if (bytes >> 9 > BDRV_REQUEST_MAX_SECTORS) { + if (bytes > BDRV_REQUEST_MAX_BYTES) { return -ERANGE; } @@ -1781,10 +1781,9 @@ static int discard_f(BlockBackend *blk, int argc, char **argv) if (bytes < 0) { print_cvtnum_err(bytes, argv[optind]); return bytes; - } else if (bytes >> BDRV_SECTOR_BITS > BDRV_REQUEST_MAX_SECTORS) { + } else if (bytes > BDRV_REQUEST_MAX_BYTES) { printf("length cannot exceed %"PRIu64", given %s\n", - (uint64_t)BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS, - argv[optind]); + (uint64_t)BDRV_REQUEST_MAX_BYTES, argv[optind]); return -EINVAL; } From 9183dd15334f1eee46051bbdafc6ab9fbfc2efd5 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 24 Jan 2019 15:25:24 +0300 Subject: [PATCH 04/24] qmp: forbid qmp_cont in RUN_STATE_FINISH_MIGRATE qmp_cont in RUN_STATE_FINISH_MIGRATE may lead to moving vm to RUN_STATE_RUNNING, before actual migration finish. So, when migration thread will try to go to RUN_STATE_POSTMIGRATE, assuming transition RUN_STATE_FINISH_MIGRATE->RUN_STATE_POSTMIGRATE, it will crash, as current state is RUN_STATE_RUNNING, and transition RUN_STATE_RUNNING->RUN_STATE_POSTMIGRATE is forbidden. Reported-by: Max Reitz Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Kevin Wolf --- qmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qmp.c b/qmp.c index b92d62cd5f..fa1b3c1577 100644 --- a/qmp.c +++ b/qmp.c @@ -156,6 +156,9 @@ void qmp_cont(Error **errp) return; } else if (runstate_check(RUN_STATE_SUSPENDED)) { return; + } else if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { + error_setg(errp, "Migration is not finalized yet"); + return; } for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { From 832d78caa51e7ac3c17a37f2c0ed9d904e423a32 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 24 Jan 2019 15:25:25 +0300 Subject: [PATCH 05/24] iotest: fix 169: do not run qmp_cont in RUN_STATE_FINISH_MIGRATE qmp_cont fails if vm in RUN_STATE_FINISH_MIGRATE, so let's wait for final RUN_STATE_POSTMIGRATE. Also, while being here, check qmp_cont result. Reported-by: Max Reitz Signed-off-by: Vladimir Sementsov-Ogievskiy Tested-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/169 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169 index 527aebd0cb..7e06cc1145 100755 --- a/tests/qemu-iotests/169 +++ b/tests/qemu-iotests/169 @@ -102,12 +102,17 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase): event = self.vm_a.event_wait('MIGRATION') if event['data']['status'] == 'completed': break + while True: + result = self.vm_a.qmp('query-status') + if (result['return']['status'] == 'postmigrate'): + break # test that bitmap is still here removed = (not migrate_bitmaps) and persistent self.check_bitmap(self.vm_a, False if removed else sha256) - self.vm_a.qmp('cont') + result = self.vm_a.qmp('cont') + self.assert_qmp(result, 'return', {}) # test that bitmap is still here after invalidation self.check_bitmap(self.vm_a, sha256) From 25349e8250e67e7d126fbf0ad213811b62e7e529 Mon Sep 17 00:00:00 2001 From: Klaus Birkelund Jensen Date: Sat, 18 May 2019 09:39:05 +0200 Subject: [PATCH 06/24] nvme: fix copy direction in DMA reads going to CMB `nvme_dma_read_prp` erronously used `qemu_iovec_*to*_buf` instead of `qemu_iovec_*from*_buf` when the request involved the controller memory buffer. Signed-off-by: Klaus Birkelund Jensen Reviewed-by: Kenneth Heitke Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 7caf92532a..63a5b58849 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -238,7 +238,7 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, } qemu_sglist_destroy(&qsg); } else { - if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { + if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { trace_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } From 5d2318499fe980542fbc21d9ccbc127a6106017b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:17:56 +0200 Subject: [PATCH 07/24] block: Add bdrv_try_set_aio_context() Eventually, we want to make sure that all parents and all children of a node are in the same AioContext as the node itself. This means that changing the AioContext may fail because one of the other involved parties (e.g. a guest device that was configured with an iothread) cannot allow switching to a different AioContext. Introduce a set of functions that allow to first check whether all involved nodes can switch to a new context and only then do the actual switch. The check recursively covers children and parents. Signed-off-by: Kevin Wolf --- block.c | 92 +++++++++++++++++++++++++++++++++++++++ include/block/block.h | 8 ++++ include/block/block_int.h | 3 ++ 3 files changed, 103 insertions(+) diff --git a/block.c b/block.c index 6999aad446..8ff6ab1152 100644 --- a/block.c +++ b/block.c @@ -936,6 +936,13 @@ static int bdrv_child_cb_inactivate(BdrvChild *child) return 0; } +static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockDriverState *bs = child->opaque; + return bdrv_can_set_aio_context(bs, ctx, ignore, errp); +} + /* * Returns the options and flags that a temporary snapshot should get, based on * the originally requested flags (the originally requested image will have @@ -1003,6 +1010,7 @@ const BdrvChildRole child_file = { .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, }; /* @@ -1029,6 +1037,7 @@ const BdrvChildRole child_format = { .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, }; static void bdrv_backing_attach(BdrvChild *c) @@ -1152,6 +1161,7 @@ const BdrvChildRole child_backing = { .drained_end = bdrv_child_cb_drained_end, .inactivate = bdrv_child_cb_inactivate, .update_filename = bdrv_backing_update_filename, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -5750,6 +5760,88 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) aio_context_release(new_context); } +static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + + /* A BdrvChildRole that doesn't handle AioContext changes cannot + * tolerate any AioContext changes */ + if (!c->role->can_set_aio_ctx) { + char *user = bdrv_child_user_desc(c); + error_setg(errp, "Changing iothreads is not supported by %s", user); + g_free(user); + return false; + } + if (!c->role->can_set_aio_ctx(c, ctx, ignore, errp)) { + assert(!errp || *errp); + return false; + } + return true; +} + +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); +} + +/* @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. */ +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BdrvChild *c; + + if (bdrv_get_aio_context(bs) == ctx) { + return true; + } + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + QLIST_FOREACH(c, &bs->children, next) { + if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + + return true; +} + +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) +{ + GSList *ignore; + bool ret; + + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); + g_slist_free(ignore); + + if (!ret) { + return -EPERM; + } + + bdrv_set_aio_context(bs, ctx); + return 0; +} + +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp) +{ + return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); +} + void bdrv_add_aio_context_notifier(BlockDriverState *bs, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) diff --git a/include/block/block.h b/include/block/block.h index 5e2b98b0ee..fc0239a887 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -586,6 +586,14 @@ void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); * This function must be called with iothread lock held. */ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context); +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp); +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp); +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp); int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); diff --git a/include/block/block_int.h b/include/block/block_int.h index 94d45c9708..b150c5f047 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -691,6 +691,9 @@ struct BdrvChildRole { * can update its reference. */ int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, const char *filename, Error **errp); + + bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp); }; extern const BdrvChildRole child_file; From a3a683c33d38fb29c7a78903e88dda12b84cc88d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:17:57 +0200 Subject: [PATCH 08/24] block: Make bdrv_attach/detach_aio_context() static Since commit b97511c7bc8, there is no reason for block drivers any more to call these functions (see the function comment in block_int.h). They are now just internal helper functions for bdrv_set_aio_context() and can be made static. Signed-off-by: Kevin Wolf --- block.c | 6 +++--- include/block/block_int.h | 21 --------------------- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/block.c b/block.c index 8ff6ab1152..b2f71142a5 100644 --- a/block.c +++ b/block.c @@ -5676,7 +5676,7 @@ static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) g_free(ban); } -void bdrv_detach_aio_context(BlockDriverState *bs) +static void bdrv_detach_aio_context(BlockDriverState *bs) { BdrvAioNotifier *baf, *baf_tmp; BdrvChild *child; @@ -5708,8 +5708,8 @@ void bdrv_detach_aio_context(BlockDriverState *bs) bs->aio_context = NULL; } -void bdrv_attach_aio_context(BlockDriverState *bs, - AioContext *new_context) +static void bdrv_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) { BdrvAioNotifier *ban, *ban_tmp; BdrvChild *child; diff --git a/include/block/block_int.h b/include/block/block_int.h index b150c5f047..aa2c638b02 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -965,27 +965,6 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, void bdrv_add_before_write_notifier(BlockDriverState *bs, NotifierWithReturn *notifier); -/** - * bdrv_detach_aio_context: - * - * May be called from .bdrv_detach_aio_context() to detach children from the - * current #AioContext. This is only needed by block drivers that manage their - * own children. Both ->file and ->backing are automatically handled and - * block drivers should not call this function on them explicitly. - */ -void bdrv_detach_aio_context(BlockDriverState *bs); - -/** - * bdrv_attach_aio_context: - * - * May be called from .bdrv_attach_aio_context() to attach children to the new - * #AioContext. This is only needed by block drivers that manage their own - * children. Both ->file and ->backing are automatically handled and block - * drivers should not call this function on them explicitly. - */ -void bdrv_attach_aio_context(BlockDriverState *bs, - AioContext *new_context); - /** * bdrv_add_aio_context_notifier: * From 0d83708a1d2effc5d905cd1d61d0ef47d310ad88 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:17:58 +0200 Subject: [PATCH 09/24] block: Move recursion to bdrv_set_aio_context() Instead of having two recursions, in bdrv_attach_aio_context() and in bdrv_detach_aio_context(), just having one recursion is enough. Said functions are only about a single node now. It is important that the recursion doesn't happen between detaching and attaching a context to the current node because the nested call will drain the node, and draining with a NULL context would segfault. Signed-off-by: Kevin Wolf --- block.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/block.c b/block.c index b2f71142a5..0ace673925 100644 --- a/block.c +++ b/block.c @@ -5679,7 +5679,6 @@ static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) static void bdrv_detach_aio_context(BlockDriverState *bs) { BdrvAioNotifier *baf, *baf_tmp; - BdrvChild *child; assert(!bs->walking_aio_notifiers); bs->walking_aio_notifiers = true; @@ -5698,9 +5697,6 @@ static void bdrv_detach_aio_context(BlockDriverState *bs) if (bs->drv && bs->drv->bdrv_detach_aio_context) { bs->drv->bdrv_detach_aio_context(bs); } - QLIST_FOREACH(child, &bs->children, next) { - bdrv_detach_aio_context(child->bs); - } if (bs->quiesce_counter) { aio_enable_external(bs->aio_context); @@ -5712,7 +5708,6 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { BdrvAioNotifier *ban, *ban_tmp; - BdrvChild *child; if (bs->quiesce_counter) { aio_disable_external(new_context); @@ -5720,9 +5715,6 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, bs->aio_context = new_context; - QLIST_FOREACH(child, &bs->children, next) { - bdrv_attach_aio_context(child->bs, new_context); - } if (bs->drv && bs->drv->bdrv_attach_aio_context) { bs->drv->bdrv_attach_aio_context(bs, new_context); } @@ -5744,11 +5736,18 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, * the same as the current context of bs). */ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) { + BdrvChild *child; + if (bdrv_get_aio_context(bs) == new_context) { return; } bdrv_drained_begin(bs); + + QLIST_FOREACH(child, &bs->children, next) { + bdrv_set_aio_context(child->bs, new_context); + } + bdrv_detach_aio_context(bs); /* This function executes in the old AioContext so acquire the new one in From 53a7d041858a6787a43012fd04ccf0258389a95d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:17:59 +0200 Subject: [PATCH 10/24] block: Propagate AioContext change to parents All block nodes and users in any connected component of the block graph must be in the same AioContext, so changing the AioContext of one node must not only change all of its children, but all of its parents (and in turn their children etc.) as well. Signed-off-by: Kevin Wolf --- block.c | 48 ++++++++++++++++++++++++++++++++++----- include/block/block.h | 2 ++ include/block/block_int.h | 1 + 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index 0ace673925..1e5230f98e 100644 --- a/block.c +++ b/block.c @@ -943,6 +943,13 @@ static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, return bdrv_can_set_aio_context(bs, ctx, ignore, errp); } +static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore) +{ + BlockDriverState *bs = child->opaque; + return bdrv_set_aio_context_ignore(bs, ctx, ignore); +} + /* * Returns the options and flags that a temporary snapshot should get, based on * the originally requested flags (the originally requested image will have @@ -1011,6 +1018,7 @@ const BdrvChildRole child_file = { .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; /* @@ -1038,6 +1046,7 @@ const BdrvChildRole child_format = { .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; static void bdrv_backing_attach(BdrvChild *c) @@ -1162,6 +1171,7 @@ const BdrvChildRole child_backing = { .inactivate = bdrv_child_cb_inactivate, .update_filename = bdrv_backing_update_filename, .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -5731,10 +5741,10 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, bs->walking_aio_notifiers = false; } -/* The caller must own the AioContext lock for the old AioContext of bs, but it - * must not own the AioContext lock for new_context (unless new_context is - * the same as the current context of bs). */ -void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) +/* @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. */ +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore) { BdrvChild *child; @@ -5745,7 +5755,20 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) bdrv_drained_begin(bs); QLIST_FOREACH(child, &bs->children, next) { - bdrv_set_aio_context(child->bs, new_context); + if (g_slist_find(*ignore, child)) { + continue; + } + *ignore = g_slist_prepend(*ignore, child); + bdrv_set_aio_context_ignore(child->bs, new_context, ignore); + } + QLIST_FOREACH(child, &bs->parents, next_parent) { + if (g_slist_find(*ignore, child)) { + continue; + } + if (child->role->set_aio_ctx) { + *ignore = g_slist_prepend(*ignore, child); + child->role->set_aio_ctx(child, new_context, ignore); + } } bdrv_detach_aio_context(bs); @@ -5759,6 +5782,16 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) aio_context_release(new_context); } +/* The caller must own the AioContext lock for the old AioContext of bs, but it + * must not own the AioContext lock for new_context (unless new_context is + * the same as the current context of bs). */ +void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) +{ + GSList *ignore_list = NULL; + bdrv_set_aio_context_ignore(bs, new_context, &ignore_list); + g_slist_free(ignore_list); +} + static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp) { @@ -5831,7 +5864,10 @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, return -EPERM; } - bdrv_set_aio_context(bs, ctx); + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + bdrv_set_aio_context_ignore(bs, ctx, &ignore); + g_slist_free(ignore); + return 0; } diff --git a/include/block/block.h b/include/block/block.h index fc0239a887..9b083e2bca 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -586,6 +586,8 @@ void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); * This function must be called with iothread lock held. */ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context); +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore); int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, Error **errp); int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, diff --git a/include/block/block_int.h b/include/block/block_int.h index aa2c638b02..1eebc7c8f3 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -694,6 +694,7 @@ struct BdrvChildRole { bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore, Error **errp); + void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); }; extern const BdrvChildRole child_file; From 7e2f096a823f75ca3f1df1811c406b46254587fd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:18:00 +0200 Subject: [PATCH 11/24] test-block-iothread: Test AioContext propagation through the tree Signed-off-by: Kevin Wolf --- tests/test-block-iothread.c | 131 ++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 036ed9a3b3..938831c9bd 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -27,6 +27,7 @@ #include "block/blockjob_int.h" #include "sysemu/block-backend.h" #include "qapi/error.h" +#include "qapi/qmp/qdict.h" #include "iothread.h" static int coroutine_fn bdrv_test_co_prwv(BlockDriverState *bs, @@ -459,6 +460,134 @@ static void test_attach_blockjob(void) blk_unref(blk); } +/* + * Test that changing the AioContext for one node in a tree (here through blk) + * changes all other nodes as well: + * + * blk + * | + * | bs_verify [blkverify] + * | / \ + * | / \ + * bs_a [bdrv_test] bs_b [bdrv_test] + * + */ +static void test_propagate_basic(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + BlockBackend *blk; + BlockDriverState *bs_a, *bs_b, *bs_verify; + QDict *options; + + /* Create bs_a and its BlockBackend */ + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs_a = bdrv_new_open_driver(&bdrv_test, "bs_a", BDRV_O_RDWR, &error_abort); + blk_insert_bs(blk, bs_a, &error_abort); + + /* Create bs_b */ + bs_b = bdrv_new_open_driver(&bdrv_test, "bs_b", BDRV_O_RDWR, &error_abort); + + /* Create blkverify filter that references both bs_a and bs_b */ + options = qdict_new(); + qdict_put_str(options, "driver", "blkverify"); + qdict_put_str(options, "test", "bs_a"); + qdict_put_str(options, "raw", "bs_b"); + + bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); + + /* Switch the AioContext */ + blk_set_aio_context(blk, ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs_a) == ctx); + g_assert(bdrv_get_aio_context(bs_verify) == ctx); + g_assert(bdrv_get_aio_context(bs_b) == ctx); + + /* Switch the AioContext back */ + ctx = qemu_get_aio_context(); + blk_set_aio_context(blk, ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs_a) == ctx); + g_assert(bdrv_get_aio_context(bs_verify) == ctx); + g_assert(bdrv_get_aio_context(bs_b) == ctx); + + bdrv_unref(bs_verify); + bdrv_unref(bs_b); + bdrv_unref(bs_a); + blk_unref(blk); +} + +/* + * Test that diamonds in the graph don't lead to endless recursion: + * + * blk + * | + * bs_verify [blkverify] + * / \ + * / \ + * bs_b [raw] bs_c[raw] + * \ / + * \ / + * bs_a [bdrv_test] + */ +static void test_propagate_diamond(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + BlockBackend *blk; + BlockDriverState *bs_a, *bs_b, *bs_c, *bs_verify; + QDict *options; + + /* Create bs_a */ + bs_a = bdrv_new_open_driver(&bdrv_test, "bs_a", BDRV_O_RDWR, &error_abort); + + /* Create bs_b and bc_c */ + options = qdict_new(); + qdict_put_str(options, "driver", "raw"); + qdict_put_str(options, "file", "bs_a"); + qdict_put_str(options, "node-name", "bs_b"); + bs_b = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); + + options = qdict_new(); + qdict_put_str(options, "driver", "raw"); + qdict_put_str(options, "file", "bs_a"); + qdict_put_str(options, "node-name", "bs_c"); + bs_c = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); + + /* Create blkverify filter that references both bs_b and bs_c */ + options = qdict_new(); + qdict_put_str(options, "driver", "blkverify"); + qdict_put_str(options, "test", "bs_b"); + qdict_put_str(options, "raw", "bs_c"); + + bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk, bs_verify, &error_abort); + + /* Switch the AioContext */ + blk_set_aio_context(blk, ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs_verify) == ctx); + g_assert(bdrv_get_aio_context(bs_a) == ctx); + g_assert(bdrv_get_aio_context(bs_b) == ctx); + g_assert(bdrv_get_aio_context(bs_c) == ctx); + + /* Switch the AioContext back */ + ctx = qemu_get_aio_context(); + blk_set_aio_context(blk, ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs_verify) == ctx); + g_assert(bdrv_get_aio_context(bs_a) == ctx); + g_assert(bdrv_get_aio_context(bs_b) == ctx); + g_assert(bdrv_get_aio_context(bs_c) == ctx); + + blk_unref(blk); + bdrv_unref(bs_verify); + bdrv_unref(bs_c); + bdrv_unref(bs_b); + bdrv_unref(bs_a); +} + int main(int argc, char **argv) { int i; @@ -474,6 +603,8 @@ int main(int argc, char **argv) } g_test_add_func("/attach/blockjob", test_attach_blockjob); + g_test_add_func("/propagate/basic", test_propagate_basic); + g_test_add_func("/propagate/diamond", test_propagate_diamond); return g_test_run(); } From 38475269d4d9efcc5f1a5aae4834d165248ea13d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:18:01 +0200 Subject: [PATCH 12/24] block: Implement .(can_)set_aio_ctx for BlockBackend bdrv_try_set_aio_context() currently fails if a BlockBackend is attached to a node because it doesn't implement the BdrvChildRole callbacks for AioContext management. We can allow changing the AioContext of monitor-owned BlockBackends as long as no device is attached to them. When setting the AioContext of the root node of a BlockBackend, we now need to pass blk->root as an ignored child because we don't want the root node to recursively call back into BlockBackend and execute blk_do_set_aio_context() a second time. Signed-off-by: Kevin Wolf --- block/block-backend.c | 45 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index f78e82a707..0e75fc8849 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -124,6 +124,11 @@ static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); +static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp); +static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore); + static char *blk_root_get_parent_desc(BdrvChild *child) { BlockBackend *blk = child->opaque; @@ -300,6 +305,9 @@ static const BdrvChildRole child_root = { .attach = blk_root_attach, .detach = blk_root_detach, + + .can_set_aio_ctx = blk_root_can_set_aio_ctx, + .set_aio_ctx = blk_root_set_aio_ctx, }; /* @@ -1852,7 +1860,8 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) return blk_get_aio_context(blk_acb->blk); } -void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) +static void blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + bool update_root_node) { BlockDriverState *bs = blk_bs(blk); ThrottleGroupMember *tgm = &blk->public.throttle_group_member; @@ -1864,10 +1873,42 @@ void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) throttle_group_attach_aio_context(tgm, new_context); bdrv_drained_end(bs); } - bdrv_set_aio_context(bs, new_context); + if (update_root_node) { + GSList *ignore = g_slist_prepend(NULL, blk->root); + bdrv_set_aio_context_ignore(bs, new_context, &ignore); + g_slist_free(ignore); + } } } +void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) +{ + blk_do_set_aio_context(blk, new_context, true); +} + +static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockBackend *blk = child->opaque; + + /* Only manually created BlockBackends that are not attached to anything + * can change their AioContext without updating their user. */ + if (!blk->name || blk->dev) { + /* TODO Add BB name/QOM path */ + error_setg(errp, "Cannot change iothread of active block backend"); + return false; + } + + return true; +} + +static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore) +{ + BlockBackend *blk = child->opaque; + blk_do_set_aio_context(blk, ctx, false); +} + void blk_add_aio_context_notifier(BlockBackend *blk, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) From 980b0f943aad9240f276d7e48e2cf92ae4eb61ca Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:18:02 +0200 Subject: [PATCH 13/24] block: Add blk_set_allow_aio_context_change() Some users (like block jobs) can tolerate an AioContext change for their BlockBackend. Add a function that tells the BlockBackend that it can allow changes. Signed-off-by: Kevin Wolf --- block/block-backend.c | 10 ++++++++++ include/sysemu/block-backend.h | 1 + 2 files changed, 11 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 0e75fc8849..4c0a8ef88d 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -71,6 +71,7 @@ struct BlockBackend { uint64_t shared_perm; bool disable_perm; + bool allow_aio_context_change; bool allow_write_beyond_eof; NotifierList remove_bs_notifiers, insert_bs_notifiers; @@ -1092,6 +1093,11 @@ void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) blk->allow_write_beyond_eof = allow; } +void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) +{ + blk->allow_aio_context_change = allow; +} + static int blk_check_byte_request(BlockBackend *blk, int64_t offset, size_t size) { @@ -1891,6 +1897,10 @@ static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, { BlockBackend *blk = child->opaque; + if (blk->allow_aio_context_change) { + return true; + } + /* Only manually created BlockBackends that are not attached to anything * can change their AioContext without updating their user. */ if (!blk->name || blk->dev) { diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 5be6224226..938de34fe9 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -103,6 +103,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); +void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); void blk_iostatus_enable(BlockBackend *blk); bool blk_iostatus_is_enabled(const BlockBackend *blk); BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); From 9ff7f0df8757c1c6ca582935c27db377a1eaeb22 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:18:03 +0200 Subject: [PATCH 14/24] blockjob: Propagate AioContext change to all job nodes Block jobs require that all of the nodes the job is using are in the same AioContext. Therefore all BdrvChild objects of the job propagate .(can_)set_aio_context to all other job nodes, so that the switch is checked and performed consistently even if both nodes are in different subtrees. Signed-off-by: Kevin Wolf --- block/backup.c | 8 -------- block/mirror.c | 10 +--------- blockjob.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/block/backup.c b/block/backup.c index 910ed764aa..916817d8b1 100644 --- a/block/backup.c +++ b/block/backup.c @@ -300,13 +300,6 @@ static void backup_clean(Job *job) s->target = NULL; } -static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context) -{ - BackupBlockJob *s = container_of(job, BackupBlockJob, common); - - blk_set_aio_context(s->target, aio_context); -} - void backup_do_checkpoint(BlockJob *job, Error **errp) { BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); @@ -558,7 +551,6 @@ static const BlockJobDriver backup_job_driver = { .abort = backup_abort, .clean = backup_clean, }, - .attached_aio_context = backup_attached_aio_context, .drain = backup_drain, }; diff --git a/block/mirror.c b/block/mirror.c index ff15cfb197..ec4bd9f404 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1142,13 +1142,6 @@ static bool mirror_drained_poll(BlockJob *job) return !!s->in_flight; } -static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context) -{ - MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); - - blk_set_aio_context(s->target, new_context); -} - static void mirror_drain(BlockJob *job) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); @@ -1178,7 +1171,6 @@ static const BlockJobDriver mirror_job_driver = { .complete = mirror_complete, }, .drained_poll = mirror_drained_poll, - .attached_aio_context = mirror_attached_aio_context, .drain = mirror_drain, }; @@ -1196,7 +1188,6 @@ static const BlockJobDriver commit_active_job_driver = { .complete = mirror_complete, }, .drained_poll = mirror_drained_poll, - .attached_aio_context = mirror_attached_aio_context, .drain = mirror_drain, }; @@ -1612,6 +1603,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, * ensure that. */ blk_set_force_allow_inactivate(s->target); } + blk_set_allow_aio_context_change(s->target, true); s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; diff --git a/blockjob.c b/blockjob.c index 730101d282..24e6093a9c 100644 --- a/blockjob.c +++ b/blockjob.c @@ -183,11 +183,44 @@ static void child_job_drained_end(BdrvChild *c) job_resume(&job->job); } +static bool child_job_can_set_aio_ctx(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockJob *job = c->opaque; + GSList *l; + + for (l = job->nodes; l; l = l->next) { + BdrvChild *sibling = l->data; + if (!bdrv_child_can_set_aio_context(sibling, ctx, ignore, errp)) { + return false; + } + } + return true; +} + +static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx, + GSList **ignore) +{ + BlockJob *job = c->opaque; + GSList *l; + + for (l = job->nodes; l; l = l->next) { + BdrvChild *sibling = l->data; + if (g_slist_find(*ignore, sibling)) { + continue; + } + *ignore = g_slist_prepend(*ignore, sibling); + bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore); + } +} + static const BdrvChildRole child_job = { .get_parent_desc = child_job_get_parent_desc, .drained_begin = child_job_drained_begin, .drained_poll = child_job_drained_poll, .drained_end = child_job_drained_end, + .can_set_aio_ctx = child_job_can_set_aio_ctx, + .set_aio_ctx = child_job_set_aio_ctx, .stay_at_node = true, }; @@ -440,6 +473,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, blk_add_aio_context_notifier(blk, block_job_attached_aio_context, block_job_detach_aio_context, job); + blk_set_allow_aio_context_change(blk, true); /* Only set speed when necessary to avoid NotSupported error */ if (speed != 0) { From 657e1203203ccb48cb041ecb3f1799bec8349744 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:18:04 +0200 Subject: [PATCH 15/24] blockjob: Remove AioContext notifiers The notifiers made sure that the job is quiesced and that the job->aio_context field is updated. The first part is unnecessary today since bdrv_set_aio_context_ignore() drains the block node, and this means drainig the block job, too. The second part can be done in the .set_aio_ctx callback of the block job BdrvChildRole. The notifiers were problematic because they poll the AioContext while the graph is in an inconsistent state with some nodes already in the new context, but others still in the old context. So removing the notifiers not only simplifies the code, but actually makes the code safer. Signed-off-by: Kevin Wolf --- blockjob.c | 43 ++----------------------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) diff --git a/blockjob.c b/blockjob.c index 24e6093a9c..9ca942ba01 100644 --- a/blockjob.c +++ b/blockjob.c @@ -81,10 +81,6 @@ BlockJob *block_job_get(const char *id) } } -static void block_job_attached_aio_context(AioContext *new_context, - void *opaque); -static void block_job_detach_aio_context(void *opaque); - void block_job_free(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); @@ -92,28 +88,10 @@ void block_job_free(Job *job) bs->job = NULL; block_job_remove_all_bdrv(bjob); - blk_remove_aio_context_notifier(bjob->blk, - block_job_attached_aio_context, - block_job_detach_aio_context, bjob); blk_unref(bjob->blk); error_free(bjob->blocker); } -static void block_job_attached_aio_context(AioContext *new_context, - void *opaque) -{ - BlockJob *job = opaque; - const JobDriver *drv = job->job.driver; - BlockJobDriver *bjdrv = container_of(drv, BlockJobDriver, job_driver); - - job->job.aio_context = new_context; - if (bjdrv->attached_aio_context) { - bjdrv->attached_aio_context(job, new_context); - } - - job_resume(&job->job); -} - void block_job_drain(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); @@ -126,23 +104,6 @@ void block_job_drain(Job *job) } } -static void block_job_detach_aio_context(void *opaque) -{ - BlockJob *job = opaque; - - /* In case the job terminates during aio_poll()... */ - job_ref(&job->job); - - job_pause(&job->job); - - while (!job->job.paused && !job_is_completed(&job->job)) { - job_drain(&job->job); - } - - job->job.aio_context = NULL; - job_unref(&job->job); -} - static char *child_job_get_parent_desc(BdrvChild *c) { BlockJob *job = c->opaque; @@ -212,6 +173,8 @@ static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx, *ignore = g_slist_prepend(*ignore, sibling); bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore); } + + job->job.aio_context = ctx; } static const BdrvChildRole child_job = { @@ -471,8 +434,6 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); - blk_add_aio_context_notifier(blk, block_job_attached_aio_context, - block_job_detach_aio_context, job); blk_set_allow_aio_context_change(blk, true); /* Only set speed when necessary to avoid NotSupported error */ From 012056f48d2669685e8695561602f852b0d34ff0 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 May 2019 19:18:05 +0200 Subject: [PATCH 16/24] test-block-iothread: Test AioContext propagation for block jobs Signed-off-by: Kevin Wolf --- tests/test-block-iothread.c | 71 +++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 938831c9bd..59f692892e 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -588,6 +588,76 @@ static void test_propagate_diamond(void) bdrv_unref(bs_a); } +static void test_propagate_mirror(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + AioContext *main_ctx = qemu_get_aio_context(); + BlockDriverState *src, *target; + BlockBackend *blk; + Job *job; + Error *local_err = NULL; + + /* Create src and target*/ + src = bdrv_new_open_driver(&bdrv_test, "src", BDRV_O_RDWR, &error_abort); + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, + &error_abort); + + /* Start a mirror job */ + mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, + MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, + BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, + false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, + &error_abort); + job = job_get("job0"); + + /* Change the AioContext of src */ + bdrv_try_set_aio_context(src, ctx, &error_abort); + g_assert(bdrv_get_aio_context(src) == ctx); + g_assert(bdrv_get_aio_context(target) == ctx); + g_assert(job->aio_context == ctx); + + /* Change the AioContext of target */ + aio_context_acquire(ctx); + bdrv_try_set_aio_context(target, main_ctx, &error_abort); + aio_context_release(ctx); + g_assert(bdrv_get_aio_context(src) == main_ctx); + g_assert(bdrv_get_aio_context(target) == main_ctx); + + /* With a BlockBackend on src, changing target must fail */ + blk = blk_new(0, BLK_PERM_ALL); + blk_insert_bs(blk, src, &error_abort); + + bdrv_try_set_aio_context(target, ctx, &local_err); + g_assert(local_err); + error_free(local_err); + + g_assert(blk_get_aio_context(blk) == main_ctx); + g_assert(bdrv_get_aio_context(src) == main_ctx); + g_assert(bdrv_get_aio_context(target) == main_ctx); + + /* ...unless we explicitly allow it */ + aio_context_acquire(ctx); + blk_set_allow_aio_context_change(blk, true); + bdrv_try_set_aio_context(target, ctx, &error_abort); + aio_context_release(ctx); + + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(src) == ctx); + g_assert(bdrv_get_aio_context(target) == ctx); + + job_cancel_sync_all(); + + aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx); + bdrv_try_set_aio_context(target, main_ctx, &error_abort); + aio_context_release(ctx); + + blk_unref(blk); + bdrv_unref(src); + bdrv_unref(target); +} + int main(int argc, char **argv) { int i; @@ -605,6 +675,7 @@ int main(int argc, char **argv) g_test_add_func("/attach/blockjob", test_attach_blockjob); g_test_add_func("/propagate/basic", test_propagate_basic); g_test_add_func("/propagate/diamond", test_propagate_diamond); + g_test_add_func("/propagate/mirror", test_propagate_mirror); return g_test_run(); } From 9c3db310ff0b7473272ae8dce5e04e2f8a825390 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 15 May 2019 06:15:40 +0200 Subject: [PATCH 17/24] block/file-posix: Unaligned O_DIRECT block-status Currently, qemu crashes whenever someone queries the block status of an unaligned image tail of an O_DIRECT image: $ echo > foo $ qemu-img map --image-opts driver=file,filename=foo,cache.direct=on Offset Length Mapped to File qemu-img: block/io.c:2093: bdrv_co_block_status: Assertion `*pnum && QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset' failed. This is because bdrv_co_block_status() checks that the result returned by the driver's implementation is aligned to the request_alignment, but file-posix can fail to do so, which is actually mentioned in a comment there: "[...] possibly including a partial sector at EOF". Fix this by rounding up those partial sectors. There are two possible alternative fixes: (1) We could refuse to open unaligned image files with O_DIRECT altogether. That sounds reasonable until you realize that qcow2 does necessarily not fill up its metadata clusters, and that nobody runs qemu-img create with O_DIRECT. Therefore, unpreallocated qcow2 files usually have an unaligned image tail. (2) bdrv_co_block_status() could ignore unaligned tails. It actually throws away everything past the EOF already, so that sounds reasonable. Unfortunately, the block layer knows file lengths only with a granularity of BDRV_SECTOR_SIZE, so bdrv_co_block_status() usually would have to guess whether its file length information is inexact or whether the driver is broken. Fixing what raw_co_block_status() returns is the safest thing to do. There seems to be no other block driver that sets request_alignment and does not make sure that it always returns aligned values. Cc: qemu-stable@nongnu.org Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/file-posix.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index e09e15bbf8..d018429672 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2488,6 +2488,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, off_t data = 0, hole = 0; int ret; + assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); + ret = fd_open(bs); if (ret < 0) { return ret; @@ -2513,6 +2515,20 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, /* On a data extent, compute bytes to the end of the extent, * possibly including a partial sector at EOF. */ *pnum = MIN(bytes, hole - offset); + + /* + * We are not allowed to return partial sectors, though, so + * round up if necessary. + */ + if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) { + int64_t file_length = raw_getlength(bs); + if (file_length > 0) { + /* Ignore errors, this is just a safeguard */ + assert(hole == file_length); + } + *pnum = ROUND_UP(*pnum, bs->bl.request_alignment); + } + ret = BDRV_BLOCK_DATA; } else { /* On a hole, compute bytes to the beginning of the next extent. */ From 2fab30c80b33cdc6157c7efe6207e54b6835cf92 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 15 May 2019 06:15:41 +0200 Subject: [PATCH 18/24] iotests: Test unaligned raw images with O_DIRECT We already have 221 for accesses through the page cache, but it is better to create a new file for O_DIRECT instead of integrating those test cases into 221. This way, we can make use of _supported_cache_modes (and _default_cache_mode) so the test is automatically skipped on filesystems that do not support O_DIRECT. As part of the split, add _supported_cache_modes to 221. With that, it no longer fails when run with -c none or -c directsync. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/221 | 4 ++ tests/qemu-iotests/253 | 84 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/253.out | 14 +++++++ tests/qemu-iotests/group | 1 + 4 files changed, 103 insertions(+) create mode 100755 tests/qemu-iotests/253 create mode 100644 tests/qemu-iotests/253.out diff --git a/tests/qemu-iotests/221 b/tests/qemu-iotests/221 index 25dd47bcfe..0e9096fec7 100755 --- a/tests/qemu-iotests/221 +++ b/tests/qemu-iotests/221 @@ -1,6 +1,7 @@ #!/usr/bin/env bash # # Test qemu-img vs. unaligned images +# (See also 253, which is the O_DIRECT version) # # Copyright (C) 2018-2019 Red Hat, Inc. # @@ -37,6 +38,9 @@ _supported_fmt raw _supported_proto file _supported_os Linux +_default_cache_mode writeback +_supported_cache_modes writeback writethrough unsafe + echo echo "=== Check mapping of unaligned raw image ===" echo diff --git a/tests/qemu-iotests/253 b/tests/qemu-iotests/253 new file mode 100755 index 0000000000..d88d5afa45 --- /dev/null +++ b/tests/qemu-iotests/253 @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# +# Test qemu-img vs. unaligned images; O_DIRECT version +# (Originates from 221) +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +seq="$(basename $0)" +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt raw +_supported_proto file +_supported_os Linux + +_default_cache_mode none +_supported_cache_modes none directsync + +echo +echo "=== Check mapping of unaligned raw image ===" +echo + +# We do not know how large a physical sector is, but it is certainly +# going to be a factor of 1 MB +size=$((1 * 1024 * 1024 - 1)) + +# qemu-img create rounds size up to BDRV_SECTOR_SIZE +_make_test_img $size +$QEMU_IMG map --output=json --image-opts \ + "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ + | _filter_qemu_img_map + +# so we resize it and check again +truncate --size=$size "$TEST_IMG" +$QEMU_IMG map --output=json --image-opts \ + "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ + | _filter_qemu_img_map + +# qemu-io with O_DIRECT always writes whole physical sectors. Again, +# we do not know how large a physical sector is, so we just start +# writing from a 64 kB boundary, which should always be aligned. +offset=$((1 * 1024 * 1024 - 64 * 1024)) +$QEMU_IO -c "w $offset $((size - offset))" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json --image-opts \ + "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ + | _filter_qemu_img_map + +# Resize it and check again -- contrary to 221, we may not get partial +# sectors here, so there should be only two areas (one zero, one +# data). +truncate --size=$size "$TEST_IMG" +$QEMU_IMG map --output=json --image-opts \ + "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ + | _filter_qemu_img_map + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out new file mode 100644 index 0000000000..607c0baa0b --- /dev/null +++ b/tests/qemu-iotests/253.out @@ -0,0 +1,14 @@ +QA output created by 253 + +=== Check mapping of unaligned raw image === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575 +[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +wrote 65535/65535 bytes at offset 983040 +63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 00e474ab0a..52b7c16e15 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -250,3 +250,4 @@ 248 rw auto quick 249 rw auto quick 252 rw auto backing quick +253 rw auto quick From 987402c5c2c777cf3a1faf26f9a5a82f587bd3a4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 15 May 2019 09:59:16 +0200 Subject: [PATCH 19/24] qemu-img.texi: Be specific about JSON object types Just writing that --output=json outputs JSON information does not really help; we should also make a note of what QAPI type the result object has. (The map subcommand does not emit a QAPI-typed object, but its section already describes the object structure well enough.) Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- qemu-img.texi | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/qemu-img.texi b/qemu-img.texi index 724f244ba1..39562317ee 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -230,6 +230,7 @@ overridden with a pattern byte specified by @var{pattern}. Perform a consistency check on the disk image @var{filename}. The command can output in the format @var{ofmt} which is either @code{human} or @code{json}. +The JSON output is an object of QAPI type @code{ImageCheck}. If @code{-r} is specified, qemu-img tries to repair any inconsistencies found during the check. @code{-r leaks} repairs only cluster leaks, whereas @@ -406,8 +407,7 @@ The size syntax is similar to dd(1)'s size syntax. Give information about the disk image @var{filename}. Use it in particular to know the size reserved on disk which can be different from the displayed size. If VM snapshots are stored in the disk image, -they are displayed too. The command can output in the format @var{ofmt} -which is either @code{human} or @code{json}. +they are displayed too. If a disk image has a backing file chain, information about each disk image in the chain can be recursively enumerated by using the option @code{--backing-chain}. @@ -424,6 +424,10 @@ To enumerate information about each disk image in the above chain, starting from qemu-img info --backing-chain snap2.qcow2 @end example +The command can output in the format @var{ofmt} which is either @code{human} or +@code{json}. The JSON output is an object of QAPI type @code{ImageInfo}; with +@code{--backing-chain}, it is an array of @code{ImageInfo} objects. + @item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [-U] @var{filename} Dump the metadata of image @var{filename} and its backing file chain. @@ -485,7 +489,8 @@ Calculate the file size required for a new image. This information can be used to size logical volumes or SAN LUNs appropriately for the image that will be placed in them. The values reported are guaranteed to be large enough to fit the image. The command can output in the format @var{ofmt} which is either -@code{human} or @code{json}. +@code{human} or @code{json}. The JSON output is an object of QAPI type +@code{BlockMeasureInfo}. If the size @var{N} is given then act as if creating a new empty image file using @command{qemu-img create}. If @var{filename} is given then act as if From 4db4390ef31648be28626dff51e87e3647485a46 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 15 May 2019 09:59:17 +0200 Subject: [PATCH 20/24] qemu-img.texi: Describe human-readable info output Ideally, it should be self-explanatory. However, keys like "disk size" arguably really are not self-explanatory. In any case, there is no harm in going into a some more detail here. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- qemu-img.texi | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/qemu-img.texi b/qemu-img.texi index 39562317ee..e8bc0fd7a2 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -428,6 +428,47 @@ The command can output in the format @var{ofmt} which is either @code{human} or @code{json}. The JSON output is an object of QAPI type @code{ImageInfo}; with @code{--backing-chain}, it is an array of @code{ImageInfo} objects. +@code{--output=human} reports the following information (for every image in the +chain): +@table @var +@item image +The image file name + +@item file format +The image format + +@item virtual size +The size of the guest disk + +@item disk size +How much space the image file occupies on the host file system (may be shown as +0 if this information is unavailable, e.g. because there is no file system) + +@item cluster_size +Cluster size of the image format, if applicable + +@item encrypted +Whether the image is encrypted (only present if so) + +@item cleanly shut down +This is shown as @code{no} if the image is dirty and will have to be +auto-repaired the next time it is opened in qemu. + +@item backing file +The backing file name, if present + +@item backing file format +The format of the backing file, if the image enforces it + +@item Snapshot list +A list of all internal snapshots + +@item Format specific information +Further information whose structure depends on the image format. This section +is a textual representation of the respective @code{ImageInfoSpecific*} QAPI +object (e.g. @code{ImageInfoSpecificQCow2} for qcow2 images). +@end table + @item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [-U] @var{filename} Dump the metadata of image @var{filename} and its backing file chain. From 481e0eeef4fdf7e2ed42425e38d0a30ffd0e9b54 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 15 May 2019 22:15:00 +0200 Subject: [PATCH 21/24] block: Improve "Block node is read-only" message This message does not make any sense when it appears as the response to making an R/W node read-only. We should detect that case and emit a different message, then. Signed-off-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Kevin Wolf --- block.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 1e5230f98e..cb11537029 100644 --- a/block.c +++ b/block.c @@ -1709,6 +1709,8 @@ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, GSList *ignore_children, Error **errp); static void bdrv_child_abort_perm_update(BdrvChild *c); static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm); typedef struct BlockReopenQueueEntry { bool prepared; @@ -1795,7 +1797,20 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && !bdrv_is_writable_after_reopen(bs, q)) { - error_setg(errp, "Block node is read-only"); + if (!bdrv_is_writable_after_reopen(bs, NULL)) { + error_setg(errp, "Block node is read-only"); + } else { + uint64_t current_perms, current_shared; + bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); + if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { + error_setg(errp, "Cannot make block node read-only, there is " + "a writer on it"); + } else { + error_setg(errp, "Cannot make block node read-only and create " + "a writer on it"); + } + } + return -EPERM; } From a93a42bd918fdbc3e64f6b88c87ccbd6cc09c971 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 15 May 2019 22:15:01 +0200 Subject: [PATCH 22/24] iotests.py: Let assert_qmp() accept an array Sometimes we cannot tell which error message qemu will emit, and we do not care. With this change, we can then just pass an array of all possible messages to assert_qmp() and it will choose the right one. Signed-off-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Kevin Wolf --- tests/qemu-iotests/iotests.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index f811f69135..d96ba1f63c 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -596,9 +596,23 @@ def assert_qmp_absent(self, d, path): self.fail('path "%s" has value "%s"' % (path, str(result))) def assert_qmp(self, d, path, value): - '''Assert that the value for a specific path in a QMP dict matches''' + '''Assert that the value for a specific path in a QMP dict + matches. When given a list of values, assert that any of + them matches.''' + result = self.dictpath(d, path) - self.assertEqual(result, value, 'values not equal "%s" and "%s"' % (str(result), str(value))) + + # [] makes no sense as a list of valid values, so treat it as + # an actual single value. + if isinstance(value, list) and value != []: + for v in value: + if result == v: + return + self.fail('no match for "%s" in %s' % (str(result), str(value))) + else: + self.assertEqual(result, value, + 'values not equal "%s" and "%s"' + % (str(result), str(value))) def assert_no_active_block_jobs(self): result = self.vm.qmp('query-block-jobs') From 86a4f599a67b9b709109c7a7c8b7eb91d21c21fd Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 15 May 2019 22:15:02 +0200 Subject: [PATCH 23/24] iotests.py: Fix VM.run_job log() is in the current module, there is no need to prefix it. In fact, doing so may make VM.run_job() unusable in tests that never use iotests.log() themselves. Signed-off-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Kevin Wolf --- tests/qemu-iotests/iotests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index d96ba1f63c..7bde380d96 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -552,7 +552,7 @@ def run_job(self, job, auto_finalize=True, auto_dismiss=False): elif status == 'null': return error else: - iotests.log(ev) + log(ev) def node_info(self, node_name): nodes = self.qmp('query-named-block-nodes') From c423a6af592cf36b4f149c54e2966dd0016b7e96 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 15 May 2019 22:15:03 +0200 Subject: [PATCH 24/24] iotests: Make 245 faster and more reliable Sometimes, 245 fails for me because some stream job has already finished while the test expects it to still be active. (With -c none, it fails basically every time.) The most reliable way to fix this is to simply set auto_finalize=false so the job will remain in the block graph as long as we need it. This allows us to drop the rate limiting, too, which makes the test faster. The only problem with this is that there is a single place that yields a different error message depending on whether the stream job is still copying data (so COR is enabled) or not (COR has been disabled, but the job still has the WRITE_UNCHANGED permission on the target node). We can easily address that by expecting either error message. Note that we do not need auto_finalize=false (or rate limiting) for the active commit job, because It never completes without an explicit block-job-complete anyway. Signed-off-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Kevin Wolf --- tests/qemu-iotests/245 | 22 ++++++++++++++-------- tests/qemu-iotests/245.out | 12 ++++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 index a04c6235c1..349b94aace 100644 --- a/tests/qemu-iotests/245 +++ b/tests/qemu-iotests/245 @@ -862,7 +862,8 @@ class TestBlockdevReopen(iotests.QMPTestCase): # hd2 <- hd0 result = self.vm.qmp('block-stream', conv_keys = True, job_id = 'stream0', - device = 'hd0', base_node = 'hd2', speed = 512 * 1024) + device = 'hd0', base_node = 'hd2', + auto_finalize = False) self.assert_qmp(result, 'return', {}) # We can't remove hd2 while the stream job is ongoing @@ -873,7 +874,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): opts['backing'] = None self.reopen(opts, {}, "Cannot change 'backing' link from 'hd0' to 'hd1'") - self.wait_until_completed(drive = 'stream0') + self.vm.run_job('stream0', auto_finalize = False, auto_dismiss = True) # Reopen the chain during a block-stream job (from hd2 to hd1) def test_block_stream_4(self): @@ -886,12 +887,16 @@ class TestBlockdevReopen(iotests.QMPTestCase): # hd1 <- hd0 result = self.vm.qmp('block-stream', conv_keys = True, job_id = 'stream0', - device = 'hd1', speed = 512 * 1024) + device = 'hd1', auto_finalize = False) self.assert_qmp(result, 'return', {}) # We can't reopen with the original options because that would # make hd1 read-only and block-stream requires it to be read-write - self.reopen(opts, {}, "Can't set node 'hd1' to r/o with copy-on-read enabled") + # (Which error message appears depends on whether the stream job is + # already done with copying at this point.) + self.reopen(opts, {}, + ["Can't set node 'hd1' to r/o with copy-on-read enabled", + "Cannot make block node read-only, there is a writer on it"]) # We can't remove hd2 while the stream job is ongoing opts['backing']['backing'] = None @@ -901,7 +906,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): opts['backing'] = None self.reopen(opts) - self.wait_until_completed(drive = 'stream0') + self.vm.run_job('stream0', auto_finalize = False, auto_dismiss = True) # Reopen the chain during a block-commit job (from hd0 to hd2) def test_block_commit_1(self): @@ -913,7 +918,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-commit', conv_keys = True, job_id = 'commit0', - device = 'hd0', speed = 1024 * 1024) + device = 'hd0') self.assert_qmp(result, 'return', {}) # We can't remove hd2 while the commit job is ongoing @@ -944,7 +949,8 @@ class TestBlockdevReopen(iotests.QMPTestCase): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-commit', conv_keys = True, job_id = 'commit0', - device = 'hd0', top_node = 'hd1', speed = 1024 * 1024) + device = 'hd0', top_node = 'hd1', + auto_finalize = False) self.assert_qmp(result, 'return', {}) # We can't remove hd2 while the commit job is ongoing @@ -956,7 +962,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): self.reopen(opts, {}, "Cannot change backing link if 'hd0' has an implicit backing file") # hd2 <- hd0 - self.wait_until_completed(drive = 'commit0') + self.vm.run_job('commit0', auto_finalize = False, auto_dismiss = True) self.assert_qmp(self.get_node('hd0'), 'ro', False) self.assertEqual(self.get_node('hd1'), None) diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out index 71009c239f..a19de5214d 100644 --- a/tests/qemu-iotests/245.out +++ b/tests/qemu-iotests/245.out @@ -3,3 +3,15 @@ Ran 18 tests OK +{"execute": "job-finalize", "arguments": {"id": "commit0"}} +{"return": {}} +{"data": {"id": "commit0", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "commit0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"execute": "job-finalize", "arguments": {"id": "stream0"}} +{"return": {}} +{"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"execute": "job-finalize", "arguments": {"id": "stream0"}} +{"return": {}} +{"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}