From d4d3e5a0d53a57282955e8a3ed7acc1ca90552d9 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 3 Jul 2018 10:37:56 +0800 Subject: [PATCH 1/3] block: Fix parameter checking in bdrv_co_copy_range_internal src may be NULL if BDRV_REQ_ZERO_WRITE flag is set, in this case only check dst and dst->bs. This bug was introduced when moving in the request tracking code from bdrv_co_copy_range, in 37aec7d75eb. This especially fixes the possible segfault when initializing src_bs with a NULL src. Signed-off-by: Fam Zheng Message-id: 20180703023758.14422-2-famz@redhat.com Reviewed-by: Jeff Cody Signed-off-by: Jeff Cody --- block/io.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/block/io.c b/block/io.c index 7035b78a20..b8845708d7 100644 --- a/block/io.c +++ b/block/io.c @@ -2897,18 +2897,11 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, bool recurse_src) { BdrvTrackedRequest src_req, dst_req; - BlockDriverState *src_bs = src->bs; - BlockDriverState *dst_bs = dst->bs; int ret; - if (!src || !dst || !src->bs || !dst->bs) { + if (!dst || !dst->bs) { return -ENOMEDIUM; } - ret = bdrv_check_byte_request(src->bs, src_offset, bytes); - if (ret) { - return ret; - } - ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes); if (ret) { return ret; @@ -2917,16 +2910,24 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags); } + if (!src || !src->bs) { + return -ENOMEDIUM; + } + ret = bdrv_check_byte_request(src->bs, src_offset, bytes); + if (ret) { + return ret; + } + if (!src->bs->drv->bdrv_co_copy_range_from || !dst->bs->drv->bdrv_co_copy_range_to || src->bs->encrypted || dst->bs->encrypted) { return -ENOTSUP; } - bdrv_inc_in_flight(src_bs); - bdrv_inc_in_flight(dst_bs); - tracked_request_begin(&src_req, src_bs, src_offset, + bdrv_inc_in_flight(src->bs); + bdrv_inc_in_flight(dst->bs); + tracked_request_begin(&src_req, src->bs, src_offset, bytes, BDRV_TRACKED_READ); - tracked_request_begin(&dst_req, dst_bs, dst_offset, + tracked_request_begin(&dst_req, dst->bs, dst_offset, bytes, BDRV_TRACKED_WRITE); wait_serialising_requests(&src_req); @@ -2944,8 +2945,8 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, } tracked_request_end(&src_req); tracked_request_end(&dst_req); - bdrv_dec_in_flight(src_bs); - bdrv_dec_in_flight(dst_bs); + bdrv_dec_in_flight(src->bs); + bdrv_dec_in_flight(dst->bs); return ret; } From dee12de89380483656072f775f5ef99f4426f966 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 3 Jul 2018 10:37:57 +0800 Subject: [PATCH 2/3] block: Honour BDRV_REQ_NO_SERIALISING in copy range This semantics is needed by drive-backup so implement it before using this API there. Reviewed-by: Stefan Hajnoczi Signed-off-by: Fam Zheng Message-id: 20180703023758.14422-3-famz@redhat.com Signed-off-by: Jeff Cody --- block/io.c | 6 ++++-- include/block/block.h | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/block/io.c b/block/io.c index b8845708d7..1a2272fad3 100644 --- a/block/io.c +++ b/block/io.c @@ -2930,8 +2930,10 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, tracked_request_begin(&dst_req, dst->bs, dst_offset, bytes, BDRV_TRACKED_WRITE); - wait_serialising_requests(&src_req); - wait_serialising_requests(&dst_req); + if (!(flags & BDRV_REQ_NO_SERIALISING)) { + wait_serialising_requests(&src_req); + wait_serialising_requests(&dst_req); + } if (recurse_src) { ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, src, src_offset, diff --git a/include/block/block.h b/include/block/block.h index 2ffc1c64c6..e5c7759a0c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -659,13 +659,14 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host); * @dst: Destination child to copy data to * @dst_offset: offset in @dst image to write data * @bytes: number of bytes to copy - * @flags: request flags. Must be one of: - * 0 - actually read data from src; + * @flags: request flags. Supported flags: * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero * write on @dst as if bdrv_co_pwrite_zeroes is * called. Used to simplify caller code, or * during BlockDriver.bdrv_co_copy_range_from() * recursion. + * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping + * requests currently in flight. * * Returns: 0 if succeeded; negative error code if failed. **/ From 9ded4a0114968e98b41494fc035ba14f84cdf700 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 3 Jul 2018 10:37:58 +0800 Subject: [PATCH 3/3] backup: Use copy offloading The implementation is similar to the 'qemu-img convert'. In the beginning of the job, offloaded copy is attempted. If it fails, further I/O will go through the existing bounce buffer code path. Then, as Kevin pointed out, both this and qemu-img convert can benefit from a local check if one request fails because of, for example, the offset is beyond EOF, but another may well be accepted by the protocol layer. This will be implemented separately. Reviewed-by: Stefan Hajnoczi Signed-off-by: Fam Zheng Message-id: 20180703023758.14422-4-famz@redhat.com Signed-off-by: Jeff Cody --- block/backup.c | 150 ++++++++++++++++++++++++++++++++------------- block/trace-events | 1 + 2 files changed, 110 insertions(+), 41 deletions(-) diff --git a/block/backup.c b/block/backup.c index d18be40caf..81895ddbe2 100644 --- a/block/backup.c +++ b/block/backup.c @@ -45,6 +45,8 @@ typedef struct BackupBlockJob { QLIST_HEAD(, CowRequest) inflight_reqs; HBitmap *copy_bitmap; + bool use_copy_range; + int64_t copy_range_size; } BackupBlockJob; static const BlockJobDriver backup_job_driver; @@ -86,19 +88,101 @@ static void cow_request_end(CowRequest *req) qemu_co_queue_restart_all(&req->wait_queue); } +/* Copy range to target with a bounce buffer and return the bytes copied. If + * error occured, return a negative error number */ +static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, + int64_t start, + int64_t end, + bool is_write_notifier, + bool *error_is_read, + void **bounce_buffer) +{ + int ret; + struct iovec iov; + QEMUIOVector qiov; + BlockBackend *blk = job->common.blk; + int nbytes; + + hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1); + nbytes = MIN(job->cluster_size, job->len - start); + if (!*bounce_buffer) { + *bounce_buffer = blk_blockalign(blk, job->cluster_size); + } + iov.iov_base = *bounce_buffer; + iov.iov_len = nbytes; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_preadv(blk, start, qiov.size, &qiov, + is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0); + if (ret < 0) { + trace_backup_do_cow_read_fail(job, start, ret); + if (error_is_read) { + *error_is_read = true; + } + goto fail; + } + + if (qemu_iovec_is_zero(&qiov)) { + ret = blk_co_pwrite_zeroes(job->target, start, + qiov.size, BDRV_REQ_MAY_UNMAP); + } else { + ret = blk_co_pwritev(job->target, start, + qiov.size, &qiov, + job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0); + } + if (ret < 0) { + trace_backup_do_cow_write_fail(job, start, ret); + if (error_is_read) { + *error_is_read = false; + } + goto fail; + } + + return nbytes; +fail: + hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1); + return ret; + +} + +/* Copy range to target and return the bytes copied. If error occured, return a + * negative error number. */ +static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job, + int64_t start, + int64_t end, + bool is_write_notifier) +{ + int ret; + int nr_clusters; + BlockBackend *blk = job->common.blk; + int nbytes; + + assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size)); + nbytes = MIN(job->copy_range_size, end - start); + nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size); + hbitmap_reset(job->copy_bitmap, start / job->cluster_size, + nr_clusters); + ret = blk_co_copy_range(blk, start, job->target, start, nbytes, + is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0); + if (ret < 0) { + trace_backup_do_cow_copy_range_fail(job, start, ret); + hbitmap_set(job->copy_bitmap, start / job->cluster_size, + nr_clusters); + return ret; + } + + return nbytes; +} + static int coroutine_fn backup_do_cow(BackupBlockJob *job, int64_t offset, uint64_t bytes, bool *error_is_read, bool is_write_notifier) { - BlockBackend *blk = job->common.blk; CowRequest cow_request; - struct iovec iov; - QEMUIOVector bounce_qiov; - void *bounce_buffer = NULL; int ret = 0; int64_t start, end; /* bytes */ - int n; /* bytes */ + void *bounce_buffer = NULL; qemu_co_rwlock_rdlock(&job->flush_rwlock); @@ -110,60 +194,38 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job, wait_for_overlapping_requests(job, start, end); cow_request_begin(&cow_request, job, start, end); - for (; start < end; start += job->cluster_size) { + while (start < end) { if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) { trace_backup_do_cow_skip(job, start); + start += job->cluster_size; continue; /* already copied */ } - hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1); trace_backup_do_cow_process(job, start); - n = MIN(job->cluster_size, job->len - start); - - if (!bounce_buffer) { - bounce_buffer = blk_blockalign(blk, job->cluster_size); - } - iov.iov_base = bounce_buffer; - iov.iov_len = n; - qemu_iovec_init_external(&bounce_qiov, &iov, 1); - - ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov, - is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0); - if (ret < 0) { - trace_backup_do_cow_read_fail(job, start, ret); - if (error_is_read) { - *error_is_read = true; + if (job->use_copy_range) { + ret = backup_cow_with_offload(job, start, end, is_write_notifier); + if (ret < 0) { + job->use_copy_range = false; } - hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1); - goto out; } - - if (buffer_is_zero(iov.iov_base, iov.iov_len)) { - ret = blk_co_pwrite_zeroes(job->target, start, - bounce_qiov.size, BDRV_REQ_MAY_UNMAP); - } else { - ret = blk_co_pwritev(job->target, start, - bounce_qiov.size, &bounce_qiov, - job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0); + if (!job->use_copy_range) { + ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier, + error_is_read, &bounce_buffer); } if (ret < 0) { - trace_backup_do_cow_write_fail(job, start, ret); - if (error_is_read) { - *error_is_read = false; - } - hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1); - goto out; + break; } /* Publish progress, guest I/O counts as progress too. Note that the * offset field is an opaque progress value, it is not a disk offset. */ - job->bytes_read += n; - job_progress_update(&job->common.job, n); + start += ret; + job->bytes_read += ret; + job_progress_update(&job->common.job, ret); + ret = 0; } -out: if (bounce_buffer) { qemu_vfree(bounce_buffer); } @@ -665,6 +727,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } else { job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); } + job->use_copy_range = true; + job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk), + blk_get_max_transfer(job->target)); + job->copy_range_size = MAX(job->cluster_size, + QEMU_ALIGN_UP(job->copy_range_size, + job->cluster_size)); /* Required permissions are already taken with target's blk_new() */ block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, diff --git a/block/trace-events b/block/trace-events index 2d59b53fd3..c35287b48a 100644 --- a/block/trace-events +++ b/block/trace-events @@ -42,6 +42,7 @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64 backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64 backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d" backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d" +backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d" # blockdev.c qmp_block_job_cancel(void *job) "job %p"