mirror of https://gitee.com/openkylin/qemu.git
blkverify: Add block driver for verifying I/O
The blkverify block driver makes investigating image format data corruption much easier. A raw image initialized with the same contents as the test image (e.g. qcow2 file) must be provided. The raw image mirrors read/write operations and is used to verify that data read from the test image is correct. See docs/blkverify.txt for more information. Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
parent
a5e3d9ef4d
commit
d9d334176c
|
@ -14,7 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
|
|||
|
||||
block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
|
||||
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
|
||||
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o
|
||||
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
|
||||
block-nested-$(CONFIG_WIN32) += raw-win32.o
|
||||
block-nested-$(CONFIG_POSIX) += raw-posix.o
|
||||
block-nested-$(CONFIG_CURL) += curl.o
|
||||
|
|
|
@ -0,0 +1,382 @@
|
|||
/*
|
||||
* Block protocol for block driver correctness testing
|
||||
*
|
||||
* Copyright (C) 2010 IBM, Corp.
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "qemu_socket.h" /* for EINPROGRESS on Windows */
|
||||
#include "block_int.h"
|
||||
|
||||
typedef struct {
|
||||
BlockDriverState *test_file;
|
||||
} BDRVBlkverifyState;
|
||||
|
||||
typedef struct BlkverifyAIOCB BlkverifyAIOCB;
|
||||
struct BlkverifyAIOCB {
|
||||
BlockDriverAIOCB common;
|
||||
QEMUBH *bh;
|
||||
|
||||
/* Request metadata */
|
||||
bool is_write;
|
||||
int64_t sector_num;
|
||||
int nb_sectors;
|
||||
|
||||
int ret; /* first completed request's result */
|
||||
unsigned int done; /* completion counter */
|
||||
bool *finished; /* completion signal for cancel */
|
||||
|
||||
QEMUIOVector *qiov; /* user I/O vector */
|
||||
QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
|
||||
void *buf; /* buffer for raw file I/O */
|
||||
|
||||
void (*verify)(BlkverifyAIOCB *acb);
|
||||
};
|
||||
|
||||
static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
|
||||
{
|
||||
BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
|
||||
bool finished = false;
|
||||
|
||||
/* Wait until request completes, invokes its callback, and frees itself */
|
||||
acb->finished = &finished;
|
||||
while (!finished) {
|
||||
qemu_aio_wait();
|
||||
}
|
||||
}
|
||||
|
||||
static AIOPool blkverify_aio_pool = {
|
||||
.aiocb_size = sizeof(BlkverifyAIOCB),
|
||||
.cancel = blkverify_aio_cancel,
|
||||
};
|
||||
|
||||
static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
fprintf(stderr, "blkverify: %s sector_num=%ld nb_sectors=%d ",
|
||||
acb->is_write ? "write" : "read", acb->sector_num,
|
||||
acb->nb_sectors);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
fprintf(stderr, "\n");
|
||||
va_end(ap);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
|
||||
static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
int ret;
|
||||
char *raw, *c;
|
||||
|
||||
/* Parse the blkverify: prefix */
|
||||
if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
|
||||
return -EINVAL;
|
||||
}
|
||||
filename += strlen("blkverify:");
|
||||
|
||||
/* Parse the raw image filename */
|
||||
c = strchr(filename, ':');
|
||||
if (c == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
raw = strdup(filename);
|
||||
raw[c - filename] = '\0';
|
||||
ret = bdrv_file_open(&bs->file, raw, flags);
|
||||
free(raw);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
filename = c + 1;
|
||||
|
||||
/* Open the test file */
|
||||
s->test_file = bdrv_new("");
|
||||
ret = bdrv_open(s->test_file, filename, flags, NULL);
|
||||
if (ret < 0) {
|
||||
bdrv_delete(s->test_file);
|
||||
s->test_file = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blkverify_close(BlockDriverState *bs)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
|
||||
bdrv_delete(s->test_file);
|
||||
s->test_file = NULL;
|
||||
}
|
||||
|
||||
static void blkverify_flush(BlockDriverState *bs)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
|
||||
/* Only flush test file, the raw file is not important */
|
||||
bdrv_flush(s->test_file);
|
||||
}
|
||||
|
||||
static int64_t blkverify_getlength(BlockDriverState *bs)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
|
||||
return bdrv_getlength(s->test_file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that I/O vector contents are identical
|
||||
*
|
||||
* @a: I/O vector
|
||||
* @b: I/O vector
|
||||
* @ret: Offset to first mismatching byte or -1 if match
|
||||
*/
|
||||
static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
|
||||
{
|
||||
int i;
|
||||
ssize_t offset = 0;
|
||||
|
||||
assert(a->niov == b->niov);
|
||||
for (i = 0; i < a->niov; i++) {
|
||||
size_t len = 0;
|
||||
uint8_t *p = (uint8_t *)a->iov[i].iov_base;
|
||||
uint8_t *q = (uint8_t *)b->iov[i].iov_base;
|
||||
|
||||
assert(a->iov[i].iov_len == b->iov[i].iov_len);
|
||||
while (len < a->iov[i].iov_len && *p++ == *q++) {
|
||||
len++;
|
||||
}
|
||||
|
||||
offset += len;
|
||||
|
||||
if (len != a->iov[i].iov_len) {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int src_index;
|
||||
struct iovec *src_iov;
|
||||
void *dest_base;
|
||||
} IOVectorSortElem;
|
||||
|
||||
static int sortelem_cmp_src_base(const void *a, const void *b)
|
||||
{
|
||||
const IOVectorSortElem *elem_a = a;
|
||||
const IOVectorSortElem *elem_b = b;
|
||||
|
||||
/* Don't overflow */
|
||||
if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
|
||||
return -1;
|
||||
} else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int sortelem_cmp_src_index(const void *a, const void *b)
|
||||
{
|
||||
const IOVectorSortElem *elem_a = a;
|
||||
const IOVectorSortElem *elem_b = b;
|
||||
|
||||
return elem_a->src_index - elem_b->src_index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy contents of I/O vector
|
||||
*
|
||||
* The relative relationships of overlapping iovecs are preserved. This is
|
||||
* necessary to ensure identical semantics in the cloned I/O vector.
|
||||
*/
|
||||
static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
|
||||
void *buf)
|
||||
{
|
||||
IOVectorSortElem sortelems[src->niov];
|
||||
void *last_end;
|
||||
int i;
|
||||
|
||||
/* Sort by source iovecs by base address */
|
||||
for (i = 0; i < src->niov; i++) {
|
||||
sortelems[i].src_index = i;
|
||||
sortelems[i].src_iov = &src->iov[i];
|
||||
}
|
||||
qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
|
||||
|
||||
/* Allocate buffer space taking into account overlapping iovecs */
|
||||
last_end = NULL;
|
||||
for (i = 0; i < src->niov; i++) {
|
||||
struct iovec *cur = sortelems[i].src_iov;
|
||||
ptrdiff_t rewind = 0;
|
||||
|
||||
/* Detect overlap */
|
||||
if (last_end && last_end > cur->iov_base) {
|
||||
rewind = last_end - cur->iov_base;
|
||||
}
|
||||
|
||||
sortelems[i].dest_base = buf - rewind;
|
||||
buf += cur->iov_len - MIN(rewind, cur->iov_len);
|
||||
last_end = MAX(cur->iov_base + cur->iov_len, last_end);
|
||||
}
|
||||
|
||||
/* Sort by source iovec index and build destination iovec */
|
||||
qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
|
||||
for (i = 0; i < src->niov; i++) {
|
||||
qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
|
||||
}
|
||||
}
|
||||
|
||||
static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
|
||||
int64_t sector_num, QEMUIOVector *qiov,
|
||||
int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb,
|
||||
void *opaque)
|
||||
{
|
||||
BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque);
|
||||
|
||||
acb->bh = NULL;
|
||||
acb->is_write = is_write;
|
||||
acb->sector_num = sector_num;
|
||||
acb->nb_sectors = nb_sectors;
|
||||
acb->ret = -EINPROGRESS;
|
||||
acb->done = 0;
|
||||
acb->qiov = qiov;
|
||||
acb->buf = NULL;
|
||||
acb->verify = NULL;
|
||||
acb->finished = NULL;
|
||||
return acb;
|
||||
}
|
||||
|
||||
static void blkverify_aio_bh(void *opaque)
|
||||
{
|
||||
BlkverifyAIOCB *acb = opaque;
|
||||
|
||||
qemu_bh_delete(acb->bh);
|
||||
if (acb->buf) {
|
||||
qemu_iovec_destroy(&acb->raw_qiov);
|
||||
qemu_vfree(acb->buf);
|
||||
}
|
||||
acb->common.cb(acb->common.opaque, acb->ret);
|
||||
if (acb->finished) {
|
||||
*acb->finished = true;
|
||||
}
|
||||
qemu_aio_release(acb);
|
||||
}
|
||||
|
||||
static void blkverify_aio_cb(void *opaque, int ret)
|
||||
{
|
||||
BlkverifyAIOCB *acb = opaque;
|
||||
|
||||
switch (++acb->done) {
|
||||
case 1:
|
||||
acb->ret = ret;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if (acb->ret != ret) {
|
||||
blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
|
||||
}
|
||||
|
||||
if (acb->verify) {
|
||||
acb->verify(acb);
|
||||
}
|
||||
|
||||
acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
|
||||
qemu_bh_schedule(acb->bh);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void blkverify_verify_readv(BlkverifyAIOCB *acb)
|
||||
{
|
||||
ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
|
||||
if (offset != -1) {
|
||||
blkverify_err(acb, "contents mismatch in sector %ld",
|
||||
acb->sector_num + (offset / BDRV_SECTOR_SIZE));
|
||||
}
|
||||
}
|
||||
|
||||
static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
|
||||
nb_sectors, cb, opaque);
|
||||
|
||||
acb->verify = blkverify_verify_readv;
|
||||
acb->buf = qemu_blockalign(bs->file, qiov->size);
|
||||
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
|
||||
blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
|
||||
|
||||
if (!bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
|
||||
blkverify_aio_cb, acb)) {
|
||||
blkverify_aio_cb(acb, -EIO);
|
||||
}
|
||||
if (!bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
|
||||
blkverify_aio_cb, acb)) {
|
||||
blkverify_aio_cb(acb, -EIO);
|
||||
}
|
||||
return &acb->common;
|
||||
}
|
||||
|
||||
static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
|
||||
nb_sectors, cb, opaque);
|
||||
|
||||
if (!bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
|
||||
blkverify_aio_cb, acb)) {
|
||||
blkverify_aio_cb(acb, -EIO);
|
||||
}
|
||||
if (!bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
|
||||
blkverify_aio_cb, acb)) {
|
||||
blkverify_aio_cb(acb, -EIO);
|
||||
}
|
||||
return &acb->common;
|
||||
}
|
||||
|
||||
static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
|
||||
BlockDriverCompletionFunc *cb,
|
||||
void *opaque)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
|
||||
/* Only flush test file, the raw file is not important */
|
||||
return bdrv_aio_flush(s->test_file, cb, opaque);
|
||||
}
|
||||
|
||||
static BlockDriver bdrv_blkverify = {
|
||||
.format_name = "blkverify",
|
||||
.protocol_name = "blkverify",
|
||||
|
||||
.instance_size = sizeof(BDRVBlkverifyState),
|
||||
|
||||
.bdrv_getlength = blkverify_getlength,
|
||||
|
||||
.bdrv_file_open = blkverify_open,
|
||||
.bdrv_close = blkverify_close,
|
||||
.bdrv_flush = blkverify_flush,
|
||||
|
||||
.bdrv_aio_readv = blkverify_aio_readv,
|
||||
.bdrv_aio_writev = blkverify_aio_writev,
|
||||
.bdrv_aio_flush = blkverify_aio_flush,
|
||||
};
|
||||
|
||||
static void bdrv_blkverify_init(void)
|
||||
{
|
||||
bdrv_register(&bdrv_blkverify);
|
||||
}
|
||||
|
||||
block_init(bdrv_blkverify_init);
|
|
@ -0,0 +1,69 @@
|
|||
= Block driver correctness testing with blkverify =
|
||||
|
||||
== Introduction ==
|
||||
|
||||
This document describes how to use the blkverify protocol to test that a block
|
||||
driver is operating correctly.
|
||||
|
||||
It is difficult to test and debug block drivers against real guests. Often
|
||||
processes inside the guest will crash because corrupt sectors were read as part
|
||||
of the executable. Other times obscure errors are raised by a program inside
|
||||
the guest. These issues are extremely hard to trace back to bugs in the block
|
||||
driver.
|
||||
|
||||
Blkverify solves this problem by catching data corruption inside QEMU the first
|
||||
time bad data is read and reporting the disk sector that is corrupted.
|
||||
|
||||
== How it works ==
|
||||
|
||||
The blkverify protocol has two child block devices, the "test" device and the
|
||||
"raw" device. Read/write operations are mirrored to both devices so their
|
||||
state should always be in sync.
|
||||
|
||||
The "raw" device is a raw image, a flat file, that has identical starting
|
||||
contents to the "test" image. The idea is that the "raw" device will handle
|
||||
read/write operations correctly and not corrupt data. It can be used as a
|
||||
reference for comparison against the "test" device.
|
||||
|
||||
After a mirrored read operation completes, blkverify will compare the data and
|
||||
raise an error if it is not identical. This makes it possible to catch the
|
||||
first instance where corrupt data is read.
|
||||
|
||||
== Example ==
|
||||
|
||||
Imagine raw.img has 0xcd repeated throughout its first sector:
|
||||
|
||||
$ ./qemu-io -c 'read -v 0 512' raw.img
|
||||
00000000: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
|
||||
00000010: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
|
||||
[...]
|
||||
000001e0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
|
||||
000001f0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
|
||||
read 512/512 bytes at offset 0
|
||||
512.000000 bytes, 1 ops; 0.0000 sec (97.656 MiB/sec and 200000.0000 ops/sec)
|
||||
|
||||
And test.img is corrupt, its first sector is zeroed when it shouldn't be:
|
||||
|
||||
$ ./qemu-io -c 'read -v 0 512' test.img
|
||||
00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
|
||||
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
|
||||
[...]
|
||||
000001e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
|
||||
000001f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
|
||||
read 512/512 bytes at offset 0
|
||||
512.000000 bytes, 1 ops; 0.0000 sec (81.380 MiB/sec and 166666.6667 ops/sec)
|
||||
|
||||
This error is caught by blkverify:
|
||||
|
||||
$ ./qemu-io -c 'read 0 512' blkverify:a.img:b.img
|
||||
blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0
|
||||
|
||||
A more realistic scenario is verifying the installation of a guest OS:
|
||||
|
||||
$ ./qemu-img create raw.img 16G
|
||||
$ ./qemu-img create -f qcow2 test.qcow2 16G
|
||||
$ x86_64-softmmu/qemu-system-x86_64 -cdrom debian.iso \
|
||||
-drive file=blkverify:raw.img:test.qcow2
|
||||
|
||||
If the installation is aborted when blkverify detects corruption, use qemu-io
|
||||
to explore the contents of the disk image at the sector in question.
|
Loading…
Reference in New Issue