Merge remote-tracking branch 'stefanha/block' into staging

# By Wenchao Xia (5) and others
# Via Stefan Hajnoczi
* stefanha/block:
  coroutine: stop using AioContext in CoQueue
  coroutine: protect global pool with a mutex
  qemu-iotests: Try creating huge qcow2 image
  qcow2.py: Subcommand for changing header fields
  qemu-io: Fix 'map' output
  blockdev: Rename BlockdevAction -> TransactionAction
  block: make all steps in qmp_transaction() as callback
  block: package rollback code in qmp_transaction()
  block: package committing code in qmp_transaction()
  block: move input parsing code in qmp_transaction()
  block: package preparation code in qmp_transaction()

Message-id: 1369405947-14818-1-git-send-email-stefanha@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
Anthony Liguori 2013-05-24 13:47:25 -05:00
commit 4c5dad040b
12 changed files with 362 additions and 160 deletions

View File

@ -750,8 +750,8 @@ void do_commit(Monitor *mon, const QDict *qdict)
static void blockdev_do_action(int kind, void *data, Error **errp)
{
BlockdevAction action;
BlockdevActionList list;
TransactionAction action;
TransactionActionList list;
action.kind = kind;
action.data = data;
@ -773,95 +773,102 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
.has_mode = has_mode,
.mode = mode,
};
blockdev_do_action(BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, &snapshot,
errp);
blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
&snapshot, errp);
}
/* New and old BlockDriverState structs for group snapshots */
typedef struct BlkTransactionStates {
BlockDriverState *old_bs;
BlockDriverState *new_bs;
QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
} BlkTransactionStates;
typedef struct BlkTransactionStates BlkTransactionStates;
/* Only prepare() may fail. In a single transaction, only one of commit() or
abort() will be called, clean() will always be called if it present. */
typedef struct BdrvActionOps {
/* Size of state struct, in bytes. */
size_t instance_size;
/* Prepare the work, must NOT be NULL. */
void (*prepare)(BlkTransactionStates *common, Error **errp);
/* Commit the changes, must NOT be NULL. */
void (*commit)(BlkTransactionStates *common);
/* Abort the changes on fail, can be NULL. */
void (*abort)(BlkTransactionStates *common);
/* Clean up resource in the end, can be NULL. */
void (*clean)(BlkTransactionStates *common);
} BdrvActionOps;
/*
* 'Atomic' group snapshots. The snapshots are taken as a set, and if any fail
* then we do not pivot any of the devices in the group, and abandon the
* snapshots
* This structure must be arranged as first member in child type, assuming
* that compiler will also arrange it to the same address with parent instance.
* Later it will be used in free().
*/
void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
struct BlkTransactionStates {
TransactionAction *action;
const BdrvActionOps *ops;
QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
};
/* external snapshot private data */
typedef struct ExternalSnapshotStates {
BlkTransactionStates common;
BlockDriverState *old_bs;
BlockDriverState *new_bs;
} ExternalSnapshotStates;
static void external_snapshot_prepare(BlkTransactionStates *common,
Error **errp)
{
int ret = 0;
BlockdevActionList *dev_entry = dev_list;
BlkTransactionStates *states, *next;
Error *local_err = NULL;
QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
QSIMPLEQ_INIT(&snap_bdrv_states);
/* drain all i/o before any snapshots */
bdrv_drain_all();
/* We don't do anything in this loop that commits us to the snapshot */
while (NULL != dev_entry) {
BlockdevAction *dev_info = NULL;
BlockDriver *proto_drv;
BlockDriver *drv;
int flags;
enum NewImageMode mode;
const char *new_image_file;
int flags, ret;
Error *local_err = NULL;
const char *device;
const char *new_image_file;
const char *format = "qcow2";
enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
ExternalSnapshotStates *states =
DO_UPCAST(ExternalSnapshotStates, common, common);
TransactionAction *action = common->action;
dev_info = dev_entry->value;
dev_entry = dev_entry->next;
/* get parameters */
g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC);
states = g_malloc0(sizeof(BlkTransactionStates));
QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
switch (dev_info->kind) {
case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
device = dev_info->blockdev_snapshot_sync->device;
if (!dev_info->blockdev_snapshot_sync->has_mode) {
dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
device = action->blockdev_snapshot_sync->device;
new_image_file = action->blockdev_snapshot_sync->snapshot_file;
if (action->blockdev_snapshot_sync->has_format) {
format = action->blockdev_snapshot_sync->format;
}
new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
if (dev_info->blockdev_snapshot_sync->has_format) {
format = dev_info->blockdev_snapshot_sync->format;
}
mode = dev_info->blockdev_snapshot_sync->mode;
break;
default:
abort();
if (action->blockdev_snapshot_sync->has_mode) {
mode = action->blockdev_snapshot_sync->mode;
}
/* start processing */
drv = bdrv_find_format(format);
if (!drv) {
error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
goto delete_and_fail;
return;
}
states->old_bs = bdrv_find(device);
if (!states->old_bs) {
error_set(errp, QERR_DEVICE_NOT_FOUND, device);
goto delete_and_fail;
return;
}
if (!bdrv_is_inserted(states->old_bs)) {
error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
goto delete_and_fail;
return;
}
if (bdrv_in_use(states->old_bs)) {
error_set(errp, QERR_DEVICE_IN_USE, device);
goto delete_and_fail;
return;
}
if (!bdrv_is_read_only(states->old_bs)) {
if (bdrv_flush(states->old_bs)) {
error_set(errp, QERR_IO_ERROR);
goto delete_and_fail;
return;
}
}
@ -870,7 +877,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
proto_drv = bdrv_find_protocol(new_image_file);
if (!proto_drv) {
error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
goto delete_and_fail;
return;
}
/* create new image w/backing file */
@ -881,7 +888,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
NULL, -1, flags, &local_err, false);
if (error_is_set(&local_err)) {
error_propagate(errp, local_err);
goto delete_and_fail;
return;
}
}
@ -893,14 +900,14 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
flags | BDRV_O_NO_BACKING, drv);
if (ret != 0) {
error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
goto delete_and_fail;
}
}
static void external_snapshot_commit(BlkTransactionStates *common)
{
ExternalSnapshotStates *states =
DO_UPCAST(ExternalSnapshotStates, common, common);
/* Now we are going to do the actual pivot. Everything up to this point
* is reversible, but we are committed at this point */
QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
/* This removes our old bs from the bdrv_states, and adds the new bs */
bdrv_append(states->new_bs, states->old_bs);
/* We don't need (or want) to use the transactional
@ -910,6 +917,68 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
NULL);
}
static void external_snapshot_abort(BlkTransactionStates *common)
{
ExternalSnapshotStates *states =
DO_UPCAST(ExternalSnapshotStates, common, common);
if (states->new_bs) {
bdrv_delete(states->new_bs);
}
}
static const BdrvActionOps actions[] = {
[TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
.instance_size = sizeof(ExternalSnapshotStates),
.prepare = external_snapshot_prepare,
.commit = external_snapshot_commit,
.abort = external_snapshot_abort,
},
};
/*
* 'Atomic' group snapshots. The snapshots are taken as a set, and if any fail
* then we do not pivot any of the devices in the group, and abandon the
* snapshots
*/
void qmp_transaction(TransactionActionList *dev_list, Error **errp)
{
TransactionActionList *dev_entry = dev_list;
BlkTransactionStates *states, *next;
Error *local_err = NULL;
QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
QSIMPLEQ_INIT(&snap_bdrv_states);
/* drain all i/o before any snapshots */
bdrv_drain_all();
/* We don't do anything in this loop that commits us to the snapshot */
while (NULL != dev_entry) {
TransactionAction *dev_info = NULL;
const BdrvActionOps *ops;
dev_info = dev_entry->value;
dev_entry = dev_entry->next;
assert(dev_info->kind < ARRAY_SIZE(actions));
ops = &actions[dev_info->kind];
states = g_malloc0(ops->instance_size);
states->ops = ops;
states->action = dev_info;
QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
states->ops->prepare(states, &local_err);
if (error_is_set(&local_err)) {
error_propagate(errp, local_err);
goto delete_and_fail;
}
}
QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
states->ops->commit(states);
}
/* success */
goto exit;
@ -919,12 +988,15 @@ delete_and_fail:
* the original bs for all images
*/
QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
if (states->new_bs) {
bdrv_delete(states->new_bs);
if (states->ops->abort) {
states->ops->abort(states);
}
}
exit:
QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
if (states->ops->clean) {
states->ops->clean(states);
}
g_free(states);
}
}

View File

@ -38,6 +38,9 @@ struct Coroutine {
void *entry_arg;
Coroutine *caller;
QSLIST_ENTRY(Coroutine) pool_next;
/* Coroutines that should be woken up when we yield or terminate */
QTAILQ_HEAD(, Coroutine) co_queue_wakeup;
QTAILQ_ENTRY(Coroutine) co_queue_next;
};
@ -45,5 +48,6 @@ Coroutine *qemu_coroutine_new(void);
void qemu_coroutine_delete(Coroutine *co);
CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
CoroutineAction action);
void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
#endif

View File

@ -1609,12 +1609,12 @@
'*mode': 'NewImageMode' } }
##
# @BlockdevAction
# @TransactionAction
#
# A discriminated record of operations that can be performed with
# @transaction.
##
{ 'union': 'BlockdevAction',
{ 'union': 'TransactionAction',
'data': {
'blockdev-snapshot-sync': 'BlockdevSnapshot'
} }
@ -1622,25 +1622,24 @@
##
# @transaction
#
# Atomically operate on a group of one or more block devices. If
# any operation fails, then the entire set of actions will be
# abandoned and the appropriate error returned. The only operation
# supported is currently blockdev-snapshot-sync.
# Executes a number of transactionable QMP commands atomically. If any
# operation fails, then the entire set of actions will be abandoned and the
# appropriate error returned.
#
# List of:
# @BlockdevAction: information needed for the device snapshot
# @TransactionAction: information needed for the respective operation
#
# Returns: nothing on success
# If @device is not a valid block device, DeviceNotFound
# Errors depend on the operations of the transaction
#
# Note: The transaction aborts on the first failure. Therefore, there will
# be only one device or snapshot file returned in an error condition, and
# Note: The transaction aborts on the first failure. Therefore, there will be
# information on only one failed operation returned in an error condition, and
# subsequent actions will not have been attempted.
#
# Since 1.1
##
{ 'command': 'transaction',
'data': { 'actions': [ 'BlockdevAction' ] } }
'data': { 'actions': [ 'TransactionAction' ] } }
##
# @blockdev-snapshot-sync

View File

@ -26,39 +26,11 @@
#include "block/coroutine.h"
#include "block/coroutine_int.h"
#include "qemu/queue.h"
#include "block/aio.h"
#include "trace.h"
/* Coroutines are awoken from a BH to allow the current coroutine to complete
* its flow of execution. The BH may run after the CoQueue has been destroyed,
* so keep BH data in a separate heap-allocated struct.
*/
typedef struct {
QEMUBH *bh;
QTAILQ_HEAD(, Coroutine) entries;
} CoQueueNextData;
static void qemu_co_queue_next_bh(void *opaque)
{
CoQueueNextData *data = opaque;
Coroutine *next;
trace_qemu_co_queue_next_bh();
while ((next = QTAILQ_FIRST(&data->entries))) {
QTAILQ_REMOVE(&data->entries, next, co_queue_next);
qemu_coroutine_enter(next, NULL);
}
qemu_bh_delete(data->bh);
g_slice_free(CoQueueNextData, data);
}
void qemu_co_queue_init(CoQueue *queue)
{
QTAILQ_INIT(&queue->entries);
/* This will be exposed to callers once there are multiple AioContexts */
queue->ctx = qemu_get_aio_context();
}
void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
@ -77,23 +49,37 @@ void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue)
assert(qemu_in_coroutine());
}
static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
/**
* qemu_co_queue_run_restart:
*
* Enter each coroutine that was previously marked for restart by
* qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is
* invoked by the core coroutine code when the current coroutine yields or
* terminates.
*/
void qemu_co_queue_run_restart(Coroutine *co)
{
Coroutine *next;
CoQueueNextData *data;
trace_qemu_co_queue_run_restart(co);
while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) {
QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next);
qemu_coroutine_enter(next, NULL);
}
}
static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
{
Coroutine *self = qemu_coroutine_self();
Coroutine *next;
if (QTAILQ_EMPTY(&queue->entries)) {
return false;
}
data = g_slice_new(CoQueueNextData);
data->bh = aio_bh_new(queue->ctx, qemu_co_queue_next_bh, data);
QTAILQ_INIT(&data->entries);
qemu_bh_schedule(data->bh);
while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) {
QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
QTAILQ_INSERT_TAIL(&data->entries, next, co_queue_next);
QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
trace_qemu_co_queue_next(next);
if (single) {
break;

View File

@ -14,6 +14,7 @@
#include "trace.h"
#include "qemu-common.h"
#include "qemu/thread.h"
#include "block/coroutine.h"
#include "block/coroutine_int.h"
@ -23,6 +24,7 @@ enum {
};
/** Free list to speed up creation */
static QemuMutex pool_lock;
static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
static unsigned int pool_size;
@ -30,31 +32,44 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
{
Coroutine *co;
qemu_mutex_lock(&pool_lock);
co = QSLIST_FIRST(&pool);
if (co) {
QSLIST_REMOVE_HEAD(&pool, pool_next);
pool_size--;
} else {
}
qemu_mutex_unlock(&pool_lock);
if (!co) {
co = qemu_coroutine_new();
}
co->entry = entry;
QTAILQ_INIT(&co->co_queue_wakeup);
return co;
}
static void coroutine_delete(Coroutine *co)
{
qemu_mutex_lock(&pool_lock);
if (pool_size < POOL_MAX_SIZE) {
QSLIST_INSERT_HEAD(&pool, co, pool_next);
co->caller = NULL;
pool_size++;
qemu_mutex_unlock(&pool_lock);
return;
}
qemu_mutex_unlock(&pool_lock);
qemu_coroutine_delete(co);
}
static void __attribute__((destructor)) coroutine_cleanup(void)
static void __attribute__((constructor)) coroutine_pool_init(void)
{
qemu_mutex_init(&pool_lock);
}
static void __attribute__((destructor)) coroutine_pool_cleanup(void)
{
Coroutine *co;
Coroutine *tmp;
@ -63,6 +78,8 @@ static void __attribute__((destructor)) coroutine_cleanup(void)
QSLIST_REMOVE_HEAD(&pool, pool_next);
qemu_coroutine_delete(co);
}
qemu_mutex_destroy(&pool_lock);
}
static void coroutine_swap(Coroutine *from, Coroutine *to)
@ -71,6 +88,8 @@ static void coroutine_swap(Coroutine *from, Coroutine *to)
ret = qemu_coroutine_switch(from, to, COROUTINE_YIELD);
qemu_co_queue_run_restart(to);
switch (ret) {
case COROUTINE_YIELD:
return;

View File

@ -1635,12 +1635,43 @@ static const cmdinfo_t alloc_cmd = {
.oneline = "checks if a sector is present in the file",
};
static int map_is_allocated(int64_t sector_num, int64_t nb_sectors, int64_t *pnum)
{
int num, num_checked;
int ret, firstret;
num_checked = MIN(nb_sectors, INT_MAX);
ret = bdrv_is_allocated(bs, sector_num, num_checked, &num);
if (ret < 0) {
return ret;
}
firstret = ret;
*pnum = num;
while (nb_sectors > 0 && ret == firstret) {
sector_num += num;
nb_sectors -= num;
num_checked = MIN(nb_sectors, INT_MAX);
ret = bdrv_is_allocated(bs, sector_num, num_checked, &num);
if (ret == firstret) {
*pnum += num;
} else {
break;
}
}
return firstret;
}
static int map_f(int argc, char **argv)
{
int64_t offset;
int64_t nb_sectors;
char s1[64];
int num, num_checked;
int64_t num;
int ret;
const char *retstr;
@ -1648,12 +1679,17 @@ static int map_f(int argc, char **argv)
nb_sectors = bs->total_sectors;
do {
num_checked = MIN(nb_sectors, INT_MAX);
ret = bdrv_is_allocated(bs, offset, num_checked, &num);
ret = map_is_allocated(offset, nb_sectors, &num);
if (ret < 0) {
error_report("Failed to get allocation status: %s", strerror(-ret));
return 0;
}
retstr = ret ? " allocated" : "not allocated";
cvtstr(offset << 9ULL, s1, sizeof(s1));
printf("[% 24" PRId64 "] % 8d/% 8d sectors %s at offset %s (%d)\n",
offset << 9ULL, num, num_checked, retstr, s1, ret);
printf("[% 24" PRId64 "] % 8" PRId64 "/% 8" PRId64 " sectors %s "
"at offset %s (%d)\n",
offset << 9ULL, num, nb_sectors, retstr, s1, ret);
offset += num;
nb_sectors -= num;

58
tests/qemu-iotests/054 Executable file
View File

@ -0,0 +1,58 @@
#!/bin/bash
#
# Test huge qcow2 images
#
# Copyright (C) 2013 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=kwolf@redhat.com
seq=`basename $0`
echo "QA output created by $seq"
here=`pwd`
tmp=/tmp/$$
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
_supported_fmt qcow2
_supported_proto generic
_supported_os Linux
echo
echo "creating too large image (1 EB)"
_make_test_img $((1024*1024))T
echo
echo "creating too large image (1 EB) using qcow2.py"
_make_test_img 4G
./qcow2.py $TEST_IMG set-header size $((1024 ** 6))
_check_test_img
# success, all done
echo "*** done"
rm -f $seq.full
status=0

View File

@ -0,0 +1,10 @@
QA output created by 054
creating too large image (1 EB)
qemu-img: The image size is too large for file format 'qcow2'
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1152921504606846976
creating too large image (1 EB) using qcow2.py
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296
qemu-img: Could not open 'TEST_DIR/t.qcow2': File too large
*** done

View File

@ -167,7 +167,7 @@ _cleanup_test_img()
_check_test_img()
{
$QEMU_IMG check "$@" -f $IMGFMT $TEST_IMG 2>&1 | \
$QEMU_IMG check "$@" -f $IMGFMT $TEST_IMG 2>&1 | _filter_testdir | \
sed -e '/allocated.*fragmented.*compressed clusters/d' \
-e 's/qemu-img: This image format does not support checks/No errors were found on the image./' \
-e '/Image end offset: [0-9]\+/d'

View File

@ -60,3 +60,4 @@
#051 rw auto
052 rw auto backing
053 rw auto
054 rw auto

View File

@ -149,6 +149,22 @@ def cmd_dump_header(fd):
h.dump()
h.dump_extensions()
def cmd_set_header(fd, name, value):
try:
value = int(value, 0)
except:
print "'%s' is not a valid number" % value
sys.exit(1)
fields = (field[2] for field in QcowHeader.fields)
if not name in fields:
print "'%s' is not a known header field" % name
sys.exit(1)
h = QcowHeader(fd)
h.__dict__[name] = value
h.update(fd)
def cmd_add_header_ext(fd, magic, data):
try:
magic = int(magic, 0)
@ -205,6 +221,7 @@ def cmd_set_feature_bit(fd, group, bit):
cmds = [
[ 'dump-header', cmd_dump_header, 0, 'Dump image header and header extensions' ],
[ 'set-header', cmd_set_header, 2, 'Set a field in the header'],
[ 'add-header-ext', cmd_add_header_ext, 2, 'Add a header extension' ],
[ 'del-header-ext', cmd_del_header_ext, 1, 'Delete a header extension' ],
[ 'set-feature-bit', cmd_set_feature_bit, 2, 'Set a feature bit'],

View File

@ -825,7 +825,7 @@ qemu_coroutine_yield(void *from, void *to) "from %p to %p"
qemu_coroutine_terminate(void *co) "self %p"
# qemu-coroutine-lock.c
qemu_co_queue_next_bh(void) ""
qemu_co_queue_run_restart(void *co) "co %p"
qemu_co_queue_next(void *nxt) "next %p"
qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"