Merge remote-tracking branch 'stefanha/block' into staging

# By MORITA Kazutaka (5) and others
# Via Stefan Hajnoczi
* stefanha/block:
  block: for HMP commit() operations on 'all', skip non-COW drives
  sheepdog: add support for connecting to unix domain socket
  sheepdog: use inet_connect to simplify connect code
  sheepdog: accept URIs
  move socket_set_nodelay to osdep.c
  slirp/tcp_subr.c: fix coding style in tcp_connect
  dataplane: remove EventPoll in favor of AioContext
  virtio-blk: fix unplug + virsh reboot
  ide/macio: Fix macio DMA initialisation.
This commit is contained in:
Anthony Liguori 2013-03-04 08:22:48 -06:00
commit 71df81afc6
15 changed files with 330 additions and 390 deletions

View File

@ -1640,9 +1640,11 @@ int bdrv_commit_all(void)
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, list) {
int ret = bdrv_commit(bs);
if (ret < 0) {
return ret;
if (bs->drv && bs->backing_hd) {
int ret = bdrv_commit(bs);
if (ret < 0) {
return ret;
}
}
}
return 0;

View File

@ -13,6 +13,7 @@
*/
#include "qemu-common.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "block/block_int.h"
@ -21,7 +22,7 @@
#define SD_PROTO_VER 0x01
#define SD_DEFAULT_ADDR "localhost"
#define SD_DEFAULT_PORT "7000"
#define SD_DEFAULT_PORT 7000
#define SD_OP_CREATE_AND_WRITE_OBJ 0x01
#define SD_OP_READ_OBJ 0x02
@ -297,8 +298,8 @@ typedef struct BDRVSheepdogState {
bool is_snapshot;
uint32_t cache_flags;
char *addr;
char *port;
char *host_spec;
bool is_unix;
int fd;
CoMutex lock;
@ -446,56 +447,29 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
return acb;
}
static int connect_to_sdog(const char *addr, const char *port)
static int connect_to_sdog(BDRVSheepdogState *s)
{
char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
int fd, ret;
struct addrinfo hints, *res, *res0;
int fd;
Error *err = NULL;
if (!addr) {
addr = SD_DEFAULT_ADDR;
port = SD_DEFAULT_PORT;
}
if (s->is_unix) {
fd = unix_connect(s->host_spec, &err);
} else {
fd = inet_connect(s->host_spec, &err);
memset(&hints, 0, sizeof(hints));
hints.ai_socktype = SOCK_STREAM;
ret = getaddrinfo(addr, port, &hints, &res0);
if (ret) {
error_report("unable to get address info %s, %s",
addr, strerror(errno));
return -errno;
}
for (res = res0; res; res = res->ai_next) {
ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf),
sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
if (ret) {
continue;
}
fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if (fd < 0) {
continue;
}
reconnect:
ret = connect(fd, res->ai_addr, res->ai_addrlen);
if (ret < 0) {
if (errno == EINTR) {
goto reconnect;
if (err == NULL) {
int ret = socket_set_nodelay(fd);
if (ret < 0) {
error_report("%s", strerror(errno));
}
close(fd);
break;
}
dprintf("connected to %s:%s\n", addr, port);
goto success;
}
fd = -errno;
error_report("failed connect to %s:%s", addr, port);
success:
freeaddrinfo(res0);
if (err != NULL) {
qerror_report_err(err);
error_free(err);
}
return fd;
}
@ -787,15 +761,6 @@ static int aio_flush_request(void *opaque)
!QLIST_EMPTY(&s->pending_aio_head);
}
static int set_nodelay(int fd)
{
int ret, opt;
opt = 1;
ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
return ret;
}
/*
* Return a socket discriptor to read/write objects.
*
@ -804,29 +769,88 @@ static int set_nodelay(int fd)
*/
static int get_sheep_fd(BDRVSheepdogState *s)
{
int ret, fd;
int fd;
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
error_report("%s", strerror(errno));
return fd;
}
socket_set_nonblock(fd);
ret = set_nodelay(fd);
if (ret) {
error_report("%s", strerror(errno));
closesocket(fd);
return -errno;
}
qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
return fd;
}
static int sd_parse_uri(BDRVSheepdogState *s, const char *filename,
char *vdi, uint32_t *snapid, char *tag)
{
URI *uri;
QueryParams *qp = NULL;
int ret = 0;
uri = uri_parse(filename);
if (!uri) {
return -EINVAL;
}
/* transport */
if (!strcmp(uri->scheme, "sheepdog")) {
s->is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
s->is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+unix")) {
s->is_unix = true;
} else {
ret = -EINVAL;
goto out;
}
if (uri->path == NULL || !strcmp(uri->path, "/")) {
ret = -EINVAL;
goto out;
}
pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1);
qp = query_params_parse(uri->query);
if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
ret = -EINVAL;
goto out;
}
if (s->is_unix) {
/* sheepdog+unix:///vdiname?socket=path */
if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
ret = -EINVAL;
goto out;
}
s->host_spec = g_strdup(qp->p[0].value);
} else {
/* sheepdog[+tcp]://[host:port]/vdiname */
s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR,
uri->port ?: SD_DEFAULT_PORT);
}
/* snapshot tag */
if (uri->fragment) {
*snapid = strtoul(uri->fragment, NULL, 10);
if (*snapid == 0) {
pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment);
}
} else {
*snapid = CURRENT_VDI_ID; /* search current vdi */
}
out:
if (qp) {
query_params_free(qp);
}
uri_free(uri);
return ret;
}
/*
* Parse a filename
* Parse a filename (old syntax)
*
* filename must be one of the following formats:
* 1. [vdiname]
@ -845,9 +869,11 @@ static int get_sheep_fd(BDRVSheepdogState *s)
static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
char *vdi, uint32_t *snapid, char *tag)
{
char *p, *q;
int nr_sep;
char *p, *q, *uri;
const char *host_spec, *vdi_spec;
int nr_sep, ret;
strstart(filename, "sheepdog:", (const char **)&filename);
p = q = g_strdup(filename);
/* count the number of separators */
@ -860,38 +886,32 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
}
p = q;
/* use the first two tokens as hostname and port number. */
/* use the first two tokens as host_spec. */
if (nr_sep >= 2) {
s->addr = p;
host_spec = p;
p = strchr(p, ':');
*p++ = '\0';
s->port = p;
p++;
p = strchr(p, ':');
*p++ = '\0';
} else {
s->addr = NULL;
s->port = 0;
host_spec = "";
}
pstrcpy(vdi, SD_MAX_VDI_LEN, p);
vdi_spec = p;
p = strchr(vdi, ':');
p = strchr(vdi_spec, ':');
if (p) {
*p++ = '\0';
*snapid = strtoul(p, NULL, 10);
if (*snapid == 0) {
pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p);
}
} else {
*snapid = CURRENT_VDI_ID; /* search current vdi */
*p++ = '#';
}
if (s->addr == NULL) {
g_free(q);
}
uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);
return 0;
ret = sd_parse_uri(s, uri, vdi, snapid, tag);
g_free(q);
g_free(uri);
return ret;
}
static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
@ -903,7 +923,7 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
unsigned int wlen, rlen = 0;
char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
return fd;
}
@ -1106,16 +1126,19 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
uint32_t snapid;
char *buf = NULL;
strstart(filename, "sheepdog:", (const char **)&filename);
QLIST_INIT(&s->inflight_aio_head);
QLIST_INIT(&s->pending_aio_head);
s->fd = -1;
memset(vdi, 0, sizeof(vdi));
memset(tag, 0, sizeof(tag));
if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) {
ret = -EINVAL;
if (strstr(filename, "://")) {
ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
} else {
ret = parse_vdiname(s, filename, vdi, &snapid, tag);
}
if (ret < 0) {
goto out;
}
s->fd = get_sheep_fd(s);
@ -1143,9 +1166,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
s->is_snapshot = true;
}
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
error_report("failed to connect");
ret = fd;
goto out;
}
@ -1178,9 +1200,8 @@ out:
return ret;
}
static int do_sd_create(char *filename, int64_t vdi_size,
uint32_t base_vid, uint32_t *vdi_id, int snapshot,
const char *addr, const char *port)
static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
uint32_t base_vid, uint32_t *vdi_id, int snapshot)
{
SheepdogVdiReq hdr;
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
@ -1188,7 +1209,7 @@ static int do_sd_create(char *filename, int64_t vdi_size,
unsigned int wlen, rlen = 0;
char buf[SD_MAX_VDI_LEN];
fd = connect_to_sdog(addr, port);
fd = connect_to_sdog(s);
if (fd < 0) {
return fd;
}
@ -1284,17 +1305,17 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid;
bool prealloc = false;
const char *vdiname;
s = g_malloc0(sizeof(BDRVSheepdogState));
strstart(filename, "sheepdog:", &vdiname);
memset(vdi, 0, sizeof(vdi));
memset(tag, 0, sizeof(tag));
if (parse_vdiname(s, vdiname, vdi, &snapid, tag) < 0) {
error_report("invalid filename");
ret = -EINVAL;
if (strstr(filename, "://")) {
ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
} else {
ret = parse_vdiname(s, filename, vdi, &snapid, tag);
}
if (ret < 0) {
goto out;
}
@ -1355,7 +1376,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
bdrv_delete(bs);
}
ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s->addr, s->port);
ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0);
if (!prealloc || ret) {
goto out;
}
@ -1376,7 +1397,7 @@ static void sd_close(BlockDriverState *bs)
dprintf("%s\n", s->name);
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
return;
}
@ -1400,7 +1421,7 @@ static void sd_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
closesocket(s->fd);
g_free(s->addr);
g_free(s->host_spec);
}
static int64_t sd_getlength(BlockDriverState *bs)
@ -1424,7 +1445,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
return -EINVAL;
}
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
return fd;
}
@ -1500,17 +1521,15 @@ static int sd_create_branch(BDRVSheepdogState *s)
buf = g_malloc(SD_INODE_SIZE);
ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1,
s->addr, s->port);
ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1);
if (ret) {
goto out;
}
dprintf("%" PRIx32 " is created.\n", vid);
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
error_report("failed to connect");
ret = fd;
goto out;
}
@ -1769,7 +1788,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
/* refresh inode. */
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
ret = fd;
goto cleanup;
@ -1782,8 +1801,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto cleanup;
}
ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1,
s->addr, s->port);
ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid,
1);
if (ret < 0) {
error_report("failed to create inode for snapshot. %s",
strerror(errno));
@ -1838,9 +1857,8 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto out;
}
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
error_report("failed to connect");
ret = fd;
goto out;
}
@ -1902,7 +1920,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
vdi_inuse = g_malloc(max);
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
ret = fd;
goto out;
@ -1929,9 +1947,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
start_nr = hval & (SD_NR_VDIS - 1);
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
error_report("failed to connect");
ret = fd;
goto out;
}
@ -1988,7 +2005,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
uint32_t vdi_index;
uint64_t offset;
fd = connect_to_sdog(s->addr, s->port);
fd = connect_to_sdog(s);
if (fd < 0) {
return fd;
}
@ -2063,7 +2080,7 @@ static QEMUOptionParameter sd_create_options[] = {
{ NULL }
};
BlockDriver bdrv_sheepdog = {
static BlockDriver bdrv_sheepdog = {
.format_name = "sheepdog",
.protocol_name = "sheepdog",
.instance_size = sizeof(BDRVSheepdogState),
@ -2088,8 +2105,60 @@ BlockDriver bdrv_sheepdog = {
.create_options = sd_create_options,
};
static BlockDriver bdrv_sheepdog_tcp = {
.format_name = "sheepdog",
.protocol_name = "sheepdog+tcp",
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_file_open = sd_open,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
.bdrv_snapshot_delete = sd_snapshot_delete,
.bdrv_snapshot_list = sd_snapshot_list,
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
.create_options = sd_create_options,
};
static BlockDriver bdrv_sheepdog_unix = {
.format_name = "sheepdog",
.protocol_name = "sheepdog+unix",
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_file_open = sd_open,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
.bdrv_snapshot_delete = sd_snapshot_delete,
.bdrv_snapshot_list = sd_snapshot_list,
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
.create_options = sd_create_options,
};
static void bdrv_sheepdog_init(void)
{
bdrv_register(&bdrv_sheepdog);
bdrv_register(&bdrv_sheepdog_tcp);
bdrv_register(&bdrv_sheepdog_unix);
}
block_init(bdrv_sheepdog_init);

View File

@ -2841,7 +2841,7 @@ static void gdb_accept(void)
GDBState *s;
struct sockaddr_in sockaddr;
socklen_t len;
int val, fd;
int fd;
for(;;) {
len = sizeof(sockaddr);
@ -2858,8 +2858,7 @@ static void gdb_accept(void)
}
/* set short latency */
val = 1;
setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
socket_set_nodelay(fd);
s = g_malloc0(sizeof(GDBState));
s->c_cpu = first_cpu;

View File

@ -1 +1 @@
obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o ioq.o virtio-blk.o
obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o ioq.o virtio-blk.o

View File

@ -1,100 +0,0 @@
/*
* Event loop with file descriptor polling
*
* Copyright 2012 IBM, Corp.
* Copyright 2012 Red Hat, Inc. and/or its affiliates
*
* Authors:
* Stefan Hajnoczi <stefanha@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include <sys/epoll.h>
#include "hw/dataplane/event-poll.h"
/* Add an event notifier and its callback for polling */
void event_poll_add(EventPoll *poll, EventHandler *handler,
EventNotifier *notifier, EventCallback *callback)
{
struct epoll_event event = {
.events = EPOLLIN,
.data.ptr = handler,
};
handler->notifier = notifier;
handler->callback = callback;
if (epoll_ctl(poll->epoll_fd, EPOLL_CTL_ADD,
event_notifier_get_fd(notifier), &event) != 0) {
fprintf(stderr, "failed to add event handler to epoll: %m\n");
exit(1);
}
}
/* Event callback for stopping event_poll() */
static void handle_stop(EventHandler *handler)
{
/* Do nothing */
}
void event_poll_init(EventPoll *poll)
{
/* Create epoll file descriptor */
poll->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
if (poll->epoll_fd < 0) {
fprintf(stderr, "epoll_create1 failed: %m\n");
exit(1);
}
/* Set up stop notifier */
if (event_notifier_init(&poll->stop_notifier, 0) < 0) {
fprintf(stderr, "failed to init stop notifier\n");
exit(1);
}
event_poll_add(poll, &poll->stop_handler,
&poll->stop_notifier, handle_stop);
}
void event_poll_cleanup(EventPoll *poll)
{
event_notifier_cleanup(&poll->stop_notifier);
close(poll->epoll_fd);
poll->epoll_fd = -1;
}
/* Block until the next event and invoke its callback */
void event_poll(EventPoll *poll)
{
EventHandler *handler;
struct epoll_event event;
int nevents;
/* Wait for the next event. Only do one event per call to keep the
* function simple, this could be changed later. */
do {
nevents = epoll_wait(poll->epoll_fd, &event, 1, -1);
} while (nevents < 0 && errno == EINTR);
if (unlikely(nevents != 1)) {
fprintf(stderr, "epoll_wait failed: %m\n");
exit(1); /* should never happen */
}
/* Find out which event handler has become active */
handler = event.data.ptr;
/* Clear the eventfd */
event_notifier_test_and_clear(handler->notifier);
/* Handle the event */
handler->callback(handler);
}
/* Stop event_poll()
*
* This function can be used from another thread.
*/
void event_poll_notify(EventPoll *poll)
{
event_notifier_set(&poll->stop_notifier);
}

View File

@ -1,40 +0,0 @@
/*
* Event loop with file descriptor polling
*
* Copyright 2012 IBM, Corp.
* Copyright 2012 Red Hat, Inc. and/or its affiliates
*
* Authors:
* Stefan Hajnoczi <stefanha@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#ifndef EVENT_POLL_H
#define EVENT_POLL_H
#include "qemu/event_notifier.h"
typedef struct EventHandler EventHandler;
typedef void EventCallback(EventHandler *handler);
struct EventHandler {
EventNotifier *notifier; /* eventfd */
EventCallback *callback; /* callback function */
};
typedef struct {
int epoll_fd; /* epoll(2) file descriptor */
EventNotifier stop_notifier; /* stop poll notifier */
EventHandler stop_handler; /* stop poll handler */
} EventPoll;
void event_poll_add(EventPoll *poll, EventHandler *handler,
EventNotifier *notifier, EventCallback *callback);
void event_poll_init(EventPoll *poll);
void event_poll_cleanup(EventPoll *poll);
void event_poll(EventPoll *poll);
void event_poll_notify(EventPoll *poll);
#endif /* EVENT_POLL_H */

View File

@ -14,13 +14,13 @@
#include "trace.h"
#include "qemu/iov.h"
#include "event-poll.h"
#include "qemu/thread.h"
#include "vring.h"
#include "ioq.h"
#include "migration/migration.h"
#include "hw/virtio-blk.h"
#include "hw/dataplane/virtio-blk.h"
#include "block/aio.h"
enum {
SEG_MAX = 126, /* maximum number of I/O segments */
@ -51,9 +51,14 @@ struct VirtIOBlockDataPlane {
Vring vring; /* virtqueue vring */
EventNotifier *guest_notifier; /* irq */
EventPoll event_poll; /* event poller */
EventHandler io_handler; /* Linux AIO completion handler */
EventHandler notify_handler; /* virtqueue notify handler */
/* Note that these EventNotifiers are assigned by value. This is
* fine as long as you do not call event_notifier_cleanup on them
* (because you don't own the file descriptor or handle; you just
* use it).
*/
AioContext *ctx;
EventNotifier io_notifier; /* Linux AIO completion */
EventNotifier host_notifier; /* doorbell */
IOQueue ioqueue; /* Linux AIO queue (should really be per
dataplane thread) */
@ -256,10 +261,10 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
}
}
static void handle_notify(EventHandler *handler)
static void handle_notify(EventNotifier *e)
{
VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane,
notify_handler);
VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
host_notifier);
/* There is one array of iovecs into which all new requests are extracted
* from the vring. Requests are read from the vring and the translated
@ -286,6 +291,7 @@ static void handle_notify(EventHandler *handler)
unsigned int out_num = 0, in_num = 0;
unsigned int num_queued;
event_notifier_test_and_clear(&s->host_notifier);
for (;;) {
/* Disable guest->host notifies to avoid unnecessary vmexits */
vring_disable_notification(s->vdev, &s->vring);
@ -334,11 +340,12 @@ static void handle_notify(EventHandler *handler)
}
}
static void handle_io(EventHandler *handler)
static void handle_io(EventNotifier *e)
{
VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane,
io_handler);
VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
io_notifier);
event_notifier_test_and_clear(&s->io_notifier);
if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
notify_guest(s);
}
@ -348,7 +355,7 @@ static void handle_io(EventHandler *handler)
* requests.
*/
if (unlikely(vring_more_avail(&s->vring))) {
handle_notify(&s->notify_handler);
handle_notify(&s->host_notifier);
}
}
@ -357,7 +364,7 @@ static void *data_plane_thread(void *opaque)
VirtIOBlockDataPlane *s = opaque;
do {
event_poll(&s->event_poll);
aio_poll(s->ctx, true);
} while (!s->stopping || s->num_reqs > 0);
return NULL;
}
@ -445,7 +452,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
return;
}
event_poll_init(&s->event_poll);
s->ctx = aio_context_new();
/* Set up guest notifier (irq) */
if (s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1,
@ -462,17 +469,16 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
fprintf(stderr, "virtio-blk failed to set host notifier\n");
exit(1);
}
event_poll_add(&s->event_poll, &s->notify_handler,
virtio_queue_get_host_notifier(vq),
handle_notify);
s->host_notifier = *virtio_queue_get_host_notifier(vq);
aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify, NULL);
/* Set up ioqueue */
ioq_init(&s->ioqueue, s->fd, REQ_MAX);
for (i = 0; i < ARRAY_SIZE(s->requests); i++) {
ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
}
event_poll_add(&s->event_poll, &s->io_handler,
ioq_get_notifier(&s->ioqueue), handle_io);
s->io_notifier = *ioq_get_notifier(&s->ioqueue);
aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io, NULL);
s->started = true;
trace_virtio_blk_data_plane_start(s);
@ -498,15 +504,17 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
qemu_bh_delete(s->start_bh);
s->start_bh = NULL;
} else {
event_poll_notify(&s->event_poll);
aio_notify(s->ctx);
qemu_thread_join(&s->thread);
}
aio_set_event_notifier(s->ctx, &s->io_notifier, NULL, NULL);
ioq_cleanup(&s->ioqueue);
aio_set_event_notifier(s->ctx, &s->host_notifier, NULL, NULL);
s->vdev->binding->set_host_notifier(s->vdev->binding_opaque, 0, false);
event_poll_cleanup(&s->event_poll);
aio_context_unref(s->ctx);
/* Clean up guest notifier (irq) */
s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, false);

View File

@ -188,7 +188,7 @@ static int macio_newworld_initfn(PCIDevice *d)
sysbus_dev = SYS_BUS_DEVICE(&ns->ide[1]);
sysbus_connect_irq(sysbus_dev, 0, ns->irqs[3]);
sysbus_connect_irq(sysbus_dev, 1, ns->irqs[4]);
macio_ide_register_dma(&ns->ide[0], s->dbdma, 0x1a);
macio_ide_register_dma(&ns->ide[1], s->dbdma, 0x1a);
ret = qdev_init(DEVICE(&ns->ide[1]));
if (ret < 0) {
return ret;

View File

@ -36,6 +36,7 @@ typedef struct VirtIOBlock
VirtIOBlkConf *blk;
unsigned short sector_mask;
DeviceState *qdev;
VMChangeStateEntry *change;
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
VirtIOBlockDataPlane *dataplane;
#endif
@ -681,7 +682,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
}
#endif
qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
s->qdev = dev;
register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
virtio_blk_save, virtio_blk_load, s);
@ -702,6 +703,7 @@ void virtio_blk_exit(VirtIODevice *vdev)
virtio_blk_data_plane_destroy(s->dataplane);
s->dataplane = NULL;
#endif
qemu_del_vm_change_state_handler(s->change);
unregister_savevm(s->qdev, "virtio-blk", s);
blockdev_mark_auto_del(s->bs);
virtio_cleanup(vdev);

View File

@ -34,6 +34,7 @@ int inet_aton(const char *cp, struct in_addr *ia);
int qemu_socket(int domain, int type, int protocol);
int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
int socket_set_cork(int fd, int v);
int socket_set_nodelay(int fd);
void socket_set_block(int fd);
void socket_set_nonblock(int fd);
int send_all(int fd, const void *buf, int len1);

View File

@ -2365,12 +2365,6 @@ static void tcp_chr_telnet_init(int fd)
send(fd, (char *)buf, 3, 0);
}
static void socket_set_nodelay(int fd)
{
int val = 1;
setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
}
static int tcp_chr_add_client(CharDriverState *chr, int fd)
{
TCPCharDriver *s = chr->opaque;

View File

@ -830,7 +830,7 @@ QEMU-based virtual machines.
You can create a Sheepdog disk image with the command:
@example
qemu-img create sheepdog:@var{image} @var{size}
qemu-img create sheepdog:///@var{image} @var{size}
@end example
where @var{image} is the Sheepdog image name and @var{size} is its
size.
@ -838,38 +838,44 @@ size.
To import the existing @var{filename} to Sheepdog, you can use a
convert command.
@example
qemu-img convert @var{filename} sheepdog:@var{image}
qemu-img convert @var{filename} sheepdog:///@var{image}
@end example
You can boot from the Sheepdog disk image with the command:
@example
qemu-system-i386 sheepdog:@var{image}
qemu-system-i386 sheepdog:///@var{image}
@end example
You can also create a snapshot of the Sheepdog image like qcow2.
@example
qemu-img snapshot -c @var{tag} sheepdog:@var{image}
qemu-img snapshot -c @var{tag} sheepdog:///@var{image}
@end example
where @var{tag} is a tag name of the newly created snapshot.
To boot from the Sheepdog snapshot, specify the tag name of the
snapshot.
@example
qemu-system-i386 sheepdog:@var{image}:@var{tag}
qemu-system-i386 sheepdog:///@var{image}#@var{tag}
@end example
You can create a cloned image from the existing snapshot.
@example
qemu-img create -b sheepdog:@var{base}:@var{tag} sheepdog:@var{image}
qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image}
@end example
where @var{base} is a image name of the source snapshot and @var{tag}
is its tag name.
You can use an unix socket instead of an inet socket:
@example
qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path}
@end example
If the Sheepdog daemon doesn't run on the local host, you need to
specify one of the Sheepdog servers to connect to.
@example
qemu-img create sheepdog:@var{hostname}:@var{port}:@var{image} @var{size}
qemu-system-i386 sheepdog:@var{hostname}:@var{port}:@var{image}
qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size}
qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image}
@end example
@node disk_images_iscsi

View File

@ -2108,23 +2108,13 @@ QEMU supports using either local sheepdog devices or remote networked
devices.
Syntax for specifying a sheepdog device
@table @list
``sheepdog:<vdiname>''
``sheepdog:<vdiname>:<snapid>''
``sheepdog:<vdiname>:<tag>''
``sheepdog:<host>:<port>:<vdiname>''
``sheepdog:<host>:<port>:<vdiname>:<snapid>''
``sheepdog:<host>:<port>:<vdiname>:<tag>''
@end table
@example
sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag]
@end example
Example
@example
qemu-system-i386 --drive file=sheepdog:192.0.2.1:30000:MyVirtualMachine
qemu-system-i386 --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine
@end example
See also @url{http://http://www.osrg.net/sheepdog/}.

View File

@ -384,83 +384,86 @@ int tcp_fconnect(struct socket *so)
* the time it gets to accept(), so... We simply accept
* here and SYN the local-host.
*/
void
tcp_connect(struct socket *inso)
void tcp_connect(struct socket *inso)
{
Slirp *slirp = inso->slirp;
struct socket *so;
struct sockaddr_in addr;
socklen_t addrlen = sizeof(struct sockaddr_in);
struct tcpcb *tp;
int s, opt;
Slirp *slirp = inso->slirp;
struct socket *so;
struct sockaddr_in addr;
socklen_t addrlen = sizeof(struct sockaddr_in);
struct tcpcb *tp;
int s, opt;
DEBUG_CALL("tcp_connect");
DEBUG_ARG("inso = %lx", (long)inso);
DEBUG_CALL("tcp_connect");
DEBUG_ARG("inso = %lx", (long)inso);
/*
* If it's an SS_ACCEPTONCE socket, no need to socreate()
* another socket, just use the accept() socket.
*/
if (inso->so_state & SS_FACCEPTONCE) {
/* FACCEPTONCE already have a tcpcb */
so = inso;
} else {
if ((so = socreate(slirp)) == NULL) {
/* If it failed, get rid of the pending connection */
closesocket(accept(inso->s,(struct sockaddr *)&addr,&addrlen));
return;
}
if (tcp_attach(so) < 0) {
free(so); /* NOT sofree */
return;
}
so->so_laddr = inso->so_laddr;
so->so_lport = inso->so_lport;
}
(void) tcp_mss(sototcpcb(so), 0);
if ((s = accept(inso->s,(struct sockaddr *)&addr,&addrlen)) < 0) {
tcp_close(sototcpcb(so)); /* This will sofree() as well */
return;
}
socket_set_nonblock(s);
opt = 1;
setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int));
opt = 1;
setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int));
opt = 1;
setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&opt,sizeof(int));
so->so_fport = addr.sin_port;
so->so_faddr = addr.sin_addr;
/* Translate connections from localhost to the real hostname */
if (so->so_faddr.s_addr == 0 ||
(so->so_faddr.s_addr & loopback_mask) ==
(loopback_addr.s_addr & loopback_mask)) {
so->so_faddr = slirp->vhost_addr;
/*
* If it's an SS_ACCEPTONCE socket, no need to socreate()
* another socket, just use the accept() socket.
*/
if (inso->so_state & SS_FACCEPTONCE) {
/* FACCEPTONCE already have a tcpcb */
so = inso;
} else {
so = socreate(slirp);
if (so == NULL) {
/* If it failed, get rid of the pending connection */
closesocket(accept(inso->s, (struct sockaddr *)&addr, &addrlen));
return;
}
if (tcp_attach(so) < 0) {
free(so); /* NOT sofree */
return;
}
so->so_laddr = inso->so_laddr;
so->so_lport = inso->so_lport;
}
/* Close the accept() socket, set right state */
if (inso->so_state & SS_FACCEPTONCE) {
closesocket(so->s); /* If we only accept once, close the accept() socket */
so->so_state = SS_NOFDREF; /* Don't select it yet, even though we have an FD */
/* if it's not FACCEPTONCE, it's already NOFDREF */
}
so->s = s;
so->so_state |= SS_INCOMING;
tcp_mss(sototcpcb(so), 0);
so->so_iptos = tcp_tos(so);
tp = sototcpcb(so);
s = accept(inso->s, (struct sockaddr *)&addr, &addrlen);
if (s < 0) {
tcp_close(sototcpcb(so)); /* This will sofree() as well */
return;
}
socket_set_nonblock(s);
opt = 1;
setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(int));
opt = 1;
setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (char *)&opt, sizeof(int));
socket_set_nodelay(s);
tcp_template(tp);
so->so_fport = addr.sin_port;
so->so_faddr = addr.sin_addr;
/* Translate connections from localhost to the real hostname */
if (so->so_faddr.s_addr == 0 ||
(so->so_faddr.s_addr & loopback_mask) ==
(loopback_addr.s_addr & loopback_mask)) {
so->so_faddr = slirp->vhost_addr;
}
tp->t_state = TCPS_SYN_SENT;
tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
tp->iss = slirp->tcp_iss;
slirp->tcp_iss += TCP_ISSINCR/2;
tcp_sendseqinit(tp);
tcp_output(tp);
/* Close the accept() socket, set right state */
if (inso->so_state & SS_FACCEPTONCE) {
/* If we only accept once, close the accept() socket */
closesocket(so->s);
/* Don't select it yet, even though we have an FD */
/* if it's not FACCEPTONCE, it's already NOFDREF */
so->so_state = SS_NOFDREF;
}
so->s = s;
so->so_state |= SS_INCOMING;
so->so_iptos = tcp_tos(so);
tp = sototcpcb(so);
tcp_template(tp);
tp->t_state = TCPS_SYN_SENT;
tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
tp->iss = slirp->tcp_iss;
slirp->tcp_iss += TCP_ISSINCR/2;
tcp_sendseqinit(tp);
tcp_output(tp);
}
/*

View File

@ -63,6 +63,12 @@ int socket_set_cork(int fd, int v)
#endif
}
int socket_set_nodelay(int fd)
{
int v = 1;
return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
}
int qemu_madvise(void *addr, size_t len, int advice)
{
if (advice == QEMU_MADV_INVALID) {