From 02d583c7232d65920634f7553700eb348f84e472 Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Sun, 24 Feb 2013 20:46:11 +0000 Subject: [PATCH 1/9] ide/macio: Fix macio DMA initialisation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 07a7484e5d713f1eb7c1c37b18a8ab0d56d88875 accidentally introduced a bug in the initialisation of the second macio DMA device which could cause some DMA operations to segfault QEMU. CC: Andreas Färber Signed-off-by: Mark Cave-Ayland Acked-by: Andreas Färber Signed-off-by: Stefan Hajnoczi --- hw/macio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/macio.c b/hw/macio.c index 74bdcd1039..0c6a6b8e7a 100644 --- a/hw/macio.c +++ b/hw/macio.c @@ -188,7 +188,7 @@ static int macio_newworld_initfn(PCIDevice *d) sysbus_dev = SYS_BUS_DEVICE(&ns->ide[1]); sysbus_connect_irq(sysbus_dev, 0, ns->irqs[3]); sysbus_connect_irq(sysbus_dev, 1, ns->irqs[4]); - macio_ide_register_dma(&ns->ide[0], s->dbdma, 0x1a); + macio_ide_register_dma(&ns->ide[1], s->dbdma, 0x1a); ret = qdev_init(DEVICE(&ns->ide[1])); if (ret < 0) { return ret; From 69b302b2044a9a0f6d157d25b39a91ff7124c61f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 22 Feb 2013 14:37:10 +0100 Subject: [PATCH 2/9] virtio-blk: fix unplug + virsh reboot virtio-blk registers a vmstate change handler. Unfortunately this handler is not unregistered on unplug, leading to some random crashes if the system is restarted, e.g. via virsh reboot. Lets unregister the vmstate change handler if the device is removed. Signed-off-by: Christian Borntraeger Signed-off-by: Stefan Hajnoczi --- hw/virtio-blk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 34913ee40e..f5e6ee90b6 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -36,6 +36,7 @@ typedef struct VirtIOBlock VirtIOBlkConf *blk; unsigned short sector_mask; DeviceState *qdev; + VMChangeStateEntry *change; #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE VirtIOBlockDataPlane *dataplane; #endif @@ -681,7 +682,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk) } #endif - qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); + s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); s->qdev = dev; register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, virtio_blk_save, virtio_blk_load, s); @@ -702,6 +703,7 @@ void virtio_blk_exit(VirtIODevice *vdev) virtio_blk_data_plane_destroy(s->dataplane); s->dataplane = NULL; #endif + qemu_del_vm_change_state_handler(s->change); unregister_savevm(s->qdev, "virtio-blk", s); blockdev_mark_auto_del(s->bs); virtio_cleanup(vdev); From 2c20e711de308cdebc91ae4b7a983396b56f1de0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 10:40:34 +0100 Subject: [PATCH 3/9] dataplane: remove EventPoll in favor of AioContext During the review of the dataplane code, the EventPoll API morphed itself (not concidentially) into something very very similar to an AioContext. Thus, it is trivial to convert virtio-blk-dataplane to use AioContext, and a first baby step towards letting dataplane talk directly to the QEMU block layer. The only interesting note is the value-copy of EventNotifiers. At least in my opinion this is part of the EventNotifier API and is even portable to Windows. Of course, in this case you should not close the notifier's underlying file descriptors or handle with event_notifier_cleanup. Signed-off-by: Paolo Bonzini Signed-off-by: Stefan Hajnoczi --- hw/dataplane/Makefile.objs | 2 +- hw/dataplane/event-poll.c | 100 ------------------------------------- hw/dataplane/event-poll.h | 40 --------------- hw/dataplane/virtio-blk.c | 48 ++++++++++-------- 4 files changed, 29 insertions(+), 161 deletions(-) delete mode 100644 hw/dataplane/event-poll.c delete mode 100644 hw/dataplane/event-poll.h diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs index 3e47d0537e..701111ccb9 100644 --- a/hw/dataplane/Makefile.objs +++ b/hw/dataplane/Makefile.objs @@ -1 +1 @@ -obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o ioq.o virtio-blk.o +obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o ioq.o virtio-blk.o diff --git a/hw/dataplane/event-poll.c b/hw/dataplane/event-poll.c deleted file mode 100644 index 2b55c6e255..0000000000 --- a/hw/dataplane/event-poll.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Event loop with file descriptor polling - * - * Copyright 2012 IBM, Corp. - * Copyright 2012 Red Hat, Inc. and/or its affiliates - * - * Authors: - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include -#include "hw/dataplane/event-poll.h" - -/* Add an event notifier and its callback for polling */ -void event_poll_add(EventPoll *poll, EventHandler *handler, - EventNotifier *notifier, EventCallback *callback) -{ - struct epoll_event event = { - .events = EPOLLIN, - .data.ptr = handler, - }; - handler->notifier = notifier; - handler->callback = callback; - if (epoll_ctl(poll->epoll_fd, EPOLL_CTL_ADD, - event_notifier_get_fd(notifier), &event) != 0) { - fprintf(stderr, "failed to add event handler to epoll: %m\n"); - exit(1); - } -} - -/* Event callback for stopping event_poll() */ -static void handle_stop(EventHandler *handler) -{ - /* Do nothing */ -} - -void event_poll_init(EventPoll *poll) -{ - /* Create epoll file descriptor */ - poll->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - if (poll->epoll_fd < 0) { - fprintf(stderr, "epoll_create1 failed: %m\n"); - exit(1); - } - - /* Set up stop notifier */ - if (event_notifier_init(&poll->stop_notifier, 0) < 0) { - fprintf(stderr, "failed to init stop notifier\n"); - exit(1); - } - event_poll_add(poll, &poll->stop_handler, - &poll->stop_notifier, handle_stop); -} - -void event_poll_cleanup(EventPoll *poll) -{ - event_notifier_cleanup(&poll->stop_notifier); - close(poll->epoll_fd); - poll->epoll_fd = -1; -} - -/* Block until the next event and invoke its callback */ -void event_poll(EventPoll *poll) -{ - EventHandler *handler; - struct epoll_event event; - int nevents; - - /* Wait for the next event. Only do one event per call to keep the - * function simple, this could be changed later. */ - do { - nevents = epoll_wait(poll->epoll_fd, &event, 1, -1); - } while (nevents < 0 && errno == EINTR); - if (unlikely(nevents != 1)) { - fprintf(stderr, "epoll_wait failed: %m\n"); - exit(1); /* should never happen */ - } - - /* Find out which event handler has become active */ - handler = event.data.ptr; - - /* Clear the eventfd */ - event_notifier_test_and_clear(handler->notifier); - - /* Handle the event */ - handler->callback(handler); -} - -/* Stop event_poll() - * - * This function can be used from another thread. - */ -void event_poll_notify(EventPoll *poll) -{ - event_notifier_set(&poll->stop_notifier); -} diff --git a/hw/dataplane/event-poll.h b/hw/dataplane/event-poll.h deleted file mode 100644 index 3e8d3ec7d5..0000000000 --- a/hw/dataplane/event-poll.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Event loop with file descriptor polling - * - * Copyright 2012 IBM, Corp. - * Copyright 2012 Red Hat, Inc. and/or its affiliates - * - * Authors: - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef EVENT_POLL_H -#define EVENT_POLL_H - -#include "qemu/event_notifier.h" - -typedef struct EventHandler EventHandler; -typedef void EventCallback(EventHandler *handler); -struct EventHandler { - EventNotifier *notifier; /* eventfd */ - EventCallback *callback; /* callback function */ -}; - -typedef struct { - int epoll_fd; /* epoll(2) file descriptor */ - EventNotifier stop_notifier; /* stop poll notifier */ - EventHandler stop_handler; /* stop poll handler */ -} EventPoll; - -void event_poll_add(EventPoll *poll, EventHandler *handler, - EventNotifier *notifier, EventCallback *callback); -void event_poll_init(EventPoll *poll); -void event_poll_cleanup(EventPoll *poll); -void event_poll(EventPoll *poll); -void event_poll_notify(EventPoll *poll); - -#endif /* EVENT_POLL_H */ diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c index 3f2da22669..aa9b04078b 100644 --- a/hw/dataplane/virtio-blk.c +++ b/hw/dataplane/virtio-blk.c @@ -14,13 +14,13 @@ #include "trace.h" #include "qemu/iov.h" -#include "event-poll.h" #include "qemu/thread.h" #include "vring.h" #include "ioq.h" #include "migration/migration.h" #include "hw/virtio-blk.h" #include "hw/dataplane/virtio-blk.h" +#include "block/aio.h" enum { SEG_MAX = 126, /* maximum number of I/O segments */ @@ -51,9 +51,14 @@ struct VirtIOBlockDataPlane { Vring vring; /* virtqueue vring */ EventNotifier *guest_notifier; /* irq */ - EventPoll event_poll; /* event poller */ - EventHandler io_handler; /* Linux AIO completion handler */ - EventHandler notify_handler; /* virtqueue notify handler */ + /* Note that these EventNotifiers are assigned by value. This is + * fine as long as you do not call event_notifier_cleanup on them + * (because you don't own the file descriptor or handle; you just + * use it). + */ + AioContext *ctx; + EventNotifier io_notifier; /* Linux AIO completion */ + EventNotifier host_notifier; /* doorbell */ IOQueue ioqueue; /* Linux AIO queue (should really be per dataplane thread) */ @@ -256,10 +261,10 @@ static int process_request(IOQueue *ioq, struct iovec iov[], } } -static void handle_notify(EventHandler *handler) +static void handle_notify(EventNotifier *e) { - VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane, - notify_handler); + VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane, + host_notifier); /* There is one array of iovecs into which all new requests are extracted * from the vring. Requests are read from the vring and the translated @@ -286,6 +291,7 @@ static void handle_notify(EventHandler *handler) unsigned int out_num = 0, in_num = 0; unsigned int num_queued; + event_notifier_test_and_clear(&s->host_notifier); for (;;) { /* Disable guest->host notifies to avoid unnecessary vmexits */ vring_disable_notification(s->vdev, &s->vring); @@ -334,11 +340,12 @@ static void handle_notify(EventHandler *handler) } } -static void handle_io(EventHandler *handler) +static void handle_io(EventNotifier *e) { - VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane, - io_handler); + VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane, + io_notifier); + event_notifier_test_and_clear(&s->io_notifier); if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) { notify_guest(s); } @@ -348,7 +355,7 @@ static void handle_io(EventHandler *handler) * requests. */ if (unlikely(vring_more_avail(&s->vring))) { - handle_notify(&s->notify_handler); + handle_notify(&s->host_notifier); } } @@ -357,7 +364,7 @@ static void *data_plane_thread(void *opaque) VirtIOBlockDataPlane *s = opaque; do { - event_poll(&s->event_poll); + aio_poll(s->ctx, true); } while (!s->stopping || s->num_reqs > 0); return NULL; } @@ -445,7 +452,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) return; } - event_poll_init(&s->event_poll); + s->ctx = aio_context_new(); /* Set up guest notifier (irq) */ if (s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, @@ -462,17 +469,16 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) fprintf(stderr, "virtio-blk failed to set host notifier\n"); exit(1); } - event_poll_add(&s->event_poll, &s->notify_handler, - virtio_queue_get_host_notifier(vq), - handle_notify); + s->host_notifier = *virtio_queue_get_host_notifier(vq); + aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify, NULL); /* Set up ioqueue */ ioq_init(&s->ioqueue, s->fd, REQ_MAX); for (i = 0; i < ARRAY_SIZE(s->requests); i++) { ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb); } - event_poll_add(&s->event_poll, &s->io_handler, - ioq_get_notifier(&s->ioqueue), handle_io); + s->io_notifier = *ioq_get_notifier(&s->ioqueue); + aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io, NULL); s->started = true; trace_virtio_blk_data_plane_start(s); @@ -498,15 +504,17 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) qemu_bh_delete(s->start_bh); s->start_bh = NULL; } else { - event_poll_notify(&s->event_poll); + aio_notify(s->ctx); qemu_thread_join(&s->thread); } + aio_set_event_notifier(s->ctx, &s->io_notifier, NULL, NULL); ioq_cleanup(&s->ioqueue); + aio_set_event_notifier(s->ctx, &s->host_notifier, NULL, NULL); s->vdev->binding->set_host_notifier(s->vdev->binding_opaque, 0, false); - event_poll_cleanup(&s->event_poll); + aio_context_unref(s->ctx); /* Clean up guest notifier (irq) */ s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, false); From 4ef7b8944cc5eae66159c60066b21466e2dc1ee4 Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Fri, 22 Feb 2013 12:39:49 +0900 Subject: [PATCH 4/9] slirp/tcp_subr.c: fix coding style in tcp_connect Fix coding style in tcp_connect before the next patch. Signed-off-by: MORITA Kazutaka Signed-off-by: Stefan Hajnoczi --- slirp/tcp_subr.c | 142 ++++++++++++++++++++++++----------------------- 1 file changed, 73 insertions(+), 69 deletions(-) diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c index 1542e43619..317dc07a06 100644 --- a/slirp/tcp_subr.c +++ b/slirp/tcp_subr.c @@ -384,83 +384,87 @@ int tcp_fconnect(struct socket *so) * the time it gets to accept(), so... We simply accept * here and SYN the local-host. */ -void -tcp_connect(struct socket *inso) +void tcp_connect(struct socket *inso) { - Slirp *slirp = inso->slirp; - struct socket *so; - struct sockaddr_in addr; - socklen_t addrlen = sizeof(struct sockaddr_in); - struct tcpcb *tp; - int s, opt; + Slirp *slirp = inso->slirp; + struct socket *so; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(struct sockaddr_in); + struct tcpcb *tp; + int s, opt; - DEBUG_CALL("tcp_connect"); - DEBUG_ARG("inso = %lx", (long)inso); + DEBUG_CALL("tcp_connect"); + DEBUG_ARG("inso = %lx", (long)inso); - /* - * If it's an SS_ACCEPTONCE socket, no need to socreate() - * another socket, just use the accept() socket. - */ - if (inso->so_state & SS_FACCEPTONCE) { - /* FACCEPTONCE already have a tcpcb */ - so = inso; - } else { - if ((so = socreate(slirp)) == NULL) { - /* If it failed, get rid of the pending connection */ - closesocket(accept(inso->s,(struct sockaddr *)&addr,&addrlen)); - return; - } - if (tcp_attach(so) < 0) { - free(so); /* NOT sofree */ - return; - } - so->so_laddr = inso->so_laddr; - so->so_lport = inso->so_lport; - } - - (void) tcp_mss(sototcpcb(so), 0); - - if ((s = accept(inso->s,(struct sockaddr *)&addr,&addrlen)) < 0) { - tcp_close(sototcpcb(so)); /* This will sofree() as well */ - return; - } - socket_set_nonblock(s); - opt = 1; - setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int)); - opt = 1; - setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int)); - opt = 1; - setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&opt,sizeof(int)); - - so->so_fport = addr.sin_port; - so->so_faddr = addr.sin_addr; - /* Translate connections from localhost to the real hostname */ - if (so->so_faddr.s_addr == 0 || - (so->so_faddr.s_addr & loopback_mask) == - (loopback_addr.s_addr & loopback_mask)) { - so->so_faddr = slirp->vhost_addr; + /* + * If it's an SS_ACCEPTONCE socket, no need to socreate() + * another socket, just use the accept() socket. + */ + if (inso->so_state & SS_FACCEPTONCE) { + /* FACCEPTONCE already have a tcpcb */ + so = inso; + } else { + so = socreate(slirp); + if (so == NULL) { + /* If it failed, get rid of the pending connection */ + closesocket(accept(inso->s, (struct sockaddr *)&addr, &addrlen)); + return; } + if (tcp_attach(so) < 0) { + free(so); /* NOT sofree */ + return; + } + so->so_laddr = inso->so_laddr; + so->so_lport = inso->so_lport; + } - /* Close the accept() socket, set right state */ - if (inso->so_state & SS_FACCEPTONCE) { - closesocket(so->s); /* If we only accept once, close the accept() socket */ - so->so_state = SS_NOFDREF; /* Don't select it yet, even though we have an FD */ - /* if it's not FACCEPTONCE, it's already NOFDREF */ - } - so->s = s; - so->so_state |= SS_INCOMING; + tcp_mss(sototcpcb(so), 0); - so->so_iptos = tcp_tos(so); - tp = sototcpcb(so); + s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); + if (s < 0) { + tcp_close(sototcpcb(so)); /* This will sofree() as well */ + return; + } + socket_set_nonblock(s); + opt = 1; + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(int)); + opt = 1; + setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (char *)&opt, sizeof(int)); + opt = 1; + setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(int)); - tcp_template(tp); + so->so_fport = addr.sin_port; + so->so_faddr = addr.sin_addr; + /* Translate connections from localhost to the real hostname */ + if (so->so_faddr.s_addr == 0 || + (so->so_faddr.s_addr & loopback_mask) == + (loopback_addr.s_addr & loopback_mask)) { + so->so_faddr = slirp->vhost_addr; + } - tp->t_state = TCPS_SYN_SENT; - tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; - tp->iss = slirp->tcp_iss; - slirp->tcp_iss += TCP_ISSINCR/2; - tcp_sendseqinit(tp); - tcp_output(tp); + /* Close the accept() socket, set right state */ + if (inso->so_state & SS_FACCEPTONCE) { + /* If we only accept once, close the accept() socket */ + closesocket(so->s); + + /* Don't select it yet, even though we have an FD */ + /* if it's not FACCEPTONCE, it's already NOFDREF */ + so->so_state = SS_NOFDREF; + } + so->s = s; + so->so_state |= SS_INCOMING; + + so->so_iptos = tcp_tos(so); + tp = sototcpcb(so); + + tcp_template(tp); + + tp->t_state = TCPS_SYN_SENT; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->iss = slirp->tcp_iss; + slirp->tcp_iss += TCP_ISSINCR/2; + tcp_sendseqinit(tp); + tcp_output(tp); } /* From bf1c852aa9cbe21beeb7c37d03e167c33ac196b2 Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Fri, 22 Feb 2013 12:39:50 +0900 Subject: [PATCH 5/9] move socket_set_nodelay to osdep.c Signed-off-by: MORITA Kazutaka Signed-off-by: Stefan Hajnoczi --- block/sheepdog.c | 11 +---------- gdbstub.c | 5 ++--- include/qemu/sockets.h | 1 + qemu-char.c | 6 ------ slirp/tcp_subr.c | 3 +-- util/osdep.c | 6 ++++++ 6 files changed, 11 insertions(+), 21 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index d466b232d7..51e75ad7a1 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -787,15 +787,6 @@ static int aio_flush_request(void *opaque) !QLIST_EMPTY(&s->pending_aio_head); } -static int set_nodelay(int fd) -{ - int ret, opt; - - opt = 1; - ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt)); - return ret; -} - /* * Return a socket discriptor to read/write objects. * @@ -814,7 +805,7 @@ static int get_sheep_fd(BDRVSheepdogState *s) socket_set_nonblock(fd); - ret = set_nodelay(fd); + ret = socket_set_nodelay(fd); if (ret) { error_report("%s", strerror(errno)); closesocket(fd); diff --git a/gdbstub.c b/gdbstub.c index 32dfea9ed0..e414ad9157 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -2841,7 +2841,7 @@ static void gdb_accept(void) GDBState *s; struct sockaddr_in sockaddr; socklen_t len; - int val, fd; + int fd; for(;;) { len = sizeof(sockaddr); @@ -2858,8 +2858,7 @@ static void gdb_accept(void) } /* set short latency */ - val = 1; - setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val)); + socket_set_nodelay(fd); s = g_malloc0(sizeof(GDBState)); s->c_cpu = first_cpu; diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 803ae1798c..6125bf7bdf 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -34,6 +34,7 @@ int inet_aton(const char *cp, struct in_addr *ia); int qemu_socket(int domain, int type, int protocol); int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen); int socket_set_cork(int fd, int v); +int socket_set_nodelay(int fd); void socket_set_block(int fd); void socket_set_nonblock(int fd); int send_all(int fd, const void *buf, int len1); diff --git a/qemu-char.c b/qemu-char.c index 160decc2f0..36295b1bcd 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -2365,12 +2365,6 @@ static void tcp_chr_telnet_init(int fd) send(fd, (char *)buf, 3, 0); } -static void socket_set_nodelay(int fd) -{ - int val = 1; - setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val)); -} - static int tcp_chr_add_client(CharDriverState *chr, int fd) { TCPCharDriver *s = chr->opaque; diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c index 317dc07a06..7b7ad60aea 100644 --- a/slirp/tcp_subr.c +++ b/slirp/tcp_subr.c @@ -430,8 +430,7 @@ void tcp_connect(struct socket *inso) setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(int)); opt = 1; setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (char *)&opt, sizeof(int)); - opt = 1; - setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(int)); + socket_set_nodelay(s); so->so_fport = addr.sin_port; so->so_faddr = addr.sin_addr; diff --git a/util/osdep.c b/util/osdep.c index 5b51a0322e..c4082610df 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -63,6 +63,12 @@ int socket_set_cork(int fd, int v) #endif } +int socket_set_nodelay(int fd) +{ + int v = 1; + return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); +} + int qemu_madvise(void *addr, size_t len, int advice) { if (advice == QEMU_MADV_INVALID) { From 5d6768e3b8908a60f0a3016b7fa24194f6b47c80 Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Fri, 22 Feb 2013 12:39:51 +0900 Subject: [PATCH 6/9] sheepdog: accept URIs The URI syntax is consistent with the NBD and Gluster syntax. The syntax is sheepdog[+tcp]://[host:port]/vdiname[#snapid|#tag] Signed-off-by: MORITA Kazutaka Signed-off-by: Stefan Hajnoczi --- block/sheepdog.c | 139 +++++++++++++++++++++++++++++++++++------------ qemu-doc.texi | 16 +++--- qemu-options.hx | 18 ++---- 3 files changed, 117 insertions(+), 56 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 51e75ad7a1..bfa8a00dda 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -13,6 +13,7 @@ */ #include "qemu-common.h" +#include "qemu/uri.h" #include "qemu/error-report.h" #include "qemu/sockets.h" #include "block/block_int.h" @@ -816,8 +817,52 @@ static int get_sheep_fd(BDRVSheepdogState *s) return fd; } +static int sd_parse_uri(BDRVSheepdogState *s, const char *filename, + char *vdi, uint32_t *snapid, char *tag) +{ + URI *uri; + QueryParams *qp = NULL; + int ret = 0; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + if (uri->path == NULL || !strcmp(uri->path, "/")) { + ret = -EINVAL; + goto out; + } + pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1); + + /* sheepdog[+tcp]://[host:port]/vdiname */ + s->addr = g_strdup(uri->server ?: SD_DEFAULT_ADDR); + if (uri->port) { + s->port = g_strdup_printf("%d", uri->port); + } else { + s->port = g_strdup(SD_DEFAULT_PORT); + } + + /* snapshot tag */ + if (uri->fragment) { + *snapid = strtoul(uri->fragment, NULL, 10); + if (*snapid == 0) { + pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment); + } + } else { + *snapid = CURRENT_VDI_ID; /* search current vdi */ + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + /* - * Parse a filename + * Parse a filename (old syntax) * * filename must be one of the following formats: * 1. [vdiname] @@ -836,9 +881,11 @@ static int get_sheep_fd(BDRVSheepdogState *s) static int parse_vdiname(BDRVSheepdogState *s, const char *filename, char *vdi, uint32_t *snapid, char *tag) { - char *p, *q; - int nr_sep; + char *p, *q, *uri; + const char *host_spec, *vdi_spec; + int nr_sep, ret; + strstart(filename, "sheepdog:", (const char **)&filename); p = q = g_strdup(filename); /* count the number of separators */ @@ -851,38 +898,32 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename, } p = q; - /* use the first two tokens as hostname and port number. */ + /* use the first two tokens as host_spec. */ if (nr_sep >= 2) { - s->addr = p; + host_spec = p; p = strchr(p, ':'); - *p++ = '\0'; - - s->port = p; + p++; p = strchr(p, ':'); *p++ = '\0'; } else { - s->addr = NULL; - s->port = 0; + host_spec = ""; } - pstrcpy(vdi, SD_MAX_VDI_LEN, p); + vdi_spec = p; - p = strchr(vdi, ':'); + p = strchr(vdi_spec, ':'); if (p) { - *p++ = '\0'; - *snapid = strtoul(p, NULL, 10); - if (*snapid == 0) { - pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p); - } - } else { - *snapid = CURRENT_VDI_ID; /* search current vdi */ + *p++ = '#'; } - if (s->addr == NULL) { - g_free(q); - } + uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec); - return 0; + ret = sd_parse_uri(s, uri, vdi, snapid, tag); + + g_free(q); + g_free(uri); + + return ret; } static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid, @@ -1097,16 +1138,19 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) uint32_t snapid; char *buf = NULL; - strstart(filename, "sheepdog:", (const char **)&filename); - QLIST_INIT(&s->inflight_aio_head); QLIST_INIT(&s->pending_aio_head); s->fd = -1; memset(vdi, 0, sizeof(vdi)); memset(tag, 0, sizeof(tag)); - if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) { - ret = -EINVAL; + + if (strstr(filename, "://")) { + ret = sd_parse_uri(s, filename, vdi, &snapid, tag); + } else { + ret = parse_vdiname(s, filename, vdi, &snapid, tag); + } + if (ret < 0) { goto out; } s->fd = get_sheep_fd(s); @@ -1275,17 +1319,17 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; uint32_t snapid; bool prealloc = false; - const char *vdiname; s = g_malloc0(sizeof(BDRVSheepdogState)); - strstart(filename, "sheepdog:", &vdiname); - memset(vdi, 0, sizeof(vdi)); memset(tag, 0, sizeof(tag)); - if (parse_vdiname(s, vdiname, vdi, &snapid, tag) < 0) { - error_report("invalid filename"); - ret = -EINVAL; + if (strstr(filename, "://")) { + ret = sd_parse_uri(s, filename, vdi, &snapid, tag); + } else { + ret = parse_vdiname(s, filename, vdi, &snapid, tag); + } + if (ret < 0) { goto out; } @@ -1392,6 +1436,7 @@ static void sd_close(BlockDriverState *bs) qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); closesocket(s->fd); g_free(s->addr); + g_free(s->port); } static int64_t sd_getlength(BlockDriverState *bs) @@ -2054,7 +2099,7 @@ static QEMUOptionParameter sd_create_options[] = { { NULL } }; -BlockDriver bdrv_sheepdog = { +static BlockDriver bdrv_sheepdog = { .format_name = "sheepdog", .protocol_name = "sheepdog", .instance_size = sizeof(BDRVSheepdogState), @@ -2079,8 +2124,34 @@ BlockDriver bdrv_sheepdog = { .create_options = sd_create_options, }; +static BlockDriver bdrv_sheepdog_tcp = { + .format_name = "sheepdog", + .protocol_name = "sheepdog+tcp", + .instance_size = sizeof(BDRVSheepdogState), + .bdrv_file_open = sd_open, + .bdrv_close = sd_close, + .bdrv_create = sd_create, + .bdrv_getlength = sd_getlength, + .bdrv_truncate = sd_truncate, + + .bdrv_co_readv = sd_co_readv, + .bdrv_co_writev = sd_co_writev, + .bdrv_co_flush_to_disk = sd_co_flush_to_disk, + + .bdrv_snapshot_create = sd_snapshot_create, + .bdrv_snapshot_goto = sd_snapshot_goto, + .bdrv_snapshot_delete = sd_snapshot_delete, + .bdrv_snapshot_list = sd_snapshot_list, + + .bdrv_save_vmstate = sd_save_vmstate, + .bdrv_load_vmstate = sd_load_vmstate, + + .create_options = sd_create_options, +}; + static void bdrv_sheepdog_init(void) { bdrv_register(&bdrv_sheepdog); + bdrv_register(&bdrv_sheepdog_tcp); } block_init(bdrv_sheepdog_init); diff --git a/qemu-doc.texi b/qemu-doc.texi index 747e052fcb..2083e29a97 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -830,7 +830,7 @@ QEMU-based virtual machines. You can create a Sheepdog disk image with the command: @example -qemu-img create sheepdog:@var{image} @var{size} +qemu-img create sheepdog:///@var{image} @var{size} @end example where @var{image} is the Sheepdog image name and @var{size} is its size. @@ -838,29 +838,29 @@ size. To import the existing @var{filename} to Sheepdog, you can use a convert command. @example -qemu-img convert @var{filename} sheepdog:@var{image} +qemu-img convert @var{filename} sheepdog:///@var{image} @end example You can boot from the Sheepdog disk image with the command: @example -qemu-system-i386 sheepdog:@var{image} +qemu-system-i386 sheepdog:///@var{image} @end example You can also create a snapshot of the Sheepdog image like qcow2. @example -qemu-img snapshot -c @var{tag} sheepdog:@var{image} +qemu-img snapshot -c @var{tag} sheepdog:///@var{image} @end example where @var{tag} is a tag name of the newly created snapshot. To boot from the Sheepdog snapshot, specify the tag name of the snapshot. @example -qemu-system-i386 sheepdog:@var{image}:@var{tag} +qemu-system-i386 sheepdog:///@var{image}#@var{tag} @end example You can create a cloned image from the existing snapshot. @example -qemu-img create -b sheepdog:@var{base}:@var{tag} sheepdog:@var{image} +qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image} @end example where @var{base} is a image name of the source snapshot and @var{tag} is its tag name. @@ -868,8 +868,8 @@ is its tag name. If the Sheepdog daemon doesn't run on the local host, you need to specify one of the Sheepdog servers to connect to. @example -qemu-img create sheepdog:@var{hostname}:@var{port}:@var{image} @var{size} -qemu-system-i386 sheepdog:@var{hostname}:@var{port}:@var{image} +qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size} +qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image} @end example @node disk_images_iscsi diff --git a/qemu-options.hx b/qemu-options.hx index 797d992804..e8fb78c5eb 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2099,23 +2099,13 @@ QEMU supports using either local sheepdog devices or remote networked devices. Syntax for specifying a sheepdog device -@table @list -``sheepdog:'' - -``sheepdog::'' - -``sheepdog::'' - -``sheepdog:::'' - -``sheepdog::::'' - -``sheepdog::::'' -@end table +@example +sheepdog[+tcp]://[host:port]/vdiname[#snapid|#tag] +@end example Example @example -qemu-system-i386 --drive file=sheepdog:192.0.2.1:30000:MyVirtualMachine +qemu-system-i386 --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine @end example See also @url{http://http://www.osrg.net/sheepdog/}. From 25af257d219ed2708b3bcf7f1fabf93234d27620 Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Fri, 22 Feb 2013 12:39:52 +0900 Subject: [PATCH 7/9] sheepdog: use inet_connect to simplify connect code This uses the form ":" for the representation of the sheepdog server to use inet_connect. Signed-off-by: MORITA Kazutaka Signed-off-by: Stefan Hajnoczi --- block/sheepdog.c | 113 +++++++++++++---------------------------------- 1 file changed, 31 insertions(+), 82 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index bfa8a00dda..b5cbdfee83 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -22,7 +22,7 @@ #define SD_PROTO_VER 0x01 #define SD_DEFAULT_ADDR "localhost" -#define SD_DEFAULT_PORT "7000" +#define SD_DEFAULT_PORT 7000 #define SD_OP_CREATE_AND_WRITE_OBJ 0x01 #define SD_OP_READ_OBJ 0x02 @@ -298,8 +298,7 @@ typedef struct BDRVSheepdogState { bool is_snapshot; uint32_t cache_flags; - char *addr; - char *port; + char *host_spec; int fd; CoMutex lock; @@ -447,56 +446,18 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, return acb; } -static int connect_to_sdog(const char *addr, const char *port) +static int connect_to_sdog(BDRVSheepdogState *s) { - char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV]; - int fd, ret; - struct addrinfo hints, *res, *res0; + int fd; + Error *err = NULL; - if (!addr) { - addr = SD_DEFAULT_ADDR; - port = SD_DEFAULT_PORT; + fd = inet_connect(s->host_spec, &err); + + if (err != NULL) { + qerror_report_err(err); + error_free(err); } - memset(&hints, 0, sizeof(hints)); - hints.ai_socktype = SOCK_STREAM; - - ret = getaddrinfo(addr, port, &hints, &res0); - if (ret) { - error_report("unable to get address info %s, %s", - addr, strerror(errno)); - return -errno; - } - - for (res = res0; res; res = res->ai_next) { - ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf), - sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV); - if (ret) { - continue; - } - - fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol); - if (fd < 0) { - continue; - } - - reconnect: - ret = connect(fd, res->ai_addr, res->ai_addrlen); - if (ret < 0) { - if (errno == EINTR) { - goto reconnect; - } - close(fd); - break; - } - - dprintf("connected to %s:%s\n", addr, port); - goto success; - } - fd = -errno; - error_report("failed connect to %s:%s", addr, port); -success: - freeaddrinfo(res0); return fd; } @@ -798,9 +759,8 @@ static int get_sheep_fd(BDRVSheepdogState *s) { int ret, fd; - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("%s", strerror(errno)); return fd; } @@ -836,12 +796,8 @@ static int sd_parse_uri(BDRVSheepdogState *s, const char *filename, pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1); /* sheepdog[+tcp]://[host:port]/vdiname */ - s->addr = g_strdup(uri->server ?: SD_DEFAULT_ADDR); - if (uri->port) { - s->port = g_strdup_printf("%d", uri->port); - } else { - s->port = g_strdup(SD_DEFAULT_PORT); - } + s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR, + uri->port ?: SD_DEFAULT_PORT); /* snapshot tag */ if (uri->fragment) { @@ -935,7 +891,7 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid, unsigned int wlen, rlen = 0; char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN]; - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { return fd; } @@ -1178,9 +1134,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) s->is_snapshot = true; } - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("failed to connect"); ret = fd; goto out; } @@ -1213,9 +1168,8 @@ out: return ret; } -static int do_sd_create(char *filename, int64_t vdi_size, - uint32_t base_vid, uint32_t *vdi_id, int snapshot, - const char *addr, const char *port) +static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, + uint32_t base_vid, uint32_t *vdi_id, int snapshot) { SheepdogVdiReq hdr; SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; @@ -1223,7 +1177,7 @@ static int do_sd_create(char *filename, int64_t vdi_size, unsigned int wlen, rlen = 0; char buf[SD_MAX_VDI_LEN]; - fd = connect_to_sdog(addr, port); + fd = connect_to_sdog(s); if (fd < 0) { return fd; } @@ -1390,7 +1344,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) bdrv_delete(bs); } - ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s->addr, s->port); + ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0); if (!prealloc || ret) { goto out; } @@ -1411,7 +1365,7 @@ static void sd_close(BlockDriverState *bs) dprintf("%s\n", s->name); - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { return; } @@ -1435,8 +1389,7 @@ static void sd_close(BlockDriverState *bs) qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); closesocket(s->fd); - g_free(s->addr); - g_free(s->port); + g_free(s->host_spec); } static int64_t sd_getlength(BlockDriverState *bs) @@ -1460,7 +1413,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) return -EINVAL; } - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { return fd; } @@ -1536,17 +1489,15 @@ static int sd_create_branch(BDRVSheepdogState *s) buf = g_malloc(SD_INODE_SIZE); - ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1, - s->addr, s->port); + ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1); if (ret) { goto out; } dprintf("%" PRIx32 " is created.\n", vid); - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("failed to connect"); ret = fd; goto out; } @@ -1805,7 +1756,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); /* refresh inode. */ - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { ret = fd; goto cleanup; @@ -1818,8 +1769,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) goto cleanup; } - ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1, - s->addr, s->port); + ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, + 1); if (ret < 0) { error_report("failed to create inode for snapshot. %s", strerror(errno)); @@ -1874,9 +1825,8 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) goto out; } - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("failed to connect"); ret = fd; goto out; } @@ -1938,7 +1888,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) vdi_inuse = g_malloc(max); - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { ret = fd; goto out; @@ -1965,9 +1915,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT); start_nr = hval & (SD_NR_VDIS - 1); - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("failed to connect"); ret = fd; goto out; } @@ -2024,7 +1973,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, uint32_t vdi_index; uint64_t offset; - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { return fd; } From 1b8bbb46e7593b92ded74cc2a5461202c2b6c05c Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Fri, 22 Feb 2013 12:39:53 +0900 Subject: [PATCH 8/9] sheepdog: add support for connecting to unix domain socket This patch adds support for a unix domain socket for a connection between qemu and local sheepdog server. You can use the unix domain socket with the following syntax: $ qemu sheepdog+unix:///?socket=[#snapid] Signed-off-by: MORITA Kazutaka Signed-off-by: Stefan Hajnoczi --- block/sheepdog.c | 82 +++++++++++++++++++++++++++++++++++++++++------- qemu-doc.texi | 6 ++++ qemu-options.hx | 2 +- 3 files changed, 77 insertions(+), 13 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index b5cbdfee83..c711c28613 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -299,6 +299,7 @@ typedef struct BDRVSheepdogState { uint32_t cache_flags; char *host_spec; + bool is_unix; int fd; CoMutex lock; @@ -451,7 +452,18 @@ static int connect_to_sdog(BDRVSheepdogState *s) int fd; Error *err = NULL; - fd = inet_connect(s->host_spec, &err); + if (s->is_unix) { + fd = unix_connect(s->host_spec, &err); + } else { + fd = inet_connect(s->host_spec, &err); + + if (err == NULL) { + int ret = socket_set_nodelay(fd); + if (ret < 0) { + error_report("%s", strerror(errno)); + } + } + } if (err != NULL) { qerror_report_err(err); @@ -757,7 +769,7 @@ static int aio_flush_request(void *opaque) */ static int get_sheep_fd(BDRVSheepdogState *s) { - int ret, fd; + int fd; fd = connect_to_sdog(s); if (fd < 0) { @@ -766,13 +778,6 @@ static int get_sheep_fd(BDRVSheepdogState *s) socket_set_nonblock(fd); - ret = socket_set_nodelay(fd); - if (ret) { - error_report("%s", strerror(errno)); - closesocket(fd); - return -errno; - } - qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s); return fd; } @@ -789,15 +794,42 @@ static int sd_parse_uri(BDRVSheepdogState *s, const char *filename, return -EINVAL; } + /* transport */ + if (!strcmp(uri->scheme, "sheepdog")) { + s->is_unix = false; + } else if (!strcmp(uri->scheme, "sheepdog+tcp")) { + s->is_unix = false; + } else if (!strcmp(uri->scheme, "sheepdog+unix")) { + s->is_unix = true; + } else { + ret = -EINVAL; + goto out; + } + if (uri->path == NULL || !strcmp(uri->path, "/")) { ret = -EINVAL; goto out; } pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1); - /* sheepdog[+tcp]://[host:port]/vdiname */ - s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR, - uri->port ?: SD_DEFAULT_PORT); + qp = query_params_parse(uri->query); + if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (s->is_unix) { + /* sheepdog+unix:///vdiname?socket=path */ + if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + s->host_spec = g_strdup(qp->p[0].value); + } else { + /* sheepdog[+tcp]://[host:port]/vdiname */ + s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR, + uri->port ?: SD_DEFAULT_PORT); + } /* snapshot tag */ if (uri->fragment) { @@ -2098,9 +2130,35 @@ static BlockDriver bdrv_sheepdog_tcp = { .create_options = sd_create_options, }; +static BlockDriver bdrv_sheepdog_unix = { + .format_name = "sheepdog", + .protocol_name = "sheepdog+unix", + .instance_size = sizeof(BDRVSheepdogState), + .bdrv_file_open = sd_open, + .bdrv_close = sd_close, + .bdrv_create = sd_create, + .bdrv_getlength = sd_getlength, + .bdrv_truncate = sd_truncate, + + .bdrv_co_readv = sd_co_readv, + .bdrv_co_writev = sd_co_writev, + .bdrv_co_flush_to_disk = sd_co_flush_to_disk, + + .bdrv_snapshot_create = sd_snapshot_create, + .bdrv_snapshot_goto = sd_snapshot_goto, + .bdrv_snapshot_delete = sd_snapshot_delete, + .bdrv_snapshot_list = sd_snapshot_list, + + .bdrv_save_vmstate = sd_save_vmstate, + .bdrv_load_vmstate = sd_load_vmstate, + + .create_options = sd_create_options, +}; + static void bdrv_sheepdog_init(void) { bdrv_register(&bdrv_sheepdog); bdrv_register(&bdrv_sheepdog_tcp); + bdrv_register(&bdrv_sheepdog_unix); } block_init(bdrv_sheepdog_init); diff --git a/qemu-doc.texi b/qemu-doc.texi index 2083e29a97..af84bef0e9 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -865,6 +865,12 @@ qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image} where @var{base} is a image name of the source snapshot and @var{tag} is its tag name. +You can use an unix socket instead of an inet socket: + +@example +qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path} +@end example + If the Sheepdog daemon doesn't run on the local host, you need to specify one of the Sheepdog servers to connect to. @example diff --git a/qemu-options.hx b/qemu-options.hx index e8fb78c5eb..f598d7a395 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2100,7 +2100,7 @@ devices. Syntax for specifying a sheepdog device @example -sheepdog[+tcp]://[host:port]/vdiname[#snapid|#tag] +sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] @end example Example From 272d2d8e1241b92ab9be87b2c8fb590fd84987a8 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Tue, 26 Feb 2013 09:55:48 -0500 Subject: [PATCH 9/9] block: for HMP commit() operations on 'all', skip non-COW drives During a commit of 'all' using the HMP non-live commit, the operation is aborted and returns error on the first error enountered. When non-COW drives are in use (e.g. ejected floppy, cdrom, or drives without a backing parent), that means a commit all will return an error of either -ENOMEDIUM or -ENOTSUP. This is not desirable, so for the 'all' commit case, only attempt the commit if both bs->drv and bs->backing_hd are present. More succinctly: 'commit all' now means a commit on all COW drives. This means an individual commit to a specific non-COW drive will still return the appropriate error (-ENOMEDIUM if eject / not present, -ENOTSUP if no backing file). Reported-by: Jan Kiszka Signed-off-by: Jeff Cody Reviewed-by: Paolo Bonzini Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block.c b/block.c index 4582961965..124a9ebf65 100644 --- a/block.c +++ b/block.c @@ -1640,9 +1640,11 @@ int bdrv_commit_all(void) BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, list) { - int ret = bdrv_commit(bs); - if (ret < 0) { - return ret; + if (bs->drv && bs->backing_hd) { + int ret = bdrv_commit(bs); + if (ret < 0) { + return ret; + } } } return 0;