From ce317461573bac12b10d67699b4ddf1f97cf066c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 25 Sep 2015 13:21:28 +0800 Subject: [PATCH 01/14] virtio: introduce virtqueue_unmap_sg() Factor out sg unmapping logic. This will be reused by the patch that can discard descriptor. Cc: Michael S. Tsirkin Cc: Andrew James Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 7504f8b33a..6f2b96cc7b 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -244,14 +244,12 @@ int virtio_queue_empty(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len, unsigned int idx) +static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) { unsigned int offset; int i; - trace_virtqueue_fill(vq, elem, len, idx); - offset = 0; for (i = 0; i < elem->in_num; i++) { size_t size = MIN(len - offset, elem->in_sg[i].iov_len); @@ -267,6 +265,14 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, cpu_physical_memory_unmap(elem->out_sg[i].iov_base, elem->out_sg[i].iov_len, 0, elem->out_sg[i].iov_len); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx) +{ + trace_virtqueue_fill(vq, elem, len, idx); + + virtqueue_unmap_sg(vq, elem, len); idx = (idx + vring_used_idx(vq)) % vq->vring.num; From 29b9f5efd78ae0f9cc02dd169b6e80d2c404bade Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 25 Sep 2015 13:21:29 +0800 Subject: [PATCH 02/14] virtio: introduce virtqueue_discard() This patch introduces virtqueue_discard() to discard a descriptor and unmap the sgs. This will be used by the patch that will discard descriptor when packet is truncated. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 7 +++++++ include/hw/virtio/virtio.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 6f2b96cc7b..d0bc72e0e4 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -267,6 +267,13 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, 0, elem->out_sg[i].iov_len); } +void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) +{ + vq->last_avail_idx--; + virtqueue_unmap_sg(vq, elem, len); +} + void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 6201ee8ce0..9d09115fab 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -146,6 +146,8 @@ void virtio_del_queue(VirtIODevice *vdev, int n); void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len); void virtqueue_flush(VirtQueue *vq, unsigned int count); +void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len); void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx); From 0cf33fb6b49a19de32859e2cdc6021334f448fb3 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 25 Sep 2015 13:21:30 +0800 Subject: [PATCH 03/14] virtio-net: correctly drop truncated packets When packet is truncated during receiving, we drop the packets but neither discard the descriptor nor add and signal used descriptor. This will lead several issues: - sg mappings are leaked - rx will be stalled if a lots of packets were truncated In order to be consistent with vhost, fix by discarding the descriptor in this case. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index d388c5571d..a877614e3e 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1094,13 +1094,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t * must have consumed the complete packet. * Otherwise, drop it. */ if (!n->mergeable_rx_bufs && offset < size) { -#if 0 - error_report("virtio-net truncated non-mergeable packet: " - "i %zd mergeable %d offset %zd, size %zd, " - "guest hdr len %zd, host hdr len %zd", - i, n->mergeable_rx_bufs, - offset, size, n->guest_hdr_len, n->host_hdr_len); -#endif + virtqueue_discard(q->rx_vq, &elem, total); return size; } From c2dfc5ba3fb3a1b7278c99bfd3bf350202169434 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 10 Sep 2015 16:36:51 +0300 Subject: [PATCH 04/14] oslib: rework anonimous RAM allocation At the moment we first allocate RAM, sometimes more than necessary for alignment reasons. We then free the extra RAM. Rework this to avoid the temporary allocation: reserve the range by mapping it with PROT_NONE, then use just the necessary range with MAP_FIXED. Signed-off-by: Michael S. Tsirkin Reviewed-by: Paolo Bonzini Acked-by: Paolo Bonzini --- util/oslib-posix.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 3ae4987b6b..27972d4add 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -129,9 +129,9 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) { size_t align = QEMU_VMALLOC_ALIGN; size_t total = size + align - getpagesize(); - void *ptr = mmap(0, total, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; + void *ptr1; if (ptr == MAP_FAILED) { return NULL; @@ -140,6 +140,14 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) if (alignment) { *alignment = align; } + + ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ptr1 == MAP_FAILED) { + munmap(ptr, total); + return NULL; + } + ptr += offset; total -= offset; From 9fac18f03a9040b67ec38e14d3e1ed34db9c7e06 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 10 Sep 2015 16:41:17 +0300 Subject: [PATCH 05/14] oslib: allocate PROT_NONE pages on top of RAM This inserts a read and write protected page between RAM and QEMU memory. This makes it harder to exploit QEMU bugs resulting from buffer overflows in devices using variants of cpu_physical_memory_map, dma_memory_map etc. Signed-off-by: Michael S. Tsirkin Reviewed-by: Paolo Bonzini Acked-by: Paolo Bonzini --- util/oslib-posix.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 27972d4add..a0fcdc2ede 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -128,7 +128,7 @@ void *qemu_memalign(size_t alignment, size_t size) void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) { size_t align = QEMU_VMALLOC_ALIGN; - size_t total = size + align - getpagesize(); + size_t total = size + align; void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; void *ptr1; @@ -154,8 +154,8 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) if (offset > 0) { munmap(ptr - offset, offset); } - if (total > size) { - munmap(ptr + size, total - size); + if (total > size + getpagesize()) { + munmap(ptr + size + getpagesize(), total - size - getpagesize()); } trace_qemu_anon_ram_alloc(size, ptr); @@ -172,7 +172,7 @@ void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); if (ptr) { - munmap(ptr, size); + munmap(ptr, size + getpagesize()); } } From 8561c9244ddf1122dfe7ccac9b23f506062f1499 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 10 Sep 2015 16:41:17 +0300 Subject: [PATCH 06/14] exec: allocate PROT_NONE pages on top of RAM This inserts a read and write protected page between RAM and QEMU memory, for file-backend RAM. This makes it harder to exploit QEMU bugs resulting from buffer overflows in devices using variants of cpu_physical_memory_map, dma_memory_map etc. Signed-off-by: Michael S. Tsirkin Reviewed-by: Paolo Bonzini Acked-by: Paolo Bonzini --- exec.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/exec.c b/exec.c index 47ada31040..7d90a52252 100644 --- a/exec.c +++ b/exec.c @@ -84,6 +84,9 @@ static MemoryRegion io_mem_unassigned; */ #define RAM_RESIZEABLE (1 << 2) +/* An extra page is mapped on top of this RAM. + */ +#define RAM_EXTRA (1 << 3) #endif struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); @@ -1185,10 +1188,13 @@ static void *file_ram_alloc(RAMBlock *block, char *filename; char *sanitized_name; char *c; + void *ptr; void *area = NULL; int fd; uint64_t hpagesize; + uint64_t total; Error *local_err = NULL; + size_t offset; hpagesize = gethugepagesize(path, &local_err); if (local_err) { @@ -1232,6 +1238,7 @@ static void *file_ram_alloc(RAMBlock *block, g_free(filename); memory = ROUND_UP(memory, hpagesize); + total = memory + hpagesize; /* * ftruncate is not supported by hugetlbfs in older @@ -1243,16 +1250,40 @@ static void *file_ram_alloc(RAMBlock *block, perror("ftruncate"); } - area = mmap(0, memory, PROT_READ | PROT_WRITE, - (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE), + ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (ptr == MAP_FAILED) { + error_setg_errno(errp, errno, + "unable to allocate memory range for hugepages"); + close(fd); + goto error; + } + + offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr; + + area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE, + (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) | + MAP_FIXED, fd, 0); if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for hugepages"); + munmap(ptr, total); close(fd); goto error; } + if (offset > 0) { + munmap(ptr, offset); + } + ptr += offset; + total -= offset; + + if (total > memory + getpagesize()) { + munmap(ptr + memory + getpagesize(), + total - memory - getpagesize()); + } + if (mem_prealloc) { os_mem_prealloc(fd, area, memory); } @@ -1570,6 +1601,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = share ? RAM_SHARED : 0; + new_block->flags |= RAM_EXTRA; new_block->host = file_ram_alloc(new_block, size, mem_path, errp); if (!new_block->host) { @@ -1671,7 +1703,11 @@ static void reclaim_ramblock(RAMBlock *block) xen_invalidate_map_cache_entry(block->host); #ifndef _WIN32 } else if (block->fd >= 0) { - munmap(block->host, block->max_length); + if (block->flags & RAM_EXTRA) { + munmap(block->host, block->max_length + getpagesize()); + } else { + munmap(block->host, block->max_length); + } close(block->fd); #endif } else { From 798595075bf51c7e3125d260a19d860b9aa63e69 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 28 Sep 2015 15:07:21 -0300 Subject: [PATCH 07/14] pc: Add a comment explaining why pc_compat_2_4() doesn't exist pc_compat_2_4() doesn't exist, and we shouldn't create one. Add a comment explaining why the function doesn't exist and why pc_compat_*() functions are deprecated. Signed-off-by: Eduardo Habkost Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 7 +++++++ hw/i386/pc_q35.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 3ffb05f93e..52fdd558e8 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -301,6 +301,13 @@ static void pc_init1(MachineState *machine, } } +/* Looking for a pc_compat_2_4() function? It doesn't exist. + * pc_compat_*() functions that run on machine-init time and + * change global QEMU state are deprecated. Please don't create + * one, and implement any pc-*-2.4 (and newer) compat code in + * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). + */ + static void pc_compat_2_3(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 1b7d3b644e..837e430666 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -284,6 +284,13 @@ static void pc_q35_init(MachineState *machine) } } +/* Looking for a pc_compat_2_4() function? It doesn't exist. + * pc_compat_*() functions that run on machine-init time and + * change global QEMU state are deprecated. Please don't create + * one, and implement any pc-*-2.4 (and newer) compat code in + * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). + */ + static void pc_compat_2_3(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); From 0d583647a7fc73f621eeb64ed703556c4bbebfcc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 19 May 2015 13:29:51 -0700 Subject: [PATCH 08/14] virtio: Notice when the system doesn't support MSIx at all And do not issue an error_report in that case. Signed-off-by: Richard Henderson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index eda8205d58..6703806f83 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1491,12 +1491,17 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) pci_set_long(cfg_mask->pci_cfg_data, ~0x0); } - if (proxy->nvectors && - msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, - proxy->msix_bar)) { - error_report("unable to init msix vectors to %" PRIu32, - proxy->nvectors); - proxy->nvectors = 0; + if (proxy->nvectors) { + int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, + proxy->msix_bar); + if (err) { + /* Notice when a system that supports MSIx can't initialize it. */ + if (err != -ENOTSUP) { + error_report("unable to init msix vectors to %" PRIu32, + proxy->nvectors); + } + proxy->nvectors = 0; + } } proxy->pci_dev.config_write = virtio_write_config; From ca06d9cc6691e23b6d02e07b44ea549aeac60151 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Sep 2015 14:12:03 +0200 Subject: [PATCH 09/14] vhost-user-test: do not reinvent glib-compat.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit glib-compat.h has the gunk to support both old-style and new-style gthread functions. Use it instead of reinventing it. Signed-off-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Marc-André Lureau Tested-by: Marc-André Lureau --- tests/vhost-user-test.c | 113 ++++++---------------------------------- 1 file changed, 16 insertions(+), 97 deletions(-) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index e301db79b9..0e04f06839 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -8,7 +8,6 @@ * */ -#define QEMU_GLIB_COMPAT_H #include #include "libqtest.h" @@ -30,12 +29,6 @@ #define HAVE_MONOTONIC_TIME #endif -#if GLIB_CHECK_VERSION(2, 32, 0) -#define HAVE_MUTEX_INIT -#define HAVE_COND_INIT -#define HAVE_THREAD_NEW -#endif - #define QEMU_CMD_ACCEL " -machine accel=tcg" #define QEMU_CMD_MEM " -m 512 -object memory-backend-file,id=mem,size=512M,"\ "mem-path=%s,share=on -numa node,memdev=mem" @@ -113,93 +106,21 @@ static VhostUserMsg m __attribute__ ((unused)); int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS]; static VhostUserMemory memory; -static GMutex *data_mutex; -static GCond *data_cond; +static CompatGMutex data_mutex; +static CompatGCond data_cond; -static gint64 _get_time(void) -{ -#ifdef HAVE_MONOTONIC_TIME - return g_get_monotonic_time(); -#else - GTimeVal time; - g_get_current_time(&time); - - return time.tv_sec * G_TIME_SPAN_SECOND + time.tv_usec; -#endif -} - -static GMutex *_mutex_new(void) -{ - GMutex *mutex; - -#ifdef HAVE_MUTEX_INIT - mutex = g_new(GMutex, 1); - g_mutex_init(mutex); -#else - mutex = g_mutex_new(); -#endif - - return mutex; -} - -static void _mutex_free(GMutex *mutex) -{ -#ifdef HAVE_MUTEX_INIT - g_mutex_clear(mutex); - g_free(mutex); -#else - g_mutex_free(mutex); -#endif -} - -static GCond *_cond_new(void) -{ - GCond *cond; - -#ifdef HAVE_COND_INIT - cond = g_new(GCond, 1); - g_cond_init(cond); -#else - cond = g_cond_new(); -#endif - - return cond; -} - -static gboolean _cond_wait_until(GCond *cond, GMutex *mutex, gint64 end_time) +#if !GLIB_CHECK_VERSION(2, 32, 0) +static gboolean g_cond_wait_until(CompatGCond cond, CompatGMutex mutex, + gint64 end_time) { gboolean ret = FALSE; -#ifdef HAVE_COND_INIT - ret = g_cond_wait_until(cond, mutex, end_time); -#else + end_time -= g_get_monotonic_time(); GTimeVal time = { end_time / G_TIME_SPAN_SECOND, end_time % G_TIME_SPAN_SECOND }; ret = g_cond_timed_wait(cond, mutex, &time); -#endif return ret; } - -static void _cond_free(GCond *cond) -{ -#ifdef HAVE_COND_INIT - g_cond_clear(cond); - g_free(cond); -#else - g_cond_free(cond); #endif -} - -static GThread *_thread_new(const gchar *name, GThreadFunc func, gpointer data) -{ - GThread *thread = NULL; - GError *error = NULL; -#ifdef HAVE_THREAD_NEW - thread = g_thread_try_new(name, func, data, &error); -#else - thread = g_thread_create(func, data, TRUE, &error); -#endif - return thread; -} static void read_guest_mem(void) { @@ -208,11 +129,11 @@ static void read_guest_mem(void) int i, j; size_t size; - g_mutex_lock(data_mutex); + g_mutex_lock(&data_mutex); - end_time = _get_time() + 5 * G_TIME_SPAN_SECOND; + end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; while (!fds_num) { - if (!_cond_wait_until(data_cond, data_mutex, end_time)) { + if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) { /* timeout has passed */ g_assert(fds_num); break; @@ -252,7 +173,7 @@ static void read_guest_mem(void) } g_assert_cmpint(1, ==, 1); - g_mutex_unlock(data_mutex); + g_mutex_unlock(&data_mutex); } static void *thread_function(void *data) @@ -280,7 +201,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) return; } - g_mutex_lock(data_mutex); + g_mutex_lock(&data_mutex); memcpy(p, buf, VHOST_USER_HDR_SIZE); if (msg.size) { @@ -313,7 +234,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int)); /* signal the test that it can continue */ - g_cond_signal(data_cond); + g_cond_signal(&data_cond); break; case VHOST_USER_SET_VRING_KICK: @@ -330,7 +251,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) default: break; } - g_mutex_unlock(data_mutex); + g_mutex_unlock(&data_mutex); } static const char *init_hugepagefs(void) @@ -395,9 +316,9 @@ int main(int argc, char **argv) qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr); /* run the main loop thread so the chardev may operate */ - data_mutex = _mutex_new(); - data_cond = _cond_new(); - _thread_new(NULL, thread_function, NULL); + g_mutex_init(&data_mutex); + g_cond_init(&data_cond); + g_thread_new(NULL, thread_function, NULL); qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path); s = qtest_start(qemu_cmd); @@ -414,8 +335,6 @@ int main(int argc, char **argv) /* cleanup */ unlink(socket_path); g_free(socket_path); - _cond_free(data_cond); - _mutex_free(data_mutex); return ret; } From 8a9b6b37dabf00388e8069a2f5c0f659626693b3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 24 Sep 2015 18:22:01 +0200 Subject: [PATCH 10/14] vhost-user: unit test for new messages Data is empty for now, but do make sure master sets the new feature bit flag. Signed-off-by: Michael S. Tsirkin Reviewed-by: Michael S. Tsirkin --- tests/vhost-user-test.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 0e04f06839..87281b9d9f 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -46,6 +46,8 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_USER_F_PROTOCOL_FEATURES 30 + typedef enum VhostUserRequest { VHOST_USER_NONE = 0, VHOST_USER_GET_FEATURES = 1, @@ -62,6 +64,8 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_KICK = 12, VHOST_USER_SET_VRING_CALL = 13, VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, VHOST_USER_MAX } VhostUserRequest; @@ -211,6 +215,20 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) switch (msg.request) { case VHOST_USER_GET_FEATURES: + /* send back features to qemu */ + msg.flags |= VHOST_USER_REPLY_MASK; + msg.size = sizeof(m.u64); + msg.u64 = 0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES; + p = (uint8_t *) &msg; + qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); + break; + + case VHOST_USER_SET_FEATURES: + g_assert_cmpint(msg.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES), + !=, 0ULL); + break; + + case VHOST_USER_GET_PROTOCOL_FEATURES: /* send back features to qemu */ msg.flags |= VHOST_USER_REPLY_MASK; msg.size = sizeof(m.u64); From df0acded19ec4b826aa095cfc19d341bd66fafd3 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 29 Sep 2015 16:53:28 +0200 Subject: [PATCH 11/14] memhp: extend address auto assignment to support gaps setting gap to TRUE will make sparse DIMM address auto allocation, leaving gaps between a new DIMM address and preceeding existing DIMM. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 3 ++- hw/mem/pc-dimm.c | 15 +++++++++------ hw/ppc/spapr.c | 2 +- include/hw/mem/pc-dimm.h | 7 ++++--- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 461c128d23..ef027364ce 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1644,7 +1644,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, goto out; } - pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err); + pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, false, + &local_err); if (local_err) { goto out; } diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index bb04862de8..6cc6ac30e7 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -32,7 +32,8 @@ typedef struct pc_dimms_capacity { } pc_dimms_capacity; void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, - MemoryRegion *mr, uint64_t align, Error **errp) + MemoryRegion *mr, uint64_t align, bool gap, + Error **errp) { int slot; MachineState *machine = MACHINE(qdev_get_machine()); @@ -48,7 +49,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, addr = pc_dimm_get_free_addr(hpms->base, memory_region_size(&hpms->mr), - !addr ? NULL : &addr, align, + !addr ? NULL : &addr, align, gap, memory_region_size(mr), &local_err); if (local_err) { goto out; @@ -287,8 +288,8 @@ static int pc_dimm_built_list(Object *obj, void *opaque) uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, - uint64_t *hint, uint64_t align, uint64_t size, - Error **errp) + uint64_t *hint, uint64_t align, bool gap, + uint64_t size, Error **errp) { GSList *list = NULL, *item; uint64_t new_addr, ret = 0; @@ -333,13 +334,15 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, goto out; } - if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) { + if (ranges_overlap(dimm->addr, dimm_size, new_addr, + size + (gap ? 1 : 0))) { if (hint) { DeviceState *d = DEVICE(dimm); error_setg(errp, "address range conflicts with '%s'", d->id); goto out; } - new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align); + new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + (gap ? 1 : 0), + align); } } ret = new_addr; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a9b5f2a669..d1b0e53668 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2096,7 +2096,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); + pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, false, &local_err); if (local_err) { goto out; } diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index d83bf30ea9..c1ee7b0408 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -83,15 +83,16 @@ typedef struct MemoryHotplugState { uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, - uint64_t *hint, uint64_t align, uint64_t size, - Error **errp); + uint64_t *hint, uint64_t align, bool gap, + uint64_t size, Error **errp); int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); int qmp_pc_dimm_device_list(Object *obj, void *opaque); uint64_t pc_existing_dimms_capacity(Error **errp); void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, - MemoryRegion *mr, uint64_t align, Error **errp); + MemoryRegion *mr, uint64_t align, bool gap, + Error **errp); void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, MemoryRegion *mr); #endif From aa8580cddf011e8cedcf87f7a0fdea7549fc4704 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 29 Sep 2015 16:53:29 +0200 Subject: [PATCH 12/14] pc: memhp: force gaps between DIMM's GPA mapping DIMMs non contiguously allows to workaround virtio bug reported earlier: http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html in this case guest kernel doesn't allocate buffers that can cross DIMM boundary keeping each buffer local to a DIMM. Suggested-by: Michael S. Tsirkin Signed-off-by: Igor Mammedov Acked-by: Eduardo Habkost Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 6 ++++-- hw/i386/pc_piix.c | 1 + hw/i386/pc_q35.c | 1 + include/hw/i386/pc.h | 1 + 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index ef027364ce..efbd41a1f1 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1629,6 +1629,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); @@ -1644,8 +1645,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, goto out; } - pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, false, - &local_err); + pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, + pcmc->inter_dimm_gap, &local_err); if (local_err) { goto out; } @@ -1946,6 +1947,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) PCMachineClass *pcmc = PC_MACHINE_CLASS(oc); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + pcmc->inter_dimm_gap = true; pcmc->get_hotplug_handler = mc->get_hotplug_handler; mc->get_hotplug_handler = pc_get_hotpug_handler; mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 52fdd558e8..4514cd1462 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -489,6 +489,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m) m->alias = NULL; m->is_default = 0; pcmc->broken_reserved_end = true; + pcmc->inter_dimm_gap = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_4); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 837e430666..1f100b1a69 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -392,6 +392,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m) pc_q35_2_5_machine_options(m); m->alias = NULL; pcmc->broken_reserved_end = true; + pcmc->inter_dimm_gap = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_4); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index ab5413f561..c13e91ddde 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -60,6 +60,7 @@ struct PCMachineClass { /*< public >*/ bool broken_reserved_end; + bool inter_dimm_gap; HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); }; From 1b7e1e3b463a6e5c117498b192cb07603c04b668 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 30 Sep 2015 18:01:21 +0300 Subject: [PATCH 13/14] vhost-user-test: use tmpfs by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most people don't run make check by default, so they skip vhost-user unit tests. Solve this by using tmpfs instead, unless hugetlbfs is specified (using an environment variable). Signed-off-by: Michael S. Tsirkin Reviewed-by: Marc-André Lureau --- tests/vhost-user-test.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 87281b9d9f..5e63cbc112 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -272,17 +272,11 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) g_mutex_unlock(&data_mutex); } -static const char *init_hugepagefs(void) +static const char *init_hugepagefs(const char *path) { - const char *path; struct statfs fs; int ret; - path = getenv("QTEST_HUGETLBFS_PATH"); - if (!path) { - path = "/hugetlbfs"; - } - if (access(path, R_OK | W_OK | X_OK)) { g_test_message("access on path (%s): %s\n", path, strerror(errno)); return NULL; @@ -309,19 +303,31 @@ int main(int argc, char **argv) { QTestState *s = NULL; CharDriverState *chr = NULL; - const char *hugefs = 0; + const char *hugefs; char *socket_path = 0; char *qemu_cmd = 0; char *chr_path = 0; int ret; + char template[] = "/tmp/vhost-test-XXXXXX"; + const char *tmpfs; + const char *root; g_test_init(&argc, &argv, NULL); module_call_init(MODULE_INIT_QOM); - hugefs = init_hugepagefs(); - if (!hugefs) { - return 0; + tmpfs = mkdtemp(template); + if (!tmpfs) { + g_test_message("mkdtemp on path (%s): %s\n", template, strerror(errno)); + } + g_assert(tmpfs); + + hugefs = getenv("QTEST_HUGETLBFS_PATH"); + if (hugefs) { + root = init_hugepagefs(hugefs); + g_assert(root); + } else { + root = tmpfs; } socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid()); @@ -338,7 +344,7 @@ int main(int argc, char **argv) g_cond_init(&data_cond); g_thread_new(NULL, thread_function, NULL); - qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path); + qemu_cmd = g_strdup_printf(QEMU_CMD, root, socket_path); s = qtest_start(qemu_cmd); g_free(qemu_cmd); @@ -354,5 +360,12 @@ int main(int argc, char **argv) unlink(socket_path); g_free(socket_path); + ret = rmdir(tmpfs); + if (ret != 0) { + g_test_message("unable to rmdir: path (%s): %s\n", + tmpfs, strerror(errno)); + } + g_assert_cmpint(ret, ==, 0); + return ret; } From 6fdac09370530be0cc6fe9e8d425c0670ba994b1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Oct 2015 15:50:52 +0300 Subject: [PATCH 14/14] vhost-user-test: fix predictable filename on tmpfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vhost-user-test uses getpid to create a unique filename. This name is predictable, and a security problem. Instead, use a tmp directory created by mkdtemp, which is a suggested best practice. Signed-off-by: Michael S. Tsirkin Reviewed-by: Marc-André Lureau --- tests/vhost-user-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 5e63cbc112..56df5cc552 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -330,7 +330,7 @@ int main(int argc, char **argv) root = tmpfs; } - socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid()); + socket_path = g_strdup_printf("%s/vhost.sock", tmpfs); /* create char dev and add read handlers */ qemu_add_opts(&qemu_chardev_opts);