From c13be5a1bfcd6cafdf20072c75eaf3e5af35f2e6 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 6 Aug 2020 14:18:00 +0100 Subject: [PATCH 1/3] async: rename event_notifier_dummy_cb/poll() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The event_notifier_*() prefix can be confused with the EventNotifier APIs that are also called event_notifier_*(). Rename the functions to aio_context_notifier_*() to make it clear that they relate to the AioContext::notifier field. Signed-off-by: Stefan Hajnoczi Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Paolo Bonzini Message-id: 20200806131802.569478-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi --- util/async.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/util/async.c b/util/async.c index 1319eee3bc..d9f987e133 100644 --- a/util/async.c +++ b/util/async.c @@ -445,12 +445,12 @@ static void aio_timerlist_notify(void *opaque, QEMUClockType type) aio_notify(opaque); } -static void event_notifier_dummy_cb(EventNotifier *e) +static void aio_context_notifier_dummy_cb(EventNotifier *e) { } /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */ -static bool event_notifier_poll(void *opaque) +static bool aio_context_notifier_poll(void *opaque) { EventNotifier *e = opaque; AioContext *ctx = container_of(e, AioContext, notifier); @@ -508,8 +508,8 @@ AioContext *aio_context_new(Error **errp) aio_set_event_notifier(ctx, &ctx->notifier, false, - event_notifier_dummy_cb, - event_notifier_poll); + aio_context_notifier_dummy_cb, + aio_context_notifier_poll); #ifdef CONFIG_LINUX_AIO ctx->linux_aio = NULL; #endif From 601829f88e0eff8f54fa08b7a69b0774a9c259c8 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 6 Aug 2020 14:18:01 +0100 Subject: [PATCH 2/3] async: always set ctx->notified in aio_notify() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit aio_notify() does not set ctx->notified when called with ctx->aio_notify_me disabled. Therefore aio_notify_me needs to be enabled during polling. This is suboptimal since expensive event_notifier_set(&ctx->notifier) and event_notifier_test_and_clear(&ctx->notifier) calls are required when ctx->aio_notify_me is enabled. Change aio_notify() so that aio->notified is always set, regardless of ctx->aio_notify_me. This will make polling cheaper since ctx->aio_notify_me can remain disabled. Move the event_notifier_test_and_clear() to the fd handler function (which is now no longer an empty function so "dummy" has been dropped from its name). The next patch takes advantage of this by optimizing polling in util/aio-posix.c. Signed-off-by: Stefan Hajnoczi Reviewed-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé Message-id: 20200806131802.569478-3-stefanha@redhat.com [Paolo Bonzini pointed out that the smp_wmb() in aio_notify_accept() should be smp_wb() but the comment should be smp_wmb() instead of smp_wb(). Fixed. --Stefan] Signed-off-by: Stefan Hajnoczi --- util/async.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/util/async.c b/util/async.c index d9f987e133..4266745dee 100644 --- a/util/async.c +++ b/util/async.c @@ -419,25 +419,32 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx) void aio_notify(AioContext *ctx) { - /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs + /* + * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in + * aio_notify_accept. + */ + smp_wmb(); + atomic_set(&ctx->notified, true); + + /* + * Write ctx->notified before reading ctx->notify_me. Pairs * with smp_mb in aio_ctx_prepare or aio_poll. */ smp_mb(); if (atomic_read(&ctx->notify_me)) { event_notifier_set(&ctx->notifier); - atomic_mb_set(&ctx->notified, true); } } void aio_notify_accept(AioContext *ctx) { - if (atomic_xchg(&ctx->notified, false) -#ifdef WIN32 - || true -#endif - ) { - event_notifier_test_and_clear(&ctx->notifier); - } + atomic_set(&ctx->notified, false); + + /* + * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb + * in aio_notify. + */ + smp_mb(); } static void aio_timerlist_notify(void *opaque, QEMUClockType type) @@ -445,8 +452,11 @@ static void aio_timerlist_notify(void *opaque, QEMUClockType type) aio_notify(opaque); } -static void aio_context_notifier_dummy_cb(EventNotifier *e) +static void aio_context_notifier_cb(EventNotifier *e) { + AioContext *ctx = container_of(e, AioContext, notifier); + + event_notifier_test_and_clear(&ctx->notifier); } /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */ @@ -508,7 +518,7 @@ AioContext *aio_context_new(Error **errp) aio_set_event_notifier(ctx, &ctx->notifier, false, - aio_context_notifier_dummy_cb, + aio_context_notifier_cb, aio_context_notifier_poll); #ifdef CONFIG_LINUX_AIO ctx->linux_aio = NULL; From 44277bf914471962c9e88e09c859aae65ae109c4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 6 Aug 2020 14:18:02 +0100 Subject: [PATCH 3/3] aio-posix: keep aio_notify_me disabled during polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Polling only monitors the ctx->notified field and does not need the ctx->notifier EventNotifier to be signalled. Keep ctx->aio_notify_me disabled while polling to avoid unnecessary EventNotifier syscalls. This optimization improves virtio-blk 4KB random read performance by 18%. The following results are with an IOThread and the null-co block driver: Test IOPS Error Before 244518.62 ± 1.20% After 290706.11 ± 0.44% Signed-off-by: Stefan Hajnoczi Reviewed-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé Message-id: 20200806131802.569478-4-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi --- util/aio-posix.c | 55 +++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/util/aio-posix.c b/util/aio-posix.c index 1b2a3af65b..f7f13ebfc2 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -464,9 +464,6 @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now) * * Polls for a given time. * - * Note that ctx->notify_me must be non-zero so this function can detect - * aio_notify(). - * * Note that the caller must have incremented ctx->list_lock. * * Returns: true if progress was made, false otherwise @@ -476,7 +473,6 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) bool progress; int64_t start_time, elapsed_time; - assert(ctx->notify_me); assert(qemu_lockcnt_count(&ctx->list_lock) > 0); trace_run_poll_handlers_begin(ctx, max_ns, *timeout); @@ -520,8 +516,6 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) * @timeout: timeout for blocking wait, computed by the caller and updated if * polling succeeds. * - * ctx->notify_me must be non-zero so this function can detect aio_notify(). - * * Note that the caller must have incremented ctx->list_lock. * * Returns: true if progress was made, false otherwise @@ -556,6 +550,7 @@ bool aio_poll(AioContext *ctx, bool blocking) AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list); int ret = 0; bool progress; + bool use_notify_me; int64_t timeout; int64_t start = 0; @@ -566,23 +561,6 @@ bool aio_poll(AioContext *ctx, bool blocking) */ assert(in_aio_context_home_thread(ctx)); - /* aio_notify can avoid the expensive event_notifier_set if - * everything (file descriptors, bottom halves, timers) will - * be re-evaluated before the next blocking poll(). This is - * already true when aio_poll is called with blocking == false; - * if blocking == true, it is only true after poll() returns, - * so disable the optimization now. - */ - if (blocking) { - atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); - /* - * Write ctx->notify_me before computing the timeout - * (reading bottom half flags, etc.). Pairs with - * smp_mb in aio_notify(). - */ - smp_mb(); - } - qemu_lockcnt_inc(&ctx->list_lock); if (ctx->poll_max_ns) { @@ -593,6 +571,29 @@ bool aio_poll(AioContext *ctx, bool blocking) progress = try_poll_mode(ctx, &timeout); assert(!(timeout && progress)); + /* + * aio_notify can avoid the expensive event_notifier_set if + * everything (file descriptors, bottom halves, timers) will + * be re-evaluated before the next blocking poll(). This is + * already true when aio_poll is called with blocking == false; + * if blocking == true, it is only true after poll() returns, + * so disable the optimization now. + */ + use_notify_me = timeout != 0; + if (use_notify_me) { + atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); + /* + * Write ctx->notify_me before reading ctx->notified. Pairs with + * smp_mb in aio_notify(). + */ + smp_mb(); + + /* Don't block if aio_notify() was called */ + if (atomic_read(&ctx->notified)) { + timeout = 0; + } + } + /* If polling is allowed, non-blocking aio_poll does not need the * system call---a single round of run_poll_handlers_once suffices. */ @@ -600,12 +601,14 @@ bool aio_poll(AioContext *ctx, bool blocking) ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout); } - if (blocking) { + if (use_notify_me) { /* Finish the poll before clearing the flag. */ - atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); - aio_notify_accept(ctx); + atomic_store_release(&ctx->notify_me, + atomic_read(&ctx->notify_me) - 2); } + aio_notify_accept(ctx); + /* Adjust polling time */ if (ctx->poll_max_ns) { int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;