diff --git a/util/aio-posix.c b/util/aio-posix.c
index 1b2a3af65b..f7f13ebfc2 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -464,9 +464,6 @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
  *
  * Polls for a given time.
  *
- * Note that ctx->notify_me must be non-zero so this function can detect
- * aio_notify().
- *
  * Note that the caller must have incremented ctx->list_lock.
  *
  * Returns: true if progress was made, false otherwise
@@ -476,7 +473,6 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
     bool progress;
     int64_t start_time, elapsed_time;
 
-    assert(ctx->notify_me);
     assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
 
     trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
@@ -520,8 +516,6 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
  * @timeout: timeout for blocking wait, computed by the caller and updated if
  *    polling succeeds.
  *
- * ctx->notify_me must be non-zero so this function can detect aio_notify().
- *
  * Note that the caller must have incremented ctx->list_lock.
  *
  * Returns: true if progress was made, false otherwise
@@ -556,6 +550,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
     int ret = 0;
     bool progress;
+    bool use_notify_me;
     int64_t timeout;
     int64_t start = 0;
 
@@ -566,23 +561,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
      */
     assert(in_aio_context_home_thread(ctx));
 
-    /* aio_notify can avoid the expensive event_notifier_set if
-     * everything (file descriptors, bottom halves, timers) will
-     * be re-evaluated before the next blocking poll().  This is
-     * already true when aio_poll is called with blocking == false;
-     * if blocking == true, it is only true after poll() returns,
-     * so disable the optimization now.
-     */
-    if (blocking) {
-        atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
-        /*
-         * Write ctx->notify_me before computing the timeout
-         * (reading bottom half flags, etc.).  Pairs with
-         * smp_mb in aio_notify().
-         */
-        smp_mb();
-    }
-
     qemu_lockcnt_inc(&ctx->list_lock);
 
     if (ctx->poll_max_ns) {
@@ -593,6 +571,29 @@ bool aio_poll(AioContext *ctx, bool blocking)
     progress = try_poll_mode(ctx, &timeout);
     assert(!(timeout && progress));
 
+    /*
+     * aio_notify can avoid the expensive event_notifier_set if
+     * everything (file descriptors, bottom halves, timers) will
+     * be re-evaluated before the next blocking poll().  This is
+     * already true when aio_poll is called with blocking == false;
+     * if blocking == true, it is only true after poll() returns,
+     * so disable the optimization now.
+     */
+    use_notify_me = timeout != 0;
+    if (use_notify_me) {
+        atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
+        /*
+         * Write ctx->notify_me before reading ctx->notified.  Pairs with
+         * smp_mb in aio_notify().
+         */
+        smp_mb();
+
+        /* Don't block if aio_notify() was called */
+        if (atomic_read(&ctx->notified)) {
+            timeout = 0;
+        }
+    }
+
     /* If polling is allowed, non-blocking aio_poll does not need the
      * system call---a single round of run_poll_handlers_once suffices.
      */
@@ -600,12 +601,14 @@ bool aio_poll(AioContext *ctx, bool blocking)
         ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
     }
 
-    if (blocking) {
+    if (use_notify_me) {
         /* Finish the poll before clearing the flag.  */
-        atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
-        aio_notify_accept(ctx);
+        atomic_store_release(&ctx->notify_me,
+                             atomic_read(&ctx->notify_me) - 2);
     }
 
+    aio_notify_accept(ctx);
+
     /* Adjust polling time */
     if (ctx->poll_max_ns) {
         int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
diff --git a/util/async.c b/util/async.c
index 1319eee3bc..4266745dee 100644
--- a/util/async.c
+++ b/util/async.c
@@ -419,25 +419,32 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
 
 void aio_notify(AioContext *ctx)
 {
-    /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
+    /*
+     * Write e.g. bh->flags before writing ctx->notified.  Pairs with smp_mb in
+     * aio_notify_accept.
+     */
+    smp_wmb();
+    atomic_set(&ctx->notified, true);
+
+    /*
+     * Write ctx->notified before reading ctx->notify_me.  Pairs
      * with smp_mb in aio_ctx_prepare or aio_poll.
      */
     smp_mb();
     if (atomic_read(&ctx->notify_me)) {
         event_notifier_set(&ctx->notifier);
-        atomic_mb_set(&ctx->notified, true);
     }
 }
 
 void aio_notify_accept(AioContext *ctx)
 {
-    if (atomic_xchg(&ctx->notified, false)
-#ifdef WIN32
-        || true
-#endif
-    ) {
-        event_notifier_test_and_clear(&ctx->notifier);
-    }
+    atomic_set(&ctx->notified, false);
+
+    /*
+     * Write ctx->notified before reading e.g. bh->flags.  Pairs with smp_wmb
+     * in aio_notify.
+     */
+    smp_mb();
 }
 
 static void aio_timerlist_notify(void *opaque, QEMUClockType type)
@@ -445,12 +452,15 @@ static void aio_timerlist_notify(void *opaque, QEMUClockType type)
     aio_notify(opaque);
 }
 
-static void event_notifier_dummy_cb(EventNotifier *e)
+static void aio_context_notifier_cb(EventNotifier *e)
 {
+    AioContext *ctx = container_of(e, AioContext, notifier);
+
+    event_notifier_test_and_clear(&ctx->notifier);
 }
 
 /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
-static bool event_notifier_poll(void *opaque)
+static bool aio_context_notifier_poll(void *opaque)
 {
     EventNotifier *e = opaque;
     AioContext *ctx = container_of(e, AioContext, notifier);
@@ -508,8 +518,8 @@ AioContext *aio_context_new(Error **errp)
 
     aio_set_event_notifier(ctx, &ctx->notifier,
                            false,
-                           event_notifier_dummy_cb,
-                           event_notifier_poll);
+                           aio_context_notifier_cb,
+                           aio_context_notifier_poll);
 #ifdef CONFIG_LINUX_AIO
     ctx->linux_aio = NULL;
 #endif