diff --git a/block.c b/block.c
index e176c6f3bc..ea4956d6c7 100644
--- a/block.c
+++ b/block.c
@@ -2669,7 +2669,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
             goto out;
         }
 
-        total_sectors = len >> BDRV_SECTOR_BITS;
+        total_sectors = (len + BDRV_SECTOR_SIZE - 1) >> BDRV_SECTOR_BITS;
         max_nb_sectors = MAX(0, total_sectors - sector_num);
         if (max_nb_sectors > 0) {
             ret = drv->bdrv_co_readv(bs, sector_num,
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 7d144205c3..5e8a7794f4 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -192,7 +192,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
     /* The snapshot list position has not yet been updated, so these clusters
      * must indeed be completely free */
     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset,
-                                        s->snapshots_size);
+                                        snapshots_size);
     if (ret < 0) {
         return ret;
     }
diff --git a/blockdev.c b/blockdev.c
index 80605a2bac..8aa66a949c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -443,7 +443,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
     if (qemu_opt_get_bool(opts, "cache.direct", false)) {
         bdrv_flags |= BDRV_O_NOCACHE;
     }
-    if (qemu_opt_get_bool(opts, "cache.no-flush", true)) {
+    if (qemu_opt_get_bool(opts, "cache.no-flush", false)) {
         bdrv_flags |= BDRV_O_NO_FLUSH;
     }
 
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index e2f55cc946..49a23c33f7 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -460,9 +460,9 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
 
 static void virtio_blk_reset(VirtIODevice *vdev)
 {
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     VirtIOBlock *s = VIRTIO_BLK(vdev);
 
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     if (s->dataplane) {
         virtio_blk_data_plane_stop(s->dataplane);
     }
@@ -473,6 +473,7 @@ static void virtio_blk_reset(VirtIODevice *vdev)
      * are per-device request lists.
      */
     bdrv_drain_all();
+    bdrv_set_enable_write_cache(s->bs, s->original_wce);
 }
 
 /* coalesce internal state, copy to pci i/o region 0
@@ -564,7 +565,25 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
     }
 
     features = vdev->guest_features;
-    bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
+
+    /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send
+     * cache flushes.  Thus, the "auto writethrough" behavior is never
+     * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature.
+     * Leaving it enabled would break the following sequence:
+     *
+     *     Guest started with "-drive cache=writethrough"
+     *     Guest sets status to 0
+     *     Guest sets DRIVER bit in status field
+     *     Guest reads host features (WCE=0, CONFIG_WCE=1)
+     *     Guest writes guest features (WCE=0, CONFIG_WCE=1)
+     *     Guest writes 1 to the WCE configuration field (writeback mode)
+     *     Guest sets DRIVER_OK bit in status field
+     *
+     * s->bs would erroneously be placed in writethrough mode.
+     */
+    if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) {
+        bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
+    }
 }
 
 static void virtio_blk_save(QEMUFile *f, void *opaque)
@@ -674,6 +693,7 @@ static int virtio_blk_device_init(VirtIODevice *vdev)
     }
 
     blkconf_serial(&blk->conf, &blk->serial);
+    s->original_wce = bdrv_enable_write_cache(blk->conf.bs);
     if (blkconf_geometry(&blk->conf, NULL, 65535, 255, 255) < 0) {
         return -1;
     }
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index b87cf490b1..41885da1a0 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -123,6 +123,7 @@ typedef struct VirtIOBlock {
     BlockConf *conf;
     VirtIOBlkConf blk;
     unsigned short sector_mask;
+    bool original_wce;
     VMChangeStateEntry *change;
 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     Notifier migration_state_notifier;
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index e4934dd61b..b58903bef5 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -115,6 +115,10 @@ static inline int64_t qemu_clock_get_us(QEMUClockType type)
  * Determines whether a clock's default timer list
  * has timers attached
  *
+ * Note that this function should not be used when other threads also access
+ * the timer list.  The return value may be outdated by the time it is acted
+ * upon.
+ *
  * Returns: true if the clock's default timer list
  * has timers attached
  */
@@ -271,6 +275,10 @@ void timerlist_free(QEMUTimerList *timer_list);
  *
  * Determine whether a timer list has active timers
  *
+ * Note that this function should not be used when other threads also access
+ * the timer list.  The return value may be outdated by the time it is acted
+ * upon.
+ *
  * Returns: true if the timer list has timers.
  */
 bool timerlist_has_timers(QEMUTimerList *timer_list);
@@ -512,6 +520,9 @@ void timer_free(QEMUTimer *ts);
  * @ts: the timer
  *
  * Delete a timer from the active list.
+ *
+ * This function is thread-safe but the timer and its timer list must not be
+ * freed while this function is running.
  */
 void timer_del(QEMUTimer *ts);
 
@@ -521,6 +532,9 @@ void timer_del(QEMUTimer *ts);
  * @expire_time: the expiry time in nanoseconds
  *
  * Modify a timer to expire at @expire_time
+ *
+ * This function is thread-safe but the timer and its timer list must not be
+ * freed while this function is running.
  */
 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
 
@@ -531,6 +545,9 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
  *
  * Modify a timer to expiry at @expire_time, taking into
  * account the scale associated with the timer.
+ *
+ * This function is thread-safe but the timer and its timer list must not be
+ * freed while this function is running.
  */
 void timer_mod(QEMUTimer *ts, int64_t expire_timer);
 
diff --git a/libcacard/Makefile b/libcacard/Makefile
index 47827a0eb8..4d15da49b8 100644
--- a/libcacard/Makefile
+++ b/libcacard/Makefile
@@ -4,7 +4,8 @@ TOOLS += vscclient$(EXESUF)
 
 # objects linked into a shared library, built with libtool with -fPIC if required
 libcacard-obj-y = $(stub-obj-y) $(libcacard-y)
-libcacard-obj-y += util/osdep.o util/cutils.o util/qemu-timer-common.o util/error.o
+libcacard-obj-y += util/osdep.o util/cutils.o util/qemu-timer-common.o
+libcacard-obj-y += util/error.o util/qemu-error.o
 libcacard-obj-$(CONFIG_WIN32) += util/oslib-win32.o util/qemu-thread-win32.o
 libcacard-obj-$(CONFIG_POSIX) += util/oslib-posix.o util/qemu-thread-posix.o
 libcacard-obj-y += $(filter trace/%, $(util-obj-y))
diff --git a/qemu-timer.c b/qemu-timer.c
index 95ff47fef3..6b62e88669 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -66,6 +66,7 @@ QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
 
 struct QEMUTimerList {
     QEMUClock *clock;
+    QemuMutex active_timers_lock;
     QEMUTimer *active_timers;
     QLIST_ENTRY(QEMUTimerList) list;
     QEMUTimerListNotifyCB *notify_cb;
@@ -101,6 +102,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
     timer_list->clock = clock;
     timer_list->notify_cb = cb;
     timer_list->notify_opaque = opaque;
+    qemu_mutex_init(&timer_list->active_timers_lock);
     QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
     return timer_list;
 }
@@ -111,6 +113,7 @@ void timerlist_free(QEMUTimerList *timer_list)
     if (timer_list->clock) {
         QLIST_REMOVE(timer_list, list);
     }
+    qemu_mutex_destroy(&timer_list->active_timers_lock);
     g_free(timer_list);
 }
 
@@ -163,9 +166,17 @@ bool qemu_clock_has_timers(QEMUClockType type)
 
 bool timerlist_expired(QEMUTimerList *timer_list)
 {
-    return (timer_list->active_timers &&
-            timer_list->active_timers->expire_time <
-            qemu_clock_get_ns(timer_list->clock->type));
+    int64_t expire_time;
+
+    qemu_mutex_lock(&timer_list->active_timers_lock);
+    if (!timer_list->active_timers) {
+        qemu_mutex_unlock(&timer_list->active_timers_lock);
+        return false;
+    }
+    expire_time = timer_list->active_timers->expire_time;
+    qemu_mutex_unlock(&timer_list->active_timers_lock);
+
+    return expire_time < qemu_clock_get_ns(timer_list->clock->type);
 }
 
 bool qemu_clock_expired(QEMUClockType type)
@@ -182,13 +193,25 @@ bool qemu_clock_expired(QEMUClockType type)
 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
 {
     int64_t delta;
+    int64_t expire_time;
 
-    if (!timer_list->clock->enabled || !timer_list->active_timers) {
+    if (!timer_list->clock->enabled) {
         return -1;
     }
 
-    delta = timer_list->active_timers->expire_time -
-        qemu_clock_get_ns(timer_list->clock->type);
+    /* The active timers list may be modified before the caller uses our return
+     * value but ->notify_cb() is called when the deadline changes.  Therefore
+     * the caller should notice the change and there is no race condition.
+     */
+    qemu_mutex_lock(&timer_list->active_timers_lock);
+    if (!timer_list->active_timers) {
+        qemu_mutex_unlock(&timer_list->active_timers_lock);
+        return -1;
+    }
+    expire_time = timer_list->active_timers->expire_time;
+    qemu_mutex_unlock(&timer_list->active_timers_lock);
+
+    delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
 
     if (delta <= 0) {
         return 0;
@@ -289,6 +312,7 @@ void timer_init(QEMUTimer *ts,
     ts->cb = cb;
     ts->opaque = opaque;
     ts->scale = scale;
+    ts->expire_time = -1;
 }
 
 void timer_free(QEMUTimer *ts)
@@ -296,14 +320,12 @@ void timer_free(QEMUTimer *ts)
     g_free(ts);
 }
 
-/* stop a timer, but do not dealloc it */
-void timer_del(QEMUTimer *ts)
+static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
 {
     QEMUTimer **pt, *t;
 
-    /* NOTE: this code must be signal safe because
-       timer_expired() can be called from a signal. */
-    pt = &ts->timer_list->active_timers;
+    ts->expire_time = -1;
+    pt = &timer_list->active_timers;
     for(;;) {
         t = *pt;
         if (!t)
@@ -316,18 +338,28 @@ void timer_del(QEMUTimer *ts)
     }
 }
 
+/* stop a timer, but do not dealloc it */
+void timer_del(QEMUTimer *ts)
+{
+    QEMUTimerList *timer_list = ts->timer_list;
+
+    qemu_mutex_lock(&timer_list->active_timers_lock);
+    timer_del_locked(timer_list, ts);
+    qemu_mutex_unlock(&timer_list->active_timers_lock);
+}
+
 /* modify the current timer so that it will be fired when current_time
    >= expire_time. The corresponding callback will be called. */
 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
 {
+    QEMUTimerList *timer_list = ts->timer_list;
     QEMUTimer **pt, *t;
 
-    timer_del(ts);
+    qemu_mutex_lock(&timer_list->active_timers_lock);
+    timer_del_locked(timer_list, ts);
 
     /* add the timer in the sorted list */
-    /* NOTE: this code must be signal safe because
-       timer_expired() can be called from a signal. */
-    pt = &ts->timer_list->active_timers;
+    pt = &timer_list->active_timers;
     for(;;) {
         t = *pt;
         if (!timer_expired_ns(t, expire_time)) {
@@ -335,15 +367,16 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
         }
         pt = &t->next;
     }
-    ts->expire_time = expire_time;
+    ts->expire_time = MAX(expire_time, 0);
     ts->next = *pt;
     *pt = ts;
+    qemu_mutex_unlock(&timer_list->active_timers_lock);
 
     /* Rearm if necessary  */
-    if (pt == &ts->timer_list->active_timers) {
+    if (pt == &timer_list->active_timers) {
         /* Interrupt execution to force deadline recalculation.  */
-        qemu_clock_warp(ts->timer_list->clock->type);
-        timerlist_notify(ts->timer_list);
+        qemu_clock_warp(timer_list->clock->type);
+        timerlist_notify(timer_list);
     }
 }
 
@@ -354,13 +387,7 @@ void timer_mod(QEMUTimer *ts, int64_t expire_time)
 
 bool timer_pending(QEMUTimer *ts)
 {
-    QEMUTimer *t;
-    for (t = ts->timer_list->active_timers; t != NULL; t = t->next) {
-        if (t == ts) {
-            return true;
-        }
-    }
-    return false;
+    return ts->expire_time >= 0;
 }
 
 bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
@@ -373,23 +400,32 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
     QEMUTimer *ts;
     int64_t current_time;
     bool progress = false;
-   
+    QEMUTimerCB *cb;
+    void *opaque;
+
     if (!timer_list->clock->enabled) {
         return progress;
     }
 
     current_time = qemu_clock_get_ns(timer_list->clock->type);
     for(;;) {
+        qemu_mutex_lock(&timer_list->active_timers_lock);
         ts = timer_list->active_timers;
         if (!timer_expired_ns(ts, current_time)) {
+            qemu_mutex_unlock(&timer_list->active_timers_lock);
             break;
         }
+
         /* remove timer from the list before calling the callback */
         timer_list->active_timers = ts->next;
         ts->next = NULL;
+        ts->expire_time = -1;
+        cb = ts->cb;
+        opaque = ts->opaque;
+        qemu_mutex_unlock(&timer_list->active_timers_lock);
 
         /* run the callback (the timer list can be modified) */
-        ts->cb(ts->opaque);
+        cb(opaque);
         progress = true;
     }
     return progress;
diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c
index 39be046ec7..15a885e882 100644
--- a/tests/test-coroutine.c
+++ b/tests/test-coroutine.c
@@ -182,17 +182,17 @@ static void perf_nesting(void)
     unsigned int i, maxcycles, maxnesting;
     double duration;
 
-    maxcycles = 100000000;
+    maxcycles = 10000;
     maxnesting = 1000;
     Coroutine *root;
-    NestData nd = {
-        .n_enter  = 0,
-        .n_return = 0,
-        .max      = maxnesting,
-    };
 
     g_test_timer_start();
     for (i = 0; i < maxcycles; i++) {
+        NestData nd = {
+            .n_enter  = 0,
+            .n_return = 0,
+            .max      = maxnesting,
+        };
         root = qemu_coroutine_create(nest);
         qemu_coroutine_enter(root, &nd);
     }
@@ -202,6 +202,38 @@ static void perf_nesting(void)
         maxcycles, maxnesting, duration);
 }
 
+/*
+ * Yield benchmark
+ */
+
+static void coroutine_fn yield_loop(void *opaque)
+{
+    unsigned int *counter = opaque;
+
+    while ((*counter) > 0) {
+        (*counter)--;
+        qemu_coroutine_yield();
+    }
+}
+
+static void perf_yield(void)
+{
+    unsigned int i, maxcycles;
+    double duration;
+
+    maxcycles = 100000000;
+    i = maxcycles;
+    Coroutine *coroutine = qemu_coroutine_create(yield_loop);
+
+    g_test_timer_start();
+    while (i > 0) {
+        qemu_coroutine_enter(coroutine, &i);
+    }
+    duration = g_test_timer_elapsed();
+
+    g_test_message("Yield %u iterations: %f s\n",
+        maxcycles, duration);
+}
 
 int main(int argc, char **argv)
 {
@@ -214,6 +246,7 @@ int main(int argc, char **argv)
     if (g_test_perf()) {
         g_test_add_func("/perf/lifecycle", perf_lifecycle);
         g_test_add_func("/perf/nesting", perf_nesting);
+        g_test_add_func("/perf/yield", perf_yield);
     }
     return g_test_run();
 }
diff --git a/util/osdep.c b/util/osdep.c
index 685c8ae889..62072b4be3 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -207,6 +207,13 @@ int qemu_open(const char *name, int flags, ...)
     }
 #endif
 
+#ifdef O_DIRECT
+    if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
+        error_report("file system may not support O_DIRECT");
+        errno = EINVAL; /* in case it was clobbered */
+    }
+#endif /* O_DIRECT */
+
     return ret;
 }