Pull request

-----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEhpWov9P5fNqsNXdanKSrs4Grc8gFAl7zJJUACgkQnKSrs4Gr c8ix3Qf/ZpEKTCWJcZZuJPEI4CSgHZTsmDilkhnI/SoSBIK+6do+oBtCWrNdfP/m BpAZspaGsKUu5kJe6HGl4Rvmjd/sTg+9+F6UnQVrWccttwmJgr+y0r9uTMEgxgdm 2xeTzkzfwfxRLn4wb8k1kX/weQUcsbJUe2F9Nvm3HzeKGkaxWlYsRwqXAluC7gjx ZK0yHBz9JXKAreAfBRmNduLDElyzc6yYikY2gsJEOYTA7/h/ksmuNWYqNPRzWYGQ wRjAPyRMg+q+pZhoir5+6qgKLt6vNk5uQOjPaiLYhSMi7fiTIXrrVrO0dSx1Pkun 2vlb2WOF7nbj5T1veJQE29/onKPhzA== =IYfR -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging Pull request # gpg: Signature made Wed 24 Jun 2020 11:01:57 BST # gpg: using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8 # gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full] # gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" [full] # Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35 775A 9CA4 ABB3 81AB 73C8 * remotes/stefanha/tags/block-pull-request: block/nvme: support nested aio_poll() block/nvme: keep BDRVNVMeState pointer in NVMeQueuePair block/nvme: clarify that free_req_queue is protected by q->lock block/nvme: switch to a NVMeRequest freelist block/nvme: don't access CQE after moving cq.head block/nvme: drop tautologous assertion block/nvme: poll queues without q->lock check-block: enable iotests with SafeStack configure: add flags to support SafeStack coroutine: add check for SafeStack in sigaltstack coroutine: support SafeStack in ucontext backend minikconf: explicitly set encoding to UTF-8 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2020-06-26 13:48:53 +01:00 · 2020-06-26 13:48:53 +01:00 · 87fb952da8
parent 10f7ffabf9 7838c67f22
commit 87fb952da8
8 changed files with 284 additions and 66 deletions
--- a/block/nvme.c
+++ b/block/nvme.c
@ -33,6 +33,14 @@
 #define NVME_QUEUE_SIZE 128
 #define NVME_BAR_SIZE 8192

+/*
+ * We have to leave one slot empty as that is the full queue case where
+ * head == tail + 1.
+ */
+#define NVME_NUM_REQS (NVME_QUEUE_SIZE - 1)
+
+typedef struct BDRVNVMeState BDRVNVMeState;
+
 typedef struct {
    int32_t  head, tail;
    uint8_t  *queue;
@ -47,24 +55,30 @@ typedef struct {
    int cid;
    void *prp_list_page;
    uint64_t prp_list_iova;
-    bool busy;
+    int free_req_next; /* q->reqs[] index of next free req */
 } NVMeRequest;

 typedef struct {
-    CoQueue     free_req_queue;
    QemuMutex   lock;

+    /* Read from I/O code path, initialized under BQL */
+    BDRVNVMeState   *s;
+    int             index;
+
    /* Fields protected by BQL */
-    int         index;
    uint8_t     *prp_list_pages;

    /* Fields protected by @lock */
+    CoQueue     free_req_queue;
    NVMeQueue   sq, cq;
    int         cq_phase;
-    NVMeRequest reqs[NVME_QUEUE_SIZE];
-    bool        busy;
+    int         free_req_head;
+    NVMeRequest reqs[NVME_NUM_REQS];
    int         need_kick;
    int         inflight;
+
+    /* Thread-safe, no lock necessary */
+    QEMUBH      *completion_bh;
 } NVMeQueuePair;

 /* Memory mapped registers */
@ -89,7 +103,7 @@ typedef volatile struct {

 QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) != 0x1000);

-typedef struct {
+struct BDRVNVMeState {
    AioContext *aio_context;
    QEMUVFIOState *vfio;
    NVMeRegs *regs;
@ -123,11 +137,13 @@ typedef struct {

    /* PCI address (required for nvme_refresh_filename()) */
    char *device;
-} BDRVNVMeState;
+};

 #define NVME_BLOCK_OPT_DEVICE "device"
 #define NVME_BLOCK_OPT_NAMESPACE "namespace"

+static void nvme_process_completion_bh(void *opaque);
+
 static QemuOptsList runtime_opts = {
    .name = "nvme",
    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@ -167,8 +183,11 @@ static void nvme_init_queue(BlockDriverState *bs, NVMeQueue *q,
    }
 }

-static void nvme_free_queue_pair(BlockDriverState *bs, NVMeQueuePair *q)
+static void nvme_free_queue_pair(NVMeQueuePair *q)
 {
+    if (q->completion_bh) {
+        qemu_bh_delete(q->completion_bh);
+    }
    qemu_vfree(q->prp_list_pages);
    qemu_vfree(q->sq.queue);
    qemu_vfree(q->cq.queue);
@ -198,21 +217,28 @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
    uint64_t prp_list_iova;

    qemu_mutex_init(&q->lock);
+    q->s = s;
    q->index = idx;
    qemu_co_queue_init(&q->free_req_queue);
-    q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_QUEUE_SIZE);
+    q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_NUM_REQS);
+    q->completion_bh = aio_bh_new(bdrv_get_aio_context(bs),
+                                  nvme_process_completion_bh, q);
    r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
-                          s->page_size * NVME_QUEUE_SIZE,
+                          s->page_size * NVME_NUM_REQS,
                          false, &prp_list_iova);
    if (r) {
        goto fail;
    }
-    for (i = 0; i < NVME_QUEUE_SIZE; i++) {
+    q->free_req_head = -1;
+    for (i = 0; i < NVME_NUM_REQS; i++) {
        NVMeRequest *req = &q->reqs[i];
        req->cid = i + 1;
+        req->free_req_next = q->free_req_head;
+        q->free_req_head = i;
        req->prp_list_page = q->prp_list_pages + i * s->page_size;
        req->prp_list_iova = prp_list_iova + i * s->page_size;
    }
+
    nvme_init_queue(bs, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
@ -229,13 +255,15 @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,

    return q;
 fail:
-    nvme_free_queue_pair(bs, q);
+    nvme_free_queue_pair(q);
    return NULL;
 }

 /* With q->lock */
-static void nvme_kick(BDRVNVMeState *s, NVMeQueuePair *q)
+static void nvme_kick(NVMeQueuePair *q)
 {
+    BDRVNVMeState *s = q->s;
+
    if (s->plugged || !q->need_kick) {
        return;
    }
@ -254,13 +282,11 @@ static void nvme_kick(BDRVNVMeState *s, NVMeQueuePair *q)
 */
 static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
 {
-    int i;
-    NVMeRequest *req = NULL;
+    NVMeRequest *req;

    qemu_mutex_lock(&q->lock);
-    while (q->inflight + q->need_kick > NVME_QUEUE_SIZE - 2) {
-        /* We have to leave one slot empty as that is the full queue case (head
-         * == tail + 1). */
+
+    while (q->free_req_head == -1) {
        if (qemu_in_coroutine()) {
            trace_nvme_free_req_queue_wait(q);
            qemu_co_queue_wait(&q->free_req_queue, &q->lock);
@ -269,20 +295,40 @@ static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
            return NULL;
        }
    }
-    for (i = 0; i < NVME_QUEUE_SIZE; i++) {
-        if (!q->reqs[i].busy) {
-            q->reqs[i].busy = true;
-            req = &q->reqs[i];
-            break;
-        }
-    }
-    /* We have checked inflight and need_kick while holding q->lock, so one
-     * free req must be available. */
-    assert(req);
+
+    req = &q->reqs[q->free_req_head];
+    q->free_req_head = req->free_req_next;
+    req->free_req_next = -1;
+
    qemu_mutex_unlock(&q->lock);
    return req;
 }

+/* With q->lock */
+static void nvme_put_free_req_locked(NVMeQueuePair *q, NVMeRequest *req)
+{
+    req->free_req_next = q->free_req_head;
+    q->free_req_head = req - q->reqs;
+}
+
+/* With q->lock */
+static void nvme_wake_free_req_locked(NVMeQueuePair *q)
+{
+    if (!qemu_co_queue_empty(&q->free_req_queue)) {
+        replay_bh_schedule_oneshot_event(q->s->aio_context,
+                nvme_free_req_queue_cb, q);
+    }
+}
+
+/* Insert a request in the freelist and wake waiters */
+static void nvme_put_free_req_and_wake(NVMeQueuePair *q, NVMeRequest *req)
+{
+    qemu_mutex_lock(&q->lock);
+    nvme_put_free_req_locked(q, req);
+    nvme_wake_free_req_locked(q);
+    qemu_mutex_unlock(&q->lock);
+}
+
 static inline int nvme_translate_error(const NvmeCqe *c)
 {
    uint16_t status = (le16_to_cpu(c->status) >> 1) & 0xFF;
@ -306,26 +352,40 @@ static inline int nvme_translate_error(const NvmeCqe *c)
 }

 /* With q->lock */
-static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
+static bool nvme_process_completion(NVMeQueuePair *q)
 {
+    BDRVNVMeState *s = q->s;
    bool progress = false;
    NVMeRequest *preq;
    NVMeRequest req;
    NvmeCqe *c;

    trace_nvme_process_completion(s, q->index, q->inflight);
-    if (q->busy || s->plugged) {
-        trace_nvme_process_completion_queue_busy(s, q->index);
+    if (s->plugged) {
+        trace_nvme_process_completion_queue_plugged(s, q->index);
        return false;
    }
-    q->busy = true;
+
+    /*
+     * Support re-entrancy when a request cb() function invokes aio_poll().
+     * Pending completions must be visible to aio_poll() so that a cb()
+     * function can wait for the completion of another request.
+     *
+     * The aio_poll() loop will execute our BH and we'll resume completion
+     * processing there.
+     */
+    qemu_bh_schedule(q->completion_bh);
+
    assert(q->inflight >= 0);
    while (q->inflight) {
+        int ret;
        int16_t cid;
+
        c = (NvmeCqe *)&q->cq.queue[q->cq.head * NVME_CQ_ENTRY_BYTES];
        if ((le16_to_cpu(c->status) & 0x1) == q->cq_phase) {
            break;
        }
+        ret = nvme_translate_error(c);
        q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE;
        if (!q->cq.head) {
            q->cq_phase = !q->cq_phase;
@ -336,33 +396,47 @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
                    cid);
            continue;
        }
-        assert(cid <= NVME_QUEUE_SIZE);
        trace_nvme_complete_command(s, q->index, cid);
        preq = &q->reqs[cid - 1];
        req = *preq;
        assert(req.cid == cid);
        assert(req.cb);
-        preq->busy = false;
+        nvme_put_free_req_locked(q, preq);
        preq->cb = preq->opaque = NULL;
-        qemu_mutex_unlock(&q->lock);
-        req.cb(req.opaque, nvme_translate_error(c));
-        qemu_mutex_lock(&q->lock);
        q->inflight--;
+        qemu_mutex_unlock(&q->lock);
+        req.cb(req.opaque, ret);
+        qemu_mutex_lock(&q->lock);
        progress = true;
    }
    if (progress) {
        /* Notify the device so it can post more completions. */
        smp_mb_release();
        *q->cq.doorbell = cpu_to_le32(q->cq.head);
-        if (!qemu_co_queue_empty(&q->free_req_queue)) {
-            replay_bh_schedule_oneshot_event(s->aio_context,
-                                             nvme_free_req_queue_cb, q);
-        }
+        nvme_wake_free_req_locked(q);
    }
-    q->busy = false;
+
+    qemu_bh_cancel(q->completion_bh);
+
    return progress;
 }

+static void nvme_process_completion_bh(void *opaque)
+{
+    NVMeQueuePair *q = opaque;
+
+    /*
+     * We're being invoked because a nvme_process_completion() cb() function
+     * called aio_poll(). The callback may be waiting for further completions
+     * so notify the device that it has space to fill in more completions now.
+     */
+    smp_mb_release();
+    *q->cq.doorbell = cpu_to_le32(q->cq.head);
+    nvme_wake_free_req_locked(q);
+
+    nvme_process_completion(q);
+}
+
 static void nvme_trace_command(const NvmeCmd *cmd)
 {
    int i;
@ -374,8 +448,7 @@ static void nvme_trace_command(const NvmeCmd *cmd)
    }
 }

-static void nvme_submit_command(BDRVNVMeState *s, NVMeQueuePair *q,
-                                NVMeRequest *req,
+static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
                                NvmeCmd *cmd, BlockCompletionFunc cb,
                                void *opaque)
 {
@ -384,15 +457,15 @@ static void nvme_submit_command(BDRVNVMeState *s, NVMeQueuePair *q,
    req->opaque = opaque;
    cmd->cid = cpu_to_le32(req->cid);

-    trace_nvme_submit_command(s, q->index, req->cid);
+    trace_nvme_submit_command(q->s, q->index, req->cid);
    nvme_trace_command(cmd);
    qemu_mutex_lock(&q->lock);
    memcpy((uint8_t *)q->sq.queue +
           q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd));
    q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
    q->need_kick++;
-    nvme_kick(s, q);
-    nvme_process_completion(s, q);
+    nvme_kick(q);
+    nvme_process_completion(q);
    qemu_mutex_unlock(&q->lock);
 }

@ -407,13 +480,12 @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
                         NvmeCmd *cmd)
 {
    NVMeRequest *req;
-    BDRVNVMeState *s = bs->opaque;
    int ret = -EINPROGRESS;
    req = nvme_get_free_req(q);
    if (!req) {
        return -EBUSY;
    }
-    nvme_submit_command(s, q, req, cmd, nvme_cmd_sync_cb, &ret);
+    nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret);

    BDRV_POLL_WHILE(bs, ret == -EINPROGRESS);
    return ret;
@ -512,8 +584,20 @@ static bool nvme_poll_queues(BDRVNVMeState *s)

    for (i = 0; i < s->nr_queues; i++) {
        NVMeQueuePair *q = s->queues[i];
+        const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
+        NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
+
+        /*
+         * Do an early check for completions. q->lock isn't needed because
+         * nvme_process_completion() only runs in the event loop thread and
+         * cannot race with itself.
+         */
+        if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) {
+            continue;
+        }
+
        qemu_mutex_lock(&q->lock);
-        while (nvme_process_completion(s, q)) {
+        while (nvme_process_completion(q)) {
            /* Keep polling */
            progress = true;
        }
@ -551,7 +635,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
    };
    if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
        error_setg(errp, "Failed to create io queue [%d]", n);
-        nvme_free_queue_pair(bs, q);
+        nvme_free_queue_pair(q);
        return false;
    }
    cmd = (NvmeCmd) {
@ -562,7 +646,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
    };
    if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
        error_setg(errp, "Failed to create io queue [%d]", n);
-        nvme_free_queue_pair(bs, q);
+        nvme_free_queue_pair(q);
        return false;
    }
    s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1);
@ -757,7 +841,7 @@ static void nvme_close(BlockDriverState *bs)
    BDRVNVMeState *s = bs->opaque;

    for (i = 0; i < s->nr_queues; ++i) {
-        nvme_free_queue_pair(bs, s->queues[i]);
+        nvme_free_queue_pair(s->queues[i]);
    }
    g_free(s->queues);
    aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
@ -987,10 +1071,10 @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs,
    r = nvme_cmd_map_qiov(bs, &cmd, req, qiov);
    qemu_co_mutex_unlock(&s->dma_map_lock);
    if (r) {
-        req->busy = false;
+        nvme_put_free_req_and_wake(ioq, req);
        return r;
    }
-    nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
+    nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);

    data.co = qemu_coroutine_self();
    while (data.ret == -EINPROGRESS) {
@ -1090,7 +1174,7 @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs)
    assert(s->nr_queues > 1);
    req = nvme_get_free_req(ioq);
    assert(req);
-    nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
+    nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);

    data.co = qemu_coroutine_self();
    if (data.ret == -EINPROGRESS) {
@ -1143,7 +1227,7 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
    req = nvme_get_free_req(ioq);
    assert(req);

-    nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
+    nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);

    data.co = qemu_coroutine_self();
    while (data.ret == -EINPROGRESS) {
@ -1204,13 +1288,13 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
    qemu_co_mutex_unlock(&s->dma_map_lock);

    if (ret) {
-        req->busy = false;
+        nvme_put_free_req_and_wake(ioq, req);
        goto out;
    }

    trace_nvme_dsm(s, offset, bytes);

-    nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
+    nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);

    data.co = qemu_coroutine_self();
    while (data.ret == -EINPROGRESS) {
@ -1262,6 +1346,13 @@ static void nvme_detach_aio_context(BlockDriverState *bs)
 {
    BDRVNVMeState *s = bs->opaque;

+    for (int i = 0; i < s->nr_queues; i++) {
+        NVMeQueuePair *q = s->queues[i];
+
+        qemu_bh_delete(q->completion_bh);
+        q->completion_bh = NULL;
+    }
+
    aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
                           false, NULL, NULL);
 }
@ -1274,6 +1365,13 @@ static void nvme_attach_aio_context(BlockDriverState *bs,
    s->aio_context = new_context;
    aio_set_event_notifier(new_context, &s->irq_notifier,
                           false, nvme_handle_event, nvme_poll_cb);
+
+    for (int i = 0; i < s->nr_queues; i++) {
+        NVMeQueuePair *q = s->queues[i];
+
+        q->completion_bh =
+            aio_bh_new(new_context, nvme_process_completion_bh, q);
+    }
 }

 static void nvme_aio_plug(BlockDriverState *bs)
@ -1292,8 +1390,8 @@ static void nvme_aio_unplug(BlockDriverState *bs)
    for (i = 1; i < s->nr_queues; i++) {
        NVMeQueuePair *q = s->queues[i];
        qemu_mutex_lock(&q->lock);
-        nvme_kick(s, q);
-        nvme_process_completion(s, q);
+        nvme_kick(q);
+        nvme_process_completion(q);
        qemu_mutex_unlock(&q->lock);
    }
 }
--- a/block/trace-events
+++ b/block/trace-events
@ -158,7 +158,7 @@ nvme_kick(void *s, int queue) "s %p queue %d"
 nvme_dma_flush_queue_wait(void *s) "s %p"
 nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
 nvme_process_completion(void *s, int index, int inflight) "s %p queue %d inflight %d"
-nvme_process_completion_queue_busy(void *s, int index) "s %p queue %d"
+nvme_process_completion_queue_plugged(void *s, int index) "s %p queue %d"
 nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d"
 nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d"
 nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
--- a/73
+++ b/73
@ -307,6 +307,7 @@ audio_win_int=""
 libs_qga=""
 debug_info="yes"
 stack_protector=""
+safe_stack=""
 use_containers="yes"
 gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb")

@ -1287,6 +1288,10 @@ for opt do
  ;;
  --disable-stack-protector) stack_protector="no"
  ;;
+  --enable-safe-stack) safe_stack="yes"
+  ;;
+  --disable-safe-stack) safe_stack="no"
+  ;;
  --disable-curses) curses="no"
  ;;
  --enable-curses) curses="yes"
@ -1829,6 +1834,8 @@ disabled with --disable-FEATURE, default is enabled if available:
  debug-tcg       TCG debugging (default is disabled)
  debug-info      debugging information
  sparse          sparse checker
+  safe-stack      SafeStack Stack Smash Protection. Depends on
+                  clang/llvm >= 3.7 and requires coroutine backend ucontext.

  gnutls          GNUTLS cryptography support
  nettle          nettle cryptography support
@ -5573,6 +5580,67 @@ if test "$debug_stack_usage" = "yes"; then
  fi
 fi

+##################################################
+# SafeStack
+
+
+if test "$safe_stack" = "yes"; then
+cat > $TMPC << EOF
+int main(int argc, char *argv[])
+{
+#if ! __has_feature(safe_stack)
+#error SafeStack Disabled
+#endif
+    return 0;
+}
+EOF
+  flag="-fsanitize=safe-stack"
+  # Check that safe-stack is supported and enabled.
+  if compile_prog "-Werror $flag" "$flag"; then
+    # Flag needed both at compilation and at linking
+    QEMU_CFLAGS="$QEMU_CFLAGS $flag"
+    QEMU_LDFLAGS="$QEMU_LDFLAGS $flag"
+  else
+    error_exit "SafeStack not supported by your compiler"
+  fi
+  if test "$coroutine" != "ucontext"; then
+    error_exit "SafeStack is only supported by the coroutine backend ucontext"
+  fi
+else
+cat > $TMPC << EOF
+int main(int argc, char *argv[])
+{
+#if defined(__has_feature)
+#if __has_feature(safe_stack)
+#error SafeStack Enabled
+#endif
+#endif
+    return 0;
+}
+EOF
+if test "$safe_stack" = "no"; then
+  # Make sure that safe-stack is disabled
+  if ! compile_prog "-Werror" ""; then
+    # SafeStack was already enabled, try to explicitly remove the feature
+    flag="-fno-sanitize=safe-stack"
+    if ! compile_prog "-Werror $flag" "$flag"; then
+      error_exit "Configure cannot disable SafeStack"
+    fi
+    QEMU_CFLAGS="$QEMU_CFLAGS $flag"
+    QEMU_LDFLAGS="$QEMU_LDFLAGS $flag"
+  fi
+else # "$safe_stack" = ""
+  # Set safe_stack to yes or no based on pre-existing flags
+  if compile_prog "-Werror" ""; then
+    safe_stack="no"
+  else
+    safe_stack="yes"
+    if test "$coroutine" != "ucontext"; then
+      error_exit "SafeStack is only supported by the coroutine backend ucontext"
+    fi
+  fi
+fi
+fi

 ##########################################
 # check if we have open_by_handle_at
@ -6765,6 +6833,7 @@ echo "sparse enabled    $sparse"
 echo "strip binaries    $strip_opt"
 echo "profiler          $profiler"
 echo "static build      $static"
+echo "safe stack        $safe_stack"
 if test "$darwin" = "yes" ; then
    echo "Cocoa support     $cocoa"
 fi
@ -8370,6 +8439,10 @@ if test "$ccache_cpp2" = "yes"; then
  echo "export CCACHE_CPP2=y" >> $config_host_mak
 fi

+if test "$safe_stack" = "yes"; then
+  echo "CONFIG_SAFESTACK=y" >> $config_host_mak
+fi
+
 # If we're using a separate build tree, set it up now.
 # DIRS are directories which we simply mkdir in the build tree;
 # LINKS are things to symlink back into the source tree
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@ -28,6 +28,11 @@
 #include "qemu/queue.h"
 #include "qemu/coroutine.h"

+#ifdef CONFIG_SAFESTACK
+/* Pointer to the unsafe stack, defined by the compiler */
+extern __thread void *__safestack_unsafe_stack_ptr;
+#endif
+
 #define COROUTINE_STACK_SIZE (1 << 20)

 typedef enum {
--- a/scripts/minikconf.py
+++ b/scripts/minikconf.py
@ -402,7 +402,7 @@ def do_include(self, include):
        if incl_abs_fname in self.data.previously_included:
            return
        try:
-            fp = open(incl_abs_fname, 'r')
+            fp = open(incl_abs_fname, 'rt', encoding='utf-8')
        except IOError as e:
            raise KconfigParserError(self,
                                '%s: %s' % (e.strerror, include))
@ -696,7 +696,7 @@ def scan_token(self):
            parser.do_assignment(name, value == 'y')
            external_vars.add(name[7:])
        else:
-            fp = open(arg, 'r')
+            fp = open(arg, 'rt', encoding='utf-8')
            parser.parse_file(fp)
            fp.close()

@ -705,7 +705,7 @@ def scan_token(self):
        if key not in external_vars and config[key]:
            print ('CONFIG_%s=y' % key)

-    deps = open(argv[2], 'w')
+    deps = open(argv[2], 'wt', encoding='utf-8')
    for fname in data.previously_included:
        print ('%s: %s' % (argv[1], fname), file=deps)
    deps.close()
--- a/tests/check-block.sh
+++ b/tests/check-block.sh
@ -21,7 +21,17 @@ if grep -q "CONFIG_GPROF=y" config-host.mak 2>/dev/null ; then
    exit 0
 fi

-if grep -q "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null ; then
+# Disable tests with any sanitizer except for SafeStack
+CFLAGS=$( grep "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null )
+SANITIZE_FLAGS=""
+#Remove all occurrencies of -fsanitize=safe-stack
+for i in ${CFLAGS}; do
+        if [ "${i}" != "-fsanitize=safe-stack" ]; then
+                SANITIZE_FLAGS="${SANITIZE_FLAGS} ${i}"
+        fi
+done
+if echo ${SANITIZE_FLAGS} | grep -q "\-fsanitize" 2>/dev/null; then
+    # Have a sanitize flag that is not allowed, stop
    echo "Sanitizers are enabled ==> Not running the qemu-iotests."
    exit 0
 fi
--- a/util/coroutine-sigaltstack.c
+++ b/util/coroutine-sigaltstack.c
@ -30,6 +30,10 @@
 #include "qemu-common.h"
 #include "qemu/coroutine_int.h"

+#ifdef CONFIG_SAFESTACK
+#error "SafeStack is not compatible with code run in alternate signal stacks"
+#endif
+
 typedef struct {
    Coroutine base;
    void *stack;
--- a/util/coroutine-ucontext.c
+++ b/util/coroutine-ucontext.c
@ -45,6 +45,11 @@ typedef struct {
    Coroutine base;
    void *stack;
    size_t stack_size;
+#ifdef CONFIG_SAFESTACK
+    /* Need an unsafe stack for each coroutine */
+    void *unsafe_stack;
+    size_t unsafe_stack_size;
+#endif
    sigjmp_buf env;

    void *tsan_co_fiber;
@ -179,6 +184,10 @@ Coroutine *qemu_coroutine_new(void)
    co = g_malloc0(sizeof(*co));
    co->stack_size = COROUTINE_STACK_SIZE;
    co->stack = qemu_alloc_stack(&co->stack_size);
+#ifdef CONFIG_SAFESTACK
+    co->unsafe_stack_size = COROUTINE_STACK_SIZE;
+    co->unsafe_stack = qemu_alloc_stack(&co->unsafe_stack_size);
+#endif
    co->base.entry_arg = &old_env; /* stash away our jmp_buf */

    uc.uc_link = &old_uc;
@ -203,6 +212,22 @@ Coroutine *qemu_coroutine_new(void)
            COROUTINE_YIELD,
            &fake_stack_save,
            co->stack, co->stack_size, co->tsan_co_fiber);
+
+#ifdef CONFIG_SAFESTACK
+        /*
+         * Before we swap the context, set the new unsafe stack
+         * The unsafe stack grows just like the normal stack, so start from
+         * the last usable location of the memory area.
+         * NOTE: we don't have to re-set the usp afterwards because we are
+         * coming back to this context through a siglongjmp.
+         * The compiler already wrapped the corresponding sigsetjmp call with
+         * code that saves the usp on the (safe) stack before the call, and
+         * restores it right after (which is where we return with siglongjmp).
+         */
+        void *usp = co->unsafe_stack + co->unsafe_stack_size;
+        __safestack_unsafe_stack_ptr = usp;
+#endif
+
        swapcontext(&old_uc, &uc);
    }

@ -235,6 +260,9 @@ void qemu_coroutine_delete(Coroutine *co_)
 #endif

    qemu_free_stack(co->stack, co->stack_size);
+#ifdef CONFIG_SAFESTACK
+    qemu_free_stack(co->unsafe_stack, co->unsafe_stack_size);
+#endif
    g_free(co);
 }