From 1cc933453bf2baae1feb7c8e757bdfd0ef639002 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 30 Jul 2015 14:16:11 +0100
Subject: [PATCH 01/19] virtio-scsi: use virtqueue_map_sg() when loading
 requests

The VirtQueueElement struct is serialized during migration but the
in_sg[]/out_sg[] iovec arrays are not usable on the destination host
because the pointers are meaningless.

Use virtqueue_map_sg() to refresh in_sg[]/out_sg[] to valid pointers
based on in_addr[]/out_addr[] hwaddrs.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <1438262173-11546-2-git-send-email-stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/virtio-scsi.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 811c3da8bd..a8bb1c66f9 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -217,6 +217,11 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq)
     assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg));
     assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg));
 
+    virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
+                     req->elem.in_num, 1);
+    virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
+                     req->elem.out_num, 0);
+
     if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size,
                               sizeof(VirtIOSCSICmdResp) + vs->sense_size) < 0) {
         error_report("invalid SCSI request migration data");

From c85a7a0057ca454607a40cde991d495e0deec34d Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 30 Jul 2015 14:16:12 +0100
Subject: [PATCH 02/19] scsi-disk: fix cmd.mode field typo

The cmd.xfer field is the data length.  The cmd.mode field is the data
transfer direction.

scsi_handle_rw_error() was using the wrong error policy for read
requests.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <1438262173-11546-3-git-send-email-stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/scsi-disk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 64f0694734..73fed3f233 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -399,7 +399,7 @@ static void scsi_read_data(SCSIRequest *req)
  */
 static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
 {
-    bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
+    bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk,
                                                    is_read, error);

From 4bb7b0daf8ea34bcc582642d35a2e4902f7841db Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 30 Jul 2015 14:16:13 +0100
Subject: [PATCH 03/19] tests: virtio-scsi: clear unit attention after reset

The unit attention after reset (power on) prevents normal commands from
running.  The unaligned WRITE SAME test never executed its command!

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <1438262173-11546-4-git-send-email-stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tests/virtio-scsi-test.c | 90 ++++++++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 36 deletions(-)

diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 11ccdd632e..6e91ca9c2b 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -13,6 +13,7 @@
 #include "libqtest.h"
 #include "qemu/osdep.h"
 #include <stdio.h>
+#include "block/scsi.h"
 #include "libqos/virtio.h"
 #include "libqos/virtio-pci.h"
 #include "libqos/pci-pc.h"
@@ -71,40 +72,6 @@ static void qvirtio_scsi_stop(void)
     qtest_end();
 }
 
-static QVirtIOSCSI *qvirtio_scsi_pci_init(int slot)
-{
-    QVirtIOSCSI *vs;
-    QVirtioPCIDevice *dev;
-    void *addr;
-    int i;
-
-    vs = g_new0(QVirtIOSCSI, 1);
-    vs->alloc = pc_alloc_init();
-    vs->bus = qpci_init_pc();
-
-    dev = qvirtio_pci_device_find(vs->bus, QVIRTIO_SCSI_DEVICE_ID);
-    vs->dev = (QVirtioDevice *)dev;
-    g_assert(dev != NULL);
-    g_assert_cmphex(vs->dev->device_type, ==, QVIRTIO_SCSI_DEVICE_ID);
-
-    qvirtio_pci_device_enable(dev);
-    qvirtio_reset(&qvirtio_pci, vs->dev);
-    qvirtio_set_acknowledge(&qvirtio_pci, vs->dev);
-    qvirtio_set_driver(&qvirtio_pci, vs->dev);
-
-    addr = dev->addr + QVIRTIO_PCI_DEVICE_SPECIFIC_NO_MSIX;
-    vs->num_queues = qvirtio_config_readl(&qvirtio_pci, vs->dev,
-                                          (uint64_t)(uintptr_t)addr);
-
-    g_assert_cmpint(vs->num_queues, <, MAX_NUM_QUEUES);
-
-    for (i = 0; i < vs->num_queues + 2; i++) {
-        vs->vq[i] = qvirtqueue_setup(&qvirtio_pci, vs->dev, vs->alloc, i);
-    }
-
-    return vs;
-}
-
 static void qvirtio_scsi_pci_free(QVirtIOSCSI *vs)
 {
     int i;
@@ -134,7 +101,8 @@ static uint64_t qvirtio_scsi_alloc(QVirtIOSCSI *vs, size_t alloc_size,
 static uint8_t virtio_scsi_do_command(QVirtIOSCSI *vs, const uint8_t *cdb,
                                       const uint8_t *data_in,
                                       size_t data_in_len,
-                                      uint8_t *data_out, size_t data_out_len)
+                                      uint8_t *data_out, size_t data_out_len,
+                                      QVirtIOSCSICmdResp *resp_out)
 {
     QVirtQueue *vq;
     QVirtIOSCSICmdReq req = { { 0 } };
@@ -174,6 +142,10 @@ static uint8_t virtio_scsi_do_command(QVirtIOSCSI *vs, const uint8_t *cdb,
 
     response = readb(resp_addr + offsetof(QVirtIOSCSICmdResp, response));
 
+    if (resp_out) {
+        memread(resp_addr, resp_out, sizeof(*resp_out));
+    }
+
     guest_free(vs->alloc, req_addr);
     guest_free(vs->alloc, resp_addr);
     guest_free(vs->alloc, data_in_addr);
@@ -181,6 +153,52 @@ static uint8_t virtio_scsi_do_command(QVirtIOSCSI *vs, const uint8_t *cdb,
     return response;
 }
 
+static QVirtIOSCSI *qvirtio_scsi_pci_init(int slot)
+{
+    const uint8_t test_unit_ready_cdb[CDB_SIZE] = {};
+    QVirtIOSCSI *vs;
+    QVirtioPCIDevice *dev;
+    QVirtIOSCSICmdResp resp;
+    void *addr;
+    int i;
+
+    vs = g_new0(QVirtIOSCSI, 1);
+    vs->alloc = pc_alloc_init();
+    vs->bus = qpci_init_pc();
+
+    dev = qvirtio_pci_device_find(vs->bus, QVIRTIO_SCSI_DEVICE_ID);
+    vs->dev = (QVirtioDevice *)dev;
+    g_assert(dev != NULL);
+    g_assert_cmphex(vs->dev->device_type, ==, QVIRTIO_SCSI_DEVICE_ID);
+
+    qvirtio_pci_device_enable(dev);
+    qvirtio_reset(&qvirtio_pci, vs->dev);
+    qvirtio_set_acknowledge(&qvirtio_pci, vs->dev);
+    qvirtio_set_driver(&qvirtio_pci, vs->dev);
+
+    addr = dev->addr + QVIRTIO_PCI_DEVICE_SPECIFIC_NO_MSIX;
+    vs->num_queues = qvirtio_config_readl(&qvirtio_pci, vs->dev,
+                                          (uint64_t)(uintptr_t)addr);
+
+    g_assert_cmpint(vs->num_queues, <, MAX_NUM_QUEUES);
+
+    for (i = 0; i < vs->num_queues + 2; i++) {
+        vs->vq[i] = qvirtqueue_setup(&qvirtio_pci, vs->dev, vs->alloc, i);
+    }
+
+    /* Clear the POWER ON OCCURRED unit attention */
+    g_assert_cmpint(virtio_scsi_do_command(vs, test_unit_ready_cdb,
+                                           NULL, 0, NULL, 0, &resp),
+                    ==, 0);
+    g_assert_cmpint(resp.status, ==, CHECK_CONDITION);
+    g_assert_cmpint(resp.sense[0], ==, 0x70); /* Fixed format sense buffer */
+    g_assert_cmpint(resp.sense[2], ==, UNIT_ATTENTION);
+    g_assert_cmpint(resp.sense[12], ==, 0x29); /* POWER ON */
+    g_assert_cmpint(resp.sense[13], ==, 0x00);
+
+    return vs;
+}
+
 /* Tests only initialization so far. TODO: Replace with functional tests */
 static void pci_nop(void)
 {
@@ -231,7 +249,7 @@ static void test_unaligned_write_same(void)
     vs = qvirtio_scsi_pci_init(PCI_SLOT);
 
     g_assert_cmphex(0, ==,
-        virtio_scsi_do_command(vs, write_same_cdb, NULL, 0, buf, 512));
+        virtio_scsi_do_command(vs, write_same_cdb, NULL, 0, buf, 512, NULL));
 
     qvirtio_scsi_pci_free(vs);
     qvirtio_scsi_stop();

From a56537a12757a8cdee24ad8c83e5af7a9833ea70 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Wed, 29 Jul 2015 16:45:11 +0800
Subject: [PATCH 04/19] scsi-disk: Fix assertion failure on WRITE SAME

The last portion of an unaligned WRITE SAME command could fail the
assertion in bdrv_aligned_pwritev:

    assert(!qiov || bytes == qiov->size);

Because we updated data->iov.iov_len right above this if block, but
data->qiov still has the old size.

Reinitialize the qiov to make them equal and keep block layer happy.

Cc: qemu-stable@nongnu.org
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1438159512-3871-2-git-send-email-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/scsi-disk.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 73fed3f233..087541d3b7 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -1683,6 +1683,10 @@ static void scsi_write_same_complete(void *opaque, int ret)
     if (data->iov.iov_len) {
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
                          data->iov.iov_len, BLOCK_ACCT_WRITE);
+        /* blk_aio_write doesn't like the qiov size being different from
+         * nb_sectors, make sure they match.
+         */
+        qemu_iovec_init_external(&data->qiov, &data->iov, 1);
         r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector,
                                       &data->qiov, data->iov.iov_len / 512,
                                       scsi_write_same_complete, data);

From 975b66555cb56af453c6852e7e821e2451700527 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Wed, 29 Jul 2015 16:45:12 +0800
Subject: [PATCH 05/19] virtio-scsi-test: Add test case for tail unaligned
 WRITE SAME

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1438159512-3871-3-git-send-email-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tests/virtio-scsi-test.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 6e91ca9c2b..66d8491e9d 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -239,9 +239,12 @@ static void hotplug(void)
 static void test_unaligned_write_same(void)
 {
     QVirtIOSCSI *vs;
-    uint8_t buf[512] = { 0 };
-    const uint8_t write_same_cdb[CDB_SIZE] = { 0x41, 0x00, 0x00, 0x00, 0x00,
+    uint8_t buf1[512] = { 0 };
+    uint8_t buf2[512] = { 1 };
+    const uint8_t write_same_cdb_1[CDB_SIZE] = { 0x41, 0x00, 0x00, 0x00, 0x00,
                                                0x01, 0x00, 0x00, 0x02, 0x00 };
+    const uint8_t write_same_cdb_2[CDB_SIZE] = { 0x41, 0x00, 0x00, 0x00, 0x00,
+                                               0x01, 0x00, 0x33, 0x00, 0x00 };
 
     qvirtio_scsi_start("-drive file=blkdebug::null-co://,if=none,id=dr1"
                        ",format=raw,file.align=4k "
@@ -249,7 +252,10 @@ static void test_unaligned_write_same(void)
     vs = qvirtio_scsi_pci_init(PCI_SLOT);
 
     g_assert_cmphex(0, ==,
-        virtio_scsi_do_command(vs, write_same_cdb, NULL, 0, buf, 512, NULL));
+        virtio_scsi_do_command(vs, write_same_cdb_1, NULL, 0, buf1, 512, NULL));
+
+    g_assert_cmphex(0, ==,
+        virtio_scsi_do_command(vs, write_same_cdb_2, NULL, 0, buf2, 512, NULL));
 
     qvirtio_scsi_pci_free(vs);
     qvirtio_scsi_stop();

From af103c9310b7ab56a2552965d9d1274b0024f27b Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Thu, 30 Jul 2015 15:29:59 +0200
Subject: [PATCH 06/19] vhost/scsi: call vhost_dev_cleanup() at unrealize()
 time

vhost-scsi calls vhost_dev_init() at realize() time
but forgets to call it's counterpart vhost_dev_cleanup()
at unrealize() time.

Calling it should fix leaking of memory table and
mem_sections table in vhost device. And also unregister
vhost's memory listerner to prevent access from
memory core to freed memory.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1438262999-287627-1-git-send-email-imammedo@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/vhost-scsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index a69918bef8..0dd57ff6b4 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -277,6 +277,7 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error **errp)
     /* This will stop vhost backend. */
     vhost_scsi_set_status(vdev, 0);
 
+    vhost_dev_cleanup(&s->dev);
     g_free(s->dev.vqs);
 
     virtio_scsi_common_unrealize(dev, errp);

From 02d57ea115b7669f588371c86484a2e8ebc369be Mon Sep 17 00:00:00 2001
From: Sergey Fedorov <serge.fdrv@gmail.com>
Date: Tue, 30 Jun 2015 12:35:09 +0300
Subject: [PATCH 07/19] cpu-exec: Do not invalidate original TB in
 cpu_exec_nocache()

Instead of invalidating an original TB in cpu_exec_nocache()
prematurely, just save a link to it in the temporary generated TB. If
cpu_io_recompile() is raised subsequently from the temporary TB,
invalidate the original one as well. That allows reusing the original TB
each time cpu_exec_nocache() is called to handle expired instruction
counter in icount mode.

Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
Message-Id: <1435656909-29116-1-git-send-email-serge.fdrv@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpu-exec.c              | 8 ++------
 include/exec/exec-all.h | 2 ++
 translate-all.c         | 8 ++++++++
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index 75694f3bb3..407fa4715a 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -231,19 +231,15 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
                              TranslationBlock *orig_tb)
 {
     TranslationBlock *tb;
-    target_ulong pc = orig_tb->pc;
-    target_ulong cs_base = orig_tb->cs_base;
-    uint64_t flags = orig_tb->flags;
 
     /* Should never happen.
        We only end up here when an existing TB is too long.  */
     if (max_cycles > CF_COUNT_MASK)
         max_cycles = CF_COUNT_MASK;
 
-    /* tb_gen_code can flush our orig_tb, invalidate it now */
-    tb_phys_invalidate(orig_tb, -1);
-    tb = tb_gen_code(cpu, pc, cs_base, flags,
+    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
                      max_cycles | CF_NOCACHE);
+    tb->orig_tb = tcg_ctx.tb_ctx.tb_invalidated_flag ? NULL : orig_tb;
     cpu->current_tb = tb;
     /* execute the generated code */
     trace_exec_tb_nocache(tb, tb->pc);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index a6fce04f65..84272253b3 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -155,6 +155,8 @@ struct TranslationBlock {
     void *tc_ptr;    /* pointer to the translated code */
     /* next matching tb for physical address. */
     struct TranslationBlock *phys_hash_next;
+    /* original tb when cflags has CF_NOCACHE */
+    struct TranslationBlock *orig_tb;
     /* first and second physical page containing code. The lower bit
        of the pointer tells the index in page_next[] */
     struct TranslationBlock *page_next[2];
diff --git a/translate-all.c b/translate-all.c
index 60a3d8b2bd..755cdaba9c 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1533,6 +1533,14 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     cs_base = tb->cs_base;
     flags = tb->flags;
     tb_phys_invalidate(tb, -1);
+    if (tb->cflags & CF_NOCACHE) {
+        if (tb->orig_tb) {
+            /* Invalidate original TB if this TB was generated in
+             * cpu_exec_nocache() */
+            tb_phys_invalidate(tb->orig_tb, -1);
+        }
+        tb_free(tb);
+    }
     /* FIXME: In theory this could raise an exception.  In practice
        we have already translated the block once so it's probably ok.  */
     tb_gen_code(cpu, pc, cs_base, flags, cflags);

From b4a4b8d0e0767c85946fd8fc404643bf5766351a Mon Sep 17 00:00:00 2001
From: Peter Crosthwaite <crosthwaitepeter@gmail.com>
Date: Sun, 5 Jul 2015 14:08:53 -0700
Subject: [PATCH 08/19] cpu_defs: Simplify CPUTLB padding logic

There was a complicated subtractive arithmetic for determining the
padding on the CPUTLBEntry structure. Simplify this with a union.

Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Message-Id: <1436130533-18565-1-git-send-email-crosthwaite.peter@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/cpu-defs.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 98b9cff310..5093be26ac 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -105,17 +105,18 @@ typedef struct CPUTLBEntry {
        bit 3                      : indicates that the entry is invalid
        bit 2..0                   : zero
     */
-    target_ulong addr_read;
-    target_ulong addr_write;
-    target_ulong addr_code;
-    /* Addend to virtual address to get host address.  IO accesses
-       use the corresponding iotlb value.  */
-    uintptr_t addend;
-    /* padding to get a power of two size */
-    uint8_t dummy[(1 << CPU_TLB_ENTRY_BITS) -
-                  (sizeof(target_ulong) * 3 +
-                   ((-sizeof(target_ulong) * 3) & (sizeof(uintptr_t) - 1)) +
-                   sizeof(uintptr_t))];
+    union {
+        struct {
+            target_ulong addr_read;
+            target_ulong addr_write;
+            target_ulong addr_code;
+            /* Addend to virtual address to get host address.  IO accesses
+               use the corresponding iotlb value.  */
+            uintptr_t addend;
+        };
+        /* padding to get a power of two size */
+        uint8_t dummy[1 << CPU_TLB_ENTRY_BITS];
+    };
 } CPUTLBEntry;
 
 QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));

From 414b15c909c88e4cf5f10e80d033b3aa90bcc9e1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 24 Jun 2015 14:16:26 +0200
Subject: [PATCH 09/19] exec: drop cpu_can_do_io, just read cpu->can_do_io

After commit 626cf8f (icount: set can_do_io outside TB execution,
2014-12-08), can_do_io is set to 1 if not executing code.  It is
no longer necessary to make this assumption in cpu_can_do_io.

It is also possible to remove the use_icount test, simply by
never setting cpu->can_do_io to 0 unless use_icount is true.

With these changes cpu_can_do_io boils down to a read of
cpu->can_do_io.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpu-exec.c              |  2 +-
 cpus.c                  |  2 +-
 include/exec/exec-all.h | 21 ---------------------
 include/qom/cpu.h       |  4 +++-
 qom/cpu.c               |  2 +-
 softmmu_template.h      |  4 ++--
 translate-all.c         |  3 ++-
 7 files changed, 10 insertions(+), 28 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index 407fa4715a..713540fc8f 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -196,7 +196,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
     }
 #endif /* DEBUG_DISAS */
 
-    cpu->can_do_io = 0;
+    cpu->can_do_io = !use_icount;
     next_tb = tcg_qemu_tb_exec(env, tb_ptr);
     cpu->can_do_io = 1;
     trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK),
diff --git a/cpus.c b/cpus.c
index a822ce3d80..c1e74d9824 100644
--- a/cpus.c
+++ b/cpus.c
@@ -145,7 +145,7 @@ int64_t cpu_get_icount_raw(void)
 
     icount = timers_state.qemu_icount;
     if (cpu) {
-        if (!cpu_can_do_io(cpu)) {
+        if (!cpu->can_do_io) {
             fprintf(stderr, "Bad icount read\n");
             exit(1);
         }
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 84272253b3..29775c012c 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -346,27 +346,6 @@ extern int singlestep;
 /* cpu-exec.c */
 extern volatile sig_atomic_t exit_request;
 
-/**
- * cpu_can_do_io:
- * @cpu: The CPU for which to check IO.
- *
- * Deterministic execution requires that IO only be performed on the last
- * instruction of a TB so that interrupts take effect immediately.
- *
- * Returns: %true if memory-mapped IO is safe, %false otherwise.
- */
-static inline bool cpu_can_do_io(CPUState *cpu)
-{
-    if (!use_icount) {
-        return true;
-    }
-    /* If not executing code then assume we are ok.  */
-    if (cpu->current_tb == NULL) {
-        return true;
-    }
-    return cpu->can_do_io != 0;
-}
-
 #if !defined(CONFIG_USER_ONLY)
 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
 #endif
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 20aabc9cb3..39712ab7cb 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -231,7 +231,9 @@ struct kvm_run;
  * @icount_decr: Number of cycles left, with interrupt flag in high bit.
  * This allows a single read-compare-cbranch-write sequence to test
  * for both decrementer underflow and exceptions.
- * @can_do_io: Nonzero if memory-mapped IO is safe.
+ * @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution
+ * requires that IO only be performed on the last instruction of a TB
+ * so that interrupts take effect immediately.
  * @env_ptr: Pointer to subclass-specific CPUArchState field.
  * @current_tb: Currently executing TB.
  * @gdb_regs: Additional GDB registers.
diff --git a/qom/cpu.c b/qom/cpu.c
index eb9cfeca18..62f4b5de44 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -247,7 +247,7 @@ static void cpu_common_reset(CPUState *cpu)
     cpu->mem_io_vaddr = 0;
     cpu->icount_extra = 0;
     cpu->icount_decr.u32 = 0;
-    cpu->can_do_io = 0;
+    cpu->can_do_io = 1;
     cpu->exception_index = -1;
     memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
 }
diff --git a/softmmu_template.h b/softmmu_template.h
index d42d89d541..50dec1c510 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -154,7 +154,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
-    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu_can_do_io(cpu)) {
+    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
 
@@ -374,7 +374,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env,
     MemoryRegion *mr = iotlb_to_region(cpu, physaddr);
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
-    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu_can_do_io(cpu)) {
+    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
 
diff --git a/translate-all.c b/translate-all.c
index 755cdaba9c..9c46ffa0e3 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -222,6 +222,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     gen_intermediate_code_pc(env, tb);
 
     if (tb->cflags & CF_USE_ICOUNT) {
+        assert(use_icount);
         /* Reset the cycle counter to the start of the block.  */
         cpu->icount_decr.u16.low += tb->icount;
         /* Clear the IO flag.  */
@@ -1470,7 +1471,7 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
 
     if (use_icount) {
         cpu->icount_decr.u16.high = 0xffff;
-        if (!cpu_can_do_io(cpu)
+        if (!cpu->can_do_io
             && (mask & ~old_mask) != 0) {
             cpu_abort(cpu, "Raised interrupt while not in I/O function");
         }

From c097a60b100c10d79d8bd5c91ce804e865d7e70b Mon Sep 17 00:00:00 2001
From: Wen Congyang <wency@cn.fujitsu.com>
Date: Mon, 27 Jul 2015 10:24:18 +0800
Subject: [PATCH 10/19] rcu: Allow calling rcu_(un)register_thread() during
 synchronize_rcu()

If rcu_(un)register_thread() is called together with synchronize_rcu(),
it will wait for the synchronize_rcu() to finish. But when synchronize_rcu()
waits for some events, we can modify the list registry.
We also use the lock rcu_gp_lock to assume that synchronize_rcu() isn't
executed in more than one thread at the same time. Add a new mutex lock
rcu_sync_lock to assume it and rename rcu_gp_lock to rcu_registry_lock.
Release rcu_registry_lock when synchronize_rcu() waits for some events.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Message-Id: <55B59652.4090503@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/rcu.c | 48 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/util/rcu.c b/util/rcu.c
index cdcad678b4..8ba304dc44 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -47,7 +47,8 @@
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 
 QemuEvent rcu_gp_event;
-static QemuMutex rcu_gp_lock;
+static QemuMutex rcu_registry_lock;
+static QemuMutex rcu_sync_lock;
 
 /*
  * Check whether a quiescent state was crossed between the beginning of
@@ -66,7 +67,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
  */
 __thread struct rcu_reader_data rcu_reader;
 
-/* Protected by rcu_gp_lock.  */
+/* Protected by rcu_registry_lock.  */
 typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
 static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
 
@@ -114,10 +115,26 @@ static void wait_for_readers(void)
             break;
         }
 
-        /* Wait for one thread to report a quiescent state and
-         * try again.
+        /* Wait for one thread to report a quiescent state and try again.
+         * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
+         * wait too much time.
+         *
+         * rcu_register_thread() may add nodes to &registry; it will not
+         * wake up synchronize_rcu, but that is okay because at least another
+         * thread must exit its RCU read-side critical section before
+         * synchronize_rcu is done.  The next iteration of the loop will
+         * move the new thread's rcu_reader from &registry to &qsreaders,
+         * because rcu_gp_ongoing() will return false.
+         *
+         * rcu_unregister_thread() may remove nodes from &qsreaders instead
+         * of &registry if it runs during qemu_event_wait.  That's okay;
+         * the node then will not be added back to &registry by QLIST_SWAP
+         * below.  The invariant is that the node is part of one list when
+         * rcu_registry_lock is released.
          */
+        qemu_mutex_unlock(&rcu_registry_lock);
         qemu_event_wait(&rcu_gp_event);
+        qemu_mutex_lock(&rcu_registry_lock);
     }
 
     /* put back the reader list in the registry */
@@ -126,7 +143,8 @@ static void wait_for_readers(void)
 
 void synchronize_rcu(void)
 {
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_sync_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
 
     if (!QLIST_EMPTY(&registry)) {
         /* In either case, the atomic_mb_set below blocks stores that free
@@ -149,7 +167,8 @@ void synchronize_rcu(void)
         wait_for_readers();
     }
 
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
+    qemu_mutex_unlock(&rcu_sync_lock);
 }
 
 
@@ -273,23 +292,24 @@ void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
 void rcu_register_thread(void)
 {
     assert(rcu_reader.ctr == 0);
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
     QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
 }
 
 void rcu_unregister_thread(void)
 {
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
     QLIST_REMOVE(&rcu_reader, node);
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
 }
 
 static void rcu_init_complete(void)
 {
     QemuThread thread;
 
-    qemu_mutex_init(&rcu_gp_lock);
+    qemu_mutex_init(&rcu_registry_lock);
+    qemu_mutex_init(&rcu_sync_lock);
     qemu_event_init(&rcu_gp_event, true);
 
     qemu_event_init(&rcu_call_ready_event, false);
@@ -306,12 +326,14 @@ static void rcu_init_complete(void)
 #ifdef CONFIG_POSIX
 static void rcu_init_lock(void)
 {
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_sync_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
 }
 
 static void rcu_init_unlock(void)
 {
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
+    qemu_mutex_unlock(&rcu_sync_lock);
 }
 #endif
 

From 9284f31994919c001b54af57fc7d1bbb0d19b6fd Mon Sep 17 00:00:00 2001
From: Chen Hanxiao <chenhanxiao@cn.fujitsu.com>
Date: Fri, 24 Jul 2015 11:12:03 +0800
Subject: [PATCH 11/19] exec: use macro ROUND_UP for alignment

Use ROUND_UP instead.

Signed-off-by: Chen Hanxiao <chenhanxiao@cn.fujitsu.com>
Message-Id: <1437707523-4910-1-git-send-email-chenhanxiao@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/exec.c b/exec.c
index 0a4a0c5af6..54cd70ac1e 100644
--- a/exec.c
+++ b/exec.c
@@ -1210,7 +1210,7 @@ static void *file_ram_alloc(RAMBlock *block,
     unlink(filename);
     g_free(filename);
 
-    memory = (memory+hpagesize-1) & ~(hpagesize-1);
+    memory = ROUND_UP(memory, hpagesize);
 
     /*
      * ftruncate is not supported by hugetlbfs in older

From fe2d1a81d96e8ffe74974bd4b3ebc608f1f6a25d Mon Sep 17 00:00:00 2001
From: Lu Lina <lina.lulina@huawei.com>
Date: Mon, 27 Jul 2015 14:25:59 +0800
Subject: [PATCH 12/19] vhost-scsi: Clarify vhost_virtqueue_mask argument

vhost_virtqueue_mask takes an "absolute" virtqueue index, while the
code looks like it's passing an index that is relative to
s->dev.vq_index.  In reality, s->dev.vq_index is always zero, so
this patch does not make any difference, but the code is clearer.

Signed-off-by: Lu Lina <lina.lulina@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Message-Id: <1437978359-17960-1-git-send-email-arei.gonglei@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/vhost-scsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 0dd57ff6b4..7eacca9dc5 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -118,7 +118,7 @@ static int vhost_scsi_start(VHostSCSI *s)
      * enabling/disabling irqfd.
      */
     for (i = 0; i < s->dev.nvqs; i++) {
-        vhost_virtqueue_mask(&s->dev, vdev, i, false);
+        vhost_virtqueue_mask(&s->dev, vdev, s->dev.vq_index + i, false);
     }
 
     return ret;

From 06832648e1dbd268d7b719b9a49df476af689c6d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 27 Jul 2015 13:52:55 +0200
Subject: [PATCH 13/19] qemu-nbd: remove unnecessary qemu_notify_event()

This was needed when qemu-nbd was using qemu_set_fd_handler2.  It is
not needed anymore now that nbd_update_server_fd_handler is called
whenever nbd_can_accept() can change from false to true.
nbd_update_server_fd_handler will call qemu_set_fd_handler(),
which will call qemu_notify_event().

Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-nbd.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 5106b802e6..d9644b2431 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -362,7 +362,6 @@ static void nbd_client_closed(NBDClient *client)
         state = TERMINATE;
     }
     nbd_update_server_fd_handler(server_fd);
-    qemu_notify_event();
     nbd_client_put(client);
 }
 

From 416471916542f30c49f2ed2187d634e9ad26d57d Mon Sep 17 00:00:00 2001
From: "Daniel P. Berrange" <berrange@redhat.com>
Date: Fri, 31 Jul 2015 13:23:23 +0100
Subject: [PATCH 14/19] configure: only add CONFIG_RDMA to config-host.h once

For unknown reasons (probably a git rebase merge mistake)

  commit 2da776db4846eadcb808598a5d3484d149773c05
  Author: Michael R. Hines <mrhines@us.ibm.com>
  Date:   Mon Jul 22 10:01:54 2013 -0400

    rdma: core logic

Adds CONFIG_RDMA to config-host.h twice, as can be seen
in the generated file:

 $ grep CONFIG_RDMA config-host.h
 #define CONFIG_RDMA 1
 #define CONFIG_RDMA 1

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1438345403-32467-1-git-send-email-berrange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 configure | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/configure b/configure
index 704b34c5aa..aace4897df 100755
--- a/configure
+++ b/configure
@@ -5601,10 +5601,6 @@ if [ "$pixman" = "internal" ]; then
   echo "config-host.h: subdir-pixman" >> $config_host_mak
 fi
 
-if test "$rdma" = "yes" ; then
-echo "CONFIG_RDMA=y" >> $config_host_mak
-fi
-
 if [ "$dtc_internal" = "yes" ]; then
   echo "config-host.h: subdir-dtc" >> $config_host_mak
 fi

From d223c10453f1a7909349fd3e52a6047295d0e94f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 22 Jul 2015 16:38:17 +0200
Subject: [PATCH 15/19] scsi: create restart bottom half in the right
 AioContext

This matches commit 4407c1c (virtio-blk: Schedule BH in the right context,
2014-06-17), which did the same thing for virtio-blk.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/scsi-bus.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index f0ae4625ff..ffac8f4bb6 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -136,7 +136,8 @@ static void scsi_dma_restart_cb(void *opaque, int running, RunState state)
         return;
     }
     if (!s->bh) {
-        s->bh = qemu_bh_new(scsi_dma_restart_bh, s);
+        AioContext *ctx = blk_get_aio_context(s->conf.blk);
+        s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
         qemu_bh_schedule(s->bh);
     }
 }

From 5fd2b563a7624959ae7f000202785a30279021f8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 19 Jul 2015 19:15:26 +0200
Subject: [PATCH 16/19] scsi-disk: identify AIO callbacks more clearly

Functions that are not callbacks should assert that aiocb is NULL and
have a non-opaque argument (usually a pointer to SCSIDiskReq).

AIO callbacks should assert that aiocb is not NULL and take care of
calling block_acct done.  They also have an opaque argument.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/scsi-disk.c | 91 ++++++++++++++++++++++++++++++---------------
 1 file changed, 61 insertions(+), 30 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 087541d3b7..bada9a7f62 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -217,6 +217,8 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    assert(r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -235,15 +237,10 @@ done:
     scsi_req_unref(&r->req);
 }
 
-static void scsi_dma_complete_noio(void *opaque, int ret)
+static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
 {
-    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
-    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+    assert(r->req.aiocb == NULL);
 
-    if (r->req.aiocb != NULL) {
-        r->req.aiocb = NULL;
-        block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
-    }
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -271,9 +268,13 @@ done:
 static void scsi_dma_complete(void *opaque, int ret)
 {
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
     assert(r->req.aiocb != NULL);
-    scsi_dma_complete_noio(opaque, ret);
+    r->req.aiocb = NULL;
+
+    block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
+    scsi_dma_complete_noio(r, ret);
 }
 
 static void scsi_read_complete(void * opaque, int ret)
@@ -308,16 +309,13 @@ done:
 }
 
 /* Actually issue a read to the block device.  */
-static void scsi_do_read(void *opaque, int ret)
+static void scsi_do_read(SCSIDiskReq *r, int ret)
 {
-    SCSIDiskReq *r = opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     uint32_t n;
 
-    if (r->req.aiocb != NULL) {
-        r->req.aiocb = NULL;
-        block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
-    }
+    assert (r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -349,6 +347,18 @@ done:
     scsi_req_unref(&r->req);
 }
 
+static void scsi_do_read_cb(void *opaque, int ret)
+{
+    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    assert (r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
+    scsi_do_read(opaque, ret);
+}
+
 /* Read more data from scsi device into buffer.  */
 static void scsi_read_data(SCSIRequest *req)
 {
@@ -384,7 +394,7 @@ static void scsi_read_data(SCSIRequest *req)
     if (first && scsi_is_cmd_fua(&r->req.cmd)) {
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
                          BLOCK_ACCT_FLUSH);
-        r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read, r);
+        r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
     } else {
         scsi_do_read(r, 0);
     }
@@ -430,16 +440,12 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
     return action != BLOCK_ERROR_ACTION_IGNORE;
 }
 
-static void scsi_write_complete(void * opaque, int ret)
+static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
 {
-    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
-    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     uint32_t n;
 
-    if (r->req.aiocb != NULL) {
-        r->req.aiocb = NULL;
-        block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
-    }
+    assert (r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -467,6 +473,18 @@ done:
     scsi_req_unref(&r->req);
 }
 
+static void scsi_write_complete(void * opaque, int ret)
+{
+    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    assert (r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
+    scsi_write_complete_noio(r, ret);
+}
+
 static void scsi_write_data(SCSIRequest *req)
 {
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
@@ -480,18 +498,18 @@ static void scsi_write_data(SCSIRequest *req)
     scsi_req_ref(&r->req);
     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
         DPRINTF("Data transfer direction invalid\n");
-        scsi_write_complete(r, -EINVAL);
+        scsi_write_complete_noio(r, -EINVAL);
         return;
     }
 
     if (!r->req.sg && !r->qiov.size) {
         /* Called for the first time.  Ask the driver to send us more data.  */
         r->started = true;
-        scsi_write_complete(r, 0);
+        scsi_write_complete_noio(r, 0);
         return;
     }
     if (s->tray_open) {
-        scsi_write_complete(r, -ENOMEDIUM);
+        scsi_write_complete_noio(r, -ENOMEDIUM);
         return;
     }
 
@@ -500,7 +518,7 @@ static void scsi_write_data(SCSIRequest *req)
         if (r->req.sg) {
             scsi_dma_complete_noio(r, 0);
         } else {
-            scsi_write_complete(r, 0);
+            scsi_write_complete_noio(r, 0);
         }
         return;
     }
@@ -1557,15 +1575,17 @@ typedef struct UnmapCBData {
     int count;
 } UnmapCBData;
 
-static void scsi_unmap_complete(void *opaque, int ret)
+static void scsi_unmap_complete(void *opaque, int ret);
+
+static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
 {
-    UnmapCBData *data = opaque;
     SCSIDiskReq *r = data->r;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     uint64_t sector_num;
     uint32_t nb_sectors;
 
-    r->req.aiocb = NULL;
+    assert(r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -1601,6 +1621,17 @@ done:
     g_free(data);
 }
 
+static void scsi_unmap_complete(void *opaque, int ret)
+{
+    UnmapCBData *data = opaque;
+    SCSIDiskReq *r = data->r;
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    scsi_unmap_complete_noio(data, ret);
+}
+
 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
@@ -1638,7 +1669,7 @@ static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
 
     /* The matching unref is in scsi_unmap_complete, before data is freed.  */
     scsi_req_ref(&r->req);
-    scsi_unmap_complete(data, 0);
+    scsi_unmap_complete_noio(data, 0);
     return;
 
 invalid_param_len:

From fa0d653b06cec7b3188a733dc7394e030948018e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 19 Jul 2015 19:15:26 +0200
Subject: [PATCH 17/19] scsi-generic: identify AIO callbacks more clearly

Functions that are not callbacks should assert that aiocb is NULL and
have a SCSIGenericReq argument.

AIO callbacks should assert that aiocb is not NULL.  They also have an
opaque argument.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/scsi-generic.c | 68 ++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index e53470f85e..1b6350be41 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -88,12 +88,12 @@ static void scsi_free_request(SCSIRequest *req)
 }
 
 /* Helper function for command completion.  */
-static void scsi_command_complete(void *opaque, int ret)
+static void scsi_command_complete_noio(SCSIGenericReq *r, int ret)
 {
     int status;
-    SCSIGenericReq *r = (SCSIGenericReq *)opaque;
 
-    r->req.aiocb = NULL;
+    assert(r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -142,6 +142,15 @@ done:
     scsi_req_unref(&r->req);
 }
 
+static void scsi_command_complete(void *opaque, int ret)
+{
+    SCSIGenericReq *r = (SCSIGenericReq *)opaque;
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+    scsi_command_complete_noio(r, ret);
+}
+
 static int execute_command(BlockBackend *blk,
                            SCSIGenericReq *r, int direction,
                            BlockCompletionFunc *complete)
@@ -172,33 +181,37 @@ static void scsi_read_complete(void * opaque, int ret)
     SCSIDevice *s = r->req.dev;
     int len;
 
+    assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
+
     if (ret || r->req.io_canceled) {
-        scsi_command_complete(r, ret);
+        scsi_command_complete_noio(r, ret);
         return;
     }
+
     len = r->io_header.dxfer_len - r->io_header.resid;
     DPRINTF("Data ready tag=0x%x len=%d\n", r->req.tag, len);
 
     r->len = -1;
     if (len == 0) {
-        scsi_command_complete(r, 0);
-    } else {
-        /* Snoop READ CAPACITY output to set the blocksize.  */
-        if (r->req.cmd.buf[0] == READ_CAPACITY_10 &&
-            (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) {
-            s->blocksize = ldl_be_p(&r->buf[4]);
-            s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL;
-        } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 &&
-                   (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
-            s->blocksize = ldl_be_p(&r->buf[8]);
-            s->max_lba = ldq_be_p(&r->buf[0]);
-        }
-        blk_set_guest_block_size(s->conf.blk, s->blocksize);
-
-        scsi_req_data(&r->req, len);
-        scsi_req_unref(&r->req);
+        scsi_command_complete_noio(r, 0);
+        return;
     }
+
+    /* Snoop READ CAPACITY output to set the blocksize.  */
+    if (r->req.cmd.buf[0] == READ_CAPACITY_10 &&
+        (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) {
+        s->blocksize = ldl_be_p(&r->buf[4]);
+        s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL;
+    } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 &&
+               (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
+        s->blocksize = ldl_be_p(&r->buf[8]);
+        s->max_lba = ldq_be_p(&r->buf[0]);
+    }
+    blk_set_guest_block_size(s->conf.blk, s->blocksize);
+
+    scsi_req_data(&r->req, len);
+    scsi_req_unref(&r->req);
 }
 
 /* Read more data from scsi device into buffer.  */
@@ -213,14 +226,14 @@ static void scsi_read_data(SCSIRequest *req)
     /* The request is used as the AIO opaque value, so add a ref.  */
     scsi_req_ref(&r->req);
     if (r->len == -1) {
-        scsi_command_complete(r, 0);
+        scsi_command_complete_noio(r, 0);
         return;
     }
 
     ret = execute_command(s->conf.blk, r, SG_DXFER_FROM_DEV,
                           scsi_read_complete);
     if (ret < 0) {
-        scsi_command_complete(r, ret);
+        scsi_command_complete_noio(r, ret);
     }
 }
 
@@ -230,9 +243,12 @@ static void scsi_write_complete(void * opaque, int ret)
     SCSIDevice *s = r->req.dev;
 
     DPRINTF("scsi_write_complete() ret = %d\n", ret);
+
+    assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
+
     if (ret || r->req.io_canceled) {
-        scsi_command_complete(r, ret);
+        scsi_command_complete_noio(r, ret);
         return;
     }
 
@@ -242,7 +258,7 @@ static void scsi_write_complete(void * opaque, int ret)
         DPRINTF("block size %d\n", s->blocksize);
     }
 
-    scsi_command_complete(r, ret);
+    scsi_command_complete_noio(r, ret);
 }
 
 /* Write data to a scsi device.  Returns nonzero on failure.
@@ -264,7 +280,7 @@ static void scsi_write_data(SCSIRequest *req)
     scsi_req_ref(&r->req);
     ret = execute_command(s->conf.blk, r, SG_DXFER_TO_DEV, scsi_write_complete);
     if (ret < 0) {
-        scsi_command_complete(r, ret);
+        scsi_command_complete_noio(r, ret);
     }
 }
 
@@ -306,7 +322,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *cmd)
         ret = execute_command(s->conf.blk, r, SG_DXFER_NONE,
                               scsi_command_complete);
         if (ret < 0) {
-            scsi_command_complete(r, ret);
+            scsi_command_complete_noio(r, ret);
             return 0;
         }
         return 0;

From 491ffc1f7ce857b88e9d310ce901ce033e45b75d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 24 Jun 2015 13:55:51 +0200
Subject: [PATCH 18/19] hw: fix mask for ColdFire UART command register

The "miscellaneous commands" part of the register is 3 bits wide.
Spotted by Coverity and confirmed in the datasheet, downloadable from
http://cache.freescale.com/files/32bit/doc/ref_manual/MCF5307BUM.pdf
(figure 14-6).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/char/mcf_uart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/char/mcf_uart.c b/hw/char/mcf_uart.c
index 98fd44e66a..cda22eea5d 100644
--- a/hw/char/mcf_uart.c
+++ b/hw/char/mcf_uart.c
@@ -126,7 +126,7 @@ static void mcf_uart_do_tx(mcf_uart_state *s)
 static void mcf_do_command(mcf_uart_state *s, uint8_t cmd)
 {
     /* Misc command.  */
-    switch ((cmd >> 4) & 3) {
+    switch ((cmd >> 4) & 7) {
     case 0: /* No-op.  */
         break;
     case 1: /* Reset mode register pointer.  */

From 9504c5445cb709415aea509954a922983925c2d3 Mon Sep 17 00:00:00 2001
From: Peter Crosthwaite <crosthwaitepeter@gmail.com>
Date: Sun, 5 Jul 2015 13:50:32 -0700
Subject: [PATCH 19/19] disas: Defeature print_target_address

It does not work in multi-arch as it requires the CPU specific
TARGET_VIRT_ADDR_SPACE_BITS global define. Just use the generic
version that does no masking. Targets should be responsible for
passing in a sane virtual address.

Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Message-Id: <1436129432-16617-1-git-send-email-crosthwaite.peter@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 disas.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/disas.c b/disas.c
index 69a6066914..0ae70c22f7 100644
--- a/disas.c
+++ b/disas.c
@@ -72,14 +72,6 @@ generic_print_address (bfd_vma addr, struct disassemble_info *info)
     (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
 }
 
-/* Print address in hex, truncated to the width of a target virtual address. */
-static void
-generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
-{
-    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
-    generic_print_address(addr & mask, info);
-}
-
 /* Print address in hex, truncated to the width of a host virtual address. */
 static void
 generic_print_host_address(bfd_vma addr, struct disassemble_info *info)
@@ -201,7 +193,7 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
     s.info.read_memory_func = target_read_memory;
     s.info.buffer_vma = code;
     s.info.buffer_length = size;
-    s.info.print_address_func = generic_print_target_address;
+    s.info.print_address_func = generic_print_address;
 
 #ifdef TARGET_WORDS_BIGENDIAN
     s.info.endian = BFD_ENDIAN_BIG;
@@ -424,7 +416,7 @@ void monitor_disas(Monitor *mon, CPUState *cpu,
     s.cpu = cpu;
     monitor_disas_is_physical = is_physical;
     s.info.read_memory_func = monitor_read_memory;
-    s.info.print_address_func = generic_print_target_address;
+    s.info.print_address_func = generic_print_address;
 
     s.info.buffer_vma = pc;