From c589b24972794a92fe3b5d6b9f4f09ef29e95460 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Date: Tue, 25 Oct 2011 19:24:24 +1100
Subject: [PATCH 01/55] iSCSI block driver

This provides built-in support for iSCSI to QEMU.

This has the advantage that the iSCSI devices need not be made visible to the host, which is useful if you have very many virtual machines and very many iscsi devices.
It also has the benefit that non-root users of QEMU can access iSCSI devices across the network without requiring root privilege on the host.

This driver interfaces with the multiplatform posix library for iscsi initiator/client access to iscsi devices hosted at
    git://github.com/sahlberg/libiscsi.git

The patch adds the driver to interface with the iscsi library.
It also updated the configure script to
* by default, probe is libiscsi is available and if so, build
  qemu against libiscsi.
* --enable-libiscsi
  Force a build against libiscsi. If libiscsi is not available
  the build will fail.
* --disable-libiscsi
  Do not link against libiscsi, even if it is available.

When linked with libiscsi, qemu gains support to access iscsi resources such as disks and cdrom directly, without having to make the devices visible to the host.

You can specify devices using a iscsi url of the form :
iscsi://[<username>[:<password>@]]<host>[:<port]/<target-iqn-name>/<lun>
When using authentication, the password can optionally be set with
LIBISCSI_CHAP_PASSWORD="password" to avoid it showing up in the process list

Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 Makefile.objs |   1 +
 block/iscsi.c | 591 ++++++++++++++++++++++++++++++++++++++++++++++++++
 configure     |  31 +++
 trace-events  |   6 +
 4 files changed, 629 insertions(+)
 create mode 100644 block/iscsi.c

diff --git a/Makefile.objs b/Makefile.objs
index 01587c8f8f..d18417ca00 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -36,6 +36,7 @@ block-nested-y += qed-check.o
 block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
+block-nested-$(CONFIG_LIBISCSI) += iscsi.o
 block-nested-$(CONFIG_CURL) += curl.o
 block-nested-$(CONFIG_RBD) += rbd.o
 
diff --git a/block/iscsi.c b/block/iscsi.c
new file mode 100644
index 0000000000..938c568071
--- /dev/null
+++ b/block/iscsi.c
@@ -0,0 +1,591 @@
+/*
+ * QEMU Block driver for iSCSI images
+ *
+ * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+
+#include <poll.h>
+#include "qemu-common.h"
+#include "qemu-error.h"
+#include "block_int.h"
+#include "trace.h"
+
+#include <iscsi/iscsi.h>
+#include <iscsi/scsi-lowlevel.h>
+
+
+typedef struct IscsiLun {
+    struct iscsi_context *iscsi;
+    int lun;
+    int block_size;
+    unsigned long num_blocks;
+} IscsiLun;
+
+typedef struct IscsiAIOCB {
+    BlockDriverAIOCB common;
+    QEMUIOVector *qiov;
+    QEMUBH *bh;
+    IscsiLun *iscsilun;
+    struct scsi_task *task;
+    uint8_t *buf;
+    int status;
+    int canceled;
+    size_t read_size;
+    size_t read_offset;
+} IscsiAIOCB;
+
+struct IscsiTask {
+    IscsiLun *iscsilun;
+    BlockDriverState *bs;
+    int status;
+    int complete;
+};
+
+static void
+iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
+                    void *private_data)
+{
+}
+
+static void
+iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
+    IscsiLun *iscsilun = acb->iscsilun;
+
+    acb->common.cb(acb->common.opaque, -ECANCELED);
+    acb->canceled = 1;
+
+    /* send a task mgmt call to the target to cancel the task on the target */
+    iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
+                                     iscsi_abort_task_cb, NULL);
+
+    /* then also cancel the task locally in libiscsi */
+    iscsi_scsi_task_cancel(iscsilun->iscsi, acb->task);
+}
+
+static AIOPool iscsi_aio_pool = {
+    .aiocb_size         = sizeof(IscsiAIOCB),
+    .cancel             = iscsi_aio_cancel,
+};
+
+
+static void iscsi_process_read(void *arg);
+static void iscsi_process_write(void *arg);
+
+static int iscsi_process_flush(void *arg)
+{
+    IscsiLun *iscsilun = arg;
+
+    return iscsi_queue_length(iscsilun->iscsi) > 0;
+}
+
+static void
+iscsi_set_events(IscsiLun *iscsilun)
+{
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+
+    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), iscsi_process_read,
+                           (iscsi_which_events(iscsi) & POLLOUT)
+                           ? iscsi_process_write : NULL,
+                           iscsi_process_flush, NULL, iscsilun);
+}
+
+static void
+iscsi_process_read(void *arg)
+{
+    IscsiLun *iscsilun = arg;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+
+    iscsi_service(iscsi, POLLIN);
+    iscsi_set_events(iscsilun);
+}
+
+static void
+iscsi_process_write(void *arg)
+{
+    IscsiLun *iscsilun = arg;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+
+    iscsi_service(iscsi, POLLOUT);
+    iscsi_set_events(iscsilun);
+}
+
+
+static int
+iscsi_schedule_bh(QEMUBHFunc *cb, IscsiAIOCB *acb)
+{
+    acb->bh = qemu_bh_new(cb, acb);
+    if (!acb->bh) {
+        error_report("oom: could not create iscsi bh");
+        return -EIO;
+    }
+
+    qemu_bh_schedule(acb->bh);
+    return 0;
+}
+
+static void
+iscsi_readv_writev_bh_cb(void *p)
+{
+    IscsiAIOCB *acb = p;
+
+    qemu_bh_delete(acb->bh);
+
+    if (acb->canceled == 0) {
+        acb->common.cb(acb->common.opaque, acb->status);
+    }
+
+    qemu_aio_release(acb);
+}
+
+
+static void
+iscsi_aio_write10_cb(struct iscsi_context *iscsi, int status,
+                     void *command_data, void *opaque)
+{
+    IscsiAIOCB *acb = opaque;
+
+    trace_iscsi_aio_write10_cb(iscsi, status, acb, acb->canceled);
+
+    g_free(acb->buf);
+
+    if (acb->canceled != 0) {
+        qemu_aio_release(acb);
+        scsi_free_scsi_task(acb->task);
+        acb->task = NULL;
+        return;
+    }
+
+    acb->status = 0;
+    if (status < 0) {
+        error_report("Failed to write10 data to iSCSI lun. %s",
+                     iscsi_get_error(iscsi));
+        acb->status = -EIO;
+    }
+
+    iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
+    scsi_free_scsi_task(acb->task);
+    acb->task = NULL;
+}
+
+static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
+{
+    return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
+}
+
+static BlockDriverAIOCB *
+iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
+                 QEMUIOVector *qiov, int nb_sectors,
+                 BlockDriverCompletionFunc *cb,
+                 void *opaque)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+    IscsiAIOCB *acb;
+    size_t size;
+    int fua = 0;
+
+    /* set FUA on writes when cache mode is write through */
+    if (!(bs->open_flags & BDRV_O_CACHE_WB)) {
+        fua = 1;
+    }
+
+    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+    trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb);
+
+    acb->iscsilun = iscsilun;
+    acb->qiov     = qiov;
+
+    acb->canceled   = 0;
+
+    /* XXX we should pass the iovec to write10 to avoid the extra copy */
+    /* this will allow us to get rid of 'buf' completely */
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    acb->buf = g_malloc(size);
+    qemu_iovec_to_buffer(acb->qiov, acb->buf);
+    acb->task = iscsi_write10_task(iscsi, iscsilun->lun, acb->buf, size,
+                              sector_qemu2lun(sector_num, iscsilun),
+                              fua, 0, iscsilun->block_size,
+                              iscsi_aio_write10_cb, acb);
+    if (acb->task == NULL) {
+        error_report("iSCSI: Failed to send write10 command. %s",
+                     iscsi_get_error(iscsi));
+        g_free(acb->buf);
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
+    iscsi_set_events(iscsilun);
+
+    return &acb->common;
+}
+
+static void
+iscsi_aio_read10_cb(struct iscsi_context *iscsi, int status,
+                    void *command_data, void *opaque)
+{
+    IscsiAIOCB *acb = opaque;
+
+    trace_iscsi_aio_read10_cb(iscsi, status, acb, acb->canceled);
+
+    if (acb->canceled != 0) {
+        qemu_aio_release(acb);
+        scsi_free_scsi_task(acb->task);
+        acb->task = NULL;
+        return;
+    }
+
+    acb->status = 0;
+    if (status != 0) {
+        error_report("Failed to read10 data from iSCSI lun. %s",
+                     iscsi_get_error(iscsi));
+        acb->status = -EIO;
+    }
+
+    iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
+    scsi_free_scsi_task(acb->task);
+    acb->task = NULL;
+}
+
+static BlockDriverAIOCB *
+iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
+                QEMUIOVector *qiov, int nb_sectors,
+                BlockDriverCompletionFunc *cb,
+                void *opaque)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+    IscsiAIOCB *acb;
+    size_t qemu_read_size, lun_read_size;
+    int i;
+
+    qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors;
+
+    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+    trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb);
+
+    acb->iscsilun = iscsilun;
+    acb->qiov     = qiov;
+
+    acb->canceled    = 0;
+    acb->read_size   = qemu_read_size;
+    acb->buf         = NULL;
+
+    /* If LUN blocksize is bigger than BDRV_BLOCK_SIZE a read from QEMU
+     * may be misaligned to the LUN, so we may need to read some extra
+     * data.
+     */
+    acb->read_offset = 0;
+    if (iscsilun->block_size > BDRV_SECTOR_SIZE) {
+        uint64_t bdrv_offset = BDRV_SECTOR_SIZE * sector_num;
+
+        acb->read_offset  = bdrv_offset % iscsilun->block_size;
+    }
+
+    lun_read_size  = (qemu_read_size + iscsilun->block_size
+                     + acb->read_offset - 1)
+                     / iscsilun->block_size * iscsilun->block_size;
+    acb->task = iscsi_read10_task(iscsi, iscsilun->lun,
+                             sector_qemu2lun(sector_num, iscsilun),
+                             lun_read_size, iscsilun->block_size,
+                             iscsi_aio_read10_cb, acb);
+    if (acb->task == NULL) {
+        error_report("iSCSI: Failed to send read10 command. %s",
+                     iscsi_get_error(iscsi));
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
+    for (i = 0; i < acb->qiov->niov; i++) {
+        scsi_task_add_data_in_buffer(acb->task,
+                acb->qiov->iov[i].iov_len,
+                acb->qiov->iov[i].iov_base);
+    }
+
+    iscsi_set_events(iscsilun);
+
+    return &acb->common;
+}
+
+
+static void
+iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
+                     void *command_data, void *opaque)
+{
+    IscsiAIOCB *acb = opaque;
+
+    if (acb->canceled != 0) {
+        qemu_aio_release(acb);
+        scsi_free_scsi_task(acb->task);
+        acb->task = NULL;
+        return;
+    }
+
+    acb->status = 0;
+    if (status < 0) {
+        error_report("Failed to sync10 data on iSCSI lun. %s",
+                     iscsi_get_error(iscsi));
+        acb->status = -EIO;
+    }
+
+    iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
+    scsi_free_scsi_task(acb->task);
+    acb->task = NULL;
+}
+
+static BlockDriverAIOCB *
+iscsi_aio_flush(BlockDriverState *bs,
+                BlockDriverCompletionFunc *cb, void *opaque)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+    IscsiAIOCB *acb;
+
+    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+
+    acb->iscsilun = iscsilun;
+    acb->canceled   = 0;
+
+    acb->task = iscsi_synchronizecache10_task(iscsi, iscsilun->lun,
+                                         0, 0, 0, 0,
+                                         iscsi_synccache10_cb,
+                                         acb);
+    if (acb->task == NULL) {
+        error_report("iSCSI: Failed to send synchronizecache10 command. %s",
+                     iscsi_get_error(iscsi));
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
+    iscsi_set_events(iscsilun);
+
+    return &acb->common;
+}
+
+static int64_t
+iscsi_getlength(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    int64_t len;
+
+    len  = iscsilun->num_blocks;
+    len *= iscsilun->block_size;
+
+    return len;
+}
+
+static void
+iscsi_readcapacity10_cb(struct iscsi_context *iscsi, int status,
+                        void *command_data, void *opaque)
+{
+    struct IscsiTask *itask = opaque;
+    struct scsi_readcapacity10 *rc10;
+    struct scsi_task *task = command_data;
+
+    if (status != 0) {
+        error_report("iSCSI: Failed to read capacity of iSCSI lun. %s",
+                     iscsi_get_error(iscsi));
+        itask->status   = 1;
+        itask->complete = 1;
+        scsi_free_scsi_task(task);
+        return;
+    }
+
+    rc10 = scsi_datain_unmarshall(task);
+    if (rc10 == NULL) {
+        error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
+        itask->status   = 1;
+        itask->complete = 1;
+        scsi_free_scsi_task(task);
+        return;
+    }
+
+    itask->iscsilun->block_size = rc10->block_size;
+    itask->iscsilun->num_blocks = rc10->lba;
+    itask->bs->total_sectors = (uint64_t)rc10->lba *
+                               rc10->block_size / BDRV_SECTOR_SIZE ;
+
+    itask->status   = 0;
+    itask->complete = 1;
+    scsi_free_scsi_task(task);
+}
+
+
+static void
+iscsi_connect_cb(struct iscsi_context *iscsi, int status, void *command_data,
+                 void *opaque)
+{
+    struct IscsiTask *itask = opaque;
+    struct scsi_task *task;
+
+    if (status != 0) {
+        itask->status   = 1;
+        itask->complete = 1;
+        return;
+    }
+
+    task = iscsi_readcapacity10_task(iscsi, itask->iscsilun->lun, 0, 0,
+                                   iscsi_readcapacity10_cb, opaque);
+    if (task == NULL) {
+        error_report("iSCSI: failed to send readcapacity command.");
+        itask->status   = 1;
+        itask->complete = 1;
+        return;
+    }
+}
+
+/*
+ * We support iscsi url's on the form
+ * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
+ */
+static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = NULL;
+    struct iscsi_url *iscsi_url = NULL;
+    struct IscsiTask task;
+    int ret;
+
+    if ((BDRV_SECTOR_SIZE % 512) != 0) {
+        error_report("iSCSI: Invalid BDRV_SECTOR_SIZE. "
+                     "BDRV_SECTOR_SIZE(%lld) is not a multiple "
+                     "of 512", BDRV_SECTOR_SIZE);
+        return -EINVAL;
+    }
+
+    memset(iscsilun, 0, sizeof(IscsiLun));
+
+    /* Should really append the KVM name after the ':' here */
+    iscsi = iscsi_create_context("iqn.2008-11.org.linux-kvm:");
+    if (iscsi == NULL) {
+        error_report("iSCSI: Failed to create iSCSI context.");
+        ret = -ENOMEM;
+        goto failed;
+    }
+
+    iscsi_url = iscsi_parse_full_url(iscsi, filename);
+    if (iscsi_url == NULL) {
+        error_report("Failed to parse URL : %s %s", filename,
+                     iscsi_get_error(iscsi));
+        ret = -EINVAL;
+        goto failed;
+    }
+
+    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
+        error_report("iSCSI: Failed to set target name.");
+        ret = -EINVAL;
+        goto failed;
+    }
+
+    if (iscsi_url->user != NULL) {
+        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
+                                              iscsi_url->passwd);
+        if (ret != 0) {
+            error_report("Failed to set initiator username and password");
+            ret = -EINVAL;
+            goto failed;
+        }
+    }
+    if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
+        error_report("iSCSI: Failed to set session type to normal.");
+        ret = -EINVAL;
+        goto failed;
+    }
+
+    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+
+    task.iscsilun = iscsilun;
+    task.status = 0;
+    task.complete = 0;
+    task.bs = bs;
+
+    iscsilun->iscsi = iscsi;
+    iscsilun->lun   = iscsi_url->lun;
+
+    if (iscsi_full_connect_async(iscsi, iscsi_url->portal, iscsi_url->lun,
+                                 iscsi_connect_cb, &task)
+        != 0) {
+        error_report("iSCSI: Failed to start async connect.");
+        ret = -EINVAL;
+        goto failed;
+    }
+
+    while (!task.complete) {
+        iscsi_set_events(iscsilun);
+        qemu_aio_wait();
+    }
+    if (task.status != 0) {
+        error_report("iSCSI: Failed to connect to LUN : %s",
+                     iscsi_get_error(iscsi));
+        ret = -EINVAL;
+        goto failed;
+    }
+
+    if (iscsi_url != NULL) {
+        iscsi_destroy_url(iscsi_url);
+    }
+    return 0;
+
+failed:
+    if (iscsi_url != NULL) {
+        iscsi_destroy_url(iscsi_url);
+    }
+    if (iscsi != NULL) {
+        iscsi_destroy_context(iscsi);
+    }
+    memset(iscsilun, 0, sizeof(IscsiLun));
+    return ret;
+}
+
+static void iscsi_close(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+
+    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL, NULL);
+    iscsi_destroy_context(iscsi);
+    memset(iscsilun, 0, sizeof(IscsiLun));
+}
+
+static BlockDriver bdrv_iscsi = {
+    .format_name     = "iscsi",
+    .protocol_name   = "iscsi",
+
+    .instance_size   = sizeof(IscsiLun),
+    .bdrv_file_open  = iscsi_open,
+    .bdrv_close      = iscsi_close,
+
+    .bdrv_getlength  = iscsi_getlength,
+
+    .bdrv_aio_readv  = iscsi_aio_readv,
+    .bdrv_aio_writev = iscsi_aio_writev,
+    .bdrv_aio_flush  = iscsi_aio_flush,
+};
+
+static void iscsi_block_init(void)
+{
+    bdrv_register(&bdrv_iscsi);
+}
+
+block_init(iscsi_block_init);
diff --git a/configure b/configure
index 4f87e0a43b..3009bbcf72 100755
--- a/configure
+++ b/configure
@@ -182,6 +182,7 @@ usb_redir=""
 opengl=""
 zlib="yes"
 guest_agent="yes"
+libiscsi=""
 
 # parse CC options first
 for opt do
@@ -657,6 +658,10 @@ for opt do
   ;;
   --enable-spice) spice="yes"
   ;;
+  --disable-libiscsi) libiscsi="no"
+  ;;
+  --enable-libiscsi) libiscsi="yes"
+  ;;
   --enable-profiler) profiler="yes"
   ;;
   --enable-cocoa)
@@ -1046,6 +1051,8 @@ echo "                           Default:trace-<pid>"
 echo "  --disable-spice          disable spice"
 echo "  --enable-spice           enable spice"
 echo "  --enable-rbd             enable building the rados block device (rbd)"
+echo "  --disable-libiscsi       disable iscsi support"
+echo "  --enable-libiscsi        enable iscsi support"
 echo "  --disable-smartcard      disable smartcard support"
 echo "  --enable-smartcard       enable smartcard support"
 echo "  --disable-smartcard-nss  disable smartcard nss support"
@@ -2334,6 +2341,25 @@ if compile_prog "" "" ; then
   bswap_h=yes
 fi
 
+##########################################
+# Do we have libiscsi
+if test "$libiscsi" != "no" ; then
+  cat > $TMPC << EOF
+#include <iscsi/iscsi.h>
+int main(void) { iscsi_create_context(""); return 0; }
+EOF
+  if compile_prog "-Werror" "-liscsi" ; then
+    libiscsi="yes"
+    LIBS="$LIBS -liscsi"
+  else
+    if test "$libiscsi" = "yes" ; then
+      feature_not_found "libiscsi"
+    fi
+    libiscsi="no"
+  fi
+fi
+
+
 ##########################################
 # Do we need librt
 cat > $TMPC <<EOF
@@ -2744,6 +2770,7 @@ echo "xfsctl support    $xfs"
 echo "nss used          $smartcard_nss"
 echo "usb net redir     $usb_redir"
 echo "OpenGL support    $opengl"
+echo "libiscsi support  $libiscsi"
 echo "build guest agent $guest_agent"
 
 if test "$sdl_too_old" = "yes"; then
@@ -3042,6 +3069,10 @@ if test "$opengl" = "yes" ; then
   echo "CONFIG_OPENGL=y" >> $config_host_mak
 fi
 
+if test "$libiscsi" = "yes" ; then
+  echo "CONFIG_LIBISCSI=y" >> $config_host_mak
+fi
+
 # XXX: suppress that
 if [ "$bsd" = "yes" ] ; then
   echo "CONFIG_BSD=y" >> $config_host_mak
diff --git a/trace-events b/trace-events
index 820b1d6ee6..a8880550b2 100644
--- a/trace-events
+++ b/trace-events
@@ -505,6 +505,12 @@ escc_sunkbd_event_out(int ch) "Translated keycode %2.2x"
 escc_kbd_command(int val) "Command %d"
 escc_sunmouse_event(int dx, int dy, int buttons_state) "dx=%d dy=%d buttons=%01x"
 
+# block/iscsi.c
+iscsi_aio_write10_cb(void *iscsi, int status, void *acb, int canceled) "iscsi %p status %d acb %p canceled %d"
+iscsi_aio_writev(void *iscsi, int64_t sector_num, int nb_sectors, void *opaque, void *acb) "iscsi %p sector_num %"PRId64" nb_sectors %d opaque %p acb %p"
+iscsi_aio_read10_cb(void *iscsi, int status, void *acb, int canceled) "iscsi %p status %d acb %p canceled %d"
+iscsi_aio_readv(void *iscsi, int64_t sector_num, int nb_sectors, void *opaque, void *acb) "iscsi %p sector_num %"PRId64" nb_sectors %d opaque %p acb %p"
+
 # hw/esp.c
 esp_raise_irq(void) "Raise IRQ"
 esp_lower_irq(void) "Lower IRQ"

From 0f5314a2c8eeb1b1e775b480e252053157069cf7 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Date: Wed, 26 Oct 2011 23:51:37 +1100
Subject: [PATCH 02/55] Documentation: Add iSCSI section

Add new section for device URL syntax for special files and describe the iSCSI
URL with examples

Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-options.hx | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 5d2a7765e8..424bae9cf0 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -148,6 +148,9 @@ Define a new drive. Valid options are:
 This option defines which disk image (@pxref{disk_images}) to use with
 this drive. If the filename contains comma, you must double it
 (for instance, "file=my,,file" to use file "my,file").
+
+Special files such as iSCSI devices can be specified using protocol
+specific URLs. See the section for "Device URL Syntax" for more information.
 @item if=@var{interface}
 This option defines on which type on interface the drive is connected.
 Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio.
@@ -1718,6 +1721,45 @@ ETEXI
 
 DEFHEADING()
 
+STEXI
+DEFHEADING(Device URL Syntax:)
+
+In addition to using normal file images for the emulated storage devices,
+QEMU can also use networked resources such as iSCSI devices. These are
+specified using a special URL syntax.
+
+@table @option
+@item iSCSI
+iSCSI support allows QEMU to access iSCSI resources directly and use as
+images for the guest storage. Both disk and cdrom images are supported.
+
+Syntax for specifying iSCSI LUNs is
+``iscsi://<target-ip>[:<port>]/<target-iqn>/<lun>''
+
+Example (without authentication):
+@example
+qemu -cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \
+--drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1
+@end example
+
+Example (CHAP username/password via URL):
+@example
+qemu --drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1
+@end example
+
+Example (CHAP username/password via environment variables):
+@example
+LIBISCSI_CHAP_USERNAME="user" \
+LIBISCSI_CHAP_PASSWORD="password" \
+qemu --drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1
+@end example
+
+iSCSI support is an optional feature of QEMU and only available when
+compiled and linked against libiscsi.
+
+@end table
+ETEXI
+
 DEFHEADING(Bluetooth(R) options:)
 
 DEF("bt", HAS_ARG, QEMU_OPTION_bt, \

From c794b4e0fd9ef8d72b068614dcdb2418c105d5cc Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Wed, 26 Oct 2011 15:51:18 -0400
Subject: [PATCH 03/55] Teach block/vdi about "discarded" (no longer allocated)
 blocks

An entry in the VDI block map will hold an offset to the actual block if
the block is allocated, or one of two specially-interpreted values if
not allocated. Using VirtualBox terminology, value VDI_IMAGE_BLOCK_FREE
(0xffffffff) represents a never-allocated block (semantically arbitrary
content).  VDI_IMAGE_BLOCK_ZERO (0xfffffffe) represents a "discarded"
block (semantically zero-filled).  block/vdi knows only about
VDI_IMAGE_BLOCK_FREE.  Teach it about VDI_IMAGE_BLOCK_ZERO.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vdi.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/block/vdi.c b/block/vdi.c
index 883046d5a2..523a6409c5 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -114,8 +114,13 @@ void uuid_unparse(const uuid_t uu, char *out);
  */
 #define VDI_TEXT "<<< QEMU VM Virtual Disk Image >>>\n"
 
-/* Unallocated blocks use this index (no need to convert endianness). */
-#define VDI_UNALLOCATED UINT32_MAX
+/* A never-allocated block; semantically arbitrary content. */
+#define VDI_UNALLOCATED 0xffffffffU
+
+/* A discarded (no longer allocated) block; semantically zero-filled. */
+#define VDI_DISCARDED   0xfffffffeU
+
+#define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)
 
 #if !defined(CONFIG_UUID)
 void uuid_generate(uuid_t out)
@@ -307,10 +312,10 @@ static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res)
     /* Check block map and value of blocks_allocated. */
     for (block = 0; block < s->header.blocks_in_image; block++) {
         uint32_t bmap_entry = le32_to_cpu(s->bmap[block]);
-        if (bmap_entry != VDI_UNALLOCATED) {
+        if (VDI_IS_ALLOCATED(bmap_entry)) {
             if (bmap_entry < s->header.blocks_in_image) {
                 blocks_allocated++;
-                if (bmap[bmap_entry] == VDI_UNALLOCATED) {
+                if (!VDI_IS_ALLOCATED(bmap[bmap_entry])) {
                     bmap[bmap_entry] = bmap_entry;
                 } else {
                     fprintf(stderr, "ERROR: block index %" PRIu32
@@ -472,7 +477,7 @@ static int vdi_is_allocated(BlockDriverState *bs, int64_t sector_num,
         n_sectors = nb_sectors;
     }
     *pnum = n_sectors;
-    return bmap_entry != VDI_UNALLOCATED;
+    return VDI_IS_ALLOCATED(bmap_entry);
 }
 
 static void vdi_aio_cancel(BlockDriverAIOCB *blockacb)
@@ -603,7 +608,7 @@ static void vdi_aio_read_cb(void *opaque, int ret)
     /* prepare next AIO request */
     acb->n_sectors = n_sectors;
     bmap_entry = le32_to_cpu(s->bmap[block_index]);
-    if (bmap_entry == VDI_UNALLOCATED) {
+    if (!VDI_IS_ALLOCATED(bmap_entry)) {
         /* Block not allocated, return zeros, no need to wait. */
         memset(acb->buf, 0, n_sectors * SECTOR_SIZE);
         ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
@@ -685,7 +690,7 @@ static void vdi_aio_write_cb(void *opaque, int ret)
         if (acb->header_modified) {
             VdiHeader *header = acb->block_buffer;
             logout("now writing modified header\n");
-            assert(acb->bmap_first != VDI_UNALLOCATED);
+            assert(VDI_IS_ALLOCATED(acb->bmap_first));
             *header = s->header;
             vdi_header_to_le(header);
             acb->header_modified = 0;
@@ -699,7 +704,7 @@ static void vdi_aio_write_cb(void *opaque, int ret)
                 goto done;
             }
             return;
-        } else if (acb->bmap_first != VDI_UNALLOCATED) {
+        } else if (VDI_IS_ALLOCATED(acb->bmap_first)) {
             /* One or more new blocks were allocated. */
             uint64_t offset;
             uint32_t bmap_first;
@@ -749,7 +754,7 @@ static void vdi_aio_write_cb(void *opaque, int ret)
     /* prepare next AIO request */
     acb->n_sectors = n_sectors;
     bmap_entry = le32_to_cpu(s->bmap[block_index]);
-    if (bmap_entry == VDI_UNALLOCATED) {
+    if (!VDI_IS_ALLOCATED(bmap_entry)) {
         /* Allocate new block and write to it. */
         uint64_t offset;
         uint8_t *block;

From 3789985f406ecb99f7d3e6521bb4310228f0577c Mon Sep 17 00:00:00 2001
From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
Date: Thu, 27 Oct 2011 14:58:57 +0800
Subject: [PATCH 04/55] qcow2: fix some errors and typo in qcow2.txt

Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 docs/specs/qcow2.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt
index 8fc3cb2f1a..e792953c8f 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -108,8 +108,8 @@ as follows:
 
     refcount_block_entries = (cluster_size / sizeof(uint16_t))
 
-    refcount_block_index = (offset / cluster_size) % refcount_table_entries
-    refcount_table_index = (offset / cluster_size) / refcount_table_entries
+    refcount_block_index = (offset / cluster_size) % refcount_block_entries
+    refcount_table_index = (offset / cluster_size) / refcount_block_entries
 
     refcount_block = load_cluster(refcount_table[refcount_table_index]);
     return refcount_block[refcount_block_index];
@@ -211,7 +211,7 @@ switch the active L1 table, so that a different set of host clusters are
 exposed to the guest.
 
 When creating a snapshot, the L1 table should be copied and the refcount of all
-L2 tables and clusters reachable form this L1 table must be increased, so that
+L2 tables and clusters reachable from this L1 table must be increased, so that
 a write causes a COW and isn't visible in other snapshots.
 
 When loading a snapshot, bit 63 of all entries in the new active L1 table and

From 3574c608195c10f4ac48bb6a27d1c5e2c5f9ac3a Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 26 Oct 2011 11:02:11 +0200
Subject: [PATCH 05/55] block: Remove dead code

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/block.c b/block.c
index 70aab63e4b..f86984fd62 100644
--- a/block.c
+++ b/block.c
@@ -2028,11 +2028,7 @@ const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
 void bdrv_get_backing_filename(BlockDriverState *bs,
                                char *filename, int filename_size)
 {
-    if (!bs->backing_file) {
-        pstrcpy(filename, filename_size, "");
-    } else {
-        pstrcpy(filename, filename_size, bs->backing_file);
-    }
+    pstrcpy(filename, filename_size, bs->backing_file);
 }
 
 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,

From 2b5728164fcf5211bbae8d3c2fc6df62dd6b2295 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 26 Oct 2011 11:03:01 +0200
Subject: [PATCH 06/55] block: Fix bdrv_open use after free

tmp_filename was used outside the block it was defined in, i.e. after it went
out of scope. Move its declaration to the top level.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block.c b/block.c
index f86984fd62..d5ec0beaf0 100644
--- a/block.c
+++ b/block.c
@@ -571,6 +571,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
               BlockDriver *drv)
 {
     int ret;
+    char tmp_filename[PATH_MAX];
 
     if (flags & BDRV_O_SNAPSHOT) {
         BlockDriverState *bs1;
@@ -578,7 +579,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
         int is_protocol = 0;
         BlockDriver *bdrv_qcow2;
         QEMUOptionParameter *options;
-        char tmp_filename[PATH_MAX];
         char backing_filename[PATH_MAX];
 
         /* if snapshot, we create a temporary backing file and open it

From 64ebe71aa0e498d24e8c02b133192142fce3a0d0 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 26 Oct 2011 11:21:50 +0200
Subject: [PATCH 07/55] qcow: Fix bdrv_write_compressed error handling

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/qcow.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index ab36b2995c..35e21eb6b3 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -736,8 +736,6 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
         return -EINVAL;
 
     out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-    if (!out_buf)
-        return -1;
 
     /* best compression, small window, no zlib header */
     memset(&strm, 0, sizeof(strm));
@@ -745,8 +743,8 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
                        Z_DEFLATED, -12,
                        9, Z_DEFAULT_STRATEGY);
     if (ret != 0) {
-        g_free(out_buf);
-        return -1;
+        ret = -EINVAL;
+        goto fail;
     }
 
     strm.avail_in = s->cluster_size;
@@ -756,9 +754,9 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
 
     ret = deflate(&strm, Z_FINISH);
     if (ret != Z_STREAM_END && ret != Z_OK) {
-        g_free(out_buf);
         deflateEnd(&strm);
-        return -1;
+        ret = -EINVAL;
+        goto fail;
     }
     out_len = strm.next_out - out_buf;
 
@@ -766,19 +764,29 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
 
     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
         /* could not compress: write normal cluster */
-        bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+        if (ret < 0) {
+            goto fail;
+        }
     } else {
         cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
                                             out_len, 0, 0);
+        if (cluster_offset == 0) {
+            ret = -EIO;
+            goto fail;
+        }
+
         cluster_offset &= s->cluster_offset_mask;
-        if (bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len) != out_len) {
-            g_free(out_buf);
-            return -1;
+        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+        if (ret < 0) {
+            goto fail;
         }
     }
 
+    ret = 0;
+fail:
     g_free(out_buf);
-    return 0;
+    return ret;
 }
 
 static coroutine_fn int qcow_co_flush(BlockDriverState *bs)

From fb60105d4942a26f571b1be92a8b9e7528d0c4d8 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 26 Oct 2011 11:52:47 +0200
Subject: [PATCH 08/55] ide: Fix off-by-one error in array index check

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/ide/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 280a117fe2..29305d35f4 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2039,7 +2039,7 @@ static int ide_drive_pio_post_load(void *opaque, int version_id)
 {
     IDEState *s = opaque;
 
-    if (s->end_transfer_fn_idx > ARRAY_SIZE(transfer_end_table)) {
+    if (s->end_transfer_fn_idx >= ARRAY_SIZE(transfer_end_table)) {
         return -EINVAL;
     }
     s->end_transfer_func = transfer_end_table[s->end_transfer_fn_idx];

From bac8d7b45dd855759204117e5a05452ede01bbab Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 26 Oct 2011 12:22:40 +0200
Subject: [PATCH 09/55] vmdk: Fix use of uninitialised value

In error cases, cid is never set.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vmdk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 6be592ffd6..6cdbfb791e 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -208,7 +208,7 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
 {
     char desc[DESC_SIZE];
-    uint32_t cid;
+    uint32_t cid = 0;
     const char *p_name, *cid_str;
     size_t cid_str_size;
     BDRVVmdkState *s = bs->opaque;

From 99f1835d9bc744f98370254600530e66f32e6d81 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 26 Oct 2011 12:25:25 +0200
Subject: [PATCH 10/55] vmdk: Improve error handling

Return the right error values in some more places.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vmdk.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 6cdbfb791e..fa0e8bd168 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -212,8 +212,10 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
     const char *p_name, *cid_str;
     size_t cid_str_size;
     BDRVVmdkState *s = bs->opaque;
+    int ret;
 
-    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
+    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    if (ret < 0) {
         return 0;
     }
 
@@ -239,10 +241,12 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
     char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
     char *p_name, *tmp_str;
     BDRVVmdkState *s = bs->opaque;
+    int ret;
 
     memset(desc, 0, sizeof(desc));
-    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
-        return -EIO;
+    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    if (ret < 0) {
+        return ret;
     }
 
     tmp_str = strstr(desc, "parentCID");
@@ -254,9 +258,11 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
         pstrcat(desc, sizeof(desc), tmp_desc);
     }
 
-    if (bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE) < 0) {
-        return -EIO;
+    ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
+    if (ret < 0) {
+        return ret;
     }
+
     return 0;
 }
 
@@ -1109,7 +1115,10 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
         /* update CID on the first write every time the virtual disk is
          * opened */
         if (!s->cid_updated) {
-            vmdk_write_cid(bs, time(NULL));
+            ret = vmdk_write_cid(bs, time(NULL));
+            if (ret < 0) {
+                return ret;
+            }
             s->cid_updated = true;
         }
     }

From 93897b9fd43548e9c15cf8bece2d9e5174b01fc7 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 26 Oct 2011 12:25:52 +0200
Subject: [PATCH 11/55] vmdk: Fix possible segfaults

Data we read from the disk isn't necessarily null terminated and may not
contain the string we're looking for. The code needs to be a bit more careful
here.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vmdk.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index fa0e8bd168..8caaf0b522 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -227,6 +227,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
         cid_str_size = sizeof("CID");
     }
 
+    desc[DESC_SIZE - 1] = '\0';
     p_name = strstr(desc, cid_str);
     if (p_name != NULL) {
         p_name += cid_str_size;
@@ -243,13 +244,17 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
     BDRVVmdkState *s = bs->opaque;
     int ret;
 
-    memset(desc, 0, sizeof(desc));
     ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
     if (ret < 0) {
         return ret;
     }
 
+    desc[DESC_SIZE - 1] = '\0';
     tmp_str = strstr(desc, "parentCID");
+    if (tmp_str == NULL) {
+        return -EINVAL;
+    }
+
     pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
     p_name = strstr(desc, "CID");
     if (p_name != NULL) {

From 08ae330e1754bf03df1388065ea508dc089ab3fd Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Date: Thu, 27 Oct 2011 20:33:21 +1100
Subject: [PATCH 12/55] Documentation: Describe NBD URL syntax

This patch adds a short description of how to specify a NBD device
to QEMU.
Syntax for both TCP and Unix Domain Sockets are provided as well
as examples.

Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-options.hx | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 424bae9cf0..c55080c041 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1757,6 +1757,27 @@ qemu --drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1
 iSCSI support is an optional feature of QEMU and only available when
 compiled and linked against libiscsi.
 
+@item NBD
+QEMU supports NBD (Network Block Devices) both using TCP protocol as well
+as Unix Domain Sockets.
+
+Syntax for specifying a NBD device using TCP
+``nbd:<server-ip>:<port>[:exportname=<export>]''
+
+Syntax for specifying a NBD device using Unix Domain Sockets
+``nbd:unix:<domain-socket>[:exportname=<export>]''
+
+
+Example for TCP
+@example
+qemu --drive file=nbd:192.0.2.1:30000
+@end example
+
+Example for Unix Domain Sockets
+@example
+qemu --drive file=nbd:unix:/tmp/nbd-socket
+@end example
+
 @end table
 ETEXI
 

From c95de7e2c40da4235ceda6d134ae069dae80157e Mon Sep 17 00:00:00 2001
From: Dong Xu Wang <wdongxu@linux.vnet.ibm.com>
Date: Thu, 27 Oct 2011 17:22:28 +0800
Subject: [PATCH 13/55] block: fix qcow2_co_flush deadlock

If qcow2_cache_flush failed, s->lock will not be unlock.

Signed-off-by: Dong Xu Wang <wdongxu@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index a181932b67..ef057d31e0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1113,11 +1113,13 @@ static int qcow2_co_flush(BlockDriverState *bs)
     qemu_co_mutex_lock(&s->lock);
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
     if (ret < 0) {
+        qemu_co_mutex_unlock(&s->lock);
         return ret;
     }
 
     ret = qcow2_cache_flush(bs, s->refcount_block_cache);
     if (ret < 0) {
+        qemu_co_mutex_unlock(&s->lock);
         return ret;
     }
     qemu_co_mutex_unlock(&s->lock);

From b46578555c4bce64e3daba4591334aba2d12c156 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Thu, 27 Oct 2011 10:54:26 +0100
Subject: [PATCH 14/55] qemu-io: delete bs instead of leaking it

Using bdrv_close() is not enough to free a BlockDriverState.  Since we
explicitly create it with bdrv_new(), use bdrv_delete() to close and
delete it.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-io.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/qemu-io.c b/qemu-io.c
index c45a4138b2..5af887e057 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -1582,7 +1582,7 @@ static const cmdinfo_t map_cmd = {
 
 static int close_f(int argc, char **argv)
 {
-    bdrv_close(bs);
+    bdrv_delete(bs);
     bs = NULL;
     return 0;
 }
@@ -1611,6 +1611,7 @@ static int openfile(char *name, int flags, int growable)
 
         if (bdrv_open(bs, name, flags, NULL) < 0) {
             fprintf(stderr, "%s: can't open device %s\n", progname, name);
+            bdrv_delete(bs);
             bs = NULL;
             return 1;
         }
@@ -1834,7 +1835,7 @@ int main(int argc, char **argv)
     qemu_aio_flush();
 
     if (bs) {
-        bdrv_close(bs);
+        bdrv_delete(bs);
     }
     return 0;
 }

From e7c637967e6aad195b5f30cfd995913c9e0b4666 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Thu, 27 Oct 2011 10:54:27 +0100
Subject: [PATCH 15/55] block: set bs->read_only before .bdrv_open()

Several block drivers set bs->read_only in .bdrv_open() but
block.c:bdrv_open_common() clobbers its value.  Additionally, QED uses
bdrv_is_read_only() in .bdrv_open() to decide whether to perform
consistency checks.

The correct ordering is to initialize bs->read_only from the open flags
before calling .bdrv_open().  This way block drivers can override it if
necessary and can use bdrv_is_read_only() in .bdrv_open().

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index d5ec0beaf0..cb37086c8f 100644
--- a/block.c
+++ b/block.c
@@ -500,6 +500,8 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
         open_flags |= BDRV_O_RDWR;
     }
 
+    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
+
     /* Open the image, either directly or using a protocol */
     if (drv->bdrv_file_open) {
         ret = drv->bdrv_file_open(bs, filename, open_flags);
@@ -514,8 +516,6 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
         goto free_and_fail;
     }
 
-    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
-
     ret = refresh_total_sectors(bs, bs->total_sectors);
     if (ret < 0) {
         goto free_and_fail;

From 03f541bd6eacdc6c2893f72b975257c89cab2b74 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Thu, 27 Oct 2011 10:54:28 +0100
Subject: [PATCH 16/55] block: reinitialize across bdrv_close()/bdrv_open()

Several BlockDriverState fields are not being reinitialized across
bdrv_close()/bdrv_open().  Make sure they are reset to their default
values.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index cb37086c8f..96f3c3fe4d 100644
--- a/block.c
+++ b/block.c
@@ -472,10 +472,13 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
     bs->total_sectors = 0;
     bs->encrypted = 0;
     bs->valid_key = 0;
+    bs->sg = 0;
     bs->open_flags = flags;
+    bs->growable = 0;
     bs->buffer_alignment = 512;
 
     pstrcpy(bs->filename, sizeof(bs->filename), filename);
+    bs->backing_file[0] = '\0';
 
     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
         return -ENOTSUP;
@@ -484,8 +487,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
     bs->drv = drv;
     bs->opaque = g_malloc0(drv->instance_size);
 
-    if (flags & BDRV_O_CACHE_WB)
-        bs->enable_write_cache = 1;
+    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
 
     /*
      * Clear flags that are internal to the block layer before opening the

From d99902287c0c57219b3b33c713f14d55124b0002 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Date: Fri, 28 Oct 2011 20:13:39 +1100
Subject: [PATCH 17/55] Documentation: Add syntax for using sheepdog devices

Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-options.hx | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index c55080c041..8be6f4b2fa 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1778,6 +1778,33 @@ Example for Unix Domain Sockets
 qemu --drive file=nbd:unix:/tmp/nbd-socket
 @end example
 
+@item Sheepdog
+Sheepdog is a distributed storage system for QEMU.
+QEMU supports using either local sheepdog devices or remote networked
+devices.
+
+Syntax for specifying a sheepdog device
+@table @list
+``sheepdog:<vdiname>''
+
+``sheepdog:<vdiname>:<snapid>''
+
+``sheepdog:<vdiname>:<tag>''
+
+``sheepdog:<host>:<port>:<vdiname>''
+
+``sheepdog:<host>:<port>:<vdiname>:<snapid>''
+
+``sheepdog:<host>:<port>:<vdiname>:<tag>''
+@end table
+
+Example
+@example
+qemu --drive file=sheepdog:192.0.2.1:30000:MyVirtualMachine
+@end example
+
+See also @url{http://http://www.osrg.net/sheepdog/}.
+
 @end table
 ETEXI
 

From 7e218df5181f1c26d52ef525b30a416c60a2160b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 19 Sep 2011 17:19:21 +0200
Subject: [PATCH 18/55] scsi: pass correct sense code for ENOMEDIUM

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 69095780ac..9c62569a17 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -231,6 +231,9 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
         bdrv_iostatus_set_err(s->bs, error);
     } else {
         switch (error) {
+        case ENOMEDIUM:
+            scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
+            break;
         case ENOMEM:
             scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE));
             break;

From 67cc61e43077eeffc7c95a1bb25d05a12d051c67 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 14:41:56 +0200
Subject: [PATCH 19/55] atapi/scsi: unify definitions for MMC

The definitions in ide/internal.h are duplicates, since ATAPI commands
actually come from SCSI.  Use the ones in scsi-defs.h and move the
missing ones there.  Two exceptions:

- MODE_PAGE_WRITE_PARMS conflicts with the "flexible disk geometry"
page in scsi-disk.c.  It is unused, so pick the latter.

- GPCMD_* is left in ide/internal.h, at least for now.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/ide/atapi.c    | 52 +++++++++++++++++-----------------
 hw/ide/core.c     |  4 +--
 hw/ide/internal.h | 71 +----------------------------------------------
 hw/ide/macio.c    |  2 +-
 hw/scsi-bus.c     |  2 +-
 hw/scsi-defs.h    | 66 +++++++++++++++++++++++++++++++++++++++++++
 hw/scsi-disk.c    |  8 +++---
 7 files changed, 101 insertions(+), 104 deletions(-)

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 3f909c3a99..be3b728ded 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -154,10 +154,10 @@ void ide_atapi_io_error(IDEState *s, int ret)
 {
     /* XXX: handle more errors */
     if (ret == -ENOMEDIUM) {
-        ide_atapi_cmd_error(s, SENSE_NOT_READY,
+        ide_atapi_cmd_error(s, NOT_READY,
                             ASC_MEDIUM_NOT_PRESENT);
     } else {
-        ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+        ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
                             ASC_LOGICAL_BLOCK_OOR);
     }
 }
@@ -282,7 +282,7 @@ static void ide_atapi_cmd_check_status(IDEState *s)
 #ifdef DEBUG_IDE_ATAPI
     printf("atapi_cmd_check_status\n");
 #endif
-    s->error = MC_ERR | (SENSE_UNIT_ATTENTION << 4);
+    s->error = MC_ERR | (UNIT_ATTENTION << 4);
     s->status = ERR_STAT;
     s->nsector = 0;
     ide_set_irq(s->bus);
@@ -354,7 +354,7 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
                                        ide_atapi_cmd_read_dma_cb, s);
     if (!s->bus->dma->aiocb) {
         /* Note: media not present is the most likely case */
-        ide_atapi_cmd_error(s, SENSE_NOT_READY,
+        ide_atapi_cmd_error(s, NOT_READY,
                             ASC_MEDIUM_NOT_PRESENT);
         goto eot;
     }
@@ -595,7 +595,7 @@ static void cmd_get_event_status_notification(IDEState *s,
     /* It is fine by the MMC spec to not support async mode operations */
     if (!(gesn_cdb->polled & 0x01)) { /* asynchronous mode */
         /* Only polling is supported, asynchronous mode is not. */
-        ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+        ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
                             ASC_INV_FIELD_IN_CMD_PACKET);
         return;
     }
@@ -643,8 +643,8 @@ static void cmd_request_sense(IDEState *s, uint8_t *buf)
     buf[7] = 10;
     buf[12] = s->asc;
 
-    if (s->sense_key == SENSE_UNIT_ATTENTION) {
-        s->sense_key = SENSE_NONE;
+    if (s->sense_key == UNIT_ATTENTION) {
+        s->sense_key = NO_SENSE;
     }
 
     ide_atapi_cmd_reply(s, 18, max_len);
@@ -676,7 +676,7 @@ static void cmd_get_configuration(IDEState *s, uint8_t *buf)
 
     /* only feature 0 is supported */
     if (buf[2] != 0 || buf[3] != 0) {
-        ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+        ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
                             ASC_INV_FIELD_IN_CMD_PACKET);
         return;
     }
@@ -733,7 +733,7 @@ static void cmd_mode_sense(IDEState *s, uint8_t *buf)
     switch(action) {
     case 0: /* current values */
         switch(code) {
-        case GPMODE_R_W_ERROR_PAGE: /* error recovery */
+        case MODE_PAGE_R_W_ERROR: /* error recovery */
             cpu_to_ube16(&buf[0], 16 + 6);
             buf[2] = 0x70;
             buf[3] = 0;
@@ -752,7 +752,7 @@ static void cmd_mode_sense(IDEState *s, uint8_t *buf)
             buf[15] = 0x00;
             ide_atapi_cmd_reply(s, 16, max_len);
             break;
-        case GPMODE_AUDIO_CTL_PAGE:
+        case MODE_PAGE_AUDIO_CTL:
             cpu_to_ube16(&buf[0], 24 + 6);
             buf[2] = 0x70;
             buf[3] = 0;
@@ -769,7 +769,7 @@ static void cmd_mode_sense(IDEState *s, uint8_t *buf)
 
             ide_atapi_cmd_reply(s, 24, max_len);
             break;
-        case GPMODE_CAPABILITIES_PAGE:
+        case MODE_PAGE_CAPABILITIES:
             cpu_to_ube16(&buf[0], 28 + 6);
             buf[2] = 0x70;
             buf[3] = 0;
@@ -813,14 +813,14 @@ static void cmd_mode_sense(IDEState *s, uint8_t *buf)
         goto error_cmd;
     default:
     case 3: /* saved values */
-        ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+        ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
                             ASC_SAVING_PARAMETERS_NOT_SUPPORTED);
         break;
     }
     return;
 
 error_cmd:
-    ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST, ASC_INV_FIELD_IN_CMD_PACKET);
+    ide_atapi_cmd_error(s, ILLEGAL_REQUEST, ASC_INV_FIELD_IN_CMD_PACKET);
 }
 
 static void cmd_test_unit_ready(IDEState *s, uint8_t *buf)
@@ -883,7 +883,7 @@ static void cmd_read_cd(IDEState *s, uint8_t* buf)
         ide_atapi_cmd_read(s, lba, nb_sectors, 2352);
         break;
     default:
-        ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+        ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
                             ASC_INV_FIELD_IN_CMD_PACKET);
         break;
     }
@@ -896,7 +896,7 @@ static void cmd_seek(IDEState *s, uint8_t* buf)
 
     lba = ube32_to_cpu(buf + 2);
     if (lba >= total_sectors) {
-        ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST, ASC_LOGICAL_BLOCK_OOR);
+        ide_atapi_cmd_error(s, ILLEGAL_REQUEST, ASC_LOGICAL_BLOCK_OOR);
         return;
     }
 
@@ -912,7 +912,7 @@ static void cmd_start_stop_unit(IDEState *s, uint8_t* buf)
     if (loej) {
         if (!start && !s->tray_open && s->tray_locked) {
             sense = bdrv_is_inserted(s->bs)
-                ? SENSE_NOT_READY : SENSE_ILLEGAL_REQUEST;
+                ? NOT_READY : ILLEGAL_REQUEST;
             ide_atapi_cmd_error(s, sense, ASC_MEDIA_REMOVAL_PREVENTED);
             return;
         }
@@ -971,7 +971,7 @@ static void cmd_read_toc_pma_atip(IDEState *s, uint8_t* buf)
         break;
     default:
     error_cmd:
-        ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+        ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
                             ASC_INV_FIELD_IN_CMD_PACKET);
     }
 }
@@ -997,11 +997,11 @@ static void cmd_read_dvd_structure(IDEState *s, uint8_t* buf)
 
     if (format < 0xff) {
         if (media_is_cd(s)) {
-            ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+            ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
                                 ASC_INCOMPATIBLE_FORMAT);
             return;
         } else if (!media_present(s)) {
-            ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+            ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
                                 ASC_INV_FIELD_IN_CMD_PACKET);
             return;
         }
@@ -1017,7 +1017,7 @@ static void cmd_read_dvd_structure(IDEState *s, uint8_t* buf)
                 ret = ide_dvd_read_structure(s, format, buf, buf);
 
                 if (ret < 0) {
-                    ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST, -ret);
+                    ide_atapi_cmd_error(s, ILLEGAL_REQUEST, -ret);
                 } else {
                     ide_atapi_cmd_reply(s, ret, max_len);
                 }
@@ -1034,7 +1034,7 @@ static void cmd_read_dvd_structure(IDEState *s, uint8_t* buf)
         case 0x90: /* TODO: List of recognized format layers */
         case 0xc0: /* TODO: Write protection status */
         default:
-            ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+            ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
                                 ASC_INV_FIELD_IN_CMD_PACKET);
             break;
     }
@@ -1106,7 +1106,7 @@ void ide_atapi_cmd(IDEState *s)
      * condition response unless a higher priority status, defined by the drive
      * here, is pending.
      */
-    if (s->sense_key == SENSE_UNIT_ATTENTION &&
+    if (s->sense_key == UNIT_ATTENTION &&
         !(atapi_cmd_table[s->io_buffer[0]].flags & ALLOW_UA)) {
         ide_atapi_cmd_check_status(s);
         return;
@@ -1119,10 +1119,10 @@ void ide_atapi_cmd(IDEState *s)
      * states rely on this behavior.
      */
     if (!s->tray_open && bdrv_is_inserted(s->bs) && s->cdrom_changed) {
-        ide_atapi_cmd_error(s, SENSE_NOT_READY, ASC_MEDIUM_NOT_PRESENT);
+        ide_atapi_cmd_error(s, NOT_READY, ASC_MEDIUM_NOT_PRESENT);
 
         s->cdrom_changed = 0;
-        s->sense_key = SENSE_UNIT_ATTENTION;
+        s->sense_key = UNIT_ATTENTION;
         s->asc = ASC_MEDIUM_MAY_HAVE_CHANGED;
         return;
     }
@@ -1131,7 +1131,7 @@ void ide_atapi_cmd(IDEState *s)
     if ((atapi_cmd_table[s->io_buffer[0]].flags & CHECK_READY) &&
         (!media_present(s) || !bdrv_is_inserted(s->bs)))
     {
-        ide_atapi_cmd_error(s, SENSE_NOT_READY, ASC_MEDIUM_NOT_PRESENT);
+        ide_atapi_cmd_error(s, NOT_READY, ASC_MEDIUM_NOT_PRESENT);
         return;
     }
 
@@ -1141,5 +1141,5 @@ void ide_atapi_cmd(IDEState *s)
         return;
     }
 
-    ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST, ASC_ILLEGAL_OPCODE);
+    ide_atapi_cmd_error(s, ILLEGAL_REQUEST, ASC_ILLEGAL_OPCODE);
 }
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 29305d35f4..9a2fd30607 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -799,7 +799,7 @@ static void ide_cd_change_cb(void *opaque, bool load)
      * First indicate to the guest that a CD has been removed.  That's
      * done on the next command the guest sends us.
      *
-     * Then we set SENSE_UNIT_ATTENTION, by which the guest will
+     * Then we set UNIT_ATTENTION, by which the guest will
      * detect a new CD in the drive.  See ide_atapi_cmd() for details.
      */
     s->cdrom_changed = 1;
@@ -2027,7 +2027,7 @@ static int ide_drive_post_load(void *opaque, int version_id)
     IDEState *s = opaque;
 
     if (version_id < 3) {
-        if (s->sense_key == SENSE_UNIT_ATTENTION &&
+        if (s->sense_key == UNIT_ATTENTION &&
             s->asc == ASC_MEDIUM_MAY_HAVE_CHANGED) {
             s->cdrom_changed = 1;
         }
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index c39dc058f4..00b28dfdbc 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -11,6 +11,7 @@
 #include "iorange.h"
 #include "dma.h"
 #include "sysemu.h"
+#include "hw/scsi-defs.h"
 
 /* debug IDE devices */
 //#define DEBUG_IDE
@@ -280,71 +281,6 @@ typedef struct IDEDMAOps IDEDMAOps;
 #define GPCMD_GET_MEDIA_STATUS		    0xda
 #define GPCMD_MODE_SENSE_6		    0x1a
 
-/* Mode page codes for mode sense/set */
-#define GPMODE_R_W_ERROR_PAGE		0x01
-#define GPMODE_WRITE_PARMS_PAGE		0x05
-#define GPMODE_AUDIO_CTL_PAGE		0x0e
-#define GPMODE_POWER_PAGE		0x1a
-#define GPMODE_FAULT_FAIL_PAGE		0x1c
-#define GPMODE_TO_PROTECT_PAGE		0x1d
-#define GPMODE_CAPABILITIES_PAGE	0x2a
-#define GPMODE_ALL_PAGES		0x3f
-/* Not in Mt. Fuji, but in ATAPI 2.6 -- depricated now in favor
- * of MODE_SENSE_POWER_PAGE */
-#define GPMODE_CDROM_PAGE		0x0d
-
-/*
- * Based on values from <linux/cdrom.h> but extending CD_MINS
- * to the maximum common size allowed by the Orange's Book ATIP
- *
- * 90 and 99 min CDs are also available but using them as the
- * upper limit reduces the effectiveness of the heuristic to
- * detect DVDs burned to less than 25% of their maximum capacity
- */
-
-/* Some generally useful CD-ROM information */
-#define CD_MINS                       80 /* max. minutes per CD */
-#define CD_SECS                       60 /* seconds per minute */
-#define CD_FRAMES                     75 /* frames per second */
-#define CD_FRAMESIZE                2048 /* bytes per frame, "cooked" mode */
-#define CD_MAX_BYTES       (CD_MINS * CD_SECS * CD_FRAMES * CD_FRAMESIZE)
-#define CD_MAX_SECTORS     (CD_MAX_BYTES / 512)
-
-/*
- * The MMC values are not IDE specific and might need to be moved
- * to a common header if they are also needed for the SCSI emulation
- */
-
-/* Profile list from MMC-6 revision 1 table 91 */
-#define MMC_PROFILE_NONE                0x0000
-#define MMC_PROFILE_CD_ROM              0x0008
-#define MMC_PROFILE_CD_R                0x0009
-#define MMC_PROFILE_CD_RW               0x000A
-#define MMC_PROFILE_DVD_ROM             0x0010
-#define MMC_PROFILE_DVD_R_SR            0x0011
-#define MMC_PROFILE_DVD_RAM             0x0012
-#define MMC_PROFILE_DVD_RW_RO           0x0013
-#define MMC_PROFILE_DVD_RW_SR           0x0014
-#define MMC_PROFILE_DVD_R_DL_SR         0x0015
-#define MMC_PROFILE_DVD_R_DL_JR         0x0016
-#define MMC_PROFILE_DVD_RW_DL           0x0017
-#define MMC_PROFILE_DVD_DDR             0x0018
-#define MMC_PROFILE_DVD_PLUS_RW         0x001A
-#define MMC_PROFILE_DVD_PLUS_R          0x001B
-#define MMC_PROFILE_DVD_PLUS_RW_DL      0x002A
-#define MMC_PROFILE_DVD_PLUS_R_DL       0x002B
-#define MMC_PROFILE_BD_ROM              0x0040
-#define MMC_PROFILE_BD_R_SRM            0x0041
-#define MMC_PROFILE_BD_R_RRM            0x0042
-#define MMC_PROFILE_BD_RE               0x0043
-#define MMC_PROFILE_HDDVD_ROM           0x0050
-#define MMC_PROFILE_HDDVD_R             0x0051
-#define MMC_PROFILE_HDDVD_RAM           0x0052
-#define MMC_PROFILE_HDDVD_RW            0x0053
-#define MMC_PROFILE_HDDVD_R_DL          0x0058
-#define MMC_PROFILE_HDDVD_RW_DL         0x005A
-#define MMC_PROFILE_INVALID             0xFFFF
-
 #define ATAPI_INT_REASON_CD             0x01 /* 0 = data transfer */
 #define ATAPI_INT_REASON_IO             0x02 /* 1 = transfer to the host */
 #define ATAPI_INT_REASON_REL            0x04
@@ -366,11 +302,6 @@ typedef struct IDEDMAOps IDEDMAOps;
 #define CFA_INVALID_ADDRESS     0x21
 #define CFA_ADDRESS_OVERFLOW    0x2f
 
-#define SENSE_NONE            0
-#define SENSE_NOT_READY       2
-#define SENSE_ILLEGAL_REQUEST 5
-#define SENSE_UNIT_ATTENTION  6
-
 #define SMART_READ_DATA       0xd0
 #define SMART_READ_THRESH     0xd1
 #define SMART_ATTR_AUTOSAVE   0xd2
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 37b8239b4d..70b33422d2 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -87,7 +87,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
     if (!m->aiocb) {
         qemu_sglist_destroy(&s->sg);
         /* Note: media not present is the most likely case */
-        ide_atapi_cmd_error(s, SENSE_NOT_READY,
+        ide_atapi_cmd_error(s, NOT_READY,
                             ASC_MEDIUM_NOT_PRESENT);
         goto done;
     }
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index aca65a16df..fc2f823177 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -800,7 +800,7 @@ const struct SCSISense sense_code_SAVING_PARAMS_NOT_SUPPORTED = {
 };
 
 /* Illegal request, Incompatible medium installed */
-const struct SCSISense sense_code_INCOMPATIBLE_MEDIUM = {
+const struct SCSISense sense_code_INCOMPATIBLE_FORMAT = {
     .key = ILLEGAL_REQUEST, .asc = 0x30, .ascq = 0x00
 };
 
diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h
index bfe93922d4..6bb4df7d3b 100644
--- a/hw/scsi-defs.h
+++ b/hw/scsi-defs.h
@@ -188,3 +188,69 @@
 #define TYPE_INACTIVE       0x20
 #define TYPE_NO_LUN         0x7f
 
+/* Mode page codes for mode sense/set */
+#define MODE_PAGE_R_W_ERROR                   0x01
+#define MODE_PAGE_HD_GEOMETRY                 0x04
+#define MODE_PAGE_FLEXIBLE_DISK_GEOMETRY      0x05
+#define MODE_PAGE_CACHING                     0x08
+#define MODE_PAGE_AUDIO_CTL                   0x0e
+#define MODE_PAGE_POWER                       0x1a
+#define MODE_PAGE_FAULT_FAIL                  0x1c
+#define MODE_PAGE_TO_PROTECT                  0x1d
+#define MODE_PAGE_CAPABILITIES                0x2a
+#define MODE_PAGE_ALLS                        0x3f
+/* Not in Mt. Fuji, but in ATAPI 2.6 -- depricated now in favor
+ * of MODE_PAGE_SENSE_POWER */
+#define MODE_PAGE_CDROM                       0x0d
+
+/*
+ * Based on values from <linux/cdrom.h> but extending CD_MINS
+ * to the maximum common size allowed by the Orange's Book ATIP
+ *
+ * 90 and 99 min CDs are also available but using them as the
+ * upper limit reduces the effectiveness of the heuristic to
+ * detect DVDs burned to less than 25% of their maximum capacity
+ */
+
+/* Some generally useful CD-ROM information */
+#define CD_MINS                       80 /* max. minutes per CD */
+#define CD_SECS                       60 /* seconds per minute */
+#define CD_FRAMES                     75 /* frames per second */
+#define CD_FRAMESIZE                2048 /* bytes per frame, "cooked" mode */
+#define CD_MAX_BYTES       (CD_MINS * CD_SECS * CD_FRAMES * CD_FRAMESIZE)
+#define CD_MAX_SECTORS     (CD_MAX_BYTES / 512)
+
+/*
+ * The MMC values are not IDE specific and might need to be moved
+ * to a common header if they are also needed for the SCSI emulation
+ */
+
+/* Profile list from MMC-6 revision 1 table 91 */
+#define MMC_PROFILE_NONE                0x0000
+#define MMC_PROFILE_CD_ROM              0x0008
+#define MMC_PROFILE_CD_R                0x0009
+#define MMC_PROFILE_CD_RW               0x000A
+#define MMC_PROFILE_DVD_ROM             0x0010
+#define MMC_PROFILE_DVD_R_SR            0x0011
+#define MMC_PROFILE_DVD_RAM             0x0012
+#define MMC_PROFILE_DVD_RW_RO           0x0013
+#define MMC_PROFILE_DVD_RW_SR           0x0014
+#define MMC_PROFILE_DVD_R_DL_SR         0x0015
+#define MMC_PROFILE_DVD_R_DL_JR         0x0016
+#define MMC_PROFILE_DVD_RW_DL           0x0017
+#define MMC_PROFILE_DVD_DDR             0x0018
+#define MMC_PROFILE_DVD_PLUS_RW         0x001A
+#define MMC_PROFILE_DVD_PLUS_R          0x001B
+#define MMC_PROFILE_DVD_PLUS_RW_DL      0x002A
+#define MMC_PROFILE_DVD_PLUS_R_DL       0x002B
+#define MMC_PROFILE_BD_ROM              0x0040
+#define MMC_PROFILE_BD_R_SRM            0x0041
+#define MMC_PROFILE_BD_R_RRM            0x0042
+#define MMC_PROFILE_BD_RE               0x0043
+#define MMC_PROFILE_HDDVD_ROM           0x0050
+#define MMC_PROFILE_HDDVD_R             0x0051
+#define MMC_PROFILE_HDDVD_RAM           0x0052
+#define MMC_PROFILE_HDDVD_RW            0x0053
+#define MMC_PROFILE_HDDVD_R_DL          0x0058
+#define MMC_PROFILE_HDDVD_RW_DL         0x005A
+#define MMC_PROFILE_INVALID             0xFFFF
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 9c62569a17..a6ef06005e 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -576,7 +576,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
      * The buffer was already menset to zero by the caller of this function.
      */
     switch (page) {
-    case 4: /* Rigid disk device geometry page. */
+    case MODE_PAGE_HD_GEOMETRY:
         if (s->qdev.type == TYPE_ROM) {
             return -1;
         }
@@ -611,7 +611,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         p[21] = 5400 & 0xff;
         break;
 
-    case 5: /* Flexible disk device geometry page. */
+    case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
         if (s->qdev.type == TYPE_ROM) {
             return -1;
         }
@@ -653,7 +653,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         p[29] = 5400 & 0xff;
         break;
 
-    case 8: /* Caching page.  */
+    case MODE_PAGE_CACHING:
         p[0] = 8;
         p[1] = 0x12;
         if (page_control == 1) { /* Changeable Values */
@@ -664,7 +664,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         }
         break;
 
-    case 0x2a: /* CD Capabilities and Mechanical Status page. */
+    case MODE_PAGE_CAPABILITIES:
         if (s->qdev.type != TYPE_ROM) {
             return -1;
         }

From f0f992e6509354e0e9d3bd46681bacb1ff53f62b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 16:00:52 +0200
Subject: [PATCH 20/55] atapi: move GESN definitions to scsi-defs.h

As a complement to the previous patch, move definitions for GET EVENT
STATUS NOTIFICATION from the two functions to scsi-defs.h.

The NCR_* constants are just bit values corresponding to the ENC_*
values, with no offsets even, so keep just one copy.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/ide/atapi.c | 43 ++++++-------------------------------------
 hw/scsi-defs.h | 21 +++++++++++++++++++++
 2 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index be3b728ded..347c38d5a5 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -505,19 +505,6 @@ static int ide_dvd_read_structure(IDEState *s, int format,
 static unsigned int event_status_media(IDEState *s,
                                        uint8_t *buf)
 {
-    enum media_event_code {
-        MEC_NO_CHANGE = 0,       /* Status unchanged */
-        MEC_EJECT_REQUESTED,     /* received a request from user to eject */
-        MEC_NEW_MEDIA,           /* new media inserted and ready for access */
-        MEC_MEDIA_REMOVAL,       /* only for media changers */
-        MEC_MEDIA_CHANGED,       /* only for media changers */
-        MEC_BG_FORMAT_COMPLETED, /* MRW or DVD+RW b/g format completed */
-        MEC_BG_FORMAT_RESTARTED, /* MRW or DVD+RW b/g format restarted */
-    };
-    enum media_status {
-        MS_TRAY_OPEN = 1,
-        MS_MEDIA_PRESENT = 2,
-    };
     uint8_t event_code, media_status;
 
     media_status = 0;
@@ -564,27 +551,6 @@ static void cmd_get_event_status_notification(IDEState *s,
         uint8_t notification_class;
         uint8_t supported_events;
     } QEMU_PACKED *gesn_event_header;
-
-    enum notification_class_request_type {
-        NCR_RESERVED1 = 1 << 0,
-        NCR_OPERATIONAL_CHANGE = 1 << 1,
-        NCR_POWER_MANAGEMENT = 1 << 2,
-        NCR_EXTERNAL_REQUEST = 1 << 3,
-        NCR_MEDIA = 1 << 4,
-        NCR_MULTI_HOST = 1 << 5,
-        NCR_DEVICE_BUSY = 1 << 6,
-        NCR_RESERVED2 = 1 << 7,
-    };
-    enum event_notification_class_field {
-        ENC_NO_EVENTS = 0,
-        ENC_OPERATIONAL_CHANGE,
-        ENC_POWER_MANAGEMENT,
-        ENC_EXTERNAL_REQUEST,
-        ENC_MEDIA,
-        ENC_MULTIPLE_HOSTS,
-        ENC_DEVICE_BUSY,
-        ENC_RESERVED,
-    };
     unsigned int max_len, used_len;
 
     gesn_cdb = (void *)packet;
@@ -606,8 +572,11 @@ static void cmd_get_event_status_notification(IDEState *s,
      * These are the supported events.
      *
      * We currently only support requests of the 'media' type.
+     * Notification class requests and supported event classes are bitmasks,
+     * but they are build from the same values as the "notification class"
+     * field.
      */
-    gesn_event_header->supported_events = NCR_MEDIA;
+    gesn_event_header->supported_events = 1 << GESN_MEDIA;
 
     /*
      * We use |= below to set the class field; other bits in this byte
@@ -621,8 +590,8 @@ static void cmd_get_event_status_notification(IDEState *s,
      * notification_class_request_type enum above specifies the
      * priority: upper elements are higher prio than lower ones.
      */
-    if (gesn_cdb->class & NCR_MEDIA) {
-        gesn_event_header->notification_class |= ENC_MEDIA;
+    if (gesn_cdb->class & (1 << GESN_MEDIA)) {
+        gesn_event_header->notification_class |= GESN_MEDIA;
         used_len = event_status_media(s, buf);
     } else {
         gesn_event_header->notification_class = 0x80; /* No event available */
diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h
index 6bb4df7d3b..5e6c9b7acb 100644
--- a/hw/scsi-defs.h
+++ b/hw/scsi-defs.h
@@ -203,6 +203,27 @@
  * of MODE_PAGE_SENSE_POWER */
 #define MODE_PAGE_CDROM                       0x0d
 
+/* Event notification classes for GET EVENT STATUS NOTIFICATION */
+#define GESN_NO_EVENTS                0
+#define GESN_OPERATIONAL_CHANGE       1
+#define GESN_POWER_MANAGEMENT         2
+#define GESN_EXTERNAL_REQUEST         3
+#define GESN_MEDIA                    4
+#define GESN_MULTIPLE_HOSTS           5
+#define GESN_DEVICE_BUSY              6
+
+/* Event codes for MEDIA event status notification */
+#define MEC_NO_CHANGE                 0
+#define MEC_EJECT_REQUESTED           1
+#define MEC_NEW_MEDIA                 2
+#define MEC_MEDIA_REMOVAL             3 /* only for media changers */
+#define MEC_MEDIA_CHANGED             4 /* only for media changers */
+#define MEC_BG_FORMAT_COMPLETED       5 /* MRW or DVD+RW b/g format completed */
+#define MEC_BG_FORMAT_RESTARTED       6 /* MRW or DVD+RW b/g format restarted */
+
+#define MS_TRAY_OPEN                  1
+#define MS_MEDIA_PRESENT              2
+
 /*
  * Based on values from <linux/cdrom.h> but extending CD_MINS
  * to the maximum common size allowed by the Orange's Book ATIP

From af0e1ea2d32eafd837c51326368ce2d8f36e6871 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Oct 2011 16:34:59 +0200
Subject: [PATCH 21/55] atapi: cleanup/fix mode sense results

The first two bytes (after the 8-byte ATAPI header) are the mode page
number and the number of bytes after the length field itself.  Make
this clear in the code.

The AUDIO_CTL page was filled with wrong values.  It is not anymore in
MMC, but at least keep the values sane.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/ide/atapi.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 347c38d5a5..e898da2dd6 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -711,8 +711,8 @@ static void cmd_mode_sense(IDEState *s, uint8_t *buf)
             buf[6] = 0;
             buf[7] = 0;
 
-            buf[8] = 0x01;
-            buf[9] = 0x06;
+            buf[8] = MODE_PAGE_R_W_ERROR;
+            buf[9] = 16 - 10;
             buf[10] = 0x00;
             buf[11] = 0x05;
             buf[12] = 0x00;
@@ -730,6 +730,8 @@ static void cmd_mode_sense(IDEState *s, uint8_t *buf)
             buf[6] = 0;
             buf[7] = 0;
 
+            buf[8] = MODE_PAGE_AUDIO_CTL;
+            buf[9] = 24 - 10;
             /* Fill with CDROM audio volume */
             buf[17] = 0;
             buf[19] = 0;
@@ -747,8 +749,8 @@ static void cmd_mode_sense(IDEState *s, uint8_t *buf)
             buf[6] = 0;
             buf[7] = 0;
 
-            buf[8] = 0x2a;
-            buf[9] = 0x12;
+            buf[8] = MODE_PAGE_CAPABILITIES;
+            buf[9] = 28 - 10;
             buf[10] = 0x00;
             buf[11] = 0x00;
 

From 3653d8c40e04d78eb6b5b069522daf7cc72d4f06 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 16:19:53 +0200
Subject: [PATCH 22/55] scsi: notify the device when unit attention is reported

Reporting media change events via unit attention sense codes requires
a small state machine: first report "NO MEDIUM", then report "MEDIUM MAY
HAVE CHANGED".  Unfortunately there is no good hooking point for the
device to notice that its pending unit attention condition has been
reported.  This patch reworks the generic machinery to add one.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-bus.c | 32 +++++++++++++++++++++++++++-----
 hw/scsi.h     |  2 ++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index fc2f823177..c190509505 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -295,6 +295,13 @@ static int32_t scsi_target_send_command(SCSIRequest *req, uint8_t *buf)
         r->len = scsi_device_get_sense(r->req.dev, r->buf,
                                        MIN(req->cmd.xfer, sizeof r->buf),
                                        (req->cmd.buf[1] & 1) == 0);
+        if (r->req.dev->sense_is_ua) {
+            if (r->req.dev->info->unit_attention_reported) {
+                r->req.dev->info->unit_attention_reported(req->dev);
+            }
+            r->req.dev->sense_len = 0;
+            r->req.dev->sense_is_ua = false;
+        }
         break;
     default:
         scsi_req_build_sense(req, SENSE_CODE(LUN_NOT_SUPPORTED));
@@ -383,7 +390,13 @@ SCSIRequest *scsi_req_new(SCSIDevice *d, uint32_t tag, uint32_t lun,
             (buf[0] != INQUIRY &&
              buf[0] != REPORT_LUNS &&
              buf[0] != GET_CONFIGURATION &&
-             buf[0] != GET_EVENT_STATUS_NOTIFICATION)) {
+             buf[0] != GET_EVENT_STATUS_NOTIFICATION &&
+
+             /*
+              * If we already have a pending unit attention condition,
+              * report this one before triggering another one.
+              */
+             !(buf[0] == REQUEST_SENSE && d->sense_is_ua))) {
             req = scsi_req_alloc(&reqops_unit_attention, d, tag, lun,
                                  hba_private);
         } else if (lun != d->lun ||
@@ -479,10 +492,15 @@ int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len)
      *
      * We assume UA_INTLCK_CTRL to be 00b for HBAs that support autosense, and
      * 10b for HBAs that do not support it (do not call scsi_req_get_sense).
-     * In the latter case, scsi_req_complete clears unit attention conditions
-     * after moving them to the device's sense buffer.
+     * Here we handle unit attention clearing for UA_INTLCK_CTRL == 00b.
      */
-    scsi_clear_unit_attention(req);
+    if (req->dev->sense_is_ua) {
+        if (req->dev->info->unit_attention_reported) {
+            req->dev->info->unit_attention_reported(req->dev);
+        }
+        req->dev->sense_len = 0;
+        req->dev->sense_is_ua = false;
+    }
     return ret;
 }
 
@@ -1082,8 +1100,12 @@ void scsi_req_complete(SCSIRequest *req, int status)
 
     if (req->sense_len) {
         memcpy(req->dev->sense, req->sense, req->sense_len);
+        req->dev->sense_len = req->sense_len;
+        req->dev->sense_is_ua = (req->ops == &reqops_unit_attention);
+    } else {
+        req->dev->sense_len = 0;
+        req->dev->sense_is_ua = false;
     }
-    req->dev->sense_len = req->sense_len;
 
     /*
      * Unit attention state is now stored in the device's sense buffer
diff --git a/hw/scsi.h b/hw/scsi.h
index e8dcabfa28..6d40b8e4f9 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -62,6 +62,7 @@ struct SCSIDevice
     BlockConf conf;
     SCSIDeviceInfo *info;
     SCSISense unit_attention;
+    bool sense_is_ua;
     uint8_t sense[SCSI_SENSE_BUF_SIZE];
     uint32_t sense_len;
     QTAILQ_HEAD(, SCSIRequest) requests;
@@ -92,6 +93,7 @@ struct SCSIDeviceInfo {
     void (*destroy)(SCSIDevice *s);
     SCSIRequest *(*alloc_req)(SCSIDevice *s, uint32_t tag, uint32_t lun,
                               void *hba_private);
+    void (*unit_attention_reported)(SCSIDevice *s);
     SCSIReqOps reqops;
 };
 

From 8a9c16f69e3ecbcc6e6bc068012986b87412c876 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 16:26:06 +0200
Subject: [PATCH 23/55] scsi-disk: report media changed via unit attention
 sense codes

Building on the previous patch, this one adds a media change callback
to scsi-disk.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-bus.c  |  5 +++++
 hw/scsi-disk.c | 29 ++++++++++++++++++++++++++++-
 hw/scsi.h      |  2 ++
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index c190509505..867b1a8ebe 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -847,6 +847,11 @@ const struct SCSISense sense_code_RESET = {
     .key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x00
 };
 
+/* Unit attention, No medium */
+const struct SCSISense sense_code_UNIT_ATTENTION_NO_MEDIUM = {
+    .key = UNIT_ATTENTION, .asc = 0x3a, .ascq = 0x00
+};
+
 /* Unit attention, Medium may have changed */
 const struct SCSISense sense_code_MEDIUM_CHANGED = {
     .key = UNIT_ATTENTION, .asc = 0x28, .ascq = 0x00
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index a6ef06005e..880cb22eb9 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -71,6 +71,7 @@ struct SCSIDiskState
     int cluster_size;
     uint32_t removable;
     uint64_t max_lba;
+    bool media_changed;
     QEMUBH *bh;
     char *version;
     char *serial;
@@ -1198,7 +1199,21 @@ static void scsi_destroy(SCSIDevice *dev)
 
 static void scsi_cd_change_media_cb(void *opaque, bool load)
 {
-    ((SCSIDiskState *)opaque)->tray_open = !load;
+    SCSIDiskState *s = opaque;
+
+    /*
+     * When a CD gets changed, we have to report an ejected state and
+     * then a loaded state to guests so that they detect tray
+     * open/close and media change events.  Guests that do not use
+     * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close
+     * states rely on this behavior.
+     *
+     * media_changed governs the state machine used for unit attention
+     * report.  media_event is used by GET EVENT STATUS NOTIFICATION.
+     */
+    s->media_changed = load;
+    s->tray_open = !load;
+    s->qdev.unit_attention = SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM);
 }
 
 static bool scsi_cd_is_tray_open(void *opaque)
@@ -1217,6 +1232,15 @@ static const BlockDevOps scsi_cd_block_ops = {
     .is_medium_locked = scsi_cd_is_medium_locked,
 };
 
+static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
+{
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
+    if (s->media_changed) {
+        s->media_changed = false;
+        s->qdev.unit_attention = SENSE_CODE(MEDIUM_CHANGED);
+    }
+}
+
 static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
@@ -1329,6 +1353,7 @@ static SCSIDeviceInfo scsi_disk_info[] = {
         .init         = scsi_hd_initfn,
         .destroy      = scsi_destroy,
         .alloc_req    = scsi_new_request,
+        .unit_attention_reported = scsi_disk_unit_attention_reported,
         .qdev.props   = (Property[]) {
             DEFINE_SCSI_DISK_PROPERTIES(),
             DEFINE_PROP_BIT("removable", SCSIDiskState, removable, 0, false),
@@ -1343,6 +1368,7 @@ static SCSIDeviceInfo scsi_disk_info[] = {
         .init         = scsi_cd_initfn,
         .destroy      = scsi_destroy,
         .alloc_req    = scsi_new_request,
+        .unit_attention_reported = scsi_disk_unit_attention_reported,
         .qdev.props   = (Property[]) {
             DEFINE_SCSI_DISK_PROPERTIES(),
             DEFINE_PROP_END_OF_LIST(),
@@ -1356,6 +1382,7 @@ static SCSIDeviceInfo scsi_disk_info[] = {
         .init         = scsi_disk_initfn,
         .destroy      = scsi_destroy,
         .alloc_req    = scsi_new_request,
+        .unit_attention_reported = scsi_disk_unit_attention_reported,
         .qdev.props   = (Property[]) {
             DEFINE_SCSI_DISK_PROPERTIES(),
             DEFINE_PROP_BIT("removable", SCSIDiskState, removable, 0, false),
diff --git a/hw/scsi.h b/hw/scsi.h
index 6d40b8e4f9..7004aaaf5d 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -161,6 +161,8 @@ extern const struct SCSISense sense_code_IO_ERROR;
 extern const struct SCSISense sense_code_I_T_NEXUS_LOSS;
 /* Command aborted, Logical Unit failure */
 extern const struct SCSISense sense_code_LUN_FAILURE;
+/* LUN not ready, Medium not present */
+extern const struct SCSISense sense_code_UNIT_ATTENTION_NO_MEDIUM;
 /* Unit attention, Power on, reset or bus device reset occurred */
 extern const struct SCSISense sense_code_RESET;
 /* Unit attention, Medium may have changed*/

From f01b59319c70cb71e6e1003948e49e1aa32e3dae Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Oct 2011 16:39:27 +0200
Subject: [PATCH 24/55] scsi-disk: fix coding style issues (braces)

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 71 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 880cb22eb9..3a08848ba5 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -350,9 +350,9 @@ static void scsi_dma_restart_cb(void *opaque, int running, RunState state)
 {
     SCSIDiskState *s = opaque;
 
-    if (!running)
+    if (!running) {
         return;
-
+    }
     if (!s->bh) {
         s->bh = qemu_bh_new(scsi_dma_restart_bh, s);
         qemu_bh_schedule(s->bh);
@@ -403,8 +403,9 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
                     "buffer size %zd\n", req->cmd.xfer);
             pages = buflen++;
             outbuf[buflen++] = 0x00; // list of supported pages (this page)
-            if (s->serial)
+            if (s->serial) {
                 outbuf[buflen++] = 0x80; // unit serial number
+            }
             outbuf[buflen++] = 0x83; // device identification
             if (s->qdev.type == TYPE_DISK) {
                 outbuf[buflen++] = 0xb0; // block limits
@@ -423,10 +424,12 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             }
 
             l = strlen(s->serial);
-            if (l > req->cmd.xfer)
+            if (l > req->cmd.xfer) {
                 l = req->cmd.xfer;
-            if (l > 20)
+            }
+            if (l > 20) {
                 l = 20;
+            }
 
             DPRINTF("Inquiry EVPD[Serial number] "
                     "buffer size %zd\n", req->cmd.xfer);
@@ -441,8 +444,9 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             int max_len = 255 - 8;
             int id_len = strlen(bdrv_get_device_name(s->bs));
 
-            if (id_len > max_len)
+            if (id_len > max_len) {
                 id_len = max_len;
+            }
             DPRINTF("Inquiry EVPD[Device identification] "
                     "buffer size %zd\n", req->cmd.xfer);
 
@@ -525,9 +529,9 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
     }
 
     buflen = req->cmd.xfer;
-    if (buflen > SCSI_MAX_INQUIRY_LEN)
+    if (buflen > SCSI_MAX_INQUIRY_LEN) {
         buflen = SCSI_MAX_INQUIRY_LEN;
-
+    }
     memset(outbuf, 0, buflen);
 
     outbuf[0] = s->qdev.type & 0x1f;
@@ -749,8 +753,9 @@ static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
             outbuf[7] = 8; /* Block descriptor length  */
         }
         nb_sectors /= s->cluster_size;
-        if (nb_sectors > 0xffffff)
+        if (nb_sectors > 0xffffff) {
             nb_sectors = 0;
+        }
         p[0] = 0; /* media density code */
         p[1] = (nb_sectors >> 16) & 0xff;
         p[2] = (nb_sectors >> 8) & 0xff;
@@ -791,8 +796,9 @@ static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
         outbuf[0] = ((buflen - 2) >> 8) & 0xff;
         outbuf[1] = (buflen - 2) & 0xff;
     }
-    if (buflen > r->req.cmd.xfer)
+    if (buflen > r->req.cmd.xfer) {
         buflen = r->req.cmd.xfer;
+    }
     return buflen;
 }
 
@@ -826,8 +832,9 @@ static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
     default:
         return -1;
     }
-    if (toclen > req->cmd.xfer)
+    if (toclen > req->cmd.xfer) {
         toclen = req->cmd.xfer;
+    }
     return toclen;
 }
 
@@ -879,40 +886,48 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
     outbuf = r->iov.iov_base;
     switch (req->cmd.buf[0]) {
     case TEST_UNIT_READY:
-        if (s->tray_open || !bdrv_is_inserted(s->bs))
+        if (s->tray_open || !bdrv_is_inserted(s->bs)) {
             goto not_ready;
+        }
         break;
     case INQUIRY:
         buflen = scsi_disk_emulate_inquiry(req, outbuf);
-        if (buflen < 0)
+        if (buflen < 0) {
             goto illegal_request;
+        }
         break;
     case MODE_SENSE:
     case MODE_SENSE_10:
         buflen = scsi_disk_emulate_mode_sense(r, outbuf);
-        if (buflen < 0)
+        if (buflen < 0) {
             goto illegal_request;
+        }
         break;
     case READ_TOC:
         buflen = scsi_disk_emulate_read_toc(req, outbuf);
-        if (buflen < 0)
+        if (buflen < 0) {
             goto illegal_request;
+        }
         break;
     case RESERVE:
-        if (req->cmd.buf[1] & 1)
+        if (req->cmd.buf[1] & 1) {
             goto illegal_request;
+        }
         break;
     case RESERVE_10:
-        if (req->cmd.buf[1] & 3)
+        if (req->cmd.buf[1] & 3) {
             goto illegal_request;
+        }
         break;
     case RELEASE:
-        if (req->cmd.buf[1] & 1)
+        if (req->cmd.buf[1] & 1) {
             goto illegal_request;
+        }
         break;
     case RELEASE_10:
-        if (req->cmd.buf[1] & 3)
+        if (req->cmd.buf[1] & 3) {
             goto illegal_request;
+        }
         break;
     case START_STOP:
         if (scsi_disk_emulate_start_stop(r) < 0) {
@@ -927,16 +942,18 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         /* The normal LEN field for this command is zero.  */
         memset(outbuf, 0, 8);
         bdrv_get_geometry(s->bs, &nb_sectors);
-        if (!nb_sectors)
+        if (!nb_sectors) {
             goto not_ready;
+        }
         nb_sectors /= s->cluster_size;
         /* Returned value is the address of the last sector.  */
         nb_sectors--;
         /* Remember the new size for read/write sanity checking. */
         s->max_lba = nb_sectors;
         /* Clip to 2TB, instead of returning capacity modulo 2TB. */
-        if (nb_sectors > UINT32_MAX)
+        if (nb_sectors > UINT32_MAX) {
             nb_sectors = UINT32_MAX;
+        }
         outbuf[0] = (nb_sectors >> 24) & 0xff;
         outbuf[1] = (nb_sectors >> 16) & 0xff;
         outbuf[2] = (nb_sectors >> 8) & 0xff;
@@ -960,8 +977,9 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
             DPRINTF("SAI READ CAPACITY(16)\n");
             memset(outbuf, 0, req->cmd.xfer);
             bdrv_get_geometry(s->bs, &nb_sectors);
-            if (!nb_sectors)
+            if (!nb_sectors) {
                 goto not_ready;
+            }
             nb_sectors /= s->cluster_size;
             /* Returned value is the address of the last sector.  */
             nb_sectors--;
@@ -1078,8 +1096,9 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
     case READ_16:
         len = r->req.cmd.xfer / s->qdev.blocksize;
         DPRINTF("Read (sector %" PRId64 ", count %d)\n", r->req.cmd.lba, len);
-        if (r->req.cmd.lba > s->max_lba)
+        if (r->req.cmd.lba > s->max_lba) {
             goto illegal_lba;
+        }
         r->sector = r->req.cmd.lba * s->cluster_size;
         r->sector_count = len * s->cluster_size;
         break;
@@ -1094,8 +1113,9 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
         DPRINTF("Write %s(sector %" PRId64 ", count %d)\n",
                 (command & 0xe) == 0xe ? "And Verify " : "",
                 r->req.cmd.lba, len);
-        if (r->req.cmd.lba > s->max_lba)
+        if (r->req.cmd.lba > s->max_lba) {
             goto illegal_lba;
+        }
         r->sector = r->req.cmd.lba * s->cluster_size;
         r->sector_count = len * s->cluster_size;
         break;
@@ -1168,8 +1188,9 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
         return -len;
     } else {
-        if (!r->sector_count)
+        if (!r->sector_count) {
             r->sector_count = -1;
+        }
         return len;
     }
 }

From b6c251ab1739cd67817db003e9878f92c3feff2b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 14:33:47 +0200
Subject: [PATCH 25/55] scsi-disk: add stubs for more MMC commands

This patch adds a few stub implementations for MMC commands to
scsi-disk, to be filled in later in the series.  It also adds to
scsi-defs.h constants for commands implemented by ide/atapi.c,
when missing.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-defs.h |  3 +++
 hw/scsi-disk.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h
index 5e6c9b7acb..d0a467aab7 100644
--- a/hw/scsi-defs.h
+++ b/hw/scsi-defs.h
@@ -113,6 +113,7 @@
 #define READ_12               0xa8
 #define WRITE_12              0xaa
 #define SERVICE_ACTION_IN_12  0xab
+#define READ_DVD_STRUCTURE    0xad
 #define WRITE_VERIFY_12       0xae
 #define VERIFY_12             0xaf
 #define SEARCH_HIGH_12        0xb0
@@ -122,6 +123,8 @@
 #define SEND_VOLUME_TAG       0xb6
 #define READ_DEFECT_DATA_12   0xb7
 #define SET_CD_SPEED          0xbb
+#define MECHANISM_STATUS      0xbd
+#define READ_CD               0xbe
 
 /*
  * SERVICE ACTION IN subcodes
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 3a08848ba5..bdb98ef5e2 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -567,6 +567,43 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
     return buflen;
 }
 
+static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
+                                   uint8_t *outbuf)
+{
+    scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
+    return -1;
+}
+
+static int scsi_get_event_status_notification(SCSIDiskState *s,
+                                              SCSIDiskReq *r, uint8_t *outbuf)
+{
+    scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
+    return -1;
+}
+
+static int scsi_get_configuration(SCSIDiskState *s, SCSIDiskReq *r,
+                                  uint8_t *outbuf)
+{
+    if (s->qdev.type != TYPE_ROM) {
+        return -1;
+    }
+    memset(outbuf, 0, 8);
+    /* ??? This should probably return much more information.  For now
+       just return the basic header indicating the CD-ROM profile.  */
+    outbuf[7] = 8; /* CD-ROM */
+    return 8;
+}
+
+static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
+{
+    if (s->qdev.type != TYPE_ROM) {
+        return -1;
+    }
+    memset(outbuf, 0, 8);
+    outbuf[5] = 1; /* CD-ROM */
+    return 8;
+}
+
 static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
                            int page_control)
 {
@@ -964,12 +1001,29 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         outbuf[7] = 0;
         buflen = 8;
         break;
+    case MECHANISM_STATUS:
+        buflen = scsi_emulate_mechanism_status(s, outbuf);
+        if (buflen < 0) {
+            goto illegal_request;
+        }
+        break;
     case GET_CONFIGURATION:
-        memset(outbuf, 0, 8);
-        /* ??? This should probably return much more information.  For now
-           just return the basic header indicating the CD-ROM profile.  */
-        outbuf[7] = 8; // CD-ROM
-        buflen = 8;
+        buflen = scsi_get_configuration(s, r, outbuf);
+        if (buflen < 0) {
+            goto illegal_request;
+        }
+        break;
+    case GET_EVENT_STATUS_NOTIFICATION:
+        buflen = scsi_get_event_status_notification(s, r, outbuf);
+        if (buflen < 0) {
+            goto illegal_request;
+        }
+        break;
+    case READ_DVD_STRUCTURE:
+        buflen = scsi_read_dvd_structure(s, r, outbuf);
+        if (buflen < 0) {
+            goto illegal_request;
+        }
         break;
     case SERVICE_ACTION_IN_16:
         /* Service Action In subcommands. */
@@ -1073,7 +1127,10 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
     case ALLOW_MEDIUM_REMOVAL:
     case READ_CAPACITY_10:
     case READ_TOC:
+    case READ_DVD_STRUCTURE:
     case GET_CONFIGURATION:
+    case GET_EVENT_STATUS_NOTIFICATION:
+    case MECHANISM_STATUS:
     case SERVICE_ACTION_IN_16:
     case VERIFY_10:
         rc = scsi_disk_emulate_command(r);

From a8f4bbe2900f0ebdead032fb8da137777bddd925 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 14:50:15 +0200
Subject: [PATCH 26/55] scsi-disk: store valid mode pages in a table

A small refactoring of the MODE SENSE implementation in scsi-disk.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index bdb98ef5e2..8f3ada6bb0 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -607,10 +607,23 @@ static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
 static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
                            int page_control)
 {
+    static const int mode_sense_valid[0x3f] = {
+        [MODE_PAGE_HD_GEOMETRY]            = (1 << TYPE_DISK),
+        [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK),
+        [MODE_PAGE_CACHING]                = (1 << TYPE_DISK) | (1 << TYPE_ROM),
+        [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
+    };
+
     BlockDriverState *bdrv = s->bs;
     int cylinders, heads, secs;
     uint8_t *p = *p_outbuf;
 
+    if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
+        return -1;
+    }
+
+    p[0] = page;
+
     /*
      * If Changeable Values are requested, a mask denoting those mode parameters
      * that are changeable shall be returned. As we currently don't support
@@ -619,10 +632,6 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
      */
     switch (page) {
     case MODE_PAGE_HD_GEOMETRY:
-        if (s->qdev.type == TYPE_ROM) {
-            return -1;
-        }
-        p[0] = 4;
         p[1] = 0x16;
         if (page_control == 1) { /* Changeable Values */
             break;
@@ -654,10 +663,6 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         break;
 
     case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
-        if (s->qdev.type == TYPE_ROM) {
-            return -1;
-        }
-        p[0] = 5;
         p[1] = 0x1e;
         if (page_control == 1) { /* Changeable Values */
             break;
@@ -707,10 +712,6 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         break;
 
     case MODE_PAGE_CAPABILITIES:
-        if (s->qdev.type != TYPE_ROM) {
-            return -1;
-        }
-        p[0] = 0x2a;
         p[1] = 0x14;
         if (page_control == 1) { /* Changeable Values */
             break;

From a07c7dcd6f33b668747148ac28c0e147f958aa18 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 15:08:22 +0200
Subject: [PATCH 27/55] atapi/scsi-disk: make mode page values coherent between
 the two

This patch adds to scsi-disk the missing mode page 0x01 for both disk
and CD-ROM drives, and mode page 0x0e for CD drives only.

A few offsets were wrong in atapi.c.  Also change the 2Ah mode page to
expose DVD media read capabilities in the IDE cdrom.  This lets you run
dvd+rw-mediainfo on the virtual DVD drives.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/ide/atapi.c | 14 +++++++-------
 hw/scsi-disk.c | 33 ++++++++++++++++++++++++---------
 2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index e898da2dd6..90b6729692 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -751,7 +751,7 @@ static void cmd_mode_sense(IDEState *s, uint8_t *buf)
 
             buf[8] = MODE_PAGE_CAPABILITIES;
             buf[9] = 28 - 10;
-            buf[10] = 0x00;
+            buf[10] = 0x3b; /* read CDR/CDRW/DVDROM/DVDR/DVDRAM */
             buf[11] = 0x00;
 
             /* Claim PLAY_AUDIO capability (0x01) since some Linux
@@ -760,14 +760,14 @@ static void cmd_mode_sense(IDEState *s, uint8_t *buf)
             buf[13] = 3 << 5;
             buf[14] = (1 << 0) | (1 << 3) | (1 << 5);
             if (s->tray_locked) {
-                buf[6] |= 1 << 1;
+                buf[14] |= 1 << 1;
             }
-            buf[15] = 0x00;
-            cpu_to_ube16(&buf[16], 706);
-            buf[18] = 0;
+            buf[15] = 0x00; /* No volume & mute control, no changer */
+            cpu_to_ube16(&buf[16], 704); /* 4x read speed */
+            buf[18] = 0; /* Two volume levels */
             buf[19] = 2;
-            cpu_to_ube16(&buf[20], 512);
-            cpu_to_ube16(&buf[22], 706);
+            cpu_to_ube16(&buf[20], 512); /* 512k buffer */
+            cpu_to_ube16(&buf[22], 704); /* 4x read speed current */
             buf[24] = 0;
             buf[25] = 0;
             buf[26] = 0;
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 8f3ada6bb0..116e562bb8 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -611,6 +611,8 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         [MODE_PAGE_HD_GEOMETRY]            = (1 << TYPE_DISK),
         [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK),
         [MODE_PAGE_CACHING]                = (1 << TYPE_DISK) | (1 << TYPE_ROM),
+        [MODE_PAGE_R_W_ERROR]              = (1 << TYPE_DISK) | (1 << TYPE_ROM),
+        [MODE_PAGE_AUDIO_CTL]              = (1 << TYPE_ROM),
         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
     };
 
@@ -711,13 +713,26 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         }
         break;
 
+    case MODE_PAGE_R_W_ERROR:
+        p[1] = 10;
+        p[2] = 0x80; /* Automatic Write Reallocation Enabled */
+        if (s->qdev.type == TYPE_ROM) {
+            p[3] = 0x20; /* Read Retry Count */
+        }
+        break;
+
+    case MODE_PAGE_AUDIO_CTL:
+        p[1] = 14;
+        break;
+
     case MODE_PAGE_CAPABILITIES:
         p[1] = 0x14;
         if (page_control == 1) { /* Changeable Values */
             break;
         }
-        p[2] = 3; // CD-R & CD-RW read
-        p[3] = 0; // Writing not supported
+
+        p[2] = 0x3b; /* CD-R & CD-RW read */
+        p[3] = 0; /* Writing not supported */
         p[4] = 0x7f; /* Audio, composite, digital out,
                         mode 2 form 1&2, multi session */
         p[5] = 0xff; /* CD DA, DA accurate, RW supported,
@@ -727,17 +742,17 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         /* Locking supported, jumper present, eject, tray */
         p[7] = 0; /* no volume & mute control, no
                      changer */
-        p[8] = (50 * 176) >> 8; // 50x read speed
+        p[8] = (50 * 176) >> 8; /* 50x read speed */
         p[9] = (50 * 176) & 0xff;
-        p[10] = 0 >> 8; // No volume
-        p[11] = 0 & 0xff;
-        p[12] = 2048 >> 8; // 2M buffer
+        p[10] = 2 >> 8; /* Two volume levels */
+        p[11] = 2 & 0xff;
+        p[12] = 2048 >> 8; /* 2M buffer */
         p[13] = 2048 & 0xff;
-        p[14] = (16 * 176) >> 8; // 16x read speed current
+        p[14] = (16 * 176) >> 8; /* 16x read speed current */
         p[15] = (16 * 176) & 0xff;
-        p[18] = (16 * 176) >> 8; // 16x write speed
+        p[18] = (16 * 176) >> 8; /* 16x write speed */
         p[19] = (16 * 176) & 0xff;
-        p[20] = (16 * 176) >> 8; // 16x write speed current
+        p[20] = (16 * 176) >> 8; /* 16x write speed current */
         p[21] = (16 * 176) & 0xff;
         break;
 

From 430ee2f26f01f146f3757467b3f0b802ad309ff8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 15:22:31 +0200
Subject: [PATCH 28/55] scsi-disk: support DVD profile in GET CONFIGURATION

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 50 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 116e562bb8..38b196bd13 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -567,6 +567,19 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
     return buflen;
 }
 
+static inline bool media_is_dvd(SCSIDiskState *s)
+{
+    uint64_t nb_sectors;
+    if (s->qdev.type != TYPE_ROM) {
+        return false;
+    }
+    if (!bdrv_is_inserted(s->bs)) {
+        return false;
+    }
+    bdrv_get_geometry(s->bs, &nb_sectors);
+    return nb_sectors > CD_MAX_SECTORS;
+}
+
 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
                                    uint8_t *outbuf)
 {
@@ -581,17 +594,38 @@ static int scsi_get_event_status_notification(SCSIDiskState *s,
     return -1;
 }
 
-static int scsi_get_configuration(SCSIDiskState *s, SCSIDiskReq *r,
-                                  uint8_t *outbuf)
+static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf)
 {
+    int current;
+
     if (s->qdev.type != TYPE_ROM) {
         return -1;
     }
-    memset(outbuf, 0, 8);
-    /* ??? This should probably return much more information.  For now
-       just return the basic header indicating the CD-ROM profile.  */
-    outbuf[7] = 8; /* CD-ROM */
-    return 8;
+    current = media_is_dvd(s) ? MMC_PROFILE_DVD_ROM : MMC_PROFILE_CD_ROM;
+    memset(outbuf, 0, 40);
+    stl_be_p(&outbuf[0], 36); /* Bytes after the data length field */
+    stw_be_p(&outbuf[6], current);
+    /* outbuf[8] - outbuf[19]: Feature 0 - Profile list */
+    outbuf[10] = 0x03; /* persistent, current */
+    outbuf[11] = 8; /* two profiles */
+    stw_be_p(&outbuf[12], MMC_PROFILE_DVD_ROM);
+    outbuf[14] = (current == MMC_PROFILE_DVD_ROM);
+    stw_be_p(&outbuf[16], MMC_PROFILE_CD_ROM);
+    outbuf[18] = (current == MMC_PROFILE_CD_ROM);
+    /* outbuf[20] - outbuf[31]: Feature 1 - Core feature */
+    stw_be_p(&outbuf[20], 1);
+    outbuf[22] = 0x08 | 0x03; /* version 2, persistent, current */
+    outbuf[23] = 8;
+    stl_be_p(&outbuf[24], 1); /* SCSI */
+    outbuf[28] = 1; /* DBE = 1, mandatory */
+    /* outbuf[32] - outbuf[39]: Feature 3 - Removable media feature */
+    stw_be_p(&outbuf[32], 3);
+    outbuf[34] = 0x08 | 0x03; /* version 2, persistent, current */
+    outbuf[35] = 4;
+    outbuf[36] = 0x39; /* tray, load=1, eject=1, unlocked at powerup, lock=1 */
+    /* TODO: Random readable, CD read, DVD read, drive serial number,
+       power management */
+    return 40;
 }
 
 static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
@@ -1024,7 +1058,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         }
         break;
     case GET_CONFIGURATION:
-        buflen = scsi_get_configuration(s, r, outbuf);
+        buflen = scsi_get_configuration(s, outbuf);
         if (buflen < 0) {
             goto illegal_request;
         }

From ceb792ef299a1bb0144c56fab45631d17c35e7f5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 15:57:15 +0200
Subject: [PATCH 29/55] scsi-disk: support READ DVD STRUCTURE

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 101 insertions(+), 1 deletion(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 38b196bd13..8f8a94de74 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -580,10 +580,110 @@ static inline bool media_is_dvd(SCSIDiskState *s)
     return nb_sectors > CD_MAX_SECTORS;
 }
 
+static inline bool media_is_cd(SCSIDiskState *s)
+{
+    uint64_t nb_sectors;
+    if (s->qdev.type != TYPE_ROM) {
+        return false;
+    }
+    if (!bdrv_is_inserted(s->bs)) {
+        return false;
+    }
+    bdrv_get_geometry(s->bs, &nb_sectors);
+    return nb_sectors <= CD_MAX_SECTORS;
+}
+
 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
                                    uint8_t *outbuf)
 {
-    scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
+    static const int rds_caps_size[5] = {
+        [0] = 2048 + 4,
+        [1] = 4 + 4,
+        [3] = 188 + 4,
+        [4] = 2048 + 4,
+    };
+
+    uint8_t media = r->req.cmd.buf[1];
+    uint8_t layer = r->req.cmd.buf[6];
+    uint8_t format = r->req.cmd.buf[7];
+    int size = -1;
+
+    if (s->qdev.type != TYPE_ROM) {
+        return -1;
+    }
+    if (media != 0) {
+        scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
+        return -1;
+    }
+
+    if (format != 0xff) {
+        if (s->tray_open || !bdrv_is_inserted(s->bs)) {
+            scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
+            return -1;
+        }
+        if (media_is_cd(s)) {
+            scsi_check_condition(r, SENSE_CODE(INCOMPATIBLE_FORMAT));
+            return -1;
+        }
+        if (format >= ARRAY_SIZE(rds_caps_size)) {
+            return -1;
+        }
+        size = rds_caps_size[format];
+        memset(outbuf, 0, size);
+    }
+
+    switch (format) {
+    case 0x00: {
+        /* Physical format information */
+        uint64_t nb_sectors;
+        if (layer != 0) {
+            goto fail;
+        }
+        bdrv_get_geometry(s->bs, &nb_sectors);
+
+        outbuf[4] = 1;   /* DVD-ROM, part version 1 */
+        outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */
+        outbuf[6] = 1;   /* one layer, read-only (per MMC-2 spec) */
+        outbuf[7] = 0;   /* default densities */
+
+        stl_be_p(&outbuf[12], (nb_sectors >> 2) - 1); /* end sector */
+        stl_be_p(&outbuf[16], (nb_sectors >> 2) - 1); /* l0 end sector */
+        break;
+    }
+
+    case 0x01: /* DVD copyright information, all zeros */
+        break;
+
+    case 0x03: /* BCA information - invalid field for no BCA info */
+        return -1;
+
+    case 0x04: /* DVD disc manufacturing information, all zeros */
+        break;
+
+    case 0xff: { /* List capabilities */
+        int i;
+        size = 4;
+        for (i = 0; i < ARRAY_SIZE(rds_caps_size); i++) {
+            if (!rds_caps_size[i]) {
+                continue;
+            }
+            outbuf[size] = i;
+            outbuf[size + 1] = 0x40; /* Not writable, readable */
+            stw_be_p(&outbuf[size + 2], rds_caps_size[i]);
+            size += 4;
+        }
+        break;
+     }
+
+    default:
+        return -1;
+    }
+
+    /* Size of buffer, not including 2 byte size field */
+    stw_be_p(outbuf, size - 2);
+    return size;
+
+fail:
     return -1;
 }
 

From 3c2f7c12c2e19707cb4e28dd57180f7be3dd4950 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 13 Sep 2011 16:26:51 +0200
Subject: [PATCH 30/55] scsi-disk: report media changed via GET EVENT STATUS
 NOTIFICATION

This adds support for media change notification via the GET EVENT STATUS
NOTIFICATION command, used by Linux versions 2.6.38 and newer.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 8f8a94de74..dd2b6050e5 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -72,6 +72,7 @@ struct SCSIDiskState
     uint32_t removable;
     uint64_t max_lba;
     bool media_changed;
+    bool media_event;
     QEMUBH *bh;
     char *version;
     char *serial;
@@ -687,11 +688,58 @@ fail:
     return -1;
 }
 
-static int scsi_get_event_status_notification(SCSIDiskState *s,
-                                              SCSIDiskReq *r, uint8_t *outbuf)
+static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf)
 {
-    scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
-    return -1;
+    uint8_t event_code, media_status;
+
+    media_status = 0;
+    if (s->tray_open) {
+        media_status = MS_TRAY_OPEN;
+    } else if (bdrv_is_inserted(s->bs)) {
+        media_status = MS_MEDIA_PRESENT;
+    }
+
+    /* Event notification descriptor */
+    event_code = MEC_NO_CHANGE;
+    if (media_status != MS_TRAY_OPEN && s->media_event) {
+        event_code = MEC_NEW_MEDIA;
+        s->media_event = false;
+    }
+
+    outbuf[0] = event_code;
+    outbuf[1] = media_status;
+
+    /* These fields are reserved, just clear them. */
+    outbuf[2] = 0;
+    outbuf[3] = 0;
+    return 4;
+}
+
+static int scsi_get_event_status_notification(SCSIDiskState *s, SCSIDiskReq *r,
+                                              uint8_t *outbuf)
+{
+    int size;
+    uint8_t *buf = r->req.cmd.buf;
+    uint8_t notification_class_request = buf[4];
+    if (s->qdev.type != TYPE_ROM) {
+        return -1;
+    }
+    if ((buf[1] & 1) == 0) {
+        /* asynchronous */
+        return -1;
+    }
+
+    size = 4;
+    outbuf[0] = outbuf[1] = 0;
+    outbuf[3] = 1 << GESN_MEDIA; /* supported events */
+    if (notification_class_request & (1 << GESN_MEDIA)) {
+        outbuf[2] = GESN_MEDIA;
+        size += scsi_event_status_media(s, &outbuf[size]);
+    } else {
+        outbuf[2] = 0x80;
+    }
+    stw_be_p(outbuf, size - 4);
+    return size;
 }
 
 static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf)
@@ -1442,6 +1490,7 @@ static void scsi_cd_change_media_cb(void *opaque, bool load)
     s->media_changed = load;
     s->tray_open = !load;
     s->qdev.unit_attention = SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM);
+    s->media_event = true;
 }
 
 static bool scsi_cd_is_tray_open(void *opaque)

From afd4030c16d290e460cc93f8f9e353516b5451a2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 13 Aug 2011 15:44:45 +0200
Subject: [PATCH 31/55] scsi: move tcq/ndev to SCSIBusOps (now SCSIBusInfo)

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/esp.c         |  7 +++++--
 hw/lsi53c895a.c  |  9 ++++++---
 hw/scsi-bus.c    | 27 ++++++++++++---------------
 hw/scsi-disk.c   |  2 +-
 hw/scsi.h        | 11 +++++------
 hw/spapr_vscsi.c |  8 +++++---
 hw/usb-msd.c     |  7 +++++--
 7 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/hw/esp.c b/hw/esp.c
index 697c2c5b80..d3fb1c69a6 100644
--- a/hw/esp.c
+++ b/hw/esp.c
@@ -720,7 +720,10 @@ void esp_init(target_phys_addr_t espaddr, int it_shift,
     *dma_enable = qdev_get_gpio_in(dev, 1);
 }
 
-static const struct SCSIBusOps esp_scsi_ops = {
+static const struct SCSIBusInfo esp_scsi_info = {
+    .tcq = false,
+    .ndev = ESP_MAX_DEVS,
+
     .transfer_data = esp_transfer_data,
     .complete = esp_command_complete,
     .cancel = esp_request_cancelled
@@ -740,7 +743,7 @@ static int esp_init1(SysBusDevice *dev)
 
     qdev_init_gpio_in(&dev->qdev, esp_gpio_demux, 2);
 
-    scsi_bus_new(&s->bus, &dev->qdev, 0, ESP_MAX_DEVS, &esp_scsi_ops);
+    scsi_bus_new(&s->bus, &dev->qdev, &esp_scsi_info);
     return scsi_bus_legacy_handle_cmdline(&s->bus);
 }
 
diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c
index e077ec07cc..4eeb49632c 100644
--- a/hw/lsi53c895a.c
+++ b/hw/lsi53c895a.c
@@ -1686,7 +1686,7 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
                 DeviceState *dev;
                 int id;
 
-                for (id = 0; id < s->bus.ndev; id++) {
+                for (id = 0; id < LSI_MAX_DEVS; id++) {
                     if (s->bus.devs[id]) {
                         dev = &s->bus.devs[id]->qdev;
                         dev->info->reset(dev);
@@ -2091,7 +2091,10 @@ static int lsi_scsi_uninit(PCIDevice *d)
     return 0;
 }
 
-static const struct SCSIBusOps lsi_scsi_ops = {
+static const struct SCSIBusInfo lsi_scsi_info = {
+    .tcq = true,
+    .ndev = LSI_MAX_DEVS,
+
     .transfer_data = lsi_transfer_data,
     .complete = lsi_command_complete,
     .cancel = lsi_request_cancelled
@@ -2118,7 +2121,7 @@ static int lsi_scsi_init(PCIDevice *dev)
     pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->ram_io);
     QTAILQ_INIT(&s->queue);
 
-    scsi_bus_new(&s->bus, &dev->qdev, 1, LSI_MAX_DEVS, &lsi_scsi_ops);
+    scsi_bus_new(&s->bus, &dev->qdev, &lsi_scsi_info);
     if (!dev->qdev.hotplugged) {
         return scsi_bus_legacy_handle_cmdline(&s->bus);
     }
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 867b1a8ebe..d9d4e18d66 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -24,14 +24,11 @@ static struct BusInfo scsi_bus_info = {
 static int next_scsi_bus;
 
 /* Create a scsi bus, and attach devices to it.  */
-void scsi_bus_new(SCSIBus *bus, DeviceState *host, int tcq, int ndev,
-                  const SCSIBusOps *ops)
+void scsi_bus_new(SCSIBus *bus, DeviceState *host, const SCSIBusInfo *info)
 {
     qbus_create_inplace(&bus->qbus, &scsi_bus_info, host, NULL);
     bus->busnr = next_scsi_bus++;
-    bus->tcq = tcq;
-    bus->ndev = ndev;
-    bus->ops = ops;
+    bus->info = info;
     bus->qbus.allow_hotplug = 1;
 }
 
@@ -43,12 +40,12 @@ static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
     int rc = -1;
 
     if (dev->id == -1) {
-        for (dev->id = 0; dev->id < bus->ndev; dev->id++) {
+        for (dev->id = 0; dev->id < bus->info->ndev; dev->id++) {
             if (bus->devs[dev->id] == NULL)
                 break;
         }
     }
-    if (dev->id >= bus->ndev) {
+    if (dev->id >= bus->info->ndev) {
         error_report("bad scsi device id: %d", dev->id);
         goto err;
     }
@@ -120,7 +117,7 @@ int scsi_bus_legacy_handle_cmdline(SCSIBus *bus)
     int res = 0, unit;
 
     loc_push_none(&loc);
-    for (unit = 0; unit < bus->ndev; unit++) {
+    for (unit = 0; unit < bus->info->ndev; unit++) {
         dinfo = drive_get(IF_SCSI, bus->busnr, unit);
         if (dinfo == NULL) {
             continue;
@@ -265,7 +262,7 @@ static bool scsi_target_emulate_inquiry(SCSITargetReq *r)
         r->buf[2] = 5; /* Version */
         r->buf[3] = 2 | 0x10; /* HiSup, response data format */
         r->buf[4] = r->len - 5; /* Additional Length = (Len - 1) - 4 */
-        r->buf[7] = 0x10 | (r->req.bus->tcq ? 0x02 : 0); /* Sync, TCQ.  */
+        r->buf[7] = 0x10 | (r->req.bus->info->tcq ? 0x02 : 0); /* Sync, TCQ.  */
         memcpy(&r->buf[8], "QEMU    ", 8);
         memcpy(&r->buf[16], "QEMU TARGET     ", 16);
         strncpy((char *) &r->buf[32], QEMU_VERSION, 4);
@@ -1062,7 +1059,7 @@ void scsi_req_continue(SCSIRequest *req)
 void scsi_req_data(SCSIRequest *req, int len)
 {
     trace_scsi_req_data(req->dev->id, req->lun, req->tag, len);
-    req->bus->ops->transfer_data(req, len);
+    req->bus->info->transfer_data(req, len);
 }
 
 void scsi_req_print(SCSIRequest *req)
@@ -1121,7 +1118,7 @@ void scsi_req_complete(SCSIRequest *req, int status)
 
     scsi_req_ref(req);
     scsi_req_dequeue(req);
-    req->bus->ops->complete(req, req->status);
+    req->bus->info->complete(req, req->status);
     scsi_req_unref(req);
 }
 
@@ -1132,8 +1129,8 @@ void scsi_req_cancel(SCSIRequest *req)
     }
     scsi_req_ref(req);
     scsi_req_dequeue(req);
-    if (req->bus->ops->cancel) {
-        req->bus->ops->cancel(req);
+    if (req->bus->info->cancel) {
+        req->bus->info->cancel(req);
     }
     scsi_req_unref(req);
 }
@@ -1164,13 +1161,13 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev)
     char path[100];
     int i;
 
-    for (i = 0; i < bus->ndev; i++) {
+    for (i = 0; i < bus->info->ndev; i++) {
         if (bus->devs[i] == d) {
             break;
         }
     }
 
-    assert(i != bus->ndev);
+    assert(i != bus->info->ndev);
 
     snprintf(path, sizeof(path), "%s@%x", qdev_fw_name(dev), i);
 
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index dd2b6050e5..50fc3d6eaa 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -564,7 +564,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
     }
 
     /* Sync data transfer and TCQ.  */
-    outbuf[7] = 0x10 | (req->bus->tcq ? 0x02 : 0);
+    outbuf[7] = 0x10 | (req->bus->info->tcq ? 0x02 : 0);
     return buflen;
 }
 
diff --git a/hw/scsi.h b/hw/scsi.h
index 7004aaaf5d..b76c4ee0a4 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -9,7 +9,7 @@
 #define SCSI_CMD_BUF_SIZE     16
 
 typedef struct SCSIBus SCSIBus;
-typedef struct SCSIBusOps SCSIBusOps;
+typedef struct SCSIBusInfo SCSIBusInfo;
 typedef struct SCSICommand SCSICommand;
 typedef struct SCSIDevice SCSIDevice;
 typedef struct SCSIDeviceInfo SCSIDeviceInfo;
@@ -97,7 +97,8 @@ struct SCSIDeviceInfo {
     SCSIReqOps reqops;
 };
 
-struct SCSIBusOps {
+struct SCSIBusInfo {
+    int tcq, ndev;
     void (*transfer_data)(SCSIRequest *req, uint32_t arg);
     void (*complete)(SCSIRequest *req, uint32_t arg);
     void (*cancel)(SCSIRequest *req);
@@ -108,14 +109,12 @@ struct SCSIBus {
     int busnr;
 
     SCSISense unit_attention;
-    int tcq, ndev;
-    const SCSIBusOps *ops;
+    const SCSIBusInfo *info;
 
     SCSIDevice *devs[MAX_SCSI_DEVS];
 };
 
-void scsi_bus_new(SCSIBus *bus, DeviceState *host, int tcq, int ndev,
-                  const SCSIBusOps *ops);
+void scsi_bus_new(SCSIBus *bus, DeviceState *host, const SCSIBusInfo *info);
 void scsi_qdev_register(SCSIDeviceInfo *info);
 
 static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d)
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index e8426d7c5e..33ae9b4963 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -862,7 +862,10 @@ static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data)
     return 0;
 }
 
-static const struct SCSIBusOps vscsi_scsi_ops = {
+static const struct SCSIBusInfo vscsi_scsi_info = {
+    .tcq = true,
+    .ndev = VSCSI_REQ_LIMIT,
+
     .transfer_data = vscsi_transfer_data,
     .complete = vscsi_command_complete,
     .cancel = vscsi_request_cancelled
@@ -883,8 +886,7 @@ static int spapr_vscsi_init(VIOsPAPRDevice *dev)
 
     dev->crq.SendFunc = vscsi_do_crq;
 
-    scsi_bus_new(&s->bus, &dev->qdev, 1, VSCSI_REQ_LIMIT,
-                 &vscsi_scsi_ops);
+    scsi_bus_new(&s->bus, &dev->qdev, &vscsi_scsi_info);
     if (!dev->qdev.hotplugged) {
         scsi_bus_legacy_handle_cmdline(&s->bus);
     }
diff --git a/hw/usb-msd.c b/hw/usb-msd.c
index 08d2d2ac77..2c047fab39 100644
--- a/hw/usb-msd.c
+++ b/hw/usb-msd.c
@@ -495,7 +495,10 @@ static void usb_msd_password_cb(void *opaque, int err)
         qdev_unplug(&s->dev.qdev);
 }
 
-static const struct SCSIBusOps usb_msd_scsi_ops = {
+static const struct SCSIBusInfo usb_msd_scsi_info = {
+    .tcq = false,
+    .ndev = 1,
+
     .transfer_data = usb_msd_transfer_data,
     .complete = usb_msd_command_complete,
     .cancel = usb_msd_request_cancelled
@@ -536,7 +539,7 @@ static int usb_msd_initfn(USBDevice *dev)
     }
 
     usb_desc_init(dev);
-    scsi_bus_new(&s->bus, &s->dev.qdev, 0, 1, &usb_msd_scsi_ops);
+    scsi_bus_new(&s->bus, &s->dev.qdev, &usb_msd_scsi_info);
     s->scsi_dev = scsi_bus_legacy_add_drive(&s->bus, bs, 0, !!s->removable);
     if (!s->scsi_dev) {
         return -1;

From d8bb00d6d72eba317f78501434fc37db4968fa31 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 14 Sep 2011 09:28:06 +0200
Subject: [PATCH 32/55] qdev: switch children device list to QTAILQ

SCSI buses will need to read the children list first-to-last.  This
requires using a QTAILQ, because hell breaks loose if you just try
inserting at the tail (thus reversing the order of all existing
visits from last-to-first to first-to-tail).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/acpi_piix4.c      |  4 ++--
 hw/i2c.c             |  2 +-
 hw/intel-hda.c       |  6 +++---
 hw/qdev.c            | 24 ++++++++++++------------
 hw/qdev.h            |  4 ++--
 hw/s390-virtio-bus.c |  4 ++--
 hw/spapr_vio.c       |  6 +++---
 hw/ssi.c             |  6 +++---
 8 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 29f0f76c35..d9075e6611 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -276,7 +276,7 @@ static void piix4_update_hotplug(PIIX4PMState *s)
 
     s->pci0_hotplug_enable = ~0;
 
-    QLIST_FOREACH_SAFE(qdev, &bus->children, sibling, next) {
+    QTAILQ_FOREACH_SAFE(qdev, &bus->children, sibling, next) {
         PCIDeviceInfo *info = container_of(qdev->info, PCIDeviceInfo, qdev);
         PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, qdev);
         int slot = PCI_SLOT(pdev->devfn);
@@ -486,7 +486,7 @@ static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
     PCIDeviceInfo *info;
     int slot = ffs(val) - 1;
 
-    QLIST_FOREACH_SAFE(qdev, &bus->children, sibling, next) {
+    QTAILQ_FOREACH_SAFE(qdev, &bus->children, sibling, next) {
         dev = DO_UPCAST(PCIDevice, qdev, qdev);
         info = container_of(qdev->info, PCIDeviceInfo, qdev);
         if (PCI_SLOT(dev->devfn) == slot && !info->no_hotplug) {
diff --git a/hw/i2c.c b/hw/i2c.c
index 49b9ecb8b6..9bcf3e1d31 100644
--- a/hw/i2c.c
+++ b/hw/i2c.c
@@ -84,7 +84,7 @@ int i2c_start_transfer(i2c_bus *bus, uint8_t address, int recv)
     DeviceState *qdev;
     i2c_slave *slave = NULL;
 
-    QLIST_FOREACH(qdev, &bus->qbus.children, sibling) {
+    QTAILQ_FOREACH(qdev, &bus->qbus.children, sibling) {
         i2c_slave *candidate = I2C_SLAVE_FROM_QDEV(qdev);
         if (candidate->address == address) {
             slave = candidate;
diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index f97775c235..675b6591e9 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -86,7 +86,7 @@ HDACodecDevice *hda_codec_find(HDACodecBus *bus, uint32_t cad)
     DeviceState *qdev;
     HDACodecDevice *cdev;
 
-    QLIST_FOREACH(qdev, &bus->qbus.children, sibling) {
+    QTAILQ_FOREACH(qdev, &bus->qbus.children, sibling) {
         cdev = DO_UPCAST(HDACodecDevice, qdev, qdev);
         if (cdev->cad == cad) {
             return cdev;
@@ -490,7 +490,7 @@ static void intel_hda_notify_codecs(IntelHDAState *d, uint32_t stream, bool runn
     DeviceState *qdev;
     HDACodecDevice *cdev;
 
-    QLIST_FOREACH(qdev, &d->codecs.qbus.children, sibling) {
+    QTAILQ_FOREACH(qdev, &d->codecs.qbus.children, sibling) {
         cdev = DO_UPCAST(HDACodecDevice, qdev, qdev);
         if (cdev->info->stream) {
             cdev->info->stream(cdev, stream, running, output);
@@ -1114,7 +1114,7 @@ static void intel_hda_reset(DeviceState *dev)
     d->wall_base_ns = qemu_get_clock_ns(vm_clock);
 
     /* reset codecs */
-    QLIST_FOREACH(qdev, &d->codecs.qbus.children, sibling) {
+    QTAILQ_FOREACH(qdev, &d->codecs.qbus.children, sibling) {
         cdev = DO_UPCAST(HDACodecDevice, qdev, qdev);
         if (qdev->info->reset) {
             qdev->info->reset(qdev);
diff --git a/hw/qdev.c b/hw/qdev.c
index a223d41cd3..50976dd0c1 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -91,7 +91,7 @@ static DeviceState *qdev_create_from_info(BusState *bus, DeviceInfo *info)
     qdev_prop_set_defaults(dev, dev->info->props);
     qdev_prop_set_defaults(dev, dev->parent_bus->info->props);
     qdev_prop_set_globals(dev);
-    QLIST_INSERT_HEAD(&bus->children, dev, sibling);
+    QTAILQ_INSERT_HEAD(&bus->children, dev, sibling);
     if (qdev_hotplug) {
         assert(bus->allow_hotplug);
         dev->hotplugged = 1;
@@ -408,7 +408,7 @@ void qdev_free(DeviceState *dev)
         if (dev->opts)
             qemu_opts_del(dev->opts);
     }
-    QLIST_REMOVE(dev, sibling);
+    QTAILQ_REMOVE(&dev->parent_bus->children, dev, sibling);
     for (prop = dev->info->props; prop && prop->name; prop++) {
         if (prop->info->free) {
             prop->info->free(dev, prop);
@@ -510,7 +510,7 @@ int qbus_walk_children(BusState *bus, qdev_walkerfn *devfn,
         }
     }
 
-    QLIST_FOREACH(dev, &bus->children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->children, sibling) {
         err = qdev_walk_children(dev, devfn, busfn, opaque);
         if (err < 0) {
             return err;
@@ -560,7 +560,7 @@ static BusState *qbus_find_recursive(BusState *bus, const char *name,
         return bus;
     }
 
-    QLIST_FOREACH(dev, &bus->children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->children, sibling) {
         QLIST_FOREACH(child, &dev->child_bus, sibling) {
             ret = qbus_find_recursive(child, name, info);
             if (ret) {
@@ -576,7 +576,7 @@ DeviceState *qdev_find_recursive(BusState *bus, const char *id)
     DeviceState *dev, *ret;
     BusState *child;
 
-    QLIST_FOREACH(dev, &bus->children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->children, sibling) {
         if (dev->id && strcmp(dev->id, id) == 0)
             return dev;
         QLIST_FOREACH(child, &dev->child_bus, sibling) {
@@ -609,7 +609,7 @@ static void qbus_list_dev(BusState *bus)
     const char *sep = " ";
 
     error_printf("devices at \"%s\":", bus->name);
-    QLIST_FOREACH(dev, &bus->children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->children, sibling) {
         error_printf("%s\"%s\"", sep, dev->info->name);
         if (dev->id)
             error_printf("/\"%s\"", dev->id);
@@ -640,17 +640,17 @@ static DeviceState *qbus_find_dev(BusState *bus, char *elem)
      *   (2) driver name
      *   (3) driver alias, if present
      */
-    QLIST_FOREACH(dev, &bus->children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->children, sibling) {
         if (dev->id  &&  strcmp(dev->id, elem) == 0) {
             return dev;
         }
     }
-    QLIST_FOREACH(dev, &bus->children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->children, sibling) {
         if (strcmp(dev->info->name, elem) == 0) {
             return dev;
         }
     }
-    QLIST_FOREACH(dev, &bus->children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->children, sibling) {
         if (dev->info->alias && strcmp(dev->info->alias, elem) == 0) {
             return dev;
         }
@@ -774,7 +774,7 @@ void qbus_create_inplace(BusState *bus, BusInfo *info,
         bus->name = buf;
     }
 
-    QLIST_INIT(&bus->children);
+    QTAILQ_INIT(&bus->children);
     if (parent) {
         QLIST_INSERT_HEAD(&parent->child_bus, bus, sibling);
         parent->num_child_bus++;
@@ -809,7 +809,7 @@ void qbus_free(BusState *bus)
 {
     DeviceState *dev;
 
-    while ((dev = QLIST_FIRST(&bus->children)) != NULL) {
+    while ((dev = QTAILQ_FIRST(&bus->children)) != NULL) {
         qdev_free(dev);
     }
     if (bus->parent) {
@@ -878,7 +878,7 @@ static void qbus_print(Monitor *mon, BusState *bus, int indent)
     qdev_printf("bus: %s\n", bus->name);
     indent += 2;
     qdev_printf("type %s\n", bus->info->name);
-    QLIST_FOREACH(dev, &bus->children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->children, sibling) {
         qdev_print(mon, dev, indent);
     }
 }
diff --git a/hw/qdev.h b/hw/qdev.h
index aa7ae36187..a7cc48b4dd 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -42,7 +42,7 @@ struct DeviceState {
     qemu_irq *gpio_in;
     QLIST_HEAD(, BusState) child_bus;
     int num_child_bus;
-    QLIST_ENTRY(DeviceState) sibling;
+    QTAILQ_ENTRY(DeviceState) sibling;
     int instance_id_alias;
     int alias_required_for_version;
 };
@@ -73,7 +73,7 @@ struct BusState {
     const char *name;
     int allow_hotplug;
     int qdev_allocated;
-    QLIST_HEAD(, DeviceState) children;
+    QTAILQ_HEAD(, DeviceState) children;
     QLIST_ENTRY(BusState) sibling;
 };
 
diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index e2f3e32aca..0ce6406b6d 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -274,7 +274,7 @@ VirtIOS390Device *s390_virtio_bus_find_vring(VirtIOS390Bus *bus,
     DeviceState *dev;
     int i;
 
-    QLIST_FOREACH(dev, &bus->bus.children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->bus.children, sibling) {
         _dev = (VirtIOS390Device *)dev;
         for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
             if (!virtio_queue_get_addr(_dev->vdev, i))
@@ -297,7 +297,7 @@ VirtIOS390Device *s390_virtio_bus_find_mem(VirtIOS390Bus *bus, ram_addr_t mem)
     VirtIOS390Device *_dev;
     DeviceState *dev;
 
-    QLIST_FOREACH(dev, &bus->bus.children, sibling) {
+    QTAILQ_FOREACH(dev, &bus->bus.children, sibling) {
         _dev = (VirtIOS390Device *)dev;
         if (_dev->dev_offs == mem) {
             return _dev;
diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index 35818e18f1..977603f81e 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -63,7 +63,7 @@ VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg)
     DeviceState *qdev;
     VIOsPAPRDevice *dev = NULL;
 
-    QLIST_FOREACH(qdev, &bus->bus.children, sibling) {
+    QTAILQ_FOREACH(qdev, &bus->bus.children, sibling) {
         dev = (VIOsPAPRDevice *)qdev;
         if (dev->reg == reg) {
             break;
@@ -588,7 +588,7 @@ static void rtas_quiesce(sPAPREnvironment *spapr, uint32_t token,
         return;
     }
 
-    QLIST_FOREACH(qdev, &bus->bus.children, sibling) {
+    QTAILQ_FOREACH(qdev, &bus->bus.children, sibling) {
         dev = (VIOsPAPRDevice *)qdev;
         spapr_vio_quiesce_one(dev);
     }
@@ -726,7 +726,7 @@ int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt)
     DeviceState *qdev;
     int ret = 0;
 
-    QLIST_FOREACH(qdev, &bus->bus.children, sibling) {
+    QTAILQ_FOREACH(qdev, &bus->bus.children, sibling) {
         VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
 
         ret = vio_make_devnode(dev, fdt);
diff --git a/hw/ssi.c b/hw/ssi.c
index 3f4c5f9f06..9842fe7472 100644
--- a/hw/ssi.c
+++ b/hw/ssi.c
@@ -25,8 +25,8 @@ static int ssi_slave_init(DeviceState *dev, DeviceInfo *base_info)
     SSIBus *bus;
 
     bus = FROM_QBUS(SSIBus, qdev_get_parent_bus(dev));
-    if (QLIST_FIRST(&bus->qbus.children) != dev
-        || QLIST_NEXT(dev, sibling) != NULL) {
+    if (QTAILQ_FIRST(&bus->qbus.children) != dev
+        || QTAILQ_NEXT(dev, sibling) != NULL) {
         hw_error("Too many devices on SSI bus");
     }
 
@@ -61,7 +61,7 @@ uint32_t ssi_transfer(SSIBus *bus, uint32_t val)
 {
     DeviceState *dev;
     SSISlave *slave;
-    dev = QLIST_FIRST(&bus->qbus.children);
+    dev = QTAILQ_FIRST(&bus->qbus.children);
     if (!dev) {
         return 0;
     }

From f48a7a6e35bb6d50573cfb42f13878c593fb6c0c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 28 Jul 2011 18:02:13 +0200
Subject: [PATCH 33/55] scsi: remove devs array from SCSIBus

Change the devs array into a linked list, and add a scsi_device_find
function to navigate the children list instead.  This lets the SCSI
bus use more complex addressing, and HBAs can talk to the correct device
when there are multiple LUNs per target.

scsi_device_find may return another LUN on the same target if none is
found that matches exactly.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/esp.c         |  8 ++++---
 hw/lsi53c895a.c  | 22 ++++++-------------
 hw/qdev.h        |  2 +-
 hw/scsi-bus.c    | 55 +++++++++++++++++++++++++++---------------------
 hw/scsi.h        |  3 +--
 hw/spapr_vscsi.c | 14 ++++++------
 6 files changed, 51 insertions(+), 53 deletions(-)

diff --git a/hw/esp.c b/hw/esp.c
index d3fb1c69a6..aad290f52c 100644
--- a/hw/esp.c
+++ b/hw/esp.c
@@ -217,7 +217,8 @@ static uint32_t get_cmd(ESPState *s, uint8_t *buf)
         s->async_len = 0;
     }
 
-    if (target >= ESP_MAX_DEVS || !s->bus.devs[target]) {
+    s->current_dev = scsi_device_find(&s->bus, target, 0);
+    if (!s->current_dev) {
         // No such drive
         s->rregs[ESP_RSTAT] = 0;
         s->rregs[ESP_RINTR] = INTR_DC;
@@ -225,7 +226,6 @@ static uint32_t get_cmd(ESPState *s, uint8_t *buf)
         esp_raise_irq(s);
         return 0;
     }
-    s->current_dev = s->bus.devs[target];
     return dmalen;
 }
 
@@ -233,10 +233,12 @@ static void do_busid_cmd(ESPState *s, uint8_t *buf, uint8_t busid)
 {
     int32_t datalen;
     int lun;
+    SCSIDevice *current_lun;
 
     trace_esp_do_busid_cmd(busid);
     lun = busid & 7;
-    s->current_req = scsi_req_new(s->current_dev, 0, lun, buf, NULL);
+    current_lun = scsi_device_find(&s->bus, s->current_dev->id, lun);
+    s->current_req = scsi_req_new(current_lun, 0, lun, buf, NULL);
     datalen = scsi_req_enqueue(s->current_req);
     s->ti_size = datalen;
     if (datalen != 0) {
diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c
index 4eeb49632c..c15f167c84 100644
--- a/hw/lsi53c895a.c
+++ b/hw/lsi53c895a.c
@@ -531,7 +531,7 @@ static void lsi_bad_selection(LSIState *s, uint32_t id)
 /* Initiate a SCSI layer data transfer.  */
 static void lsi_do_dma(LSIState *s, int out)
 {
-    uint32_t count, id;
+    uint32_t count;
     target_phys_addr_t addr;
     SCSIDevice *dev;
 
@@ -542,12 +542,8 @@ static void lsi_do_dma(LSIState *s, int out)
         return;
     }
 
-    id = (s->current->tag >> 8) & 0xf;
-    dev = s->bus.devs[id];
-    if (!dev) {
-        lsi_bad_selection(s, id);
-        return;
-    }
+    dev = s->current->req->dev;
+    assert(dev);
 
     count = s->dbc;
     if (count > s->current->dma_len)
@@ -771,7 +767,7 @@ static void lsi_do_command(LSIState *s)
     s->command_complete = 0;
 
     id = (s->select_tag >> 8) & 0xf;
-    dev = s->bus.devs[id];
+    dev = scsi_device_find(&s->bus, id, s->current_lun);
     if (!dev) {
         lsi_bad_selection(s, id);
         return;
@@ -1202,7 +1198,7 @@ again:
                 }
                 s->sstat0 |= LSI_SSTAT0_WOA;
                 s->scntl1 &= ~LSI_SCNTL1_IARB;
-                if (id >= LSI_MAX_DEVS || !s->bus.devs[id]) {
+                if (!scsi_device_find(&s->bus, id, 0)) {
                     lsi_bad_selection(s, id);
                     break;
                 }
@@ -1684,13 +1680,9 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
         if (val & LSI_SCNTL1_RST) {
             if (!(s->sstat0 & LSI_SSTAT0_RST)) {
                 DeviceState *dev;
-                int id;
 
-                for (id = 0; id < LSI_MAX_DEVS; id++) {
-                    if (s->bus.devs[id]) {
-                        dev = &s->bus.devs[id]->qdev;
-                        dev->info->reset(dev);
-                    }
+                QTAILQ_FOREACH(dev, &s->bus.qbus.children, sibling) {
+                    dev->info->reset(dev);
                 }
                 s->sstat0 |= LSI_SSTAT0_RST;
                 lsi_script_scsi_interrupt(s, LSI_SIST0_RST, 0);
diff --git a/hw/qdev.h b/hw/qdev.h
index a7cc48b4dd..36a4198c89 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -73,7 +73,7 @@ struct BusState {
     const char *name;
     int allow_hotplug;
     int qdev_allocated;
-    QTAILQ_HEAD(, DeviceState) children;
+    QTAILQ_HEAD(ChildrenHead, DeviceState) children;
     QLIST_ENTRY(BusState) sibling;
 };
 
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index d9d4e18d66..7104e98d59 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -37,12 +37,16 @@ static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
     SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
     SCSIDeviceInfo *info = DO_UPCAST(SCSIDeviceInfo, qdev, base);
     SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, dev->qdev.parent_bus);
+    SCSIDevice *olddev;
     int rc = -1;
 
     if (dev->id == -1) {
-        for (dev->id = 0; dev->id < bus->info->ndev; dev->id++) {
-            if (bus->devs[dev->id] == NULL)
+        int id;
+        for (id = 0; id < bus->info->ndev; id++) {
+            if (!scsi_device_find(bus, id, 0)) {
+                dev->id = id;
                 break;
+            }
         }
     }
     if (dev->id >= bus->info->ndev) {
@@ -50,17 +54,14 @@ static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
         goto err;
     }
 
-    if (bus->devs[dev->id]) {
-        qdev_free(&bus->devs[dev->id]->qdev);
+    olddev = scsi_device_find(bus, dev->id, dev->lun);
+    if (olddev && dev->lun == olddev->lun) {
+        qdev_free(&olddev->qdev);
     }
-    bus->devs[dev->id] = dev;
 
     dev->info = info;
     QTAILQ_INIT(&dev->requests);
     rc = dev->info->init(dev);
-    if (rc != 0) {
-        bus->devs[dev->id] = NULL;
-    }
 
 err:
     return rc;
@@ -69,13 +70,10 @@ err:
 static int scsi_qdev_exit(DeviceState *qdev)
 {
     SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
-    SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, dev->qdev.parent_bus);
 
-    assert(bus->devs[dev->id] != NULL);
-    if (bus->devs[dev->id]->info->destroy) {
-        bus->devs[dev->id]->info->destroy(bus->devs[dev->id]);
+    if (dev->info->destroy) {
+        dev->info->destroy(dev);
     }
-    bus->devs[dev->id] = NULL;
     return 0;
 }
 
@@ -1157,19 +1155,28 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
 static char *scsibus_get_fw_dev_path(DeviceState *dev)
 {
     SCSIDevice *d = DO_UPCAST(SCSIDevice, qdev, dev);
-    SCSIBus *bus = scsi_bus_from_device(d);
     char path[100];
-    int i;
 
-    for (i = 0; i < bus->info->ndev; i++) {
-        if (bus->devs[i] == d) {
-            break;
-        }
-    }
-
-    assert(i != bus->info->ndev);
-
-    snprintf(path, sizeof(path), "%s@%x", qdev_fw_name(dev), i);
+    snprintf(path, sizeof(path), "%s@%d:%d:%d", qdev_fw_name(dev),
+             0, d->id, d->lun);
 
     return strdup(path);
 }
+
+SCSIDevice *scsi_device_find(SCSIBus *bus, int id, int lun)
+{
+    DeviceState *qdev;
+    SCSIDevice *target_dev = NULL;
+
+    QTAILQ_FOREACH_REVERSE(qdev, &bus->qbus.children, ChildrenHead, sibling) {
+        SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
+
+        if (dev->id == id) {
+            if (dev->lun == lun) {
+                return dev;
+            }
+            target_dev = dev;
+        }
+    }
+    return target_dev;
+}
diff --git a/hw/scsi.h b/hw/scsi.h
index b76c4ee0a4..add3d2db67 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -110,8 +110,6 @@ struct SCSIBus {
 
     SCSISense unit_attention;
     const SCSIBusInfo *info;
-
-    SCSIDevice *devs[MAX_SCSI_DEVS];
 };
 
 void scsi_bus_new(SCSIBus *bus, DeviceState *host, const SCSIBusInfo *info);
@@ -195,5 +193,6 @@ void scsi_req_abort(SCSIRequest *req, int status);
 void scsi_req_cancel(SCSIRequest *req);
 void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense);
 int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed);
+SCSIDevice *scsi_device_find(SCSIBus *bus, int target, int lun);
 
 #endif
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index 33ae9b4963..ea3bb085db 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -129,11 +129,12 @@ static void vscsi_put_req(vscsi_req *req)
     req->active = 0;
 }
 
-static void vscsi_decode_id_lun(uint64_t srp_lun, int *id, int *lun)
+static SCSIDevice *vscsi_device_find(SCSIBus *bus, uint64_t srp_lun, int *lun)
 {
     /* XXX Figure that one out properly ! This is crackpot */
-    *id = (srp_lun >> 56) & 0x7f;
+    int id = (srp_lun >> 56) & 0x7f;
     *lun = (srp_lun >> 48) & 0xff;
+    return scsi_device_find(bus, id, *lun);
 }
 
 static int vscsi_send_iu(VSCSIState *s, vscsi_req *req,
@@ -582,14 +583,11 @@ static int vscsi_queue_cmd(VSCSIState *s, vscsi_req *req)
 {
     union srp_iu *srp = &req->iu.srp;
     SCSIDevice *sdev;
-    int n, id, lun;
+    int n, lun;
 
-    vscsi_decode_id_lun(be64_to_cpu(srp->cmd.lun), &id, &lun);
-
-    /* Qemu vs. linux issue with LUNs to be sorted out ... */
-    sdev = (id < 8 && lun < 16) ? s->bus.devs[id] : NULL;
+    sdev = vscsi_device_find(&s->bus, be64_to_cpu(srp->cmd.lun), &lun);
     if (!sdev) {
-        dprintf("VSCSI: Command for id %d with no drive\n", id);
+        dprintf("VSCSI: Command for lun %08" PRIx64 " with no drive\n", be64_to_cpu(srp->cmd.lun));
         if (srp->cmd.cdb[0] == INQUIRY) {
             vscsi_inquiry_no_target(s, req);
         } else {

From ba74307c5a3c2ff87d4596a10828faf7560d4552 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 14 Sep 2011 20:39:36 +0200
Subject: [PATCH 34/55] scsi: implement REPORT LUNS for arbitrary LUNs

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-bus.c | 51 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 7104e98d59..a2d57a72fe 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -170,7 +170,7 @@ typedef struct SCSITargetReq SCSITargetReq;
 struct SCSITargetReq {
     SCSIRequest req;
     int len;
-    uint8_t buf[64];
+    uint8_t buf[2056];
 };
 
 static void store_lun(uint8_t *outbuf, int lun)
@@ -185,23 +185,52 @@ static void store_lun(uint8_t *outbuf, int lun)
 
 static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
 {
-    int len;
+    DeviceState *qdev;
+    int i, len, n;
+    int id;
+    bool found_lun0;
+
     if (r->req.cmd.xfer < 16) {
         return false;
     }
     if (r->req.cmd.buf[2] > 2) {
         return false;
     }
-    len = MIN(sizeof r->buf, r->req.cmd.xfer);
-    memset(r->buf, 0, len);
-    if (r->req.dev->lun != 0) {
-        r->buf[3] = 16;
-        r->len = 24;
-        store_lun(&r->buf[16], r->req.dev->lun);
-    } else {
-        r->buf[3] = 8;
-        r->len = 16;
+    id = r->req.dev->id;
+    found_lun0 = false;
+    n = 0;
+    QTAILQ_FOREACH(qdev, &r->req.bus->qbus.children, sibling) {
+        SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
+
+        if (dev->id == id) {
+            if (dev->lun == 0) {
+                found_lun0 = true;
+            }
+            n += 8;
+        }
     }
+    if (!found_lun0) {
+        n += 8;
+    }
+    len = MIN(n + 8, r->req.cmd.xfer & ~7);
+    if (len > sizeof(r->buf)) {
+        /* TODO: > 256 LUNs? */
+        return false;
+    }
+
+    memset(r->buf, 0, len);
+    stl_be_p(&r->buf, n);
+    i = found_lun0 ? 8 : 16;
+    QTAILQ_FOREACH(qdev, &r->req.bus->qbus.children, sibling) {
+        SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
+
+        if (dev->id == id) {
+            store_lun(&r->buf[i], dev->lun);
+            i += 8;
+        }
+    }
+    assert(i == n + 8);
+    r->len = len;
     return true;
 }
 

From 7e0380b9bbf7c40361e06e6e0d8675a55bd0dea0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 13 Aug 2011 18:55:17 +0200
Subject: [PATCH 35/55] scsi: allow arbitrary LUNs

This only requires changes in two places: in SCSIBus, we need to look
for a free LUN if somebody creates a device with a pre-existing scsi-id
but the default LUN (-1, meaning "search for a free spot"); in vSCSI,
we need to actually parse the LUN according to the SCSI spec.

For vSCSI, max_target/max_lun are set according to the logical unit
addressing format in SAM.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/esp.c         |  3 ++-
 hw/lsi53c895a.c  |  3 ++-
 hw/scsi-bus.c    | 48 ++++++++++++++++++++++++++++++++----------------
 hw/scsi.h        |  3 ++-
 hw/spapr_vscsi.c | 39 +++++++++++++++++++++++++++++++++++----
 hw/usb-msd.c     |  3 ++-
 6 files changed, 75 insertions(+), 24 deletions(-)

diff --git a/hw/esp.c b/hw/esp.c
index aad290f52c..5742bb300c 100644
--- a/hw/esp.c
+++ b/hw/esp.c
@@ -724,7 +724,8 @@ void esp_init(target_phys_addr_t espaddr, int it_shift,
 
 static const struct SCSIBusInfo esp_scsi_info = {
     .tcq = false,
-    .ndev = ESP_MAX_DEVS,
+    .max_target = ESP_MAX_DEVS,
+    .max_lun = 7,
 
     .transfer_data = esp_transfer_data,
     .complete = esp_command_complete,
diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c
index c15f167c84..d26e442c9d 100644
--- a/hw/lsi53c895a.c
+++ b/hw/lsi53c895a.c
@@ -2085,7 +2085,8 @@ static int lsi_scsi_uninit(PCIDevice *d)
 
 static const struct SCSIBusInfo lsi_scsi_info = {
     .tcq = true,
-    .ndev = LSI_MAX_DEVS,
+    .max_target = LSI_MAX_DEVS,
+    .max_lun = 0,  /* LUN support is buggy */
 
     .transfer_data = lsi_transfer_data,
     .complete = lsi_command_complete,
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index a2d57a72fe..cec06dbd5b 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -17,7 +17,7 @@ static struct BusInfo scsi_bus_info = {
     .get_fw_dev_path = scsibus_get_fw_dev_path,
     .props = (Property[]) {
         DEFINE_PROP_UINT32("scsi-id", SCSIDevice, id, -1),
-        DEFINE_PROP_UINT32("lun", SCSIDevice, lun, 0),
+        DEFINE_PROP_UINT32("lun", SCSIDevice, lun, -1),
         DEFINE_PROP_END_OF_LIST(),
     },
 };
@@ -37,26 +37,42 @@ static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
     SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
     SCSIDeviceInfo *info = DO_UPCAST(SCSIDeviceInfo, qdev, base);
     SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, dev->qdev.parent_bus);
-    SCSIDevice *olddev;
+    SCSIDevice *d;
     int rc = -1;
 
-    if (dev->id == -1) {
-        int id;
-        for (id = 0; id < bus->info->ndev; id++) {
-            if (!scsi_device_find(bus, id, 0)) {
-                dev->id = id;
-                break;
-            }
-        }
-    }
-    if (dev->id >= bus->info->ndev) {
+    if (dev->id != -1 && dev->id > bus->info->max_target) {
         error_report("bad scsi device id: %d", dev->id);
         goto err;
     }
 
-    olddev = scsi_device_find(bus, dev->id, dev->lun);
-    if (olddev && dev->lun == olddev->lun) {
-        qdev_free(&olddev->qdev);
+    if (dev->id == -1) {
+        int id = -1;
+        if (dev->lun == -1) {
+            dev->lun = 0;
+        }
+        do {
+            d = scsi_device_find(bus, ++id, dev->lun);
+        } while (d && d->lun == dev->lun && id <= bus->info->max_target);
+        if (id > bus->info->max_target) {
+            error_report("no free target");
+            goto err;
+        }
+        dev->id = id;
+    } else if (dev->lun == -1) {
+        int lun = -1;
+        do {
+            d = scsi_device_find(bus, dev->id, ++lun);
+        } while (d && d->lun == lun && lun < bus->info->max_lun);
+        if (lun > bus->info->max_lun) {
+            error_report("no free lun");
+            goto err;
+        }
+        dev->lun = lun;
+    } else {
+        d = scsi_device_find(bus, dev->id, dev->lun);
+        if (dev->lun == d->lun && dev != d) {
+            qdev_free(&d->qdev);
+        }
     }
 
     dev->info = info;
@@ -115,7 +131,7 @@ int scsi_bus_legacy_handle_cmdline(SCSIBus *bus)
     int res = 0, unit;
 
     loc_push_none(&loc);
-    for (unit = 0; unit < bus->info->ndev; unit++) {
+    for (unit = 0; unit < bus->info->max_target; unit++) {
         dinfo = drive_get(IF_SCSI, bus->busnr, unit);
         if (dinfo == NULL) {
             continue;
diff --git a/hw/scsi.h b/hw/scsi.h
index add3d2db67..401d8d321a 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -98,7 +98,8 @@ struct SCSIDeviceInfo {
 };
 
 struct SCSIBusInfo {
-    int tcq, ndev;
+    int tcq;
+    int max_target, max_lun;
     void (*transfer_data)(SCSIRequest *req, uint32_t arg);
     void (*complete)(SCSIRequest *req, uint32_t arg);
     void (*cancel)(SCSIRequest *req);
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index ea3bb085db..e6c3581abf 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -131,9 +131,39 @@ static void vscsi_put_req(vscsi_req *req)
 
 static SCSIDevice *vscsi_device_find(SCSIBus *bus, uint64_t srp_lun, int *lun)
 {
-    /* XXX Figure that one out properly ! This is crackpot */
-    int id = (srp_lun >> 56) & 0x7f;
-    *lun = (srp_lun >> 48) & 0xff;
+    int channel = 0, id = 0;
+
+retry:
+    switch (srp_lun >> 62) {
+    case 0:
+        if ((srp_lun >> 56) != 0) {
+            channel = (srp_lun >> 56) & 0x3f;
+            id = (srp_lun >> 48) & 0xff;
+            srp_lun <<= 16;
+            goto retry;
+        }
+        *lun = (srp_lun >> 48) & 0xff;
+        break;
+
+    case 1:
+        *lun = (srp_lun >> 48) & 0x3fff;
+        break;
+    case 2:
+        channel = (srp_lun >> 53) & 0x7;
+        id = (srp_lun >> 56) & 0x3f;
+        *lun = (srp_lun >> 48) & 0x1f;
+        break;
+    case 3:
+        *lun = -1;
+        return NULL;
+    default:
+        abort();
+    }
+
+    if (channel) {
+        *lun = -1;
+        return NULL;
+    }
     return scsi_device_find(bus, id, *lun);
 }
 
@@ -862,7 +892,8 @@ static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data)
 
 static const struct SCSIBusInfo vscsi_scsi_info = {
     .tcq = true,
-    .ndev = VSCSI_REQ_LIMIT,
+    .max_target = 63, /* logical unit addressing format */
+    .max_lun = 31,
 
     .transfer_data = vscsi_transfer_data,
     .complete = vscsi_command_complete,
diff --git a/hw/usb-msd.c b/hw/usb-msd.c
index 2c047fab39..1a0815a136 100644
--- a/hw/usb-msd.c
+++ b/hw/usb-msd.c
@@ -497,7 +497,8 @@ static void usb_msd_password_cb(void *opaque, int err)
 
 static const struct SCSIBusInfo usb_msd_scsi_info = {
     .tcq = false,
-    .ndev = 1,
+    .max_target = 0,
+    .max_lun = 0,
 
     .transfer_data = usb_msd_transfer_data,
     .complete = usb_msd_command_complete,

From 0d3545e76c856be4cce26cf1c96981b26cafb6b1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 27 Jul 2011 23:24:50 +0200
Subject: [PATCH 36/55] scsi: add channel to addressing

This also requires little more than adding the new argument to
scsi_device_find, and the qdev property.  All devices by default
end up on channel 0.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/esp.c         |  4 ++--
 hw/lsi53c895a.c  |  4 ++--
 hw/scsi-bus.c    | 24 +++++++++++++++---------
 hw/scsi.h        |  5 +++--
 hw/spapr_vscsi.c |  9 +++------
 5 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/hw/esp.c b/hw/esp.c
index 5742bb300c..b698a43fe6 100644
--- a/hw/esp.c
+++ b/hw/esp.c
@@ -217,7 +217,7 @@ static uint32_t get_cmd(ESPState *s, uint8_t *buf)
         s->async_len = 0;
     }
 
-    s->current_dev = scsi_device_find(&s->bus, target, 0);
+    s->current_dev = scsi_device_find(&s->bus, 0, target, 0);
     if (!s->current_dev) {
         // No such drive
         s->rregs[ESP_RSTAT] = 0;
@@ -237,7 +237,7 @@ static void do_busid_cmd(ESPState *s, uint8_t *buf, uint8_t busid)
 
     trace_esp_do_busid_cmd(busid);
     lun = busid & 7;
-    current_lun = scsi_device_find(&s->bus, s->current_dev->id, lun);
+    current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, lun);
     s->current_req = scsi_req_new(current_lun, 0, lun, buf, NULL);
     datalen = scsi_req_enqueue(s->current_req);
     s->ti_size = datalen;
diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c
index d26e442c9d..2984cea633 100644
--- a/hw/lsi53c895a.c
+++ b/hw/lsi53c895a.c
@@ -767,7 +767,7 @@ static void lsi_do_command(LSIState *s)
     s->command_complete = 0;
 
     id = (s->select_tag >> 8) & 0xf;
-    dev = scsi_device_find(&s->bus, id, s->current_lun);
+    dev = scsi_device_find(&s->bus, 0, id, s->current_lun);
     if (!dev) {
         lsi_bad_selection(s, id);
         return;
@@ -1198,7 +1198,7 @@ again:
                 }
                 s->sstat0 |= LSI_SSTAT0_WOA;
                 s->scntl1 &= ~LSI_SCNTL1_IARB;
-                if (!scsi_device_find(&s->bus, id, 0)) {
+                if (!scsi_device_find(&s->bus, 0, id, 0)) {
                     lsi_bad_selection(s, id);
                     break;
                 }
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index cec06dbd5b..bdd6e947f2 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -16,6 +16,7 @@ static struct BusInfo scsi_bus_info = {
     .size  = sizeof(SCSIBus),
     .get_fw_dev_path = scsibus_get_fw_dev_path,
     .props = (Property[]) {
+        DEFINE_PROP_UINT32("channel", SCSIDevice, channel, 0),
         DEFINE_PROP_UINT32("scsi-id", SCSIDevice, id, -1),
         DEFINE_PROP_UINT32("lun", SCSIDevice, lun, -1),
         DEFINE_PROP_END_OF_LIST(),
@@ -40,6 +41,10 @@ static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
     SCSIDevice *d;
     int rc = -1;
 
+    if (dev->channel > bus->info->max_channel) {
+        error_report("bad scsi channel id: %d", dev->channel);
+        goto err;
+    }
     if (dev->id != -1 && dev->id > bus->info->max_target) {
         error_report("bad scsi device id: %d", dev->id);
         goto err;
@@ -51,7 +56,7 @@ static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
             dev->lun = 0;
         }
         do {
-            d = scsi_device_find(bus, ++id, dev->lun);
+            d = scsi_device_find(bus, dev->channel, ++id, dev->lun);
         } while (d && d->lun == dev->lun && id <= bus->info->max_target);
         if (id > bus->info->max_target) {
             error_report("no free target");
@@ -61,7 +66,7 @@ static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
     } else if (dev->lun == -1) {
         int lun = -1;
         do {
-            d = scsi_device_find(bus, dev->id, ++lun);
+            d = scsi_device_find(bus, dev->channel, dev->id, ++lun);
         } while (d && d->lun == lun && lun < bus->info->max_lun);
         if (lun > bus->info->max_lun) {
             error_report("no free lun");
@@ -69,7 +74,7 @@ static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
         }
         dev->lun = lun;
     } else {
-        d = scsi_device_find(bus, dev->id, dev->lun);
+        d = scsi_device_find(bus, dev->channel, dev->id, dev->lun);
         if (dev->lun == d->lun && dev != d) {
             qdev_free(&d->qdev);
         }
@@ -203,7 +208,7 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
 {
     DeviceState *qdev;
     int i, len, n;
-    int id;
+    int channel, id;
     bool found_lun0;
 
     if (r->req.cmd.xfer < 16) {
@@ -212,13 +217,14 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
     if (r->req.cmd.buf[2] > 2) {
         return false;
     }
+    channel = r->req.dev->channel;
     id = r->req.dev->id;
     found_lun0 = false;
     n = 0;
     QTAILQ_FOREACH(qdev, &r->req.bus->qbus.children, sibling) {
         SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
 
-        if (dev->id == id) {
+        if (dev->channel == channel && dev->id == id) {
             if (dev->lun == 0) {
                 found_lun0 = true;
             }
@@ -240,7 +246,7 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
     QTAILQ_FOREACH(qdev, &r->req.bus->qbus.children, sibling) {
         SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
 
-        if (dev->id == id) {
+        if (dev->channel == channel && dev->id == id) {
             store_lun(&r->buf[i], dev->lun);
             i += 8;
         }
@@ -1203,12 +1209,12 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev)
     char path[100];
 
     snprintf(path, sizeof(path), "%s@%d:%d:%d", qdev_fw_name(dev),
-             0, d->id, d->lun);
+             d->channel, d->id, d->lun);
 
     return strdup(path);
 }
 
-SCSIDevice *scsi_device_find(SCSIBus *bus, int id, int lun)
+SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int id, int lun)
 {
     DeviceState *qdev;
     SCSIDevice *target_dev = NULL;
@@ -1216,7 +1222,7 @@ SCSIDevice *scsi_device_find(SCSIBus *bus, int id, int lun)
     QTAILQ_FOREACH_REVERSE(qdev, &bus->qbus.children, ChildrenHead, sibling) {
         SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
 
-        if (dev->id == id) {
+        if (dev->channel == channel && dev->id == id) {
             if (dev->lun == lun) {
                 return dev;
             }
diff --git a/hw/scsi.h b/hw/scsi.h
index 401d8d321a..c8649cfa9a 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -66,6 +66,7 @@ struct SCSIDevice
     uint8_t sense[SCSI_SENSE_BUF_SIZE];
     uint32_t sense_len;
     QTAILQ_HEAD(, SCSIRequest) requests;
+    uint32_t channel;
     uint32_t lun;
     int blocksize;
     int type;
@@ -99,7 +100,7 @@ struct SCSIDeviceInfo {
 
 struct SCSIBusInfo {
     int tcq;
-    int max_target, max_lun;
+    int max_channel, max_target, max_lun;
     void (*transfer_data)(SCSIRequest *req, uint32_t arg);
     void (*complete)(SCSIRequest *req, uint32_t arg);
     void (*cancel)(SCSIRequest *req);
@@ -194,6 +195,6 @@ void scsi_req_abort(SCSIRequest *req, int status);
 void scsi_req_cancel(SCSIRequest *req);
 void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense);
 int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed);
-SCSIDevice *scsi_device_find(SCSIBus *bus, int target, int lun);
+SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun);
 
 #endif
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index e6c3581abf..00e2d2d5d3 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -160,11 +160,7 @@ retry:
         abort();
     }
 
-    if (channel) {
-        *lun = -1;
-        return NULL;
-    }
-    return scsi_device_find(bus, id, *lun);
+    return scsi_device_find(bus, channel, id, *lun);
 }
 
 static int vscsi_send_iu(VSCSIState *s, vscsi_req *req,
@@ -892,7 +888,8 @@ static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data)
 
 static const struct SCSIBusInfo vscsi_scsi_info = {
     .tcq = true,
-    .max_target = 63, /* logical unit addressing format */
+    .max_channel = 7, /* logical unit addressing format */
+    .max_target = 63,
     .max_lun = 31,
 
     .transfer_data = vscsi_transfer_data,

From 7cec78b6f0e9a3b33732afd14a811849d219ffbc Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 3 Oct 2011 13:57:06 +0200
Subject: [PATCH 37/55] scsi-disk: fail READ CAPACITY if LBA != 0 but PMI == 0

Tested by the Windows Logo Kit SCSI Compliance test. From SBC-3, paragraph
5.25: "The LOGICAL BLOCK ADDRESS field shall be set to zero if the PMI
bit is set to zero. If the PMI bit is set to zero and the LOGICAL BLOCK
ADDRESS field is not set to zero, then the device server shall terminate
the command with CHECK CONDITION status with the sense key set to ILLEGAL
REQUEST and the additional sense code set to INVALID FIELD IN CDB".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 50fc3d6eaa..6b139acda1 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -1180,6 +1180,9 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         if (!nb_sectors) {
             goto not_ready;
         }
+        if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) {
+            goto illegal_request;
+        }
         nb_sectors /= s->cluster_size;
         /* Returned value is the address of the last sector.  */
         nb_sectors--;
@@ -1232,6 +1235,9 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
             if (!nb_sectors) {
                 goto not_ready;
             }
+            if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) {
+                goto illegal_request;
+            }
             nb_sectors /= s->cluster_size;
             /* Returned value is the address of the last sector.  */
             nb_sectors--;

From 628e95b61650f00607e8ddcf952b2e93f6ac3c89 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 4 Oct 2011 15:36:05 +0200
Subject: [PATCH 38/55] scsi-disk: fix retrying a flush

Flush does not go anymore through scsi_disk_emulate_command.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 6b139acda1..e800ab358a 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -81,7 +81,7 @@ struct SCSIDiskState
 };
 
 static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type);
-static int scsi_disk_emulate_command(SCSIDiskReq *r);
+static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf);
 
 static void scsi_free_request(SCSIRequest *req)
 {
@@ -325,7 +325,6 @@ static void scsi_dma_restart_bh(void *opaque)
         r = DO_UPCAST(SCSIDiskReq, req, req);
         if (r->status & SCSI_REQ_STATUS_RETRY) {
             int status = r->status;
-            int ret;
 
             r->status &=
                 ~(SCSI_REQ_STATUS_RETRY | SCSI_REQ_STATUS_RETRY_TYPE_MASK);
@@ -338,10 +337,8 @@ static void scsi_dma_restart_bh(void *opaque)
                 scsi_write_data(&r->req);
                 break;
             case SCSI_REQ_STATUS_RETRY_FLUSH:
-                ret = scsi_disk_emulate_command(r);
-                if (ret == 0) {
-                    scsi_req_complete(&r->req, GOOD);
-                }
+                scsi_send_command(&r->req, r->req.cmd.buf);
+                break;
             }
         }
     }

From 8869e103979e499babe73488abe1f9c935746a60 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Oct 2011 12:49:35 +0200
Subject: [PATCH 39/55] scsi-generic: drop SCSIGenericState

It is not needed, because s->bs is already stored in SCSIDevice, and
can be reached from the conf.bs member.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-generic.c | 90 +++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 50 deletions(-)

diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 8f6b70df2b..4af0f24356 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -46,8 +46,6 @@ do { fprintf(stderr, "scsi-generic: " fmt , ## __VA_ARGS__); } while (0)
 #define MAX_UINT ((unsigned int)-1)
 #endif
 
-typedef struct SCSIGenericState SCSIGenericState;
-
 typedef struct SCSIGenericReq {
     SCSIRequest req;
     uint8_t *buf;
@@ -56,12 +54,6 @@ typedef struct SCSIGenericReq {
     sg_io_hdr_t io_header;
 } SCSIGenericReq;
 
-struct SCSIGenericState
-{
-    SCSIDevice qdev;
-    BlockDriverState *bs;
-};
-
 static void scsi_free_request(SCSIRequest *req)
 {
     SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req);
@@ -174,7 +166,7 @@ static void scsi_read_complete(void * opaque, int ret)
 static void scsi_read_data(SCSIRequest *req)
 {
     SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req);
-    SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, r->req.dev);
+    SCSIDevice *s = r->req.dev;
     int ret;
 
     DPRINTF("scsi_read_data 0x%x\n", req->tag);
@@ -183,17 +175,16 @@ static void scsi_read_data(SCSIRequest *req)
         return;
     }
 
-    ret = execute_command(s->bs, r, SG_DXFER_FROM_DEV, scsi_read_complete);
+    ret = execute_command(s->conf.bs, r, SG_DXFER_FROM_DEV, scsi_read_complete);
     if (ret < 0) {
         scsi_command_complete(r, ret);
-        return;
     }
 }
 
 static void scsi_write_complete(void * opaque, int ret)
 {
     SCSIGenericReq *r = (SCSIGenericReq *)opaque;
-    SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, r->req.dev);
+    SCSIDevice *s = r->req.dev;
 
     DPRINTF("scsi_write_complete() ret = %d\n", ret);
     r->req.aiocb = NULL;
@@ -204,9 +195,9 @@ static void scsi_write_complete(void * opaque, int ret)
     }
 
     if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
-        s->qdev.type == TYPE_TAPE) {
-        s->qdev.blocksize = (r->buf[9] << 16) | (r->buf[10] << 8) | r->buf[11];
-        DPRINTF("block size %d\n", s->qdev.blocksize);
+        s->type == TYPE_TAPE) {
+        s->blocksize = (r->buf[9] << 16) | (r->buf[10] << 8) | r->buf[11];
+        DPRINTF("block size %d\n", s->blocksize);
     }
 
     scsi_command_complete(r, ret);
@@ -216,8 +207,8 @@ static void scsi_write_complete(void * opaque, int ret)
    The transfer may complete asynchronously.  */
 static void scsi_write_data(SCSIRequest *req)
 {
-    SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, req->dev);
     SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req);
+    SCSIDevice *s = r->req.dev;
     int ret;
 
     DPRINTF("scsi_write_data 0x%x\n", req->tag);
@@ -227,7 +218,7 @@ static void scsi_write_data(SCSIRequest *req)
         return;
     }
 
-    ret = execute_command(s->bs, r, SG_DXFER_TO_DEV, scsi_write_complete);
+    ret = execute_command(s->conf.bs, r, SG_DXFER_TO_DEV, scsi_write_complete);
     if (ret < 0) {
         scsi_command_complete(r, ret);
     }
@@ -261,8 +252,8 @@ static void scsi_req_fixup(SCSIRequest *req)
 
 static int32_t scsi_send_command(SCSIRequest *req, uint8_t *cmd)
 {
-    SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, req->dev);
     SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req);
+    SCSIDevice *s = r->req.dev;
     int ret;
 
     scsi_req_fixup(&r->req);
@@ -285,7 +276,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *cmd)
             g_free(r->buf);
         r->buflen = 0;
         r->buf = NULL;
-        ret = execute_command(s->bs, r, SG_DXFER_NONE, scsi_command_complete);
+        ret = execute_command(s->conf.bs, r, SG_DXFER_NONE, scsi_command_complete);
         if (ret < 0) {
             scsi_command_complete(r, ret);
             return 0;
@@ -373,77 +364,76 @@ static int get_stream_blocksize(BlockDriverState *bdrv)
 
 static void scsi_generic_reset(DeviceState *dev)
 {
-    SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev.qdev, dev);
+    SCSIDevice *s = DO_UPCAST(SCSIDevice, qdev, dev);
 
-    scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
+    scsi_device_purge_requests(s, SENSE_CODE(RESET));
 }
 
-static void scsi_destroy(SCSIDevice *d)
+static void scsi_destroy(SCSIDevice *s)
 {
-    SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d);
-
-    scsi_device_purge_requests(&s->qdev, SENSE_CODE(NO_SENSE));
-    blockdev_mark_auto_del(s->qdev.conf.bs);
+    scsi_device_purge_requests(s, SENSE_CODE(NO_SENSE));
+    blockdev_mark_auto_del(s->conf.bs);
 }
 
-static int scsi_generic_initfn(SCSIDevice *dev)
+static int scsi_generic_initfn(SCSIDevice *s)
 {
-    SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, dev);
     int sg_version;
     struct sg_scsi_id scsiid;
 
-    if (!s->qdev.conf.bs) {
+    if (!s->conf.bs) {
         error_report("scsi-generic: drive property not set");
         return -1;
     }
-    s->bs = s->qdev.conf.bs;
 
     /* check we are really using a /dev/sg* file */
-    if (!bdrv_is_sg(s->bs)) {
+    if (!bdrv_is_sg(s->conf.bs)) {
         error_report("scsi-generic: not /dev/sg*");
         return -1;
     }
 
-    if (bdrv_get_on_error(s->bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
+    if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
         error_report("Device doesn't support drive option werror");
         return -1;
     }
-    if (bdrv_get_on_error(s->bs, 1) != BLOCK_ERR_REPORT) {
+    if (bdrv_get_on_error(s->conf.bs, 1) != BLOCK_ERR_REPORT) {
         error_report("Device doesn't support drive option rerror");
         return -1;
     }
 
     /* check we are using a driver managing SG_IO (version 3 and after */
-    if (bdrv_ioctl(s->bs, SG_GET_VERSION_NUM, &sg_version) < 0 ||
+    if (bdrv_ioctl(s->conf.bs, SG_GET_VERSION_NUM, &sg_version) < 0 ||
         sg_version < 30000) {
         error_report("scsi-generic: scsi generic interface too old");
         return -1;
     }
 
     /* get LUN of the /dev/sg? */
-    if (bdrv_ioctl(s->bs, SG_GET_SCSI_ID, &scsiid)) {
+    if (bdrv_ioctl(s->conf.bs, SG_GET_SCSI_ID, &scsiid)) {
         error_report("scsi-generic: SG_GET_SCSI_ID ioctl failed");
         return -1;
     }
 
     /* define device state */
-    s->qdev.type = scsiid.scsi_type;
-    DPRINTF("device type %d\n", s->qdev.type);
-    if (s->qdev.type == TYPE_TAPE) {
-        s->qdev.blocksize = get_stream_blocksize(s->bs);
-        if (s->qdev.blocksize == -1)
-            s->qdev.blocksize = 0;
+    s->type = scsiid.scsi_type;
+    DPRINTF("device type %d\n", s->type);
+    if (s->type == TYPE_TAPE) {
+        s->blocksize = get_stream_blocksize(s->conf.bs);
+        if (s->blocksize == -1) {
+            s->blocksize = 0;
+        }
     } else {
-        s->qdev.blocksize = get_blocksize(s->bs);
+        s->blocksize = get_blocksize(s->conf.bs);
         /* removable media returns 0 if not present */
-        if (s->qdev.blocksize <= 0) {
-            if (s->qdev.type == TYPE_ROM || s->qdev.type  == TYPE_WORM)
-                s->qdev.blocksize = 2048;
-            else
-                s->qdev.blocksize = 512;
+        if (s->blocksize <= 0) {
+            if (s->type == TYPE_ROM || s->type  == TYPE_WORM) {
+                s->blocksize = 2048;
+            } else {
+                s->blocksize = 512;
+            }
         }
     }
-    DPRINTF("block size %d\n", s->qdev.blocksize);
+
+    DPRINTF("block size %d\n", s->blocksize);
     return 0;
 }
 
@@ -469,13 +459,13 @@ static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
 static SCSIDeviceInfo scsi_generic_info = {
     .qdev.name    = "scsi-generic",
     .qdev.desc    = "pass through generic scsi device (/dev/sg*)",
-    .qdev.size    = sizeof(SCSIGenericState),
+    .qdev.size    = sizeof(SCSIDevice),
     .qdev.reset   = scsi_generic_reset,
     .init         = scsi_generic_initfn,
     .destroy      = scsi_destroy,
     .alloc_req    = scsi_new_request,
     .qdev.props   = (Property[]) {
-        DEFINE_BLOCK_PROPERTIES(SCSIGenericState, qdev.conf),
+        DEFINE_BLOCK_PROPERTIES(SCSIDevice, conf),
         DEFINE_PROP_END_OF_LIST(),
     },
 };

From 6a58a3a6ebb738e4173f93ecc089b45dd57c7574 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Oct 2011 11:54:59 +0200
Subject: [PATCH 40/55] scsi-generic: remove scsi_req_fixup

This is not needed anymore, since asynchronous ioctls were introduced
by commit 221f715 (new scsi-generic abstraction, use SG_IO, 2009-03-28).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-generic.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 4af0f24356..5ef4825392 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -232,19 +232,6 @@ static uint8_t *scsi_get_buf(SCSIRequest *req)
     return r->buf;
 }
 
-static void scsi_req_fixup(SCSIRequest *req)
-{
-    switch(req->cmd.buf[0]) {
-    case REWIND:
-    case START_STOP:
-        if (req->dev->type == TYPE_TAPE) {
-            /* force IMMED, otherwise qemu waits end of command */
-            req->cmd.buf[1] = 0x01;
-        }
-        break;
-    }
-}
-
 /* Execute a scsi command.  Returns the length of the data expected by the
    command.  This will be Positive for data transfers from the device
    (eg. disk reads), negative for transfers to the device (eg. disk writes),
@@ -256,8 +243,6 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *cmd)
     SCSIDevice *s = r->req.dev;
     int ret;
 
-    scsi_req_fixup(&r->req);
-
     DPRINTF("Command: lun=%d tag=0x%x len %zd data=0x%02x", lun, tag,
             r->req.cmd.xfer, cmd[0]);
 

From fe0ed71279eb442b920f0080c763438dbaa09f74 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Oct 2011 14:49:48 +0200
Subject: [PATCH 41/55] scsi-generic: check ioctl statuses when SG_IO succeeds

A succeeding ioctl does not imply that the SCSI command succeeded.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-generic.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 5ef4825392..c313749d7e 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -310,9 +310,9 @@ static int get_blocksize(BlockDriverState *bdrv)
     io_header.timeout = 6000; /* XXX */
 
     ret = bdrv_ioctl(bdrv, SG_IO, &io_header);
-    if (ret < 0)
+    if (ret < 0 || io_header.driver_status || io_header.host_status) {
         return -1;
-
+    }
     return (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
 }
 
@@ -341,9 +341,9 @@ static int get_stream_blocksize(BlockDriverState *bdrv)
     io_header.timeout = 6000; /* XXX */
 
     ret = bdrv_ioctl(bdrv, SG_IO, &io_header);
-    if (ret < 0)
+    if (ret < 0 || io_header.driver_status || io_header.host_status) {
         return -1;
-
+    }
     return (buf[9] << 16) | (buf[10] << 8) | buf[11];
 }
 

From a3b16e71ab733f76104231b873dff48d43fb3890 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Oct 2011 14:53:43 +0200
Subject: [PATCH 42/55] scsi-generic: look at host status

Pass down the host status so that failing transport can be detected
by the guest.  Similar treatment of host status could be done in
virtio-blk, too.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-generic.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index c313749d7e..5ad3d573eb 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -39,8 +39,13 @@ do { fprintf(stderr, "scsi-generic: " fmt , ## __VA_ARGS__); } while (0)
 
 #define SCSI_SENSE_BUF_SIZE 96
 
-#define SG_ERR_DRIVER_TIMEOUT 0x06
-#define SG_ERR_DRIVER_SENSE 0x08
+#define SG_ERR_DRIVER_TIMEOUT  0x06
+#define SG_ERR_DRIVER_SENSE    0x08
+
+#define SG_ERR_DID_OK          0x00
+#define SG_ERR_DID_NO_CONNECT  0x01
+#define SG_ERR_DID_BUS_BUSY    0x02
+#define SG_ERR_DID_TIME_OUT    0x03
 
 #ifndef MAX_UINT
 #define MAX_UINT ((unsigned int)-1)
@@ -68,8 +73,9 @@ static void scsi_command_complete(void *opaque, int ret)
     SCSIGenericReq *r = (SCSIGenericReq *)opaque;
 
     r->req.aiocb = NULL;
-    if (r->io_header.driver_status & SG_ERR_DRIVER_SENSE)
+    if (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) {
         r->req.sense_len = r->io_header.sb_len_wr;
+    }
 
     if (ret != 0) {
         switch (ret) {
@@ -86,9 +92,15 @@ static void scsi_command_complete(void *opaque, int ret)
             break;
         }
     } else {
-        if (r->io_header.driver_status & SG_ERR_DRIVER_TIMEOUT) {
+        if (r->io_header.host_status == SG_ERR_DID_NO_CONNECT ||
+            r->io_header.host_status == SG_ERR_DID_BUS_BUSY ||
+            r->io_header.host_status == SG_ERR_DID_TIME_OUT ||
+            (r->io_header.driver_status & SG_ERR_DRIVER_TIMEOUT)) {
             status = BUSY;
             BADF("Driver Timeout\n");
+        } else if (r->io_header.host_status) {
+            status = CHECK_CONDITION;
+            scsi_req_build_sense(&r->req, SENSE_CODE(I_T_NEXUS_LOSS));
         } else if (r->io_header.status) {
             status = r->io_header.status;
         } else if (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) {

From 9b6eef8a319f11f977add658b622b60900877410 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Oct 2011 10:36:27 +0200
Subject: [PATCH 43/55] scsi-generic: snoop READ CAPACITY commands to get block
 size

Instead of "guessing" the block size when there is no medium in the
drive, wait for the guest to send a READ CAPACITY command and snoop
it from there.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-generic.c | 66 ++++++++++++++++++-----------------------------
 1 file changed, 25 insertions(+), 41 deletions(-)

diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 5ad3d573eb..4d7ad828f1 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -155,6 +155,7 @@ static int execute_command(BlockDriverState *bdrv,
 static void scsi_read_complete(void * opaque, int ret)
 {
     SCSIGenericReq *r = (SCSIGenericReq *)opaque;
+    SCSIDevice *s = r->req.dev;
     int len;
 
     r->req.aiocb = NULL;
@@ -170,6 +171,15 @@ static void scsi_read_complete(void * opaque, int ret)
     if (len == 0) {
         scsi_command_complete(r, 0);
     } else {
+        /* Snoop READ CAPACITY output to set the blocksize.  */
+        if (r->req.cmd.buf[0] == READ_CAPACITY_10) {
+            s->blocksize = ldl_be_p(&r->buf[4]);
+        } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 &&
+                   (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
+            s->blocksize = ldl_be_p(&r->buf[8]);
+        }
+        bdrv_set_buffer_alignment(s->conf.bs, s->blocksize);
+
         scsi_req_data(&r->req, len);
     }
 }
@@ -298,36 +308,6 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *cmd)
     }
 }
 
-static int get_blocksize(BlockDriverState *bdrv)
-{
-    uint8_t cmd[10];
-    uint8_t buf[8];
-    uint8_t sensebuf[8];
-    sg_io_hdr_t io_header;
-    int ret;
-
-    memset(cmd, 0, sizeof(cmd));
-    memset(buf, 0, sizeof(buf));
-    cmd[0] = READ_CAPACITY_10;
-
-    memset(&io_header, 0, sizeof(io_header));
-    io_header.interface_id = 'S';
-    io_header.dxfer_direction = SG_DXFER_FROM_DEV;
-    io_header.dxfer_len = sizeof(buf);
-    io_header.dxferp = buf;
-    io_header.cmdp = cmd;
-    io_header.cmd_len = sizeof(cmd);
-    io_header.mx_sb_len = sizeof(sensebuf);
-    io_header.sbp = sensebuf;
-    io_header.timeout = 6000; /* XXX */
-
-    ret = bdrv_ioctl(bdrv, SG_IO, &io_header);
-    if (ret < 0 || io_header.driver_status || io_header.host_status) {
-        return -1;
-    }
-    return (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
-}
-
 static int get_stream_blocksize(BlockDriverState *bdrv)
 {
     uint8_t cmd[6];
@@ -413,21 +393,25 @@ static int scsi_generic_initfn(SCSIDevice *s)
     /* define device state */
     s->type = scsiid.scsi_type;
     DPRINTF("device type %d\n", s->type);
-    if (s->type == TYPE_TAPE) {
+    switch (s->type) {
+    case TYPE_TAPE:
         s->blocksize = get_stream_blocksize(s->conf.bs);
         if (s->blocksize == -1) {
             s->blocksize = 0;
         }
-    } else {
-        s->blocksize = get_blocksize(s->conf.bs);
-        /* removable media returns 0 if not present */
-        if (s->blocksize <= 0) {
-            if (s->type == TYPE_ROM || s->type  == TYPE_WORM) {
-                s->blocksize = 2048;
-            } else {
-                s->blocksize = 512;
-            }
-        }
+        break;
+
+        /* Make a guess for block devices, we'll fix it when the guest sends.
+         * READ CAPACITY.  If they don't, they likely would assume these sizes
+         * anyway. (TODO: they could also send MODE SENSE).
+         */
+    case TYPE_ROM:
+    case TYPE_WORM:
+        s->blocksize = 2048;
+        break;
+    default:
+        s->blocksize = 512;
+        break;
     }
 
     DPRINTF("block size %d\n", s->blocksize);

From 44740c38168bd46e2a99094a42cbdbb781299604 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Oct 2011 12:54:16 +0200
Subject: [PATCH 44/55] scsi-disk: do not duplicate BlockDriverState member

Same as for scsi-generic, avoid duplication even if it causes longer
lines.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 92 ++++++++++++++++++++++++--------------------------
 1 file changed, 45 insertions(+), 47 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index e800ab358a..8a05031300 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -65,7 +65,6 @@ typedef struct SCSIDiskReq {
 struct SCSIDiskState
 {
     SCSIDevice qdev;
-    BlockDriverState *bs;
     /* The qemu block layer uses a fixed 512 byte sector size.
        This is the number of 512 byte blocks in a single scsi sector.  */
     int cluster_size;
@@ -119,7 +118,7 @@ static uint32_t scsi_init_iovec(SCSIDiskReq *r)
 
     if (!r->iov.iov_base) {
         r->buflen = SCSI_DMA_BUF_SIZE;
-        r->iov.iov_base = qemu_blockalign(s->bs, r->buflen);
+        r->iov.iov_base = qemu_blockalign(s->qdev.conf.bs, r->buflen);
     }
     r->iov.iov_len = MIN(r->sector_count * 512, r->buflen);
     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
@@ -134,7 +133,7 @@ static void scsi_read_complete(void * opaque, int ret)
 
     if (r->req.aiocb != NULL) {
         r->req.aiocb = NULL;
-        bdrv_acct_done(s->bs, &r->acct);
+        bdrv_acct_done(s->qdev.conf.bs, &r->acct);
     }
 
     if (ret) {
@@ -158,7 +157,7 @@ static void scsi_flush_complete(void * opaque, int ret)
 
     if (r->req.aiocb != NULL) {
         r->req.aiocb = NULL;
-        bdrv_acct_done(s->bs, &r->acct);
+        bdrv_acct_done(s->qdev.conf.bs, &r->acct);
     }
 
     if (ret < 0) {
@@ -203,8 +202,8 @@ static void scsi_read_data(SCSIRequest *req)
         scsi_read_complete(r, -ENOMEDIUM);
     }
     n = scsi_init_iovec(r);
-    bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
-    r->req.aiocb = bdrv_aio_readv(s->bs, r->sector, &r->qiov, n,
+    bdrv_acct_start(s->qdev.conf.bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
+    r->req.aiocb = bdrv_aio_readv(s->qdev.conf.bs, r->sector, &r->qiov, n,
                               scsi_read_complete, r);
     if (r->req.aiocb == NULL) {
         scsi_read_complete(r, -EIO);
@@ -215,10 +214,10 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
 {
     int is_read = (type == SCSI_REQ_STATUS_RETRY_READ);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-    BlockErrorAction action = bdrv_get_on_error(s->bs, is_read);
+    BlockErrorAction action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
 
     if (action == BLOCK_ERR_IGNORE) {
-        bdrv_mon_event(s->bs, BDRV_ACTION_IGNORE, is_read);
+        bdrv_mon_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read);
         return 0;
     }
 
@@ -228,9 +227,9 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
         type &= SCSI_REQ_STATUS_RETRY_TYPE_MASK;
         r->status |= SCSI_REQ_STATUS_RETRY | type;
 
-        bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
+        bdrv_mon_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read);
         vm_stop(RUN_STATE_IO_ERROR);
-        bdrv_iostatus_set_err(s->bs, error);
+        bdrv_iostatus_set_err(s->qdev.conf.bs, error);
     } else {
         switch (error) {
         case ENOMEDIUM:
@@ -246,7 +245,7 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
             scsi_check_condition(r, SENSE_CODE(IO_ERROR));
             break;
         }
-        bdrv_mon_event(s->bs, BDRV_ACTION_REPORT, is_read);
+        bdrv_mon_event(s->qdev.conf.bs, BDRV_ACTION_REPORT, is_read);
     }
     return 1;
 }
@@ -259,7 +258,7 @@ static void scsi_write_complete(void * opaque, int ret)
 
     if (r->req.aiocb != NULL) {
         r->req.aiocb = NULL;
-        bdrv_acct_done(s->bs, &r->acct);
+        bdrv_acct_done(s->qdev.conf.bs, &r->acct);
     }
 
     if (ret) {
@@ -300,8 +299,8 @@ static void scsi_write_data(SCSIRequest *req)
         if (s->tray_open) {
             scsi_write_complete(r, -ENOMEDIUM);
         }
-        bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_WRITE);
-        r->req.aiocb = bdrv_aio_writev(s->bs, r->sector, &r->qiov, n,
+        bdrv_acct_start(s->qdev.conf.bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_WRITE);
+        r->req.aiocb = bdrv_aio_writev(s->qdev.conf.bs, r->sector, &r->qiov, n,
                                        scsi_write_complete, r);
         if (r->req.aiocb == NULL) {
             scsi_write_complete(r, -ENOMEM);
@@ -440,7 +439,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
         case 0x83: /* Device identification page, mandatory */
         {
             int max_len = 255 - 8;
-            int id_len = strlen(bdrv_get_device_name(s->bs));
+            int id_len = strlen(bdrv_get_device_name(s->qdev.conf.bs));
 
             if (id_len > max_len) {
                 id_len = max_len;
@@ -454,7 +453,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             outbuf[buflen++] = 0;   // reserved
             outbuf[buflen++] = id_len; // length of data following
 
-            memcpy(outbuf+buflen, bdrv_get_device_name(s->bs), id_len);
+            memcpy(outbuf+buflen, bdrv_get_device_name(s->qdev.conf.bs), id_len);
             buflen += id_len;
             break;
         }
@@ -571,10 +570,10 @@ static inline bool media_is_dvd(SCSIDiskState *s)
     if (s->qdev.type != TYPE_ROM) {
         return false;
     }
-    if (!bdrv_is_inserted(s->bs)) {
+    if (!bdrv_is_inserted(s->qdev.conf.bs)) {
         return false;
     }
-    bdrv_get_geometry(s->bs, &nb_sectors);
+    bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
     return nb_sectors > CD_MAX_SECTORS;
 }
 
@@ -584,10 +583,10 @@ static inline bool media_is_cd(SCSIDiskState *s)
     if (s->qdev.type != TYPE_ROM) {
         return false;
     }
-    if (!bdrv_is_inserted(s->bs)) {
+    if (!bdrv_is_inserted(s->qdev.conf.bs)) {
         return false;
     }
-    bdrv_get_geometry(s->bs, &nb_sectors);
+    bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
     return nb_sectors <= CD_MAX_SECTORS;
 }
 
@@ -615,7 +614,7 @@ static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
     }
 
     if (format != 0xff) {
-        if (s->tray_open || !bdrv_is_inserted(s->bs)) {
+        if (s->tray_open || !bdrv_is_inserted(s->qdev.conf.bs)) {
             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
             return -1;
         }
@@ -637,7 +636,7 @@ static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
         if (layer != 0) {
             goto fail;
         }
-        bdrv_get_geometry(s->bs, &nb_sectors);
+        bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
 
         outbuf[4] = 1;   /* DVD-ROM, part version 1 */
         outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */
@@ -692,7 +691,7 @@ static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf)
     media_status = 0;
     if (s->tray_open) {
         media_status = MS_TRAY_OPEN;
-    } else if (bdrv_is_inserted(s->bs)) {
+    } else if (bdrv_is_inserted(s->qdev.conf.bs)) {
         media_status = MS_MEDIA_PRESENT;
     }
 
@@ -795,7 +794,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
     };
 
-    BlockDriverState *bdrv = s->bs;
+    BlockDriverState *bdrv = s->qdev.conf.bs;
     int cylinders, heads, secs;
     uint8_t *p = *p_outbuf;
 
@@ -887,7 +886,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         if (page_control == 1) { /* Changeable Values */
             break;
         }
-        if (bdrv_enable_write_cache(s->bs)) {
+        if (bdrv_enable_write_cache(s->qdev.conf.bs)) {
             p[2] = 4; /* WCE */
         }
         break;
@@ -959,7 +958,7 @@ static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
     memset(outbuf, 0, r->req.cmd.xfer);
     p = outbuf;
 
-    if (bdrv_is_read_only(s->bs)) {
+    if (bdrv_is_read_only(s->qdev.conf.bs)) {
         dev_specific_param = 0x80; /* Readonly.  */
     } else {
         dev_specific_param = 0x00;
@@ -977,7 +976,7 @@ static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
         p += 8;
     }
 
-    bdrv_get_geometry(s->bs, &nb_sectors);
+    bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
     if (!dbd && nb_sectors) {
         if (r->req.cmd.buf[0] == MODE_SENSE) {
             outbuf[3] = 8; /* Block descriptor length  */
@@ -1043,7 +1042,7 @@ static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
     msf = req->cmd.buf[1] & 2;
     format = req->cmd.buf[2] & 0xf;
     start_track = req->cmd.buf[6];
-    bdrv_get_geometry(s->bs, &nb_sectors);
+    bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
     DPRINTF("Read TOC (track %d format %d msf %d)\n", start_track, format, msf >> 1);
     nb_sectors /= s->cluster_size;
     switch (format) {
@@ -1080,12 +1079,12 @@ static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
     if (s->qdev.type == TYPE_ROM && loej) {
         if (!start && !s->tray_open && s->tray_locked) {
             scsi_check_condition(r,
-                                 bdrv_is_inserted(s->bs)
+                                 bdrv_is_inserted(s->qdev.conf.bs)
                                  ? SENSE_CODE(ILLEGAL_REQ_REMOVAL_PREVENTED)
                                  : SENSE_CODE(NOT_READY_REMOVAL_PREVENTED));
             return -1;
         }
-        bdrv_eject(s->bs, !start);
+        bdrv_eject(s->qdev.conf.bs, !start);
         s->tray_open = !start;
     }
     return 0;
@@ -1112,13 +1111,13 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
             goto illegal_request;
         }
         r->buflen = MAX(4096, req->cmd.xfer);
-        r->iov.iov_base = qemu_blockalign(s->bs, r->buflen);
+        r->iov.iov_base = qemu_blockalign(s->qdev.conf.bs, r->buflen);
     }
 
     outbuf = r->iov.iov_base;
     switch (req->cmd.buf[0]) {
     case TEST_UNIT_READY:
-        if (s->tray_open || !bdrv_is_inserted(s->bs)) {
+        if (s->tray_open || !bdrv_is_inserted(s->qdev.conf.bs)) {
             goto not_ready;
         }
         break;
@@ -1168,12 +1167,12 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         break;
     case ALLOW_MEDIUM_REMOVAL:
         s->tray_locked = req->cmd.buf[4] & 1;
-        bdrv_lock_medium(s->bs, req->cmd.buf[4] & 1);
+        bdrv_lock_medium(s->qdev.conf.bs, req->cmd.buf[4] & 1);
         break;
     case READ_CAPACITY_10:
         /* The normal LEN field for this command is zero.  */
         memset(outbuf, 0, 8);
-        bdrv_get_geometry(s->bs, &nb_sectors);
+        bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
         if (!nb_sectors) {
             goto not_ready;
         }
@@ -1228,7 +1227,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
             DPRINTF("SAI READ CAPACITY(16)\n");
             memset(outbuf, 0, req->cmd.xfer);
-            bdrv_get_geometry(s->bs, &nb_sectors);
+            bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
             if (!nb_sectors) {
                 goto not_ready;
             }
@@ -1275,7 +1274,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
     return buflen;
 
 not_ready:
-    if (s->tray_open || !bdrv_is_inserted(s->bs)) {
+    if (s->tray_open || !bdrv_is_inserted(s->qdev.conf.bs)) {
         scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
     } else {
         scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
@@ -1342,8 +1341,8 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
         r->iov.iov_len = rc;
         break;
     case SYNCHRONIZE_CACHE:
-        bdrv_acct_start(s->bs, &r->acct, 0, BDRV_ACCT_FLUSH);
-        r->req.aiocb = bdrv_aio_flush(s->bs, scsi_flush_complete, r);
+        bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
+        r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_flush_complete, r);
         if (r->req.aiocb == NULL) {
             scsi_flush_complete(r, -EIO);
         }
@@ -1418,7 +1417,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
             goto fail;
         }
 
-        rc = bdrv_discard(s->bs, r->req.cmd.lba * s->cluster_size,
+        rc = bdrv_discard(s->qdev.conf.bs, r->req.cmd.lba * s->cluster_size,
                           len * s->cluster_size);
         if (rc < 0) {
             /* XXX: better error code ?*/
@@ -1460,7 +1459,7 @@ static void scsi_disk_reset(DeviceState *dev)
 
     scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
 
-    bdrv_get_geometry(s->bs, &nb_sectors);
+    bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
     nb_sectors /= s->cluster_size;
     if (nb_sectors) {
         nb_sectors--;
@@ -1530,16 +1529,15 @@ static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
         error_report("scsi-disk: drive property not set");
         return -1;
     }
-    s->bs = s->qdev.conf.bs;
 
-    if (scsi_type == TYPE_DISK && !bdrv_is_inserted(s->bs)) {
+    if (scsi_type == TYPE_DISK && !bdrv_is_inserted(s->qdev.conf.bs)) {
         error_report("Device needs media, but drive is empty");
         return -1;
     }
 
     if (!s->serial) {
         /* try to fall back to value set with legacy -drive serial=... */
-        dinfo = drive_get_by_blockdev(s->bs);
+        dinfo = drive_get_by_blockdev(s->qdev.conf.bs);
         if (*dinfo->serial) {
             s->serial = g_strdup(dinfo->serial);
         }
@@ -1549,13 +1547,13 @@ static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
         s->version = g_strdup(QEMU_VERSION);
     }
 
-    if (bdrv_is_sg(s->bs)) {
+    if (bdrv_is_sg(s->qdev.conf.bs)) {
         error_report("scsi-disk: unwanted /dev/sg*");
         return -1;
     }
 
     if (scsi_type == TYPE_ROM) {
-        bdrv_set_dev_ops(s->bs, &scsi_cd_block_ops, s);
+        bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_cd_block_ops, s);
         s->qdev.blocksize = 2048;
     } else if (scsi_type == TYPE_DISK) {
         s->qdev.blocksize = s->qdev.conf.logical_block_size;
@@ -1564,11 +1562,11 @@ static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
         return -1;
     }
     s->cluster_size = s->qdev.blocksize / 512;
-    bdrv_set_buffer_alignment(s->bs, s->qdev.blocksize);
+    bdrv_set_buffer_alignment(s->qdev.conf.bs, s->qdev.blocksize);
 
     s->qdev.type = scsi_type;
     qemu_add_vm_change_state_handler(scsi_dma_restart_cb, s);
-    bdrv_iostatus_enable(s->bs);
+    bdrv_iostatus_enable(s->qdev.conf.bs);
     add_boot_device_path(s->qdev.conf.bootindex, &dev->qdev, ",0");
     return 0;
 }

From 69377307b25f12235560d21050808a8de5793199 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Oct 2011 10:35:46 +0200
Subject: [PATCH 45/55] scsi-disk: remove cluster_size

This field is redundant, and having it makes it more complicated
to share reqops between the upcoming scsi-block and scsi-generic.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 8a05031300..f89e6c575f 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -65,9 +65,6 @@ typedef struct SCSIDiskReq {
 struct SCSIDiskState
 {
     SCSIDevice qdev;
-    /* The qemu block layer uses a fixed 512 byte sector size.
-       This is the number of 512 byte blocks in a single scsi sector.  */
-    int cluster_size;
     uint32_t removable;
     uint64_t max_lba;
     bool media_changed;
@@ -854,7 +851,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         bdrv_get_geometry_hint(bdrv, &cylinders, &heads, &secs);
         p[4] = heads & 0xff;
         p[5] = secs & 0xff;
-        p[6] = s->cluster_size * 2;
+        p[6] = s->qdev.blocksize >> 8;
         p[8] = (cylinders >> 8) & 0xff;
         p[9] = cylinders & 0xff;
         /* Write precomp start cylinder, disabled */
@@ -983,7 +980,7 @@ static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
         } else { /* MODE_SENSE_10 */
             outbuf[7] = 8; /* Block descriptor length  */
         }
-        nb_sectors /= s->cluster_size;
+        nb_sectors /= (s->qdev.blocksize / 512);
         if (nb_sectors > 0xffffff) {
             nb_sectors = 0;
         }
@@ -993,7 +990,7 @@ static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
         p[3] = nb_sectors & 0xff;
         p[4] = 0; /* reserved */
         p[5] = 0; /* bytes 5-7 are the sector size in bytes */
-        p[6] = s->cluster_size * 2;
+        p[6] = s->qdev.blocksize >> 8;
         p[7] = 0;
         p += 8;
     }
@@ -1044,7 +1041,7 @@ static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
     start_track = req->cmd.buf[6];
     bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
     DPRINTF("Read TOC (track %d format %d msf %d)\n", start_track, format, msf >> 1);
-    nb_sectors /= s->cluster_size;
+    nb_sectors /= s->qdev.blocksize / 512;
     switch (format) {
     case 0:
         toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track);
@@ -1179,7 +1176,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) {
             goto illegal_request;
         }
-        nb_sectors /= s->cluster_size;
+        nb_sectors /= s->qdev.blocksize / 512;
         /* Returned value is the address of the last sector.  */
         nb_sectors--;
         /* Remember the new size for read/write sanity checking. */
@@ -1194,7 +1191,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         outbuf[3] = nb_sectors & 0xff;
         outbuf[4] = 0;
         outbuf[5] = 0;
-        outbuf[6] = s->cluster_size * 2;
+        outbuf[6] = s->qdev.blocksize >> 8;
         outbuf[7] = 0;
         buflen = 8;
         break;
@@ -1234,7 +1231,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
             if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) {
                 goto illegal_request;
             }
-            nb_sectors /= s->cluster_size;
+            nb_sectors /= s->qdev.blocksize / 512;
             /* Returned value is the address of the last sector.  */
             nb_sectors--;
             /* Remember the new size for read/write sanity checking. */
@@ -1249,7 +1246,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
             outbuf[7] = nb_sectors & 0xff;
             outbuf[8] = 0;
             outbuf[9] = 0;
-            outbuf[10] = s->cluster_size * 2;
+            outbuf[10] = s->qdev.blocksize >> 8;
             outbuf[11] = 0;
             outbuf[12] = 0;
             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
@@ -1356,8 +1353,8 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
         if (r->req.cmd.lba > s->max_lba) {
             goto illegal_lba;
         }
-        r->sector = r->req.cmd.lba * s->cluster_size;
-        r->sector_count = len * s->cluster_size;
+        r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
+        r->sector_count = len * (s->qdev.blocksize / 512);
         break;
     case WRITE_6:
     case WRITE_10:
@@ -1373,8 +1370,8 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
         if (r->req.cmd.lba > s->max_lba) {
             goto illegal_lba;
         }
-        r->sector = r->req.cmd.lba * s->cluster_size;
-        r->sector_count = len * s->cluster_size;
+        r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
+        r->sector_count = len * (s->qdev.blocksize / 512);
         break;
     case MODE_SELECT:
         DPRINTF("Mode Select(6) (len %lu)\n", (long)r->req.cmd.xfer);
@@ -1417,8 +1414,9 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
             goto fail;
         }
 
-        rc = bdrv_discard(s->qdev.conf.bs, r->req.cmd.lba * s->cluster_size,
-                          len * s->cluster_size);
+        rc = bdrv_discard(s->qdev.conf.bs,
+                          r->req.cmd.lba * (s->qdev.blocksize / 512),
+                          len * (s->qdev.blocksize / 512));
         if (rc < 0) {
             /* XXX: better error code ?*/
             goto fail;
@@ -1460,7 +1458,7 @@ static void scsi_disk_reset(DeviceState *dev)
     scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
 
     bdrv_get_geometry(s->qdev.conf.bs, &nb_sectors);
-    nb_sectors /= s->cluster_size;
+    nb_sectors /= s->qdev.blocksize / 512;
     if (nb_sectors) {
         nb_sectors--;
     }
@@ -1561,7 +1559,6 @@ static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
         error_report("scsi-disk: Unhandled SCSI type %02x", scsi_type);
         return -1;
     }
-    s->cluster_size = s->qdev.blocksize / 512;
     bdrv_set_buffer_alignment(s->qdev.conf.bs, s->qdev.blocksize);
 
     s->qdev.type = scsi_type;

From e39be4823215e511d0e85aa33b2a5ade71064c72 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Oct 2011 14:27:40 +0200
Subject: [PATCH 46/55] scsi-disk: small clean up to INQUIRY

Set s->removable, s->qdev.blocksize and s->qdev.type in the callers
of scsi_initfn.

With this in place, s->qdev.type is allowed, and we can just reuse it
as the first byte in VPD data (just like we do in standard INQUIRY data).
Also set s->removable is set consistently and we can use it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 46 +++++++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index f89e6c575f..af1001de32 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -381,11 +381,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             return -1;
         }
 
-        if (s->qdev.type == TYPE_ROM) {
-            outbuf[buflen++] = 5;
-        } else {
-            outbuf[buflen++] = 0;
-        }
+        outbuf[buflen++] = s->qdev.type & 0x1f;
         outbuf[buflen++] = page_code ; // this page
         outbuf[buflen++] = 0x00;
 
@@ -529,11 +525,10 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
     memset(outbuf, 0, buflen);
 
     outbuf[0] = s->qdev.type & 0x1f;
+    outbuf[1] = s->removable ? 0x80 : 0;
     if (s->qdev.type == TYPE_ROM) {
-        outbuf[1] = 0x80;
         memcpy(&outbuf[16], "QEMU CD-ROM     ", 16);
     } else {
-        outbuf[1] = s->removable ? 0x80 : 0;
         memcpy(&outbuf[16], "QEMU HARDDISK   ", 16);
     }
     memcpy(&outbuf[8], "QEMU    ", 8);
@@ -1518,7 +1513,7 @@ static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
     }
 }
 
-static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
+static int scsi_initfn(SCSIDevice *dev)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
     DriveInfo *dinfo;
@@ -1528,7 +1523,7 @@ static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
         return -1;
     }
 
-    if (scsi_type == TYPE_DISK && !bdrv_is_inserted(s->qdev.conf.bs)) {
+    if (!s->removable && !bdrv_is_inserted(s->qdev.conf.bs)) {
         error_report("Device needs media, but drive is empty");
         return -1;
     }
@@ -1550,18 +1545,11 @@ static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
         return -1;
     }
 
-    if (scsi_type == TYPE_ROM) {
+    if (s->removable) {
         bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_cd_block_ops, s);
-        s->qdev.blocksize = 2048;
-    } else if (scsi_type == TYPE_DISK) {
-        s->qdev.blocksize = s->qdev.conf.logical_block_size;
-    } else {
-        error_report("scsi-disk: Unhandled SCSI type %02x", scsi_type);
-        return -1;
     }
     bdrv_set_buffer_alignment(s->qdev.conf.bs, s->qdev.blocksize);
 
-    s->qdev.type = scsi_type;
     qemu_add_vm_change_state_handler(scsi_dma_restart_cb, s);
     bdrv_iostatus_enable(s->qdev.conf.bs);
     add_boot_device_path(s->qdev.conf.bootindex, &dev->qdev, ",0");
@@ -1570,27 +1558,35 @@ static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
 
 static int scsi_hd_initfn(SCSIDevice *dev)
 {
-    return scsi_initfn(dev, TYPE_DISK);
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
+    s->qdev.blocksize = s->qdev.conf.logical_block_size;
+    s->qdev.type = TYPE_DISK;
+    return scsi_initfn(&s->qdev);
 }
 
 static int scsi_cd_initfn(SCSIDevice *dev)
 {
-    return scsi_initfn(dev, TYPE_ROM);
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
+    s->qdev.blocksize = 2048;
+    s->qdev.type = TYPE_ROM;
+    s->removable = true;
+    return scsi_initfn(&s->qdev);
 }
 
 static int scsi_disk_initfn(SCSIDevice *dev)
 {
     DriveInfo *dinfo;
-    uint8_t scsi_type;
 
     if (!dev->conf.bs) {
-        scsi_type = TYPE_DISK;  /* will die in scsi_initfn() */
-    } else {
-        dinfo = drive_get_by_blockdev(dev->conf.bs);
-        scsi_type = dinfo->media_cd ? TYPE_ROM : TYPE_DISK;
+        return scsi_initfn(dev);  /* ... and die there */
     }
 
-    return scsi_initfn(dev, scsi_type);
+    dinfo = drive_get_by_blockdev(dev->conf.bs);
+    if (dinfo->media_cd) {
+        return scsi_cd_initfn(dev);
+    } else {
+        return scsi_hd_initfn(dev);
+    }
 }
 
 static SCSIReqOps scsi_disk_reqops = {

From 7877903aa0ef318017441c32605bc64650e6a326 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Oct 2011 10:39:50 +0200
Subject: [PATCH 47/55] scsi: move max_lba to SCSIDevice

The field is only in scsi-disk for now.  Moving it up to SCSIDevice makes
it easier to reuse the scsi-generic reqops elsewhere.

At the same time, make scsi-generic get max_lba from snooped READ CAPACITY
commands as well.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c    | 15 +++++++--------
 hw/scsi-generic.c |  2 ++
 hw/scsi.h         |  1 +
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index af1001de32..74990a8299 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -66,7 +66,6 @@ struct SCSIDiskState
 {
     SCSIDevice qdev;
     uint32_t removable;
-    uint64_t max_lba;
     bool media_changed;
     bool media_event;
     QEMUBH *bh;
@@ -1175,7 +1174,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         /* Returned value is the address of the last sector.  */
         nb_sectors--;
         /* Remember the new size for read/write sanity checking. */
-        s->max_lba = nb_sectors;
+        s->qdev.max_lba = nb_sectors;
         /* Clip to 2TB, instead of returning capacity modulo 2TB. */
         if (nb_sectors > UINT32_MAX) {
             nb_sectors = UINT32_MAX;
@@ -1230,7 +1229,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
             /* Returned value is the address of the last sector.  */
             nb_sectors--;
             /* Remember the new size for read/write sanity checking. */
-            s->max_lba = nb_sectors;
+            s->qdev.max_lba = nb_sectors;
             outbuf[0] = (nb_sectors >> 56) & 0xff;
             outbuf[1] = (nb_sectors >> 48) & 0xff;
             outbuf[2] = (nb_sectors >> 40) & 0xff;
@@ -1345,7 +1344,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
     case READ_16:
         len = r->req.cmd.xfer / s->qdev.blocksize;
         DPRINTF("Read (sector %" PRId64 ", count %d)\n", r->req.cmd.lba, len);
-        if (r->req.cmd.lba > s->max_lba) {
+        if (r->req.cmd.lba > s->qdev.max_lba) {
             goto illegal_lba;
         }
         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
@@ -1362,7 +1361,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
         DPRINTF("Write %s(sector %" PRId64 ", count %d)\n",
                 (command & 0xe) == 0xe ? "And Verify " : "",
                 r->req.cmd.lba, len);
-        if (r->req.cmd.lba > s->max_lba) {
+        if (r->req.cmd.lba > s->qdev.max_lba) {
             goto illegal_lba;
         }
         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
@@ -1388,7 +1387,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
     case SEEK_10:
         DPRINTF("Seek(%d) (sector %" PRId64 ")\n", command == SEEK_6 ? 6 : 10,
                 r->req.cmd.lba);
-        if (r->req.cmd.lba > s->max_lba) {
+        if (r->req.cmd.lba > s->qdev.max_lba) {
             goto illegal_lba;
         }
         break;
@@ -1398,7 +1397,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
         DPRINTF("WRITE SAME(16) (sector %" PRId64 ", count %d)\n",
                 r->req.cmd.lba, len);
 
-        if (r->req.cmd.lba > s->max_lba) {
+        if (r->req.cmd.lba > s->qdev.max_lba) {
             goto illegal_lba;
         }
 
@@ -1457,7 +1456,7 @@ static void scsi_disk_reset(DeviceState *dev)
     if (nb_sectors) {
         nb_sectors--;
     }
-    s->max_lba = nb_sectors;
+    s->qdev.max_lba = nb_sectors;
 }
 
 static void scsi_destroy(SCSIDevice *dev)
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 4d7ad828f1..fcf23cd8cd 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -174,9 +174,11 @@ static void scsi_read_complete(void * opaque, int ret)
         /* Snoop READ CAPACITY output to set the blocksize.  */
         if (r->req.cmd.buf[0] == READ_CAPACITY_10) {
             s->blocksize = ldl_be_p(&r->buf[4]);
+            s->max_lba = ldl_be_p(&r->buf[0]);
         } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 &&
                    (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
             s->blocksize = ldl_be_p(&r->buf[8]);
+            s->max_lba = ldq_be_p(&r->buf[0]);
         }
         bdrv_set_buffer_alignment(s->conf.bs, s->blocksize);
 
diff --git a/hw/scsi.h b/hw/scsi.h
index c8649cfa9a..d56e875672 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -70,6 +70,7 @@ struct SCSIDevice
     uint32_t lun;
     int blocksize;
     int type;
+    uint64_t max_lba;
 };
 
 /* cdrom.c */

From adcf2754b99dcab33c5c0c523eb3b970c1a24f6c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Oct 2011 12:57:59 +0200
Subject: [PATCH 48/55] scsi: make reqops const

Also delete a stale occurrence of SCSIReqOps inside SCSIDeviceInfo.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-bus.c     | 10 +++++-----
 hw/scsi-disk.c    |  2 +-
 hw/scsi-generic.c |  2 +-
 hw/scsi.h         |  7 +++----
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index bdd6e947f2..1f38ac88e5 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -160,7 +160,7 @@ static int32_t scsi_invalid_command(SCSIRequest *req, uint8_t *buf)
     return 0;
 }
 
-struct SCSIReqOps reqops_invalid_opcode = {
+static const struct SCSIReqOps reqops_invalid_opcode = {
     .size         = sizeof(SCSIRequest),
     .send_command = scsi_invalid_command
 };
@@ -178,7 +178,7 @@ static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf)
     return 0;
 }
 
-struct SCSIReqOps reqops_unit_attention = {
+static const struct SCSIReqOps reqops_unit_attention = {
     .size         = sizeof(SCSIRequest),
     .send_command = scsi_unit_attention
 };
@@ -386,7 +386,7 @@ static uint8_t *scsi_target_get_buf(SCSIRequest *req)
     return r->buf;
 }
 
-struct SCSIReqOps reqops_target_command = {
+static const struct SCSIReqOps reqops_target_command = {
     .size         = sizeof(SCSITargetReq),
     .send_command = scsi_target_send_command,
     .read_data    = scsi_target_read_data,
@@ -394,8 +394,8 @@ struct SCSIReqOps reqops_target_command = {
 };
 
 
-SCSIRequest *scsi_req_alloc(SCSIReqOps *reqops, SCSIDevice *d, uint32_t tag,
-                            uint32_t lun, void *hba_private)
+SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d,
+                            uint32_t tag, uint32_t lun, void *hba_private)
 {
     SCSIRequest *req;
 
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 74990a8299..77673f251a 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -1588,7 +1588,7 @@ static int scsi_disk_initfn(SCSIDevice *dev)
     }
 }
 
-static SCSIReqOps scsi_disk_reqops = {
+static const SCSIReqOps scsi_disk_reqops = {
     .size         = sizeof(SCSIDiskReq),
     .free_req     = scsi_free_request,
     .send_command = scsi_send_command,
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index fcf23cd8cd..a5e77cb109 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -420,7 +420,7 @@ static int scsi_generic_initfn(SCSIDevice *s)
     return 0;
 }
 
-static SCSIReqOps scsi_generic_req_ops = {
+static const SCSIReqOps scsi_generic_req_ops = {
     .size         = sizeof(SCSIGenericReq),
     .free_req     = scsi_free_request,
     .send_command = scsi_send_command,
diff --git a/hw/scsi.h b/hw/scsi.h
index d56e875672..af558c3a16 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -41,7 +41,7 @@ struct SCSICommand {
 struct SCSIRequest {
     SCSIBus           *bus;
     SCSIDevice        *dev;
-    SCSIReqOps        *ops;
+    const SCSIReqOps  *ops;
     uint32_t          refcount;
     uint32_t          tag;
     uint32_t          lun;
@@ -96,7 +96,6 @@ struct SCSIDeviceInfo {
     SCSIRequest *(*alloc_req)(SCSIDevice *s, uint32_t tag, uint32_t lun,
                               void *hba_private);
     void (*unit_attention_reported)(SCSIDevice *s);
-    SCSIReqOps reqops;
 };
 
 struct SCSIBusInfo {
@@ -176,8 +175,8 @@ extern const struct SCSISense sense_code_DEVICE_INTERNAL_RESET;
 
 int scsi_sense_valid(SCSISense sense);
 
-SCSIRequest *scsi_req_alloc(SCSIReqOps *reqops, SCSIDevice *d, uint32_t tag,
-                            uint32_t lun, void *hba_private);
+SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d,
+                            uint32_t tag, uint32_t lun, void *hba_private);
 SCSIRequest *scsi_req_new(SCSIDevice *d, uint32_t tag, uint32_t lun,
                           uint8_t *buf, void *hba_private);
 int32_t scsi_req_enqueue(SCSIRequest *req);

From 765d1525a6c727674c5f6f459da4fdaeeda91162 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Oct 2011 12:54:31 +0200
Subject: [PATCH 49/55] scsi: export scsi_generic_reqops

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-generic.c | 2 +-
 hw/scsi.h         | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index a5e77cb109..32f50cda3e 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -420,7 +420,7 @@ static int scsi_generic_initfn(SCSIDevice *s)
     return 0;
 }
 
-static const SCSIReqOps scsi_generic_req_ops = {
+const SCSIReqOps scsi_generic_req_ops = {
     .size         = sizeof(SCSIGenericReq),
     .free_req     = scsi_free_request,
     .send_command = scsi_send_command,
diff --git a/hw/scsi.h b/hw/scsi.h
index af558c3a16..01c6655f60 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -197,4 +197,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense);
 int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed);
 SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun);
 
+/* scsi-generic.c. */
+extern const SCSIReqOps scsi_generic_req_ops;
+
 #endif

From 63db0f0eee3c0b2cc3a06b36daf50c4e7801ea1b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Oct 2011 12:58:31 +0200
Subject: [PATCH 50/55] scsi: pass cdb to alloc_req

This will let scsi-block choose between passthrough and emulation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-bus.c     | 2 +-
 hw/scsi-disk.c    | 4 ++--
 hw/scsi-generic.c | 2 +-
 hw/scsi.h         | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 1f38ac88e5..3cf571eea4 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -451,7 +451,7 @@ SCSIRequest *scsi_req_new(SCSIDevice *d, uint32_t tag, uint32_t lun,
             req = scsi_req_alloc(&reqops_target_command, d, tag, lun,
                                  hba_private);
         } else {
-            req = d->info->alloc_req(d, tag, lun, hba_private);
+            req = d->info->alloc_req(d, tag, lun, buf, hba_private);
         }
     }
 
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 77673f251a..415f81d1db 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -1598,8 +1598,8 @@ static const SCSIReqOps scsi_disk_reqops = {
     .get_buf      = scsi_get_buf,
 };
 
-static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag,
-                                     uint32_t lun, void *hba_private)
+static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
+                                     uint8_t *buf, void *hba_private)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
     SCSIRequest *req;
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 32f50cda3e..2f95f2d967 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -431,7 +431,7 @@ const SCSIReqOps scsi_generic_req_ops = {
 };
 
 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
-                                     void *hba_private)
+                                     uint8_t *buf, void *hba_private)
 {
     SCSIRequest *req;
 
diff --git a/hw/scsi.h b/hw/scsi.h
index 01c6655f60..8ea744a392 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -94,7 +94,7 @@ struct SCSIDeviceInfo {
     scsi_qdev_initfn init;
     void (*destroy)(SCSIDevice *s);
     SCSIRequest *(*alloc_req)(SCSIDevice *s, uint32_t tag, uint32_t lun,
-                              void *hba_private);
+                              uint8_t *buf, void *hba_private);
     void (*unit_attention_reported)(SCSIDevice *s);
 };
 

From e88c591d63ed1bc8520f4f276bebd77c22e4ec72 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2011 12:53:33 +0200
Subject: [PATCH 51/55] scsi: do not call transfer_data after canceling a
 request

Otherwise, if cancellation is "faked" by the AIO layer and goes
through qemu_aio_flush, the whole request is completed synchronously
during scsi_req_cancel.

Using the enqueued flag would work here, but not in the next patches,
so I'm introducing a new io_canceled flag.  That's because scsi_req_data
is a synchronous callback and the enqueued flag might be reset by the
time it returns.  scsi-disk cannot unref the request until after calling
scsi_req_data.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-bus.c | 23 +++++++++++++++++++----
 hw/scsi.h     |  1 +
 trace-events  |  1 +
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 3cf571eea4..dfce5fbeeb 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -1107,8 +1107,12 @@ void scsi_req_continue(SCSIRequest *req)
    Once it completes, calling scsi_req_continue will restart I/O.  */
 void scsi_req_data(SCSIRequest *req, int len)
 {
-    trace_scsi_req_data(req->dev->id, req->lun, req->tag, len);
-    req->bus->info->transfer_data(req, len);
+    if (req->io_canceled) {
+        trace_scsi_req_data_canceled(req->dev->id, req->lun, req->tag, len);
+    } else {
+        trace_scsi_req_data(req->dev->id, req->lun, req->tag, len);
+        req->bus->info->transfer_data(req, len);
+    }
 }
 
 void scsi_req_print(SCSIRequest *req)
@@ -1173,11 +1177,15 @@ void scsi_req_complete(SCSIRequest *req, int status)
 
 void scsi_req_cancel(SCSIRequest *req)
 {
-    if (req->ops->cancel_io) {
-        req->ops->cancel_io(req);
+    if (!req->enqueued) {
+        return;
     }
     scsi_req_ref(req);
     scsi_req_dequeue(req);
+    req->io_canceled = true;
+    if (req->ops->cancel_io) {
+        req->ops->cancel_io(req);
+    }
     if (req->bus->info->cancel) {
         req->bus->info->cancel(req);
     }
@@ -1186,10 +1194,17 @@ void scsi_req_cancel(SCSIRequest *req)
 
 void scsi_req_abort(SCSIRequest *req, int status)
 {
+    if (!req->enqueued) {
+        return;
+    }
+    scsi_req_ref(req);
+    scsi_req_dequeue(req);
+    req->io_canceled = true;
     if (req->ops->cancel_io) {
         req->ops->cancel_io(req);
     }
     scsi_req_complete(req, status);
+    scsi_req_unref(req);
 }
 
 void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
diff --git a/hw/scsi.h b/hw/scsi.h
index 8ea744a392..fcc345581d 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -51,6 +51,7 @@ struct SCSIRequest {
     uint8_t sense[SCSI_SENSE_BUF_SIZE];
     uint32_t sense_len;
     bool enqueued;
+    bool io_canceled;
     void *hba_private;
     QTAILQ_ENTRY(SCSIRequest) next;
 };
diff --git a/trace-events b/trace-events
index a8880550b2..56443af727 100644
--- a/trace-events
+++ b/trace-events
@@ -278,6 +278,7 @@ usb_host_claim_port(int bus, int hub, int port) "bus %d, hub addr %d, port %d"
 # hw/scsi-bus.c
 scsi_req_alloc(int target, int lun, int tag) "target %d lun %d tag %d"
 scsi_req_data(int target, int lun, int tag, int len) "target %d lun %d tag %d len %d"
+scsi_req_data_canceled(int target, int lun, int tag, int len) "target %d lun %d tag %d len %d"
 scsi_req_dequeue(int target, int lun, int tag) "target %d lun %d tag %d"
 scsi_req_continue(int target, int lun, int tag) "target %d lun %d tag %d"
 scsi_req_parsed(int target, int lun, int tag, int cmd, int mode, int xfer) "target %d lun %d tag %d command %d dir %d length %d"

From c7bae6a75b5b43d560f5b18386447842f9bfec37 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2011 12:53:34 +0200
Subject: [PATCH 52/55] scsi-disk: bump SCSIRequest reference count until aio
 completion runs

In some cases a request may be canceled before the completion callback
runs.  Keep a reference to the request between starting an AIO operation
and the corresponding scsi_req_cancel or scsi_*_complete.

When a request has to be retried, the request can be dropped because
scsi_dma_restart_bh only looks at requests that are enqueued.  As such,
they always have at least a reference.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 415f81d1db..bb07737439 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -102,8 +102,14 @@ static void scsi_cancel_io(SCSIRequest *req)
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
 
     DPRINTF("Cancel tag=0x%x\n", req->tag);
+    r->status &= ~(SCSI_REQ_STATUS_RETRY | SCSI_REQ_STATUS_RETRY_TYPE_MASK);
     if (r->req.aiocb) {
         bdrv_aio_cancel(r->req.aiocb);
+
+        /* This reference was left in by scsi_*_data.  We take ownership of
+         * it the moment scsi_req_cancel is called, independent of whether
+         * bdrv_aio_cancel completes the request or not.  */
+        scsi_req_unref(&r->req);
     }
     r->req.aiocb = NULL;
 }
@@ -134,7 +140,7 @@ static void scsi_read_complete(void * opaque, int ret)
 
     if (ret) {
         if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_READ)) {
-            return;
+            goto done;
         }
     }
 
@@ -144,6 +150,11 @@ static void scsi_read_complete(void * opaque, int ret)
     r->sector += n;
     r->sector_count -= n;
     scsi_req_data(&r->req, r->qiov.size);
+
+done:
+    if (!r->req.io_canceled) {
+        scsi_req_unref(&r->req);
+    }
 }
 
 static void scsi_flush_complete(void * opaque, int ret)
@@ -158,11 +169,16 @@ static void scsi_flush_complete(void * opaque, int ret)
 
     if (ret < 0) {
         if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_FLUSH)) {
-            return;
+            goto done;
         }
     }
 
     scsi_req_complete(&r->req, GOOD);
+
+done:
+    if (!r->req.io_canceled) {
+        scsi_req_unref(&r->req);
+    }
 }
 
 /* Read more data from scsi device into buffer.  */
@@ -188,6 +204,8 @@ static void scsi_read_data(SCSIRequest *req)
     /* No data transfer may already be in progress */
     assert(r->req.aiocb == NULL);
 
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
         DPRINTF("Data transfer direction invalid\n");
         scsi_read_complete(r, -EINVAL);
@@ -196,7 +214,9 @@ static void scsi_read_data(SCSIRequest *req)
 
     if (s->tray_open) {
         scsi_read_complete(r, -ENOMEDIUM);
+        return;
     }
+
     n = scsi_init_iovec(r);
     bdrv_acct_start(s->qdev.conf.bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
     r->req.aiocb = bdrv_aio_readv(s->qdev.conf.bs, r->sector, &r->qiov, n,
@@ -206,6 +226,13 @@ static void scsi_read_data(SCSIRequest *req)
     }
 }
 
+/*
+ * scsi_handle_rw_error has two return values.  0 means that the error
+ * must be ignored, 1 means that the error has been processed and the
+ * caller should not do anything else for this request.  Note that
+ * scsi_handle_rw_error always manages its reference counts, independent
+ * of the return value.
+ */
 static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
 {
     int is_read = (type == SCSI_REQ_STATUS_RETRY_READ);
@@ -226,6 +253,11 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
         bdrv_mon_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read);
         vm_stop(RUN_STATE_IO_ERROR);
         bdrv_iostatus_set_err(s->qdev.conf.bs, error);
+
+        /* No need to save a reference, because scsi_dma_restart_bh just
+         * looks at the request list.  If a request is canceled, the
+         * retry request is just dropped.
+         */
     } else {
         switch (error) {
         case ENOMEDIUM:
@@ -259,7 +291,7 @@ static void scsi_write_complete(void * opaque, int ret)
 
     if (ret) {
         if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_WRITE)) {
-            return;
+            goto done;
         }
     }
 
@@ -273,6 +305,11 @@ static void scsi_write_complete(void * opaque, int ret)
         DPRINTF("Write complete tag=0x%x more=%d\n", r->req.tag, r->qiov.size);
         scsi_req_data(&r->req, r->qiov.size);
     }
+
+done:
+    if (!r->req.io_canceled) {
+        scsi_req_unref(&r->req);
+    }
 }
 
 static void scsi_write_data(SCSIRequest *req)
@@ -284,6 +321,8 @@ static void scsi_write_data(SCSIRequest *req)
     /* No data transfer may already be in progress */
     assert(r->req.aiocb == NULL);
 
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
         DPRINTF("Data transfer direction invalid\n");
         scsi_write_complete(r, -EINVAL);
@@ -294,6 +333,7 @@ static void scsi_write_data(SCSIRequest *req)
     if (n) {
         if (s->tray_open) {
             scsi_write_complete(r, -ENOMEDIUM);
+            return;
         }
         bdrv_acct_start(s->qdev.conf.bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_WRITE);
         r->req.aiocb = bdrv_aio_writev(s->qdev.conf.bs, r->sector, &r->qiov, n,
@@ -1332,6 +1372,8 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
         r->iov.iov_len = rc;
         break;
     case SYNCHRONIZE_CACHE:
+        /* The request is used as the AIO opaque value, so add a ref.  */
+        scsi_req_ref(&r->req);
         bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
         r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_flush_complete, r);
         if (r->req.aiocb == NULL) {

From c9501c951c3dbe007dfba9328156be2d931f6d94 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2011 12:53:35 +0200
Subject: [PATCH 53/55] scsi-generic: bump SCSIRequest reference count until
 aio completion runs

Same as before, but for scsi-generic.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-generic.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 2f95f2d967..9594cc1276 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -113,6 +113,9 @@ static void scsi_command_complete(void *opaque, int ret)
             r, r->req.tag, status);
 
     scsi_req_complete(&r->req, status);
+    if (!r->req.io_canceled) {
+        scsi_req_unref(&r->req);
+    }
 }
 
 /* Cancel a pending data transfer.  */
@@ -123,6 +126,11 @@ static void scsi_cancel_io(SCSIRequest *req)
     DPRINTF("Cancel tag=0x%x\n", req->tag);
     if (r->req.aiocb) {
         bdrv_aio_cancel(r->req.aiocb);
+
+        /* This reference was left in by scsi_*_data.  We take ownership of
+         * it independent of whether bdrv_aio_cancel completes the request
+         * or not.  */
+        scsi_req_unref(&r->req);
     }
     r->req.aiocb = NULL;
 }
@@ -183,6 +191,9 @@ static void scsi_read_complete(void * opaque, int ret)
         bdrv_set_buffer_alignment(s->conf.bs, s->blocksize);
 
         scsi_req_data(&r->req, len);
+        if (!r->req.io_canceled) {
+            scsi_req_unref(&r->req);
+        }
     }
 }
 
@@ -194,6 +205,9 @@ static void scsi_read_data(SCSIRequest *req)
     int ret;
 
     DPRINTF("scsi_read_data 0x%x\n", req->tag);
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
     if (r->len == -1) {
         scsi_command_complete(r, 0);
         return;
@@ -242,6 +256,8 @@ static void scsi_write_data(SCSIRequest *req)
         return;
     }
 
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
     ret = execute_command(s->conf.bs, r, SG_DXFER_TO_DEV, scsi_write_complete);
     if (ret < 0) {
         scsi_command_complete(r, ret);
@@ -285,6 +301,8 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *cmd)
             g_free(r->buf);
         r->buflen = 0;
         r->buf = NULL;
+        /* The request is used as the AIO opaque value, so add a ref.  */
+        scsi_req_ref(&r->req);
         ret = execute_command(s->conf.bs, r, SG_DXFER_NONE, scsi_command_complete);
         if (ret < 0) {
             scsi_command_complete(r, ret);

From 71544d30a6f8b91574552a61cd5bd122a77e82d1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2011 12:53:36 +0200
Subject: [PATCH 54/55] scsi: push request restart to SCSIDevice

The request restart mechanism is generic and could be reused for
scsi-generic.  In the meanwhile, pushing it to SCSIDevice avoids
that scsi_dma_restart_bh looks at SCSIGenericReqs when working on
a scsi-block device.

The code is the same that is already in hw/scsi-disk.c, with
the type flags replaced by req->cmd.mode and a more generic way to
requeue SCSI_XFER_NONE commands.

I also added a missing call to qemu_del_vm_change_state_handler.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-bus.c  | 56 +++++++++++++++++++++++++++++++++++++
 hw/scsi-disk.c | 76 +++++---------------------------------------------
 hw/scsi.h      |  5 ++++
 3 files changed, 68 insertions(+), 69 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index dfce5fbeeb..e6ebbd594e 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -8,6 +8,7 @@
 
 static char *scsibus_get_fw_dev_path(DeviceState *dev);
 static int scsi_req_parse(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf);
+static void scsi_req_dequeue(SCSIRequest *req);
 static int scsi_build_sense(uint8_t *in_buf, int in_len,
                             uint8_t *buf, int len, bool fixed);
 
@@ -33,6 +34,53 @@ void scsi_bus_new(SCSIBus *bus, DeviceState *host, const SCSIBusInfo *info)
     bus->qbus.allow_hotplug = 1;
 }
 
+static void scsi_dma_restart_bh(void *opaque)
+{
+    SCSIDevice *s = opaque;
+    SCSIRequest *req, *next;
+
+    qemu_bh_delete(s->bh);
+    s->bh = NULL;
+
+    QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
+        scsi_req_ref(req);
+        if (req->retry) {
+            req->retry = false;
+            switch (req->cmd.mode) {
+            case SCSI_XFER_FROM_DEV:
+            case SCSI_XFER_TO_DEV:
+                scsi_req_continue(req);
+                break;
+            case SCSI_XFER_NONE:
+                scsi_req_dequeue(req);
+                scsi_req_enqueue(req);
+                break;
+            }
+        }
+        scsi_req_unref(req);
+    }
+}
+
+void scsi_req_retry(SCSIRequest *req)
+{
+    /* No need to save a reference, because scsi_dma_restart_bh just
+     * looks at the request list.  */
+    req->retry = true;
+}
+
+static void scsi_dma_restart_cb(void *opaque, int running, RunState state)
+{
+    SCSIDevice *s = opaque;
+
+    if (!running) {
+        return;
+    }
+    if (!s->bh) {
+        s->bh = qemu_bh_new(scsi_dma_restart_bh, s);
+        qemu_bh_schedule(s->bh);
+    }
+}
+
 static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
 {
     SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
@@ -83,6 +131,10 @@ static int scsi_qdev_init(DeviceState *qdev, DeviceInfo *base)
     dev->info = info;
     QTAILQ_INIT(&dev->requests);
     rc = dev->info->init(dev);
+    if (rc == 0) {
+        dev->vmsentry = qemu_add_vm_change_state_handler(scsi_dma_restart_cb,
+                                                         dev);
+    }
 
 err:
     return rc;
@@ -92,6 +144,9 @@ static int scsi_qdev_exit(DeviceState *qdev)
 {
     SCSIDevice *dev = DO_UPCAST(SCSIDevice, qdev, qdev);
 
+    if (dev->vmsentry) {
+        qemu_del_vm_change_state_handler(dev->vmsentry);
+    }
     if (dev->info->destroy) {
         dev->info->destroy(dev);
     }
@@ -586,6 +641,7 @@ int32_t scsi_req_enqueue(SCSIRequest *req)
 static void scsi_req_dequeue(SCSIRequest *req)
 {
     trace_scsi_req_dequeue(req->dev->id, req->lun, req->tag);
+    req->retry = false;
     if (req->enqueued) {
         QTAILQ_REMOVE(&req->dev->requests, req, next);
         req->enqueued = false;
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index bb07737439..352d41fc2b 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -42,12 +42,6 @@ do { fprintf(stderr, "scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
 #define SCSI_DMA_BUF_SIZE    131072
 #define SCSI_MAX_INQUIRY_LEN 256
 
-#define SCSI_REQ_STATUS_RETRY           0x01
-#define SCSI_REQ_STATUS_RETRY_TYPE_MASK 0x06
-#define SCSI_REQ_STATUS_RETRY_READ      0x00
-#define SCSI_REQ_STATUS_RETRY_WRITE     0x02
-#define SCSI_REQ_STATUS_RETRY_FLUSH     0x04
-
 typedef struct SCSIDiskState SCSIDiskState;
 
 typedef struct SCSIDiskReq {
@@ -58,7 +52,6 @@ typedef struct SCSIDiskReq {
     uint32_t buflen;
     struct iovec iov;
     QEMUIOVector qiov;
-    uint32_t status;
     BlockAcctCookie acct;
 } SCSIDiskReq;
 
@@ -75,8 +68,7 @@ struct SCSIDiskState
     bool tray_locked;
 };
 
-static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type);
-static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf);
+static int scsi_handle_rw_error(SCSIDiskReq *r, int error);
 
 static void scsi_free_request(SCSIRequest *req)
 {
@@ -102,7 +94,6 @@ static void scsi_cancel_io(SCSIRequest *req)
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
 
     DPRINTF("Cancel tag=0x%x\n", req->tag);
-    r->status &= ~(SCSI_REQ_STATUS_RETRY | SCSI_REQ_STATUS_RETRY_TYPE_MASK);
     if (r->req.aiocb) {
         bdrv_aio_cancel(r->req.aiocb);
 
@@ -139,7 +130,7 @@ static void scsi_read_complete(void * opaque, int ret)
     }
 
     if (ret) {
-        if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_READ)) {
+        if (scsi_handle_rw_error(r, -ret)) {
             goto done;
         }
     }
@@ -168,7 +159,7 @@ static void scsi_flush_complete(void * opaque, int ret)
     }
 
     if (ret < 0) {
-        if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_FLUSH)) {
+        if (scsi_handle_rw_error(r, -ret)) {
             goto done;
         }
     }
@@ -233,9 +224,9 @@ static void scsi_read_data(SCSIRequest *req)
  * scsi_handle_rw_error always manages its reference counts, independent
  * of the return value.
  */
-static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
+static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
 {
-    int is_read = (type == SCSI_REQ_STATUS_RETRY_READ);
+    int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     BlockErrorAction action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
 
@@ -247,17 +238,10 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
     if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
             || action == BLOCK_ERR_STOP_ANY) {
 
-        type &= SCSI_REQ_STATUS_RETRY_TYPE_MASK;
-        r->status |= SCSI_REQ_STATUS_RETRY | type;
-
         bdrv_mon_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read);
         vm_stop(RUN_STATE_IO_ERROR);
         bdrv_iostatus_set_err(s->qdev.conf.bs, error);
-
-        /* No need to save a reference, because scsi_dma_restart_bh just
-         * looks at the request list.  If a request is canceled, the
-         * retry request is just dropped.
-         */
+        scsi_req_retry(&r->req);
     } else {
         switch (error) {
         case ENOMEDIUM:
@@ -290,7 +274,7 @@ static void scsi_write_complete(void * opaque, int ret)
     }
 
     if (ret) {
-        if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_WRITE)) {
+        if (scsi_handle_rw_error(r, -ret)) {
             goto done;
         }
     }
@@ -347,51 +331,6 @@ static void scsi_write_data(SCSIRequest *req)
     }
 }
 
-static void scsi_dma_restart_bh(void *opaque)
-{
-    SCSIDiskState *s = opaque;
-    SCSIRequest *req;
-    SCSIDiskReq *r;
-
-    qemu_bh_delete(s->bh);
-    s->bh = NULL;
-
-    QTAILQ_FOREACH(req, &s->qdev.requests, next) {
-        r = DO_UPCAST(SCSIDiskReq, req, req);
-        if (r->status & SCSI_REQ_STATUS_RETRY) {
-            int status = r->status;
-
-            r->status &=
-                ~(SCSI_REQ_STATUS_RETRY | SCSI_REQ_STATUS_RETRY_TYPE_MASK);
-
-            switch (status & SCSI_REQ_STATUS_RETRY_TYPE_MASK) {
-            case SCSI_REQ_STATUS_RETRY_READ:
-                scsi_read_data(&r->req);
-                break;
-            case SCSI_REQ_STATUS_RETRY_WRITE:
-                scsi_write_data(&r->req);
-                break;
-            case SCSI_REQ_STATUS_RETRY_FLUSH:
-                scsi_send_command(&r->req, r->req.cmd.buf);
-                break;
-            }
-        }
-    }
-}
-
-static void scsi_dma_restart_cb(void *opaque, int running, RunState state)
-{
-    SCSIDiskState *s = opaque;
-
-    if (!running) {
-        return;
-    }
-    if (!s->bh) {
-        s->bh = qemu_bh_new(scsi_dma_restart_bh, s);
-        qemu_bh_schedule(s->bh);
-    }
-}
-
 /* Return a pointer to the data buffer.  */
 static uint8_t *scsi_get_buf(SCSIRequest *req)
 {
@@ -1591,7 +1530,6 @@ static int scsi_initfn(SCSIDevice *dev)
     }
     bdrv_set_buffer_alignment(s->qdev.conf.bs, s->qdev.blocksize);
 
-    qemu_add_vm_change_state_handler(scsi_dma_restart_cb, s);
     bdrv_iostatus_enable(s->qdev.conf.bs);
     add_boot_device_path(s->qdev.conf.bootindex, &dev->qdev, ",0");
     return 0;
diff --git a/hw/scsi.h b/hw/scsi.h
index fcc345581d..ff8fdd0962 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -3,6 +3,7 @@
 
 #include "qdev.h"
 #include "block.h"
+#include "sysemu.h"
 
 #define MAX_SCSI_DEVS	255
 
@@ -52,6 +53,7 @@ struct SCSIRequest {
     uint32_t sense_len;
     bool enqueued;
     bool io_canceled;
+    bool retry;
     void *hba_private;
     QTAILQ_ENTRY(SCSIRequest) next;
 };
@@ -59,6 +61,8 @@ struct SCSIRequest {
 struct SCSIDevice
 {
     DeviceState qdev;
+    VMChangeStateEntry *vmsentry;
+    QEMUBH *bh;
     uint32_t id;
     BlockConf conf;
     SCSIDeviceInfo *info;
@@ -194,6 +198,7 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req);
 int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len);
 void scsi_req_abort(SCSIRequest *req, int status);
 void scsi_req_cancel(SCSIRequest *req);
+void scsi_req_retry(SCSIRequest *req);
 void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense);
 int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed);
 SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun);

From 336a6915bc7089fb20fea4ba99972ad9a97c5f52 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2011 12:53:37 +0200
Subject: [PATCH 55/55] scsi-disk: add scsi-block for device passthrough

scsi-block is a new device that supports device passthrough of Linux
block devices (i.e. /dev/sda, not /dev/sg0).  It uses SG_IO for commands
other than I/O commands, and regular AIO read/writes for I/O commands.
Besides being simpler to configure (no mapping required to scsi-generic
device names), this removes the need for a large bounce buffer and,
in the future, will get scatter/gather support for free from scsi-disk.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi-disk.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 352d41fc2b..1c04872af7 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -39,6 +39,10 @@ do { fprintf(stderr, "scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
 #include "blockdev.h"
 #include "block_int.h"
 
+#ifdef __linux
+#include <scsi/sg.h>
+#endif
+
 #define SCSI_DMA_BUF_SIZE    131072
 #define SCSI_MAX_INQUIRY_LEN 256
 
@@ -1588,6 +1592,105 @@ static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
     return req;
 }
 
+#ifdef __linux__
+static int get_device_type(SCSIDiskState *s)
+{
+    BlockDriverState *bdrv = s->qdev.conf.bs;
+    uint8_t cmd[16];
+    uint8_t buf[36];
+    uint8_t sensebuf[8];
+    sg_io_hdr_t io_header;
+    int ret;
+
+    memset(cmd, 0, sizeof(cmd));
+    memset(buf, 0, sizeof(buf));
+    cmd[0] = INQUIRY;
+    cmd[4] = sizeof(buf);
+
+    memset(&io_header, 0, sizeof(io_header));
+    io_header.interface_id = 'S';
+    io_header.dxfer_direction = SG_DXFER_FROM_DEV;
+    io_header.dxfer_len = sizeof(buf);
+    io_header.dxferp = buf;
+    io_header.cmdp = cmd;
+    io_header.cmd_len = sizeof(cmd);
+    io_header.mx_sb_len = sizeof(sensebuf);
+    io_header.sbp = sensebuf;
+    io_header.timeout = 6000; /* XXX */
+
+    ret = bdrv_ioctl(bdrv, SG_IO, &io_header);
+    if (ret < 0 || io_header.driver_status || io_header.host_status) {
+        return -1;
+    }
+    s->qdev.type = buf[0];
+    s->removable = (buf[1] & 0x80) != 0;
+    return 0;
+}
+
+static int scsi_block_initfn(SCSIDevice *dev)
+{
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
+    int sg_version;
+    int rc;
+
+    if (!s->qdev.conf.bs) {
+        error_report("scsi-block: drive property not set");
+        return -1;
+    }
+
+    /* check we are using a driver managing SG_IO (version 3 and after) */
+    if (bdrv_ioctl(s->qdev.conf.bs, SG_GET_VERSION_NUM, &sg_version) < 0 ||
+        sg_version < 30000) {
+        error_report("scsi-block: scsi generic interface too old");
+        return -1;
+    }
+
+    /* get device type from INQUIRY data */
+    rc = get_device_type(s);
+    if (rc < 0) {
+        error_report("scsi-block: INQUIRY failed");
+        return -1;
+    }
+
+    /* Make a guess for the block size, we'll fix it when the guest sends.
+     * READ CAPACITY.  If they don't, they likely would assume these sizes
+     * anyway. (TODO: check in /sys).
+     */
+    if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM) {
+        s->qdev.blocksize = 2048;
+    } else {
+        s->qdev.blocksize = 512;
+    }
+    return scsi_initfn(&s->qdev);
+}
+
+static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
+                                           uint32_t lun, uint8_t *buf,
+                                           void *hba_private)
+{
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
+
+    switch (buf[0]) {
+    case READ_6:
+    case READ_10:
+    case READ_12:
+    case READ_16:
+    case WRITE_6:
+    case WRITE_10:
+    case WRITE_12:
+    case WRITE_16:
+    case WRITE_VERIFY_10:
+    case WRITE_VERIFY_12:
+    case WRITE_VERIFY_16:
+        return scsi_req_alloc(&scsi_disk_reqops, &s->qdev, tag, lun,
+                              hba_private);
+    }
+
+    return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
+                          hba_private);
+}
+#endif
+
 #define DEFINE_SCSI_DISK_PROPERTIES()                           \
     DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf),          \
     DEFINE_PROP_STRING("ver",  SCSIDiskState, version),         \
@@ -1623,6 +1726,21 @@ static SCSIDeviceInfo scsi_disk_info[] = {
             DEFINE_SCSI_DISK_PROPERTIES(),
             DEFINE_PROP_END_OF_LIST(),
         },
+#ifdef __linux__
+    },{
+        .qdev.name    = "scsi-block",
+        .qdev.fw_name = "disk",
+        .qdev.desc    = "SCSI block device passthrough",
+        .qdev.size    = sizeof(SCSIDiskState),
+        .qdev.reset   = scsi_disk_reset,
+        .init         = scsi_block_initfn,
+        .destroy      = scsi_destroy,
+        .alloc_req    = scsi_block_new_request,
+        .qdev.props   = (Property[]) {
+            DEFINE_SCSI_DISK_PROPERTIES(),
+            DEFINE_PROP_END_OF_LIST(),
+        },
+#endif
     },{
         .qdev.name    = "scsi-disk", /* legacy -device scsi-disk */
         .qdev.fw_name = "disk",