2017-05-17 08:48:07 +08:00
|
|
|
/*
|
|
|
|
* vfio based subchannel assignment support
|
|
|
|
*
|
|
|
|
* Copyright 2017 IBM Corp.
|
2019-05-07 23:47:33 +08:00
|
|
|
* Copyright 2019 Red Hat, Inc.
|
|
|
|
*
|
2017-05-17 08:48:07 +08:00
|
|
|
* Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
|
|
|
|
* Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
|
|
|
|
* Pierre Morel <pmorel@linux.vnet.ibm.com>
|
2019-05-07 23:47:33 +08:00
|
|
|
* Cornelia Huck <cohuck@redhat.com>
|
2017-05-17 08:48:07 +08:00
|
|
|
*
|
2018-02-28 01:25:41 +08:00
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or (at
|
|
|
|
* your option) any later version. See the COPYING file in the top-level
|
2017-05-17 08:48:07 +08:00
|
|
|
* directory.
|
|
|
|
*/
|
|
|
|
|
2017-10-18 00:43:53 +08:00
|
|
|
#include "qemu/osdep.h"
|
2017-05-17 08:48:07 +08:00
|
|
|
#include <linux/vfio.h>
|
2017-05-17 08:48:08 +08:00
|
|
|
#include <linux/vfio_ccw.h>
|
2017-05-17 08:48:07 +08:00
|
|
|
#include <sys/ioctl.h>
|
|
|
|
|
|
|
|
#include "qapi/error.h"
|
|
|
|
#include "hw/sysbus.h"
|
|
|
|
#include "hw/vfio/vfio.h"
|
|
|
|
#include "hw/vfio/vfio-common.h"
|
|
|
|
#include "hw/s390x/s390-ccw.h"
|
2019-04-04 22:34:20 +08:00
|
|
|
#include "hw/s390x/vfio-ccw.h"
|
2019-08-12 13:23:51 +08:00
|
|
|
#include "hw/qdev-properties.h"
|
2017-05-17 08:48:07 +08:00
|
|
|
#include "hw/s390x/ccw-device.h"
|
2018-05-29 07:26:59 +08:00
|
|
|
#include "exec/address-spaces.h"
|
2017-05-17 08:48:09 +08:00
|
|
|
#include "qemu/error-report.h"
|
Include qemu/main-loop.h less
In my "build everything" tree, changing qemu/main-loop.h triggers a
recompile of some 5600 out of 6600 objects (not counting tests and
objects that don't depend on qemu/osdep.h). It includes block/aio.h,
which in turn includes qemu/event_notifier.h, qemu/notify.h,
qemu/processor.h, qemu/qsp.h, qemu/queue.h, qemu/thread-posix.h,
qemu/thread.h, qemu/timer.h, and a few more.
Include qemu/main-loop.h only where it's needed. Touching it now
recompiles only some 1700 objects. For block/aio.h and
qemu/event_notifier.h, these numbers drop from 5600 to 2800. For the
others, they shrink only slightly.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20190812052359.30071-21-armbru@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2019-08-12 13:23:50 +08:00
|
|
|
#include "qemu/main-loop.h"
|
2019-05-23 22:35:07 +08:00
|
|
|
#include "qemu/module.h"
|
2017-05-17 08:48:07 +08:00
|
|
|
|
2019-04-04 22:34:20 +08:00
|
|
|
struct VFIOCCWDevice {
|
2017-05-17 08:48:07 +08:00
|
|
|
S390CCWDevice cdev;
|
|
|
|
VFIODevice vdev;
|
2017-05-17 08:48:08 +08:00
|
|
|
uint64_t io_region_size;
|
|
|
|
uint64_t io_region_offset;
|
|
|
|
struct ccw_io_region *io_region;
|
2019-05-07 23:47:33 +08:00
|
|
|
uint64_t async_cmd_region_size;
|
|
|
|
uint64_t async_cmd_region_offset;
|
|
|
|
struct ccw_cmd_region *async_cmd_region;
|
2017-05-17 08:48:09 +08:00
|
|
|
EventNotifier io_notifier;
|
2018-05-25 01:58:27 +08:00
|
|
|
bool force_orb_pfch;
|
|
|
|
bool warned_orb_pfch;
|
2019-04-04 22:34:20 +08:00
|
|
|
};
|
2017-05-17 08:48:07 +08:00
|
|
|
|
2018-05-25 01:58:27 +08:00
|
|
|
static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch,
|
|
|
|
const char *msg)
|
|
|
|
{
|
2018-08-30 22:59:01 +08:00
|
|
|
warn_report_once_cond(&vcdev->warned_orb_pfch,
|
|
|
|
"vfio-ccw (devno %x.%x.%04x): %s",
|
|
|
|
sch->cssid, sch->ssid, sch->devno, msg);
|
2018-05-25 01:58:27 +08:00
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:07 +08:00
|
|
|
static void vfio_ccw_compute_needs_reset(VFIODevice *vdev)
|
|
|
|
{
|
|
|
|
vdev->needs_reset = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't need vfio_hot_reset_multi and vfio_eoi operations for
|
|
|
|
* vfio_ccw device now.
|
|
|
|
*/
|
|
|
|
struct VFIODeviceOps vfio_ccw_ops = {
|
|
|
|
.vfio_compute_needs_reset = vfio_ccw_compute_needs_reset,
|
|
|
|
};
|
|
|
|
|
2017-10-17 22:04:49 +08:00
|
|
|
static IOInstEnding vfio_ccw_handle_request(SubchDev *sch)
|
2017-05-17 08:48:10 +08:00
|
|
|
{
|
2017-10-17 22:04:49 +08:00
|
|
|
S390CCWDevice *cdev = sch->driver_data;
|
2017-05-17 08:48:10 +08:00
|
|
|
VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
|
|
|
|
struct ccw_io_region *region = vcdev->io_region;
|
|
|
|
int ret;
|
|
|
|
|
2020-05-13 02:15:35 +08:00
|
|
|
if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH) && vcdev->force_orb_pfch) {
|
|
|
|
sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH;
|
|
|
|
warn_once_pfch(vcdev, sch, "PFCH flag forced");
|
2018-05-25 01:58:27 +08:00
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:10 +08:00
|
|
|
QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB));
|
|
|
|
QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW));
|
|
|
|
QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB));
|
|
|
|
|
|
|
|
memset(region, 0, sizeof(*region));
|
|
|
|
|
2017-10-17 22:04:49 +08:00
|
|
|
memcpy(region->orb_area, &sch->orb, sizeof(ORB));
|
|
|
|
memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW));
|
2017-05-17 08:48:10 +08:00
|
|
|
|
|
|
|
again:
|
|
|
|
ret = pwrite(vcdev->vdev.fd, region,
|
|
|
|
vcdev->io_region_size, vcdev->io_region_offset);
|
|
|
|
if (ret != vcdev->io_region_size) {
|
|
|
|
if (errno == EAGAIN) {
|
|
|
|
goto again;
|
|
|
|
}
|
2019-11-28 22:30:14 +08:00
|
|
|
error_report("vfio-ccw: write I/O region failed with errno=%d", errno);
|
2017-10-17 22:04:49 +08:00
|
|
|
ret = -errno;
|
|
|
|
} else {
|
|
|
|
ret = region->ret_code;
|
|
|
|
}
|
|
|
|
switch (ret) {
|
|
|
|
case 0:
|
|
|
|
return IOINST_CC_EXPECTED;
|
|
|
|
case -EBUSY:
|
|
|
|
return IOINST_CC_BUSY;
|
|
|
|
case -ENODEV:
|
|
|
|
case -EACCES:
|
|
|
|
return IOINST_CC_NOT_OPERATIONAL;
|
|
|
|
case -EFAULT:
|
|
|
|
default:
|
|
|
|
sch_gen_unit_exception(sch);
|
|
|
|
css_inject_io_interrupt(sch);
|
|
|
|
return IOINST_CC_EXPECTED;
|
2017-05-17 08:48:10 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-07 23:47:33 +08:00
|
|
|
static int vfio_ccw_handle_clear(SubchDev *sch)
|
|
|
|
{
|
|
|
|
S390CCWDevice *cdev = sch->driver_data;
|
|
|
|
VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
|
|
|
|
struct ccw_cmd_region *region = vcdev->async_cmd_region;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!vcdev->async_cmd_region) {
|
|
|
|
/* Async command region not available, fall back to emulation */
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(region, 0, sizeof(*region));
|
|
|
|
region->command = VFIO_CCW_ASYNC_CMD_CSCH;
|
|
|
|
|
|
|
|
again:
|
|
|
|
ret = pwrite(vcdev->vdev.fd, region,
|
|
|
|
vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset);
|
|
|
|
if (ret != vcdev->async_cmd_region_size) {
|
|
|
|
if (errno == EAGAIN) {
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
error_report("vfio-ccw: write cmd region failed with errno=%d", errno);
|
|
|
|
ret = -errno;
|
|
|
|
} else {
|
|
|
|
ret = region->ret_code;
|
|
|
|
}
|
|
|
|
switch (ret) {
|
|
|
|
case 0:
|
|
|
|
case -ENODEV:
|
|
|
|
case -EACCES:
|
|
|
|
return 0;
|
|
|
|
case -EFAULT:
|
|
|
|
default:
|
|
|
|
sch_gen_unit_exception(sch);
|
|
|
|
css_inject_io_interrupt(sch);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int vfio_ccw_handle_halt(SubchDev *sch)
|
|
|
|
{
|
|
|
|
S390CCWDevice *cdev = sch->driver_data;
|
|
|
|
VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
|
|
|
|
struct ccw_cmd_region *region = vcdev->async_cmd_region;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!vcdev->async_cmd_region) {
|
|
|
|
/* Async command region not available, fall back to emulation */
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(region, 0, sizeof(*region));
|
|
|
|
region->command = VFIO_CCW_ASYNC_CMD_HSCH;
|
|
|
|
|
|
|
|
again:
|
|
|
|
ret = pwrite(vcdev->vdev.fd, region,
|
|
|
|
vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset);
|
|
|
|
if (ret != vcdev->async_cmd_region_size) {
|
|
|
|
if (errno == EAGAIN) {
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
error_report("vfio-ccw: write cmd region failed with errno=%d", errno);
|
|
|
|
ret = -errno;
|
|
|
|
} else {
|
|
|
|
ret = region->ret_code;
|
|
|
|
}
|
|
|
|
switch (ret) {
|
|
|
|
case 0:
|
|
|
|
case -EBUSY:
|
|
|
|
case -ENODEV:
|
|
|
|
case -EACCES:
|
|
|
|
return 0;
|
|
|
|
case -EFAULT:
|
|
|
|
default:
|
|
|
|
sch_gen_unit_exception(sch);
|
|
|
|
css_inject_io_interrupt(sch);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:07 +08:00
|
|
|
static void vfio_ccw_reset(DeviceState *dev)
|
|
|
|
{
|
|
|
|
CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
|
|
|
|
S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
|
|
|
|
VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
|
|
|
|
|
|
|
|
ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET);
|
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:09 +08:00
|
|
|
static void vfio_ccw_io_notifier_handler(void *opaque)
|
|
|
|
{
|
|
|
|
VFIOCCWDevice *vcdev = opaque;
|
2017-05-17 08:48:10 +08:00
|
|
|
struct ccw_io_region *region = vcdev->io_region;
|
|
|
|
S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev);
|
|
|
|
CcwDevice *ccw_dev = CCW_DEVICE(cdev);
|
|
|
|
SubchDev *sch = ccw_dev->sch;
|
2019-03-29 19:11:01 +08:00
|
|
|
SCHIB *schib = &sch->curr_status;
|
|
|
|
SCSW s;
|
2017-05-17 08:48:10 +08:00
|
|
|
IRB irb;
|
|
|
|
int size;
|
2017-05-17 08:48:09 +08:00
|
|
|
|
|
|
|
if (!event_notifier_test_and_clear(&vcdev->io_notifier)) {
|
|
|
|
return;
|
|
|
|
}
|
2017-05-17 08:48:10 +08:00
|
|
|
|
|
|
|
size = pread(vcdev->vdev.fd, region, vcdev->io_region_size,
|
|
|
|
vcdev->io_region_offset);
|
|
|
|
if (size == -1) {
|
|
|
|
switch (errno) {
|
|
|
|
case ENODEV:
|
|
|
|
/* Generate a deferred cc 3 condition. */
|
2019-03-29 19:11:01 +08:00
|
|
|
schib->scsw.flags |= SCSW_FLAGS_MASK_CC;
|
|
|
|
schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
|
|
|
|
schib->scsw.ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND);
|
2017-05-17 08:48:10 +08:00
|
|
|
goto read_err;
|
|
|
|
case EFAULT:
|
|
|
|
/* Memory problem, generate channel data check. */
|
2019-03-29 19:11:01 +08:00
|
|
|
schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND;
|
|
|
|
schib->scsw.cstat = SCSW_CSTAT_DATA_CHECK;
|
|
|
|
schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
|
|
|
|
schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
|
2017-05-17 08:48:10 +08:00
|
|
|
SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
|
|
|
|
goto read_err;
|
|
|
|
default:
|
|
|
|
/* Error, generate channel program check. */
|
2019-03-29 19:11:01 +08:00
|
|
|
schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND;
|
|
|
|
schib->scsw.cstat = SCSW_CSTAT_PROG_CHECK;
|
|
|
|
schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
|
|
|
|
schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
|
2017-05-17 08:48:10 +08:00
|
|
|
SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
|
|
|
|
goto read_err;
|
|
|
|
}
|
|
|
|
} else if (size != vcdev->io_region_size) {
|
|
|
|
/* Information transfer error, generate channel-control check. */
|
2019-03-29 19:11:01 +08:00
|
|
|
schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND;
|
|
|
|
schib->scsw.cstat = SCSW_CSTAT_CHN_CTRL_CHK;
|
|
|
|
schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
|
|
|
|
schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
|
2017-05-17 08:48:10 +08:00
|
|
|
SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
|
|
|
|
goto read_err;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(&irb, region->irb_area, sizeof(IRB));
|
|
|
|
|
|
|
|
/* Update control block via irb. */
|
2019-03-29 19:11:01 +08:00
|
|
|
s = schib->scsw;
|
|
|
|
copy_scsw_to_guest(&s, &irb.scsw);
|
|
|
|
schib->scsw = s;
|
2017-05-17 08:48:10 +08:00
|
|
|
|
2017-05-17 08:48:12 +08:00
|
|
|
/* If a uint check is pending, copy sense data. */
|
2019-03-29 19:11:01 +08:00
|
|
|
if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) &&
|
|
|
|
(schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) {
|
2017-05-17 08:48:12 +08:00
|
|
|
memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw));
|
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:10 +08:00
|
|
|
read_err:
|
|
|
|
css_inject_io_interrupt(sch);
|
2017-05-17 08:48:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp)
|
|
|
|
{
|
|
|
|
VFIODevice *vdev = &vcdev->vdev;
|
|
|
|
struct vfio_irq_info *irq_info;
|
|
|
|
size_t argsz;
|
2019-04-12 19:42:31 +08:00
|
|
|
int fd;
|
2017-05-17 08:48:09 +08:00
|
|
|
|
|
|
|
if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) {
|
|
|
|
error_setg(errp, "vfio: unexpected number of io irqs %u",
|
|
|
|
vdev->num_irqs);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-07-18 09:49:25 +08:00
|
|
|
argsz = sizeof(*irq_info);
|
2017-05-17 08:48:09 +08:00
|
|
|
irq_info = g_malloc0(argsz);
|
|
|
|
irq_info->index = VFIO_CCW_IO_IRQ_INDEX;
|
|
|
|
irq_info->argsz = argsz;
|
|
|
|
if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
|
|
|
|
irq_info) < 0 || irq_info->count < 1) {
|
|
|
|
error_setg_errno(errp, errno, "vfio: Error getting irq info");
|
|
|
|
goto out_free_info;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (event_notifier_init(&vcdev->io_notifier, 0)) {
|
|
|
|
error_setg_errno(errp, errno,
|
|
|
|
"vfio: Unable to init event notifier for IO");
|
|
|
|
goto out_free_info;
|
|
|
|
}
|
|
|
|
|
2019-04-12 19:42:31 +08:00
|
|
|
fd = event_notifier_get_fd(&vcdev->io_notifier);
|
|
|
|
qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev);
|
|
|
|
|
|
|
|
if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0,
|
|
|
|
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
|
|
|
|
qemu_set_fd_handler(fd, NULL, NULL, vcdev);
|
2017-05-17 08:48:09 +08:00
|
|
|
event_notifier_cleanup(&vcdev->io_notifier);
|
|
|
|
}
|
|
|
|
|
|
|
|
out_free_info:
|
|
|
|
g_free(irq_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev)
|
|
|
|
{
|
2019-04-12 19:42:31 +08:00
|
|
|
Error *err = NULL;
|
|
|
|
|
2019-07-03 03:41:34 +08:00
|
|
|
if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0,
|
|
|
|
VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
|
2019-04-12 19:42:31 +08:00
|
|
|
error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name);
|
2017-05-17 08:48:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier),
|
|
|
|
NULL, NULL, vcdev);
|
|
|
|
event_notifier_cleanup(&vcdev->io_notifier);
|
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:08 +08:00
|
|
|
static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
|
|
|
|
{
|
|
|
|
VFIODevice *vdev = &vcdev->vdev;
|
|
|
|
struct vfio_region_info *info;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Sanity check device */
|
|
|
|
if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) {
|
|
|
|
error_setg(errp, "vfio: Um, this isn't a vfio-ccw device");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-05-07 23:47:33 +08:00
|
|
|
/*
|
|
|
|
* We always expect at least the I/O region to be present. We also
|
|
|
|
* may have a variable number of regions governed by capabilities.
|
|
|
|
*/
|
2017-05-17 08:48:08 +08:00
|
|
|
if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) {
|
2019-05-07 23:47:33 +08:00
|
|
|
error_setg(errp, "vfio: too few regions (%u), expected at least %u",
|
|
|
|
vdev->num_regions, VFIO_CCW_CONFIG_REGION_INDEX + 1);
|
2017-05-17 08:48:08 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info);
|
|
|
|
if (ret) {
|
|
|
|
error_setg_errno(errp, -ret, "vfio: Error getting config info");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vcdev->io_region_size = info->size;
|
|
|
|
if (sizeof(*vcdev->io_region) != vcdev->io_region_size) {
|
|
|
|
error_setg(errp, "vfio: Unexpected size of the I/O region");
|
2020-05-05 20:57:53 +08:00
|
|
|
goto out_err;
|
2017-05-17 08:48:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
vcdev->io_region_offset = info->offset;
|
|
|
|
vcdev->io_region = g_malloc0(info->size);
|
|
|
|
|
2019-05-07 23:47:33 +08:00
|
|
|
/* check for the optional async command region */
|
|
|
|
ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW,
|
|
|
|
VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD, &info);
|
|
|
|
if (!ret) {
|
|
|
|
vcdev->async_cmd_region_size = info->size;
|
|
|
|
if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) {
|
|
|
|
error_setg(errp, "vfio: Unexpected size of the async cmd region");
|
2020-05-05 20:57:53 +08:00
|
|
|
goto out_err;
|
2019-05-07 23:47:33 +08:00
|
|
|
}
|
|
|
|
vcdev->async_cmd_region_offset = info->offset;
|
|
|
|
vcdev->async_cmd_region = g_malloc0(info->size);
|
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:08 +08:00
|
|
|
g_free(info);
|
2020-05-05 20:57:53 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
out_err:
|
|
|
|
g_free(vcdev->async_cmd_region);
|
|
|
|
g_free(vcdev->io_region);
|
|
|
|
g_free(info);
|
|
|
|
return;
|
2017-05-17 08:48:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
|
|
|
|
{
|
2019-05-07 23:47:33 +08:00
|
|
|
g_free(vcdev->async_cmd_region);
|
2017-05-17 08:48:08 +08:00
|
|
|
g_free(vcdev->io_region);
|
|
|
|
}
|
|
|
|
|
2018-04-09 18:15:10 +08:00
|
|
|
static void vfio_ccw_put_device(VFIOCCWDevice *vcdev)
|
2017-05-17 08:48:07 +08:00
|
|
|
{
|
|
|
|
g_free(vcdev->vdev.name);
|
|
|
|
vfio_put_base_device(&vcdev->vdev);
|
|
|
|
}
|
|
|
|
|
2018-04-09 18:15:10 +08:00
|
|
|
static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid,
|
|
|
|
vcdev->cdev.hostid.ssid,
|
|
|
|
vcdev->cdev.hostid.devid);
|
|
|
|
VFIODevice *vbasedev;
|
|
|
|
|
|
|
|
QLIST_FOREACH(vbasedev, &group->device_list, next) {
|
|
|
|
if (strcmp(vbasedev->name, name) == 0) {
|
|
|
|
error_setg(errp, "vfio: subchannel %s has already been attached",
|
|
|
|
name);
|
|
|
|
goto out_err;
|
|
|
|
}
|
|
|
|
}
|
2018-08-17 23:27:16 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* All vfio-ccw devices are believed to operate in a way compatible with
|
|
|
|
* memory ballooning, ie. pages pinned in the host are in the current
|
|
|
|
* working set of the guest driver and therefore never overlap with pages
|
|
|
|
* available to the guest balloon driver. This needs to be set before
|
|
|
|
* vfio_get_device() for vfio common to handle the balloon inhibitor.
|
|
|
|
*/
|
|
|
|
vcdev->vdev.balloon_allowed = true;
|
2018-04-09 18:15:10 +08:00
|
|
|
|
|
|
|
if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
|
|
|
|
goto out_err;
|
|
|
|
}
|
|
|
|
|
|
|
|
vcdev->vdev.ops = &vfio_ccw_ops;
|
|
|
|
vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW;
|
|
|
|
vcdev->vdev.name = name;
|
|
|
|
vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj;
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
out_err:
|
|
|
|
g_free(name);
|
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:07 +08:00
|
|
|
static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp)
|
|
|
|
{
|
|
|
|
char *tmp, group_path[PATH_MAX];
|
|
|
|
ssize_t len;
|
|
|
|
int groupid;
|
|
|
|
|
|
|
|
tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group",
|
|
|
|
cdev->hostid.cssid, cdev->hostid.ssid,
|
|
|
|
cdev->hostid.devid, cdev->mdevid);
|
|
|
|
len = readlink(tmp, group_path, sizeof(group_path));
|
|
|
|
g_free(tmp);
|
|
|
|
|
|
|
|
if (len <= 0 || len >= sizeof(group_path)) {
|
|
|
|
error_setg(errp, "vfio: no iommu_group found");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
group_path[len] = 0;
|
|
|
|
|
|
|
|
if (sscanf(basename(group_path), "%d", &groupid) != 1) {
|
|
|
|
error_setg(errp, "vfio: failed to read %s", group_path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return vfio_get_group(groupid, &address_space_memory, errp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vfio_ccw_realize(DeviceState *dev, Error **errp)
|
|
|
|
{
|
|
|
|
VFIOGroup *group;
|
|
|
|
CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
|
|
|
|
S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
|
|
|
|
VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
|
|
|
|
S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
|
|
|
|
Error *err = NULL;
|
|
|
|
|
|
|
|
/* Call the class init function for subchannel. */
|
|
|
|
if (cdc->realize) {
|
|
|
|
cdc->realize(cdev, vcdev->vdev.sysfsdev, &err);
|
|
|
|
if (err) {
|
|
|
|
goto out_err_propagate;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
group = vfio_ccw_get_group(cdev, &err);
|
|
|
|
if (!group) {
|
|
|
|
goto out_group_err;
|
|
|
|
}
|
|
|
|
|
2018-04-09 18:15:10 +08:00
|
|
|
vfio_ccw_get_device(group, vcdev, &err);
|
|
|
|
if (err) {
|
2017-05-17 08:48:07 +08:00
|
|
|
goto out_device_err;
|
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:08 +08:00
|
|
|
vfio_ccw_get_region(vcdev, &err);
|
|
|
|
if (err) {
|
|
|
|
goto out_region_err;
|
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:09 +08:00
|
|
|
vfio_ccw_register_io_notifier(vcdev, &err);
|
|
|
|
if (err) {
|
|
|
|
goto out_notifier_err;
|
|
|
|
}
|
|
|
|
|
2017-05-17 08:48:07 +08:00
|
|
|
return;
|
|
|
|
|
2017-05-17 08:48:09 +08:00
|
|
|
out_notifier_err:
|
|
|
|
vfio_ccw_put_region(vcdev);
|
2017-05-17 08:48:08 +08:00
|
|
|
out_region_err:
|
2018-04-09 18:15:10 +08:00
|
|
|
vfio_ccw_put_device(vcdev);
|
2017-05-17 08:48:07 +08:00
|
|
|
out_device_err:
|
|
|
|
vfio_put_group(group);
|
|
|
|
out_group_err:
|
|
|
|
if (cdc->unrealize) {
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 23:29:24 +08:00
|
|
|
cdc->unrealize(cdev);
|
2017-05-17 08:48:07 +08:00
|
|
|
}
|
|
|
|
out_err_propagate:
|
|
|
|
error_propagate(errp, err);
|
|
|
|
}
|
|
|
|
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 23:29:24 +08:00
|
|
|
static void vfio_ccw_unrealize(DeviceState *dev)
|
2017-05-17 08:48:07 +08:00
|
|
|
{
|
|
|
|
CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
|
|
|
|
S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
|
|
|
|
VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
|
|
|
|
S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
|
|
|
|
VFIOGroup *group = vcdev->vdev.group;
|
|
|
|
|
2017-05-17 08:48:09 +08:00
|
|
|
vfio_ccw_unregister_io_notifier(vcdev);
|
2017-05-17 08:48:08 +08:00
|
|
|
vfio_ccw_put_region(vcdev);
|
2018-04-09 18:15:10 +08:00
|
|
|
vfio_ccw_put_device(vcdev);
|
2017-05-17 08:48:07 +08:00
|
|
|
vfio_put_group(group);
|
|
|
|
|
|
|
|
if (cdc->unrealize) {
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 23:29:24 +08:00
|
|
|
cdc->unrealize(cdev);
|
2017-05-17 08:48:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static Property vfio_ccw_properties[] = {
|
|
|
|
DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev),
|
2018-05-25 01:58:27 +08:00
|
|
|
DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false),
|
2017-05-17 08:48:07 +08:00
|
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
|
|
};
|
|
|
|
|
|
|
|
static const VMStateDescription vfio_ccw_vmstate = {
|
2019-05-21 23:15:41 +08:00
|
|
|
.name = "vfio-ccw",
|
2017-05-17 08:48:07 +08:00
|
|
|
.unmigratable = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void vfio_ccw_class_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
|
|
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
2017-05-17 08:48:10 +08:00
|
|
|
S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass);
|
2017-05-17 08:48:07 +08:00
|
|
|
|
2020-01-10 23:30:32 +08:00
|
|
|
device_class_set_props(dc, vfio_ccw_properties);
|
2017-05-17 08:48:07 +08:00
|
|
|
dc->vmsd = &vfio_ccw_vmstate;
|
|
|
|
dc->desc = "VFIO-based subchannel assignment";
|
2017-10-04 16:51:49 +08:00
|
|
|
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
2017-05-17 08:48:07 +08:00
|
|
|
dc->realize = vfio_ccw_realize;
|
|
|
|
dc->unrealize = vfio_ccw_unrealize;
|
|
|
|
dc->reset = vfio_ccw_reset;
|
2017-05-17 08:48:10 +08:00
|
|
|
|
|
|
|
cdc->handle_request = vfio_ccw_handle_request;
|
2019-05-07 23:47:33 +08:00
|
|
|
cdc->handle_halt = vfio_ccw_handle_halt;
|
|
|
|
cdc->handle_clear = vfio_ccw_handle_clear;
|
2017-05-17 08:48:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static const TypeInfo vfio_ccw_info = {
|
|
|
|
.name = TYPE_VFIO_CCW,
|
|
|
|
.parent = TYPE_S390_CCW,
|
|
|
|
.instance_size = sizeof(VFIOCCWDevice),
|
|
|
|
.class_init = vfio_ccw_class_init,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void register_vfio_ccw_type(void)
|
|
|
|
{
|
|
|
|
type_register_static(&vfio_ccw_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_init(register_vfio_ccw_type)
|