2014-12-23 00:54:51 +08:00
|
|
|
/*
|
|
|
|
* common header for vfio based device assignment support
|
|
|
|
*
|
|
|
|
* Copyright Red Hat, Inc. 2012
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Alex Williamson <alex.williamson@redhat.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
* Based on qemu-kvm device-assignment:
|
|
|
|
* Adapted for KVM by Qumranet.
|
|
|
|
* Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
|
|
|
|
* Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
|
|
|
|
* Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
|
|
|
|
* Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
|
|
|
|
* Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
|
|
|
|
*/
|
2016-06-29 21:29:06 +08:00
|
|
|
|
2014-12-23 00:54:51 +08:00
|
|
|
#ifndef HW_VFIO_VFIO_COMMON_H
|
|
|
|
#define HW_VFIO_VFIO_COMMON_H
|
|
|
|
|
|
|
|
#include "qemu-common.h"
|
|
|
|
#include "exec/address-spaces.h"
|
|
|
|
#include "exec/memory.h"
|
|
|
|
#include "qemu/queue.h"
|
|
|
|
#include "qemu/notify.h"
|
2016-03-11 00:39:07 +08:00
|
|
|
#ifdef CONFIG_LINUX
|
|
|
|
#include <linux/vfio.h>
|
|
|
|
#endif
|
2014-12-23 00:54:51 +08:00
|
|
|
|
|
|
|
/*#define DEBUG_VFIO*/
|
|
|
|
#ifdef DEBUG_VFIO
|
|
|
|
#define DPRINTF(fmt, ...) \
|
|
|
|
do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0)
|
|
|
|
#else
|
|
|
|
#define DPRINTF(fmt, ...) \
|
|
|
|
do { } while (0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
enum {
|
|
|
|
VFIO_DEVICE_TYPE_PCI = 0,
|
2015-06-08 23:25:25 +08:00
|
|
|
VFIO_DEVICE_TYPE_PLATFORM = 1,
|
2014-12-23 00:54:51 +08:00
|
|
|
};
|
|
|
|
|
2016-03-11 00:39:07 +08:00
|
|
|
typedef struct VFIOMmap {
|
|
|
|
MemoryRegion mem;
|
|
|
|
void *mmap;
|
|
|
|
off_t offset;
|
|
|
|
size_t size;
|
|
|
|
} VFIOMmap;
|
|
|
|
|
2014-12-23 00:54:51 +08:00
|
|
|
typedef struct VFIORegion {
|
|
|
|
struct VFIODevice *vbasedev;
|
|
|
|
off_t fd_offset; /* offset of region within device fd */
|
2016-03-11 00:39:07 +08:00
|
|
|
MemoryRegion *mem; /* slow, read/write access */
|
2014-12-23 00:54:51 +08:00
|
|
|
size_t size;
|
|
|
|
uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
|
2016-03-11 00:39:07 +08:00
|
|
|
uint32_t nr_mmaps;
|
|
|
|
VFIOMmap *mmaps;
|
2014-12-23 00:54:51 +08:00
|
|
|
uint8_t nr; /* cache the region number for debug */
|
|
|
|
} VFIORegion;
|
|
|
|
|
|
|
|
typedef struct VFIOAddressSpace {
|
|
|
|
AddressSpace *as;
|
|
|
|
QLIST_HEAD(, VFIOContainer) containers;
|
|
|
|
QLIST_ENTRY(VFIOAddressSpace) list;
|
|
|
|
} VFIOAddressSpace;
|
|
|
|
|
|
|
|
struct VFIOGroup;
|
|
|
|
|
|
|
|
typedef struct VFIOContainer {
|
|
|
|
VFIOAddressSpace *space;
|
|
|
|
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
|
2015-09-30 10:13:51 +08:00
|
|
|
MemoryListener listener;
|
2016-07-04 11:33:04 +08:00
|
|
|
MemoryListener prereg_listener;
|
|
|
|
unsigned iommu_type;
|
2015-09-30 10:13:51 +08:00
|
|
|
int error;
|
|
|
|
bool initialized;
|
vfio: Check guest IOVA ranges against host IOMMU capabilities
The current vfio core code assumes that the host IOMMU is capable of
mapping any IOVA the guest wants to use to where we need. However, real
IOMMUs generally only support translating a certain range of IOVAs (the
"DMA window") not a full 64-bit address space.
The common x86 IOMMUs support a wide enough range that guests are very
unlikely to go beyond it in practice, however the IOMMU used on IBM Power
machines - in the default configuration - supports only a much more limited
IOVA range, usually 0..2GiB.
If the guest attempts to set up an IOVA range that the host IOMMU can't
map, qemu won't report an error until it actually attempts to map a bad
IOVA. If guest RAM is being mapped directly into the IOMMU (i.e. no guest
visible IOMMU) then this will show up very quickly. If there is a guest
visible IOMMU, however, the problem might not show up until much later when
the guest actually attempt to DMA with an IOVA the host can't handle.
This patch adds a test so that we will detect earlier if the guest is
attempting to use IOVA ranges that the host IOMMU won't be able to deal
with.
For now, we assume that "Type1" (x86) IOMMUs can support any IOVA, this is
incorrect, but no worse than what we have already. We can't do better for
now because the Type1 kernel interface doesn't tell us what IOVA range the
IOMMU actually supports.
For the Power "sPAPR TCE" IOMMU, however, we can retrieve the supported
IOVA range and validate guest IOVA ranges against it, and this patch does
so.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2015-09-30 10:13:53 +08:00
|
|
|
/*
|
|
|
|
* This assumes the host IOMMU can support only a single
|
|
|
|
* contiguous IOVA window. We may need to generalize that in
|
|
|
|
* future
|
|
|
|
*/
|
2014-12-23 00:54:51 +08:00
|
|
|
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
2016-07-04 11:33:05 +08:00
|
|
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
2014-12-23 00:54:51 +08:00
|
|
|
QLIST_HEAD(, VFIOGroup) group_list;
|
|
|
|
QLIST_ENTRY(VFIOContainer) next;
|
|
|
|
} VFIOContainer;
|
|
|
|
|
|
|
|
typedef struct VFIOGuestIOMMU {
|
|
|
|
VFIOContainer *container;
|
|
|
|
MemoryRegion *iommu;
|
2016-05-26 23:43:23 +08:00
|
|
|
hwaddr iommu_offset;
|
2014-12-23 00:54:51 +08:00
|
|
|
Notifier n;
|
|
|
|
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
|
|
|
|
} VFIOGuestIOMMU;
|
|
|
|
|
2016-07-04 11:33:05 +08:00
|
|
|
typedef struct VFIOHostDMAWindow {
|
|
|
|
hwaddr min_iova;
|
|
|
|
hwaddr max_iova;
|
|
|
|
uint64_t iova_pgsizes;
|
|
|
|
QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
|
|
|
|
} VFIOHostDMAWindow;
|
|
|
|
|
2014-12-23 00:54:51 +08:00
|
|
|
typedef struct VFIODeviceOps VFIODeviceOps;
|
|
|
|
|
|
|
|
typedef struct VFIODevice {
|
|
|
|
QLIST_ENTRY(VFIODevice) next;
|
|
|
|
struct VFIOGroup *group;
|
vfio: Add sysfsdev property for pci & platform
vfio-pci currently requires a host= parameter, which comes in the
form of a PCI address in [domain:]<bus:slot.function> notation. We
expect to find a matching entry in sysfs for that under
/sys/bus/pci/devices/. vfio-platform takes a similar approach, but
defines the host= parameter to be a string, which can be matched
directly under /sys/bus/platform/devices/. On the PCI side, we have
some interest in using vfio to expose vGPU devices. These are not
actual discrete PCI devices, so they don't have a compatible host PCI
bus address or a device link where QEMU wants to look for it. There's
also really no requirement that vfio can only be used to expose
physical devices, a new vfio bus and iommu driver could expose a
completely emulated device. To fit within the vfio framework, it
would need a kernel struct device and associated IOMMU group, but
those are easy constraints to manage.
To support such devices, which would include vGPUs, that honor the
VFIO PCI programming API, but are not necessarily backed by a unique
PCI address, add support for specifying any device in sysfs. The
vfio API already has support for probing the device type to ensure
compatibility with either vfio-pci or vfio-platform.
With this, a vfio-pci device could either be specified as:
-device vfio-pci,host=02:00.0
or
-device vfio-pci,sysfsdev=/sys/devices/pci0000:00/0000:00:1c.0/0000:02:00.0
or even
-device vfio-pci,sysfsdev=/sys/bus/pci/devices/0000:02:00.0
When vGPU support comes along, this might look something more like:
-device vfio-pci,sysfsdev=/sys/devices/virtual/intel-vgpu/vgpu0@0000:00:02.0
NB - This is only a made up example path
The same change is made for vfio-platform, specifying sysfsdev has
precedence over the old host option.
Tested-by: Eric Auger <eric.auger@linaro.org>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-11 00:39:07 +08:00
|
|
|
char *sysfsdev;
|
2014-12-23 00:54:51 +08:00
|
|
|
char *name;
|
|
|
|
int fd;
|
|
|
|
int type;
|
|
|
|
bool reset_works;
|
|
|
|
bool needs_reset;
|
2015-09-24 03:04:44 +08:00
|
|
|
bool no_mmap;
|
2014-12-23 00:54:51 +08:00
|
|
|
VFIODeviceOps *ops;
|
|
|
|
unsigned int num_irqs;
|
|
|
|
unsigned int num_regions;
|
|
|
|
unsigned int flags;
|
|
|
|
} VFIODevice;
|
|
|
|
|
|
|
|
struct VFIODeviceOps {
|
|
|
|
void (*vfio_compute_needs_reset)(VFIODevice *vdev);
|
|
|
|
int (*vfio_hot_reset_multi)(VFIODevice *vdev);
|
|
|
|
void (*vfio_eoi)(VFIODevice *vdev);
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct VFIOGroup {
|
|
|
|
int fd;
|
|
|
|
int groupid;
|
|
|
|
VFIOContainer *container;
|
|
|
|
QLIST_HEAD(, VFIODevice) device_list;
|
|
|
|
QLIST_ENTRY(VFIOGroup) next;
|
|
|
|
QLIST_ENTRY(VFIOGroup) container_next;
|
|
|
|
} VFIOGroup;
|
|
|
|
|
|
|
|
void vfio_put_base_device(VFIODevice *vbasedev);
|
|
|
|
void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
|
|
|
|
void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
|
|
|
|
void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index);
|
|
|
|
void vfio_region_write(void *opaque, hwaddr addr,
|
|
|
|
uint64_t data, unsigned size);
|
|
|
|
uint64_t vfio_region_read(void *opaque,
|
|
|
|
hwaddr addr, unsigned size);
|
2016-03-11 00:39:07 +08:00
|
|
|
int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
|
|
|
|
int index, const char *name);
|
|
|
|
int vfio_region_mmap(VFIORegion *region);
|
|
|
|
void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
|
|
|
|
void vfio_region_exit(VFIORegion *region);
|
|
|
|
void vfio_region_finalize(VFIORegion *region);
|
2014-12-23 00:54:51 +08:00
|
|
|
void vfio_reset_handler(void *opaque);
|
|
|
|
VFIOGroup *vfio_get_group(int groupid, AddressSpace *as);
|
|
|
|
void vfio_put_group(VFIOGroup *group);
|
|
|
|
int vfio_get_device(VFIOGroup *group, const char *name,
|
|
|
|
VFIODevice *vbasedev);
|
|
|
|
|
|
|
|
extern const MemoryRegionOps vfio_region_ops;
|
|
|
|
extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list;
|
|
|
|
extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces;
|
|
|
|
|
2016-03-11 00:39:07 +08:00
|
|
|
#ifdef CONFIG_LINUX
|
|
|
|
int vfio_get_region_info(VFIODevice *vbasedev, int index,
|
|
|
|
struct vfio_region_info **info);
|
2016-05-26 23:43:20 +08:00
|
|
|
int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
|
|
|
|
uint32_t subtype, struct vfio_region_info **info);
|
2016-03-11 00:39:07 +08:00
|
|
|
#endif
|
2016-07-04 11:33:04 +08:00
|
|
|
extern const MemoryListener vfio_prereg_listener;
|
|
|
|
|
2016-07-04 11:33:06 +08:00
|
|
|
int vfio_spapr_create_window(VFIOContainer *container,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
hwaddr *pgsize);
|
|
|
|
int vfio_spapr_remove_window(VFIOContainer *container,
|
|
|
|
hwaddr offset_within_address_space);
|
|
|
|
|
2016-06-29 21:29:06 +08:00
|
|
|
#endif /* HW_VFIO_VFIO_COMMON_H */
|