2012-07-31 22:16:24 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
|
|
|
* Author: Alex Williamson <alex.williamson@redhat.com>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* Derived from original vfio:
|
|
|
|
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
|
|
|
|
* Author: Tom Lyon, pugs@cisco.com
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/mutex.h>
|
|
|
|
#include <linux/pci.h>
|
2015-09-18 22:29:50 +08:00
|
|
|
#include <linux/irqbypass.h>
|
2016-02-23 07:02:39 +08:00
|
|
|
#include <linux/types.h>
|
2012-07-31 22:16:24 +08:00
|
|
|
|
|
|
|
#ifndef VFIO_PCI_PRIVATE_H
|
|
|
|
#define VFIO_PCI_PRIVATE_H
|
|
|
|
|
|
|
|
#define VFIO_PCI_OFFSET_SHIFT 40
|
|
|
|
|
|
|
|
#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
|
|
|
|
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
|
|
|
|
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
|
|
|
|
|
2016-02-23 07:02:41 +08:00
|
|
|
/* Special capability IDs predefined access */
|
|
|
|
#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
|
|
|
|
#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
|
|
|
|
|
2018-03-22 02:46:21 +08:00
|
|
|
/* Cap maximum number of ioeventfds per device (arbitrary) */
|
|
|
|
#define VFIO_PCI_IOEVENTFD_MAX 1000
|
|
|
|
|
|
|
|
struct vfio_pci_ioeventfd {
|
|
|
|
struct list_head next;
|
|
|
|
struct virqfd *virqfd;
|
|
|
|
void __iomem *addr;
|
|
|
|
uint64_t data;
|
|
|
|
loff_t pos;
|
|
|
|
int bar;
|
|
|
|
int count;
|
|
|
|
};
|
|
|
|
|
2012-07-31 22:16:24 +08:00
|
|
|
struct vfio_pci_irq_ctx {
|
|
|
|
struct eventfd_ctx *trigger;
|
|
|
|
struct virqfd *unmask;
|
|
|
|
struct virqfd *mask;
|
|
|
|
char *name;
|
|
|
|
bool masked;
|
2015-09-18 22:29:50 +08:00
|
|
|
struct irq_bypass_producer producer;
|
2012-07-31 22:16:24 +08:00
|
|
|
};
|
|
|
|
|
2016-02-23 07:02:39 +08:00
|
|
|
struct vfio_pci_device;
|
|
|
|
struct vfio_pci_region;
|
|
|
|
|
|
|
|
struct vfio_pci_regops {
|
|
|
|
size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
|
|
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
void (*release)(struct vfio_pci_device *vdev,
|
|
|
|
struct vfio_pci_region *region);
|
2018-12-19 16:52:30 +08:00
|
|
|
int (*mmap)(struct vfio_pci_device *vdev,
|
|
|
|
struct vfio_pci_region *region,
|
|
|
|
struct vm_area_struct *vma);
|
2018-12-19 16:52:31 +08:00
|
|
|
int (*add_capability)(struct vfio_pci_device *vdev,
|
|
|
|
struct vfio_pci_region *region,
|
|
|
|
struct vfio_info_cap *caps);
|
2016-02-23 07:02:39 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct vfio_pci_region {
|
|
|
|
u32 type;
|
|
|
|
u32 subtype;
|
|
|
|
const struct vfio_pci_regops *ops;
|
|
|
|
void *data;
|
|
|
|
size_t size;
|
|
|
|
u32 flags;
|
|
|
|
};
|
|
|
|
|
2016-06-30 15:21:24 +08:00
|
|
|
struct vfio_pci_dummy_resource {
|
|
|
|
struct resource resource;
|
|
|
|
int index;
|
|
|
|
struct list_head res_next;
|
|
|
|
};
|
|
|
|
|
2018-12-13 03:51:07 +08:00
|
|
|
struct vfio_pci_reflck {
|
|
|
|
struct kref kref;
|
|
|
|
struct mutex lock;
|
|
|
|
};
|
|
|
|
|
2012-07-31 22:16:24 +08:00
|
|
|
struct vfio_pci_device {
|
|
|
|
struct pci_dev *pdev;
|
|
|
|
void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
|
2016-06-30 15:21:24 +08:00
|
|
|
bool bar_mmap_supported[PCI_STD_RESOURCE_END + 1];
|
2012-07-31 22:16:24 +08:00
|
|
|
u8 *pci_config_map;
|
|
|
|
u8 *vconfig;
|
|
|
|
struct perm_bits *msi_perm;
|
|
|
|
spinlock_t irqlock;
|
|
|
|
struct mutex igate;
|
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
|
|
|
int num_ctx;
|
|
|
|
int irq_type;
|
2016-02-23 07:02:39 +08:00
|
|
|
int num_regions;
|
|
|
|
struct vfio_pci_region *region;
|
2012-07-31 22:16:24 +08:00
|
|
|
u8 msi_qmax;
|
|
|
|
u8 msix_bar;
|
|
|
|
u16 msix_size;
|
|
|
|
u32 msix_offset;
|
|
|
|
u32 rbar[7];
|
|
|
|
bool pci_2_3;
|
|
|
|
bool virq_disabled;
|
|
|
|
bool reset_works;
|
|
|
|
bool extended_caps;
|
|
|
|
bool bardirty;
|
2013-02-19 01:11:13 +08:00
|
|
|
bool has_vga;
|
2014-08-08 01:12:07 +08:00
|
|
|
bool needs_reset;
|
vfio/pci: Hide broken INTx support from user
INTx masking has two components, the first is that we need the ability
to prevent the device from continuing to assert INTx. This is
provided via the DisINTx bit in the command register and is the only
thing we can really probe for when testing if INTx masking is
supported. The second component is that the device needs to indicate
if INTx is asserted via the interrupt status bit in the device status
register. With these two features we can generically determine if one
of the devices we own is asserting INTx, signal the user, and mask the
interrupt while the user services the device.
Generally if one or both of these components is broken we resort to
APIC level interrupt masking, which requires an exclusive interrupt
since we have no way to determine the source of the interrupt in a
shared configuration. This often makes it difficult or impossible to
configure the system for userspace use of the device, for an interrupt
mode that the user may not need.
One possible configuration of broken INTx masking is that the DisINTx
support is fully functional, but the interrupt status bit never
signals interrupt assertion. In this case we do have the ability to
prevent the device from asserting INTx, but lack the ability to
identify the interrupt source. For this case we can simply pretend
that the device lacks INTx support entirely, keeping DisINTx set on
the physical device, virtualizing this bit for the user, and
virtualizing the interrupt pin register to indicate no INTx support.
We already support virtualization of the DisINTx bit and already
virtualize the interrupt pin for platforms without INTx support. By
tying these components together, setting DisINTx on open and reset,
and identifying devices broken in this particular way, we can provide
support for them w/o the handicap of APIC level INTx masking.
Intel i40e (XL710/X710) 10/20/40GbE NICs have been identified as being
broken in this specific way. We leave the vfio-pci.nointxmask option
as a mechanism to bypass this support, enabling INTx on the device
with all the requirements of APIC level masking.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: John Ronciak <john.ronciak@intel.com>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
2016-03-25 03:05:18 +08:00
|
|
|
bool nointx;
|
2012-07-31 22:16:24 +08:00
|
|
|
struct pci_saved_state *pci_saved_state;
|
2018-12-13 03:51:07 +08:00
|
|
|
struct vfio_pci_reflck *reflck;
|
2014-08-08 01:12:04 +08:00
|
|
|
int refcnt;
|
2018-03-22 02:46:21 +08:00
|
|
|
int ioeventfds_nr;
|
2013-03-11 23:31:22 +08:00
|
|
|
struct eventfd_ctx *err_trigger;
|
2015-02-07 06:05:08 +08:00
|
|
|
struct eventfd_ctx *req_trigger;
|
2016-06-30 15:21:24 +08:00
|
|
|
struct list_head dummy_resources_list;
|
2018-03-22 02:46:21 +08:00
|
|
|
struct mutex ioeventfds_lock;
|
|
|
|
struct list_head ioeventfds_list;
|
2012-07-31 22:16:24 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
|
|
|
|
#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX)
|
|
|
|
#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
|
|
|
|
#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev)))
|
|
|
|
#define irq_is(vdev, type) (vdev->irq_type == type)
|
|
|
|
|
|
|
|
extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev);
|
|
|
|
extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev);
|
|
|
|
|
|
|
|
extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
|
|
|
|
uint32_t flags, unsigned index,
|
|
|
|
unsigned start, unsigned count, void *data);
|
|
|
|
|
2013-02-15 05:02:12 +08:00
|
|
|
extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev,
|
|
|
|
char __user *buf, size_t count,
|
|
|
|
loff_t *ppos, bool iswrite);
|
|
|
|
|
|
|
|
extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
|
|
|
|
size_t count, loff_t *ppos, bool iswrite);
|
2012-07-31 22:16:24 +08:00
|
|
|
|
2013-02-19 01:11:13 +08:00
|
|
|
extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
|
|
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
|
2018-03-22 02:46:21 +08:00
|
|
|
extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
|
|
|
|
uint64_t data, int count, int fd);
|
|
|
|
|
2012-07-31 22:16:24 +08:00
|
|
|
extern int vfio_pci_init_perm_bits(void);
|
|
|
|
extern void vfio_pci_uninit_perm_bits(void);
|
|
|
|
|
|
|
|
extern int vfio_config_init(struct vfio_pci_device *vdev);
|
|
|
|
extern void vfio_config_free(struct vfio_pci_device *vdev);
|
2016-02-23 07:02:39 +08:00
|
|
|
|
|
|
|
extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
|
|
|
|
unsigned int type, unsigned int subtype,
|
|
|
|
const struct vfio_pci_regops *ops,
|
|
|
|
size_t size, u32 flags, void *data);
|
2016-02-23 07:02:43 +08:00
|
|
|
#ifdef CONFIG_VFIO_PCI_IGD
|
2016-02-23 07:02:45 +08:00
|
|
|
extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
|
2016-02-23 07:02:43 +08:00
|
|
|
#else
|
2016-02-23 07:02:45 +08:00
|
|
|
static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
|
2016-02-23 07:02:43 +08:00
|
|
|
{
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
#endif
|
2018-12-20 09:10:36 +08:00
|
|
|
#ifdef CONFIG_VFIO_PCI_NVLINK2
|
|
|
|
extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev);
|
|
|
|
extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev);
|
|
|
|
#else
|
|
|
|
static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
|
|
|
|
{
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
|
|
|
|
{
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
#endif
|
2012-07-31 22:16:24 +08:00
|
|
|
#endif /* VFIO_PCI_PRIVATE_H */
|