mirror of https://gitee.com/openkylin/linux.git
s390: A bunch of fixes and optimizations for interrupt and time
handling. PPC: Mostly bug fixes. ARM: No big features, but many small fixes and prerequisites including: - a number of fixes for the arch-timer - introducing proper level-triggered semantics for the arch-timers - a series of patches to synchronously halt a guest (prerequisite for IRQ forwarding) - some tracepoint improvements - a tweak for the EL2 panic handlers - some more VGIC cleanups getting rid of redundant state x86: quite a few changes: - support for VT-d posted interrupts (i.e. PCI devices can inject interrupts directly into vCPUs). This introduces a new component (in virt/lib/) that connects VFIO and KVM together. The same infrastructure will be used for ARM interrupt forwarding as well. - more Hyper-V features, though the main one Hyper-V synthetic interrupt controller will have to wait for 4.5. These will let KVM expose Hyper-V devices. - nested virtualization now supports VPID (same as PCID but for vCPUs) which makes it quite a bit faster - for future hardware that supports NVDIMM, there is support for clflushopt, clwb, pcommit - support for "split irqchip", i.e. LAPIC in kernel + IOAPIC/PIC/PIT in userspace, which reduces the attack surface of the hypervisor - obligatory smattering of SMM fixes - on the guest side, stable scheduler clock support was rewritten to not require help from the hypervisor. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJWO2IQAAoJEL/70l94x66D/K0H/3AovAgYmJQToZlimsktMk6a f2xhdIqfU5lIQQh5uNBCfL3o9o8H9Py1ym7aEw3fmztPHHJYc91oTatt2UEKhmEw VtZHp/dFHt3hwaIdXmjRPEXiYctraKCyrhaUYdWmUYkoKi7lW5OL5h+S7frG2U6u p/hFKnHRZfXHr6NSgIqvYkKqtnc+C0FWY696IZMzgCksOO8jB1xrxoSN3tANW3oJ PDV+4og0fN/Fr1capJUFEc/fejREHneANvlKrLaa8ht0qJQutoczNADUiSFLcMPG iHljXeDsv5eyjMtUuIL8+MPzcrIt/y4rY41ZPiKggxULrXc6H+JJL/e/zThZpXc= =iv2z -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Paolo Bonzini: "First batch of KVM changes for 4.4. s390: A bunch of fixes and optimizations for interrupt and time handling. PPC: Mostly bug fixes. ARM: No big features, but many small fixes and prerequisites including: - a number of fixes for the arch-timer - introducing proper level-triggered semantics for the arch-timers - a series of patches to synchronously halt a guest (prerequisite for IRQ forwarding) - some tracepoint improvements - a tweak for the EL2 panic handlers - some more VGIC cleanups getting rid of redundant state x86: Quite a few changes: - support for VT-d posted interrupts (i.e. PCI devices can inject interrupts directly into vCPUs). This introduces a new component (in virt/lib/) that connects VFIO and KVM together. The same infrastructure will be used for ARM interrupt forwarding as well. - more Hyper-V features, though the main one Hyper-V synthetic interrupt controller will have to wait for 4.5. These will let KVM expose Hyper-V devices. - nested virtualization now supports VPID (same as PCID but for vCPUs) which makes it quite a bit faster - for future hardware that supports NVDIMM, there is support for clflushopt, clwb, pcommit - support for "split irqchip", i.e. LAPIC in kernel + IOAPIC/PIC/PIT in userspace, which reduces the attack surface of the hypervisor - obligatory smattering of SMM fixes - on the guest side, stable scheduler clock support was rewritten to not require help from the hypervisor" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (123 commits) KVM: VMX: Fix commit which broke PML KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0() KVM: x86: allow RSM from 64-bit mode KVM: VMX: fix SMEP and SMAP without EPT KVM: x86: move kvm_set_irq_inatomic to legacy device assignment KVM: device assignment: remove pointless #ifdefs KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic KVM: x86: zero apic_arb_prio on reset drivers/hv: share Hyper-V SynIC constants with userspace KVM: x86: handle SMBASE as physical address in RSM KVM: x86: add read_phys to x86_emulate_ops KVM: x86: removing unused variable KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr() KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings KVM: arm/arm64: Optimize away redundant LR tracking KVM: s390: use simple switch statement as multiplexer KVM: s390: drop useless newline in debugging data KVM: s390: SCA must not cross page boundaries KVM: arm: Do not indent the arguments of DECLARE_BITMAP ...
This commit is contained in:
commit
933425fb00
|
@ -1585,6 +1585,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
nosid disable Source ID checking
|
||||
no_x2apic_optout
|
||||
BIOS x2APIC opt-out request will be ignored
|
||||
nopost disable Interrupt Posting
|
||||
|
||||
iomem= Disable strict checking of access to MMIO memory
|
||||
strict regions from userspace.
|
||||
|
|
|
@ -401,10 +401,9 @@ Capability: basic
|
|||
Architectures: x86, ppc, mips
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_interrupt (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
Returns: 0 on success, negative on failure.
|
||||
|
||||
Queues a hardware interrupt vector to be injected. This is only
|
||||
useful if in-kernel local APIC or equivalent is not used.
|
||||
Queues a hardware interrupt vector to be injected.
|
||||
|
||||
/* for KVM_INTERRUPT */
|
||||
struct kvm_interrupt {
|
||||
|
@ -414,7 +413,14 @@ struct kvm_interrupt {
|
|||
|
||||
X86:
|
||||
|
||||
Note 'irq' is an interrupt vector, not an interrupt pin or line.
|
||||
Returns: 0 on success,
|
||||
-EEXIST if an interrupt is already enqueued
|
||||
-EINVAL the the irq number is invalid
|
||||
-ENXIO if the PIC is in the kernel
|
||||
-EFAULT if the pointer is invalid
|
||||
|
||||
Note 'irq' is an interrupt vector, not an interrupt pin or line. This
|
||||
ioctl is useful if the in-kernel PIC is not used.
|
||||
|
||||
PPC:
|
||||
|
||||
|
@ -1598,7 +1604,7 @@ provided event instead of triggering an exit.
|
|||
struct kvm_ioeventfd {
|
||||
__u64 datamatch;
|
||||
__u64 addr; /* legal pio/mmio address */
|
||||
__u32 len; /* 1, 2, 4, or 8 bytes */
|
||||
__u32 len; /* 0, 1, 2, 4, or 8 bytes */
|
||||
__s32 fd;
|
||||
__u32 flags;
|
||||
__u8 pad[36];
|
||||
|
@ -1621,6 +1627,10 @@ to the registered address is equal to datamatch in struct kvm_ioeventfd.
|
|||
For virtio-ccw devices, addr contains the subchannel id and datamatch the
|
||||
virtqueue index.
|
||||
|
||||
With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and
|
||||
the kernel will ignore the length of guest write and may get a faster vmexit.
|
||||
The speedup may only apply to specific architectures, but the ioeventfd will
|
||||
work anyway.
|
||||
|
||||
4.60 KVM_DIRTY_TLB
|
||||
|
||||
|
@ -3309,6 +3319,18 @@ Valid values for 'type' are:
|
|||
to ignore the request, or to gather VM memory core dump and/or
|
||||
reset/shutdown of the VM.
|
||||
|
||||
/* KVM_EXIT_IOAPIC_EOI */
|
||||
struct {
|
||||
__u8 vector;
|
||||
} eoi;
|
||||
|
||||
Indicates that the VCPU's in-kernel local APIC received an EOI for a
|
||||
level-triggered IOAPIC interrupt. This exit only triggers when the
|
||||
IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
|
||||
the userspace IOAPIC should process the EOI and retrigger the interrupt if
|
||||
it is still asserted. Vector is the LAPIC interrupt vector for which the
|
||||
EOI was received.
|
||||
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
|
@ -3627,6 +3649,26 @@ struct {
|
|||
|
||||
KVM handlers should exit to userspace with rc = -EREMOTE.
|
||||
|
||||
7.5 KVM_CAP_SPLIT_IRQCHIP
|
||||
|
||||
Architectures: x86
|
||||
Parameters: args[0] - number of routes reserved for userspace IOAPICs
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Create a local apic for each processor in the kernel. This can be used
|
||||
instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
|
||||
IOAPIC and PIC (and also the PIT, even though this has to be enabled
|
||||
separately).
|
||||
|
||||
This capability also enables in kernel routing of interrupt requests;
|
||||
when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are
|
||||
used in the IRQ routing table. The first args[0] MSI routes are reserved
|
||||
for the IOAPIC pins. Whenever the LAPIC receives an EOI for these routes,
|
||||
a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
|
||||
|
||||
Fails if VCPU has already been created, or if the irqchip is already in the
|
||||
kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
|
||||
|
||||
|
||||
8. Other capabilities.
|
||||
----------------------
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
KVM/ARM VGIC Forwarded Physical Interrupts
|
||||
==========================================
|
||||
|
||||
The KVM/ARM code implements software support for the ARM Generic
|
||||
Interrupt Controller's (GIC's) hardware support for virtualization by
|
||||
allowing software to inject virtual interrupts to a VM, which the guest
|
||||
OS sees as regular interrupts. The code is famously known as the VGIC.
|
||||
|
||||
Some of these virtual interrupts, however, correspond to physical
|
||||
interrupts from real physical devices. One example could be the
|
||||
architected timer, which itself supports virtualization, and therefore
|
||||
lets a guest OS program the hardware device directly to raise an
|
||||
interrupt at some point in time. When such an interrupt is raised, the
|
||||
host OS initially handles the interrupt and must somehow signal this
|
||||
event as a virtual interrupt to the guest. Another example could be a
|
||||
passthrough device, where the physical interrupts are initially handled
|
||||
by the host, but the device driver for the device lives in the guest OS
|
||||
and KVM must therefore somehow inject a virtual interrupt on behalf of
|
||||
the physical one to the guest OS.
|
||||
|
||||
These virtual interrupts corresponding to a physical interrupt on the
|
||||
host are called forwarded physical interrupts, but are also sometimes
|
||||
referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
|
||||
|
||||
Forwarded physical interrupts are handled slightly differently compared
|
||||
to virtual interrupts generated purely by a software emulated device.
|
||||
|
||||
|
||||
The HW bit
|
||||
----------
|
||||
Virtual interrupts are signalled to the guest by programming the List
|
||||
Registers (LRs) on the GIC before running a VCPU. The LR is programmed
|
||||
with the virtual IRQ number and the state of the interrupt (Pending,
|
||||
Active, or Pending+Active). When the guest ACKs and EOIs a virtual
|
||||
interrupt, the LR state moves from Pending to Active, and finally to
|
||||
inactive.
|
||||
|
||||
The LRs include an extra bit, called the HW bit. When this bit is set,
|
||||
KVM must also program an additional field in the LR, the physical IRQ
|
||||
number, to link the virtual with the physical IRQ.
|
||||
|
||||
When the HW bit is set, KVM must EITHER set the Pending OR the Active
|
||||
bit, never both at the same time.
|
||||
|
||||
Setting the HW bit causes the hardware to deactivate the physical
|
||||
interrupt on the physical distributor when the guest deactivates the
|
||||
corresponding virtual interrupt.
|
||||
|
||||
|
||||
Forwarded Physical Interrupts Life Cycle
|
||||
----------------------------------------
|
||||
|
||||
The state of forwarded physical interrupts is managed in the following way:
|
||||
|
||||
- The physical interrupt is acked by the host, and becomes active on
|
||||
the physical distributor (*).
|
||||
- KVM sets the LR.Pending bit, because this is the only way the GICV
|
||||
interface is going to present it to the guest.
|
||||
- LR.Pending will stay set as long as the guest has not acked the interrupt.
|
||||
- LR.Pending transitions to LR.Active on the guest read of the IAR, as
|
||||
expected.
|
||||
- On guest EOI, the *physical distributor* active bit gets cleared,
|
||||
but the LR.Active is left untouched (set).
|
||||
- KVM clears the LR on VM exits when the physical distributor
|
||||
active state has been cleared.
|
||||
|
||||
(*): The host handling is slightly more complicated. For some forwarded
|
||||
interrupts (shared), KVM directly sets the active state on the physical
|
||||
distributor before entering the guest, because the interrupt is never actually
|
||||
handled on the host (see details on the timer as an example below). For other
|
||||
forwarded interrupts (non-shared) the host does not deactivate the interrupt
|
||||
when the host ISR completes, but leaves the interrupt active until the guest
|
||||
deactivates it. Leaving the interrupt active is allowed, because Linux
|
||||
configures the physical GIC with EOIMode=1, which causes EOI operations to
|
||||
perform a priority drop allowing the GIC to receive other interrupts of the
|
||||
default priority.
|
||||
|
||||
|
||||
Forwarded Edge and Level Triggered PPIs and SPIs
|
||||
------------------------------------------------
|
||||
Forwarded physical interrupts injected should always be active on the
|
||||
physical distributor when injected to a guest.
|
||||
|
||||
Level-triggered interrupts will keep the interrupt line to the GIC
|
||||
asserted, typically until the guest programs the device to deassert the
|
||||
line. This means that the interrupt will remain pending on the physical
|
||||
distributor until the guest has reprogrammed the device. Since we
|
||||
always run the VM with interrupts enabled on the CPU, a pending
|
||||
interrupt will exit the guest as soon as we switch into the guest,
|
||||
preventing the guest from ever making progress as the process repeats
|
||||
over and over. Therefore, the active state on the physical distributor
|
||||
must be set when entering the guest, preventing the GIC from forwarding
|
||||
the pending interrupt to the CPU. As soon as the guest deactivates the
|
||||
interrupt, the physical line is sampled by the hardware again and the host
|
||||
takes a new interrupt if and only if the physical line is still asserted.
|
||||
|
||||
Edge-triggered interrupts do not exhibit the same problem with
|
||||
preventing guest execution that level-triggered interrupts do. One
|
||||
option is to not use HW bit at all, and inject edge-triggered interrupts
|
||||
from a physical device as pure virtual interrupts. But that would
|
||||
potentially slow down handling of the interrupt in the guest, because a
|
||||
physical interrupt occurring in the middle of the guest ISR would
|
||||
preempt the guest for the host to handle the interrupt. Additionally,
|
||||
if you configure the system to handle interrupts on a separate physical
|
||||
core from that running your VCPU, you still have to interrupt the VCPU
|
||||
to queue the pending state onto the LR, even though the guest won't use
|
||||
this information until the guest ISR completes. Therefore, the HW
|
||||
bit should always be set for forwarded edge-triggered interrupts. With
|
||||
the HW bit set, the virtual interrupt is injected and additional
|
||||
physical interrupts occurring before the guest deactivates the interrupt
|
||||
simply mark the state on the physical distributor as Pending+Active. As
|
||||
soon as the guest deactivates the interrupt, the host takes another
|
||||
interrupt if and only if there was a physical interrupt between injecting
|
||||
the forwarded interrupt to the guest and the guest deactivating the
|
||||
interrupt.
|
||||
|
||||
Consequently, whenever we schedule a VCPU with one or more LRs with the
|
||||
HW bit set, the interrupt must also be active on the physical
|
||||
distributor.
|
||||
|
||||
|
||||
Forwarded LPIs
|
||||
--------------
|
||||
LPIs, introduced in GICv3, are always edge-triggered and do not have an
|
||||
active state. They become pending when a device signal them, and as
|
||||
soon as they are acked by the CPU, they are inactive again.
|
||||
|
||||
It therefore doesn't make sense, and is not supported, to set the HW bit
|
||||
for physical LPIs that are forwarded to a VM as virtual interrupts,
|
||||
typically virtual SPIs.
|
||||
|
||||
For LPIs, there is no other choice than to preempt the VCPU thread if
|
||||
necessary, and queue the pending state onto the LR.
|
||||
|
||||
|
||||
Putting It Together: The Architected Timer
|
||||
------------------------------------------
|
||||
The architected timer is a device that signals interrupts with level
|
||||
triggered semantics. The timer hardware is directly accessed by VCPUs
|
||||
which program the timer to fire at some point in time. Each VCPU on a
|
||||
system programs the timer to fire at different times, and therefore the
|
||||
hardware is multiplexed between multiple VCPUs. This is implemented by
|
||||
context-switching the timer state along with each VCPU thread.
|
||||
|
||||
However, this means that a scenario like the following is entirely
|
||||
possible, and in fact, typical:
|
||||
|
||||
1. KVM runs the VCPU
|
||||
2. The guest programs the time to fire in T+100
|
||||
3. The guest is idle and calls WFI (wait-for-interrupts)
|
||||
4. The hardware traps to the host
|
||||
5. KVM stores the timer state to memory and disables the hardware timer
|
||||
6. KVM schedules a soft timer to fire in T+(100 - time since step 2)
|
||||
7. KVM puts the VCPU thread to sleep (on a waitqueue)
|
||||
8. The soft timer fires, waking up the VCPU thread
|
||||
9. KVM reprograms the timer hardware with the VCPU's values
|
||||
10. KVM marks the timer interrupt as active on the physical distributor
|
||||
11. KVM injects a forwarded physical interrupt to the guest
|
||||
12. KVM runs the VCPU
|
||||
|
||||
Notice that KVM injects a forwarded physical interrupt in step 11 without
|
||||
the corresponding interrupt having actually fired on the host. That is
|
||||
exactly why we mark the timer interrupt as active in step 10, because
|
||||
the active state on the physical distributor is part of the state
|
||||
belonging to the timer hardware, which is context-switched along with
|
||||
the VCPU thread.
|
||||
|
||||
If the guest does not idle because it is busy, the flow looks like this
|
||||
instead:
|
||||
|
||||
1. KVM runs the VCPU
|
||||
2. The guest programs the time to fire in T+100
|
||||
4. At T+100 the timer fires and a physical IRQ causes the VM to exit
|
||||
(note that this initially only traps to EL2 and does not run the host ISR
|
||||
until KVM has returned to the host).
|
||||
5. With interrupts still disabled on the CPU coming back from the guest, KVM
|
||||
stores the virtual timer state to memory and disables the virtual hw timer.
|
||||
6. KVM looks at the timer state (in memory) and injects a forwarded physical
|
||||
interrupt because it concludes the timer has expired.
|
||||
7. KVM marks the timer interrupt as active on the physical distributor
|
||||
7. KVM enables the timer, enables interrupts, and runs the VCPU
|
||||
|
||||
Notice that again the forwarded physical interrupt is injected to the
|
||||
guest without having actually been handled on the host. In this case it
|
||||
is because the physical interrupt is never actually seen by the host because the
|
||||
timer is disabled upon guest return, and the virtual forwarded interrupt is
|
||||
injected on the KVM guest entry path.
|
|
@ -44,28 +44,29 @@ Groups:
|
|||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | cpu id | offset |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
|
||||
All distributor regs are (rw, 32-bit)
|
||||
|
||||
The offset is relative to the "Distributor base address" as defined in the
|
||||
GICv2 specs. Getting or setting such a register has the same effect as
|
||||
reading or writing the register on the actual hardware from the cpu
|
||||
specified with cpu id field. Note that most distributor fields are not
|
||||
banked, but return the same value regardless of the cpu id used to access
|
||||
the register.
|
||||
reading or writing the register on the actual hardware from the cpu whose
|
||||
index is specified with the vcpu_index field. Note that most distributor
|
||||
fields are not banked, but return the same value regardless of the
|
||||
vcpu_index used to access the register.
|
||||
Limitations:
|
||||
- Priorities are not implemented, and registers are RAZ/WI
|
||||
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
Errors:
|
||||
-ENODEV: Getting or setting this register is not yet supported
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
-EINVAL: Invalid vcpu_index supplied
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CPU_REGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||
values: | reserved | cpu id | offset |
|
||||
values: | reserved | vcpu_index | offset |
|
||||
|
||||
All CPU interface regs are (rw, 32-bit)
|
||||
|
||||
|
@ -91,8 +92,9 @@ Groups:
|
|||
- Priorities are not implemented, and registers are RAZ/WI
|
||||
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
Errors:
|
||||
-ENODEV: Getting or setting this register is not yet supported
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
-EINVAL: Invalid vcpu_index supplied
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
||||
Attributes:
|
||||
|
|
|
@ -166,3 +166,15 @@ Comment: The srcu read lock must be held while accessing memslots (e.g.
|
|||
MMIO/PIO address->device structure mapping (kvm->buses).
|
||||
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
|
||||
if it is needed by multiple functions.
|
||||
|
||||
Name: blocked_vcpu_on_cpu_lock
|
||||
Type: spinlock_t
|
||||
Arch: x86
|
||||
Protects: blocked_vcpu_on_cpu
|
||||
Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
|
||||
When VT-d posted-interrupts is supported and the VM has assigned
|
||||
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
|
||||
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
|
||||
wakeup notification event since external interrupts from the
|
||||
assigned devices happens, we will find the vCPU on the list to
|
||||
wakeup.
|
||||
|
|
|
@ -11348,6 +11348,13 @@ L: netdev@vger.kernel.org
|
|||
S: Maintained
|
||||
F: drivers/net/ethernet/via/via-velocity.*
|
||||
|
||||
VIRT LIB
|
||||
M: Alex Williamson <alex.williamson@redhat.com>
|
||||
M: Paolo Bonzini <pbonzini@redhat.com>
|
||||
L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
F: virt/lib/
|
||||
|
||||
VIVID VIRTUAL VIDEO DRIVER
|
||||
M: Hans Verkuil <hverkuil@xs4all.nl>
|
||||
L: linux-media@vger.kernel.org
|
||||
|
|
10
Makefile
10
Makefile
|
@ -550,6 +550,7 @@ drivers-y := drivers/ sound/ firmware/
|
|||
net-y := net/
|
||||
libs-y := lib/
|
||||
core-y := usr/
|
||||
virt-y := virt/
|
||||
endif # KBUILD_EXTMOD
|
||||
|
||||
ifeq ($(dot-config),1)
|
||||
|
@ -882,10 +883,10 @@ core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
|
|||
|
||||
vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
|
||||
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \
|
||||
$(net-y) $(net-m) $(libs-y) $(libs-m)))
|
||||
$(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y)))
|
||||
|
||||
vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
|
||||
$(init-) $(core-) $(drivers-) $(net-) $(libs-))))
|
||||
$(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-))))
|
||||
|
||||
init-y := $(patsubst %/, %/built-in.o, $(init-y))
|
||||
core-y := $(patsubst %/, %/built-in.o, $(core-y))
|
||||
|
@ -894,14 +895,15 @@ net-y := $(patsubst %/, %/built-in.o, $(net-y))
|
|||
libs-y1 := $(patsubst %/, %/lib.a, $(libs-y))
|
||||
libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y))
|
||||
libs-y := $(libs-y1) $(libs-y2)
|
||||
virt-y := $(patsubst %/, %/built-in.o, $(virt-y))
|
||||
|
||||
# Externally visible symbols (used by link-vmlinux.sh)
|
||||
export KBUILD_VMLINUX_INIT := $(head-y) $(init-y)
|
||||
export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y)
|
||||
export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) $(virt-y)
|
||||
export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds
|
||||
export LDFLAGS_vmlinux
|
||||
# used by scripts/pacmage/Makefile
|
||||
export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools virt)
|
||||
export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools)
|
||||
|
||||
vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN)
|
||||
|
||||
|
|
|
@ -218,4 +218,24 @@
|
|||
#define HSR_DABT_CM (1U << 8)
|
||||
#define HSR_DABT_EA (1U << 9)
|
||||
|
||||
#define kvm_arm_exception_type \
|
||||
{0, "RESET" }, \
|
||||
{1, "UNDEFINED" }, \
|
||||
{2, "SOFTWARE" }, \
|
||||
{3, "PREF_ABORT" }, \
|
||||
{4, "DATA_ABORT" }, \
|
||||
{5, "IRQ" }, \
|
||||
{6, "FIQ" }, \
|
||||
{7, "HVC" }
|
||||
|
||||
#define HSRECN(x) { HSR_EC_##x, #x }
|
||||
|
||||
#define kvm_arm_exception_class \
|
||||
HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \
|
||||
HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \
|
||||
HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \
|
||||
HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \
|
||||
HSRECN(DABT), HSRECN(DABT_HYP)
|
||||
|
||||
|
||||
#endif /* __ARM_KVM_ARM_H__ */
|
||||
|
|
|
@ -126,7 +126,10 @@ struct kvm_vcpu_arch {
|
|||
* here.
|
||||
*/
|
||||
|
||||
/* Don't run the guest on this vcpu */
|
||||
/* vcpu power-off state */
|
||||
bool power_off;
|
||||
|
||||
/* Don't run the guest (internal implementation need) */
|
||||
bool pause;
|
||||
|
||||
/* IO related fields */
|
||||
|
|
|
@ -46,4 +46,6 @@ config KVM_ARM_HOST
|
|||
---help---
|
||||
Provides host support for ARM processors.
|
||||
|
||||
source drivers/vhost/Kconfig
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
|
|
@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
|||
return kvm_timer_should_fire(vcpu);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_timer_schedule(vcpu);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_timer_unschedule(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Force users to call KVM_ARM_VCPU_INIT */
|
||||
|
@ -308,7 +318,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
|||
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mp_state *mp_state)
|
||||
{
|
||||
if (vcpu->arch.pause)
|
||||
if (vcpu->arch.power_off)
|
||||
mp_state->mp_state = KVM_MP_STATE_STOPPED;
|
||||
else
|
||||
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
|
@ -321,10 +331,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
|||
{
|
||||
switch (mp_state->mp_state) {
|
||||
case KVM_MP_STATE_RUNNABLE:
|
||||
vcpu->arch.pause = false;
|
||||
vcpu->arch.power_off = false;
|
||||
break;
|
||||
case KVM_MP_STATE_STOPPED:
|
||||
vcpu->arch.pause = true;
|
||||
vcpu->arch.power_off = true;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
|
@ -342,7 +352,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
|||
*/
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
{
|
||||
return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v);
|
||||
return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
|
||||
&& !v->arch.power_off && !v->arch.pause);
|
||||
}
|
||||
|
||||
/* Just ensure a guest exit from a particular CPU */
|
||||
|
@ -468,11 +479,38 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
|
|||
return vgic_initialized(kvm);
|
||||
}
|
||||
|
||||
static void vcpu_pause(struct kvm_vcpu *vcpu)
|
||||
static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused;
|
||||
static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused;
|
||||
|
||||
static void kvm_arm_halt_guest(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
vcpu->arch.pause = true;
|
||||
force_vm_exit(cpu_all_mask);
|
||||
}
|
||||
|
||||
static void kvm_arm_resume_guest(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
|
||||
vcpu->arch.pause = false;
|
||||
wake_up_interruptible(wq);
|
||||
}
|
||||
}
|
||||
|
||||
static void vcpu_sleep(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
|
||||
wait_event_interruptible(*wq, !vcpu->arch.pause);
|
||||
wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
|
||||
(!vcpu->arch.pause)));
|
||||
}
|
||||
|
||||
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
|
||||
|
@ -522,8 +560,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
|
||||
update_vttbr(vcpu->kvm);
|
||||
|
||||
if (vcpu->arch.pause)
|
||||
vcpu_pause(vcpu);
|
||||
if (vcpu->arch.power_off || vcpu->arch.pause)
|
||||
vcpu_sleep(vcpu);
|
||||
|
||||
/*
|
||||
* Disarming the background timer must be done in a
|
||||
|
@ -549,11 +587,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
run->exit_reason = KVM_EXIT_INTR;
|
||||
}
|
||||
|
||||
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
|
||||
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
|
||||
vcpu->arch.power_off || vcpu->arch.pause) {
|
||||
local_irq_enable();
|
||||
kvm_timer_sync_hwstate(vcpu);
|
||||
kvm_vgic_sync_hwstate(vcpu);
|
||||
preempt_enable();
|
||||
kvm_timer_sync_hwstate(vcpu);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -596,14 +635,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
* guest time.
|
||||
*/
|
||||
kvm_guest_exit();
|
||||
trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
|
||||
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
|
||||
|
||||
/*
|
||||
* We must sync the timer state before the vgic state so that
|
||||
* the vgic can properly sample the updated state of the
|
||||
* interrupt line.
|
||||
*/
|
||||
kvm_timer_sync_hwstate(vcpu);
|
||||
|
||||
kvm_vgic_sync_hwstate(vcpu);
|
||||
|
||||
preempt_enable();
|
||||
|
||||
kvm_timer_sync_hwstate(vcpu);
|
||||
|
||||
ret = handle_exit(vcpu, run, ret);
|
||||
}
|
||||
|
||||
|
@ -765,12 +809,12 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
|
|||
vcpu_reset_hcr(vcpu);
|
||||
|
||||
/*
|
||||
* Handle the "start in power-off" case by marking the VCPU as paused.
|
||||
* Handle the "start in power-off" case.
|
||||
*/
|
||||
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
|
||||
vcpu->arch.pause = true;
|
||||
vcpu->arch.power_off = true;
|
||||
else
|
||||
vcpu->arch.pause = false;
|
||||
vcpu->arch.power_off = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -63,7 +63,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.pause = true;
|
||||
vcpu->arch.power_off = true;
|
||||
}
|
||||
|
||||
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
||||
|
@ -87,7 +87,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
|||
*/
|
||||
if (!vcpu)
|
||||
return PSCI_RET_INVALID_PARAMS;
|
||||
if (!vcpu->arch.pause) {
|
||||
if (!vcpu->arch.power_off) {
|
||||
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
|
||||
return PSCI_RET_ALREADY_ON;
|
||||
else
|
||||
|
@ -115,7 +115,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
|||
* the general puspose registers are undefined upon CPU_ON.
|
||||
*/
|
||||
*vcpu_reg(vcpu, 0) = context_id;
|
||||
vcpu->arch.pause = false;
|
||||
vcpu->arch.power_off = false;
|
||||
smp_mb(); /* Make sure the above is visible */
|
||||
|
||||
wq = kvm_arch_vcpu_wq(vcpu);
|
||||
|
@ -153,7 +153,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
|
|||
mpidr = kvm_vcpu_get_mpidr_aff(tmp);
|
||||
if ((mpidr & target_affinity_mask) == target_affinity) {
|
||||
matching_cpus++;
|
||||
if (!tmp->arch.pause)
|
||||
if (!tmp->arch.power_off)
|
||||
return PSCI_0_2_AFFINITY_LEVEL_ON;
|
||||
}
|
||||
}
|
||||
|
@ -179,7 +179,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
|
|||
* re-initialized.
|
||||
*/
|
||||
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
|
||||
tmp->arch.pause = true;
|
||||
tmp->arch.power_off = true;
|
||||
kvm_vcpu_kick(tmp);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,21 +25,25 @@ TRACE_EVENT(kvm_entry,
|
|||
);
|
||||
|
||||
TRACE_EVENT(kvm_exit,
|
||||
TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc),
|
||||
TP_ARGS(exit_reason, vcpu_pc),
|
||||
TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
|
||||
TP_ARGS(idx, exit_reason, vcpu_pc),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, idx )
|
||||
__field( unsigned int, exit_reason )
|
||||
__field( unsigned long, vcpu_pc )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->idx = idx;
|
||||
__entry->exit_reason = exit_reason;
|
||||
__entry->vcpu_pc = vcpu_pc;
|
||||
),
|
||||
|
||||
TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx",
|
||||
TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
|
||||
__print_symbolic(__entry->idx, kvm_arm_exception_type),
|
||||
__entry->exit_reason,
|
||||
__print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
|
||||
__entry->vcpu_pc)
|
||||
);
|
||||
|
||||
|
|
|
@ -200,4 +200,20 @@
|
|||
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
|
||||
#define HPFAR_MASK (~UL(0xf))
|
||||
|
||||
#define kvm_arm_exception_type \
|
||||
{0, "IRQ" }, \
|
||||
{1, "TRAP" }
|
||||
|
||||
#define ECN(x) { ESR_ELx_EC_##x, #x }
|
||||
|
||||
#define kvm_arm_exception_class \
|
||||
ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
|
||||
ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \
|
||||
ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \
|
||||
ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
|
||||
ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
|
||||
ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
|
||||
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
|
||||
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
|
||||
|
||||
#endif /* __ARM64_KVM_ARM_H__ */
|
||||
|
|
|
@ -149,7 +149,10 @@ struct kvm_vcpu_arch {
|
|||
u32 mdscr_el1;
|
||||
} guest_debug_preserved;
|
||||
|
||||
/* Don't run the guest */
|
||||
/* vcpu power-off state */
|
||||
bool power_off;
|
||||
|
||||
/* Don't run the guest (internal implementation need) */
|
||||
bool pause;
|
||||
|
||||
/* IO related fields */
|
||||
|
|
|
@ -48,4 +48,6 @@ config KVM_ARM_HOST
|
|||
---help---
|
||||
Provides host support for ARM processors.
|
||||
|
||||
source drivers/vhost/Kconfig
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
|
|
@ -880,6 +880,14 @@ __kvm_hyp_panic:
|
|||
|
||||
bl __restore_sysregs
|
||||
|
||||
/*
|
||||
* Make sure we have a valid host stack, and don't leave junk in the
|
||||
* frame pointer that will give us a misleading host stack unwinding.
|
||||
*/
|
||||
ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
|
||||
msr sp_el1, x22
|
||||
mov x29, xzr
|
||||
|
||||
1: adr x0, __hyp_panic_str
|
||||
adr x1, 2f
|
||||
ldp x2, x3, [x1]
|
||||
|
|
|
@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
|||
struct kvm_memory_slot *slot) {}
|
||||
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#endif /* __MIPS_KVM_HOST_H__ */
|
||||
|
|
|
@ -42,6 +42,11 @@ static inline unsigned int get_dcrn(u32 inst)
|
|||
return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
|
||||
}
|
||||
|
||||
static inline unsigned int get_tmrn(u32 inst)
|
||||
{
|
||||
return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
|
||||
}
|
||||
|
||||
static inline unsigned int get_rt(u32 inst)
|
||||
{
|
||||
return (inst >> 21) & 0x1f;
|
||||
|
|
|
@ -716,5 +716,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
|
|||
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_exit(void) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#endif /* __POWERPC_KVM_HOST_H__ */
|
||||
|
|
|
@ -742,6 +742,12 @@
|
|||
#define MMUBE1_VBE4 0x00000002
|
||||
#define MMUBE1_VBE5 0x00000001
|
||||
|
||||
#define TMRN_TMCFG0 16 /* Thread Management Configuration Register 0 */
|
||||
#define TMRN_TMCFG0_NPRIBITS 0x003f0000 /* Bits of thread priority */
|
||||
#define TMRN_TMCFG0_NPRIBITS_SHIFT 16
|
||||
#define TMRN_TMCFG0_NATHRD 0x00003f00 /* Number of active threads */
|
||||
#define TMRN_TMCFG0_NATHRD_SHIFT 8
|
||||
#define TMRN_TMCFG0_NTHRD 0x0000003f /* Number of threads */
|
||||
#define TMRN_IMSR0 0x120 /* Initial MSR Register 0 (e6500) */
|
||||
#define TMRN_IMSR1 0x121 /* Initial MSR Register 1 (e6500) */
|
||||
#define TMRN_INIA0 0x140 /* Next Instruction Address Register 0 */
|
||||
|
|
|
@ -70,7 +70,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
|
|||
}
|
||||
|
||||
/* Lastly try successively smaller sizes from the page allocator */
|
||||
while (!hpt && order > PPC_MIN_HPT_ORDER) {
|
||||
/* Only do this if userspace didn't specify a size via ioctl */
|
||||
while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) {
|
||||
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
|
||||
__GFP_NOWARN, order - PAGE_SHIFT);
|
||||
if (!hpt)
|
||||
|
|
|
@ -470,6 +470,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
|
|||
note_hpte_modification(kvm, rev);
|
||||
unlock_hpte(hpte, 0);
|
||||
|
||||
if (v & HPTE_V_ABSENT)
|
||||
v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
|
||||
hpret[0] = v;
|
||||
hpret[1] = r;
|
||||
return H_SUCCESS;
|
||||
|
|
|
@ -150,6 +150,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
|
||||
beq 11f
|
||||
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
|
||||
beq 15f /* Invoke the H_DOORBELL handler */
|
||||
cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
|
||||
beq cr2, 14f /* HMI check */
|
||||
|
||||
|
@ -174,6 +176,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
mtspr SPRN_HSRR1, r7
|
||||
b hmi_exception_after_realmode
|
||||
|
||||
15: mtspr SPRN_HSRR0, r8
|
||||
mtspr SPRN_HSRR1, r7
|
||||
ba 0xe80
|
||||
|
||||
kvmppc_primary_no_guest:
|
||||
/* We handle this much like a ceded vcpu */
|
||||
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
|
||||
|
@ -2377,7 +2383,6 @@ machine_check_realmode:
|
|||
mr r3, r9 /* get vcpu pointer */
|
||||
bl kvmppc_realmode_machine_check
|
||||
nop
|
||||
cmpdi r3, 0 /* Did we handle MCE ? */
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
/*
|
||||
|
@ -2390,13 +2395,18 @@ machine_check_realmode:
|
|||
* The old code used to return to host for unhandled errors which
|
||||
* was causing guest to hang with soft lockups inside guest and
|
||||
* makes it difficult to recover guest instance.
|
||||
*
|
||||
* if we receive machine check with MSR(RI=0) then deliver it to
|
||||
* guest as machine check causing guest to crash.
|
||||
*/
|
||||
ld r10, VCPU_PC(r9)
|
||||
ld r11, VCPU_MSR(r9)
|
||||
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
|
||||
beq 1f /* Deliver a machine check to guest */
|
||||
ld r10, VCPU_PC(r9)
|
||||
cmpdi r3, 0 /* Did we handle MCE ? */
|
||||
bne 2f /* Continue guest execution. */
|
||||
/* If not, deliver a machine check. SRR0/1 are already set */
|
||||
li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
ld r11, VCPU_MSR(r9)
|
||||
1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
bl kvmppc_msr_interrupt
|
||||
2: b fast_interrupt_c_return
|
||||
|
||||
|
@ -2436,14 +2446,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
|
||||
/* hypervisor doorbell */
|
||||
3: li r12, BOOK3S_INTERRUPT_H_DOORBELL
|
||||
|
||||
/*
|
||||
* Clear the doorbell as we will invoke the handler
|
||||
* explicitly in the guest exit path.
|
||||
*/
|
||||
lis r6, (PPC_DBELL_SERVER << (63-36))@h
|
||||
PPC_MSGCLR(6)
|
||||
/* see if it's a host IPI */
|
||||
li r3, 1
|
||||
lbz r0, HSTATE_HOST_IPI(r13)
|
||||
cmpwi r0, 0
|
||||
bnelr
|
||||
/* if not, clear it and return -1 */
|
||||
lis r6, (PPC_DBELL_SERVER << (63-36))@h
|
||||
PPC_MSGCLR(6)
|
||||
/* if not, return -1 */
|
||||
li r3, -1
|
||||
blr
|
||||
|
||||
|
|
|
@ -237,7 +237,8 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
|
|||
struct kvm_book3e_206_tlb_entry *gtlbe)
|
||||
{
|
||||
struct vcpu_id_table *idt = vcpu_e500->idt;
|
||||
unsigned int pr, tid, ts, pid;
|
||||
unsigned int pr, tid, ts;
|
||||
int pid;
|
||||
u32 val, eaddr;
|
||||
unsigned long flags;
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/disassemble.h>
|
||||
#include <asm/dbell.h>
|
||||
#include <asm/reg_booke.h>
|
||||
|
||||
#include "booke.h"
|
||||
#include "e500.h"
|
||||
|
@ -22,6 +23,7 @@
|
|||
#define XOP_DCBTLS 166
|
||||
#define XOP_MSGSND 206
|
||||
#define XOP_MSGCLR 238
|
||||
#define XOP_MFTMR 366
|
||||
#define XOP_TLBIVAX 786
|
||||
#define XOP_TLBSX 914
|
||||
#define XOP_TLBRE 946
|
||||
|
@ -113,6 +115,19 @@ static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu)
|
|||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst,
|
||||
int rt)
|
||||
{
|
||||
/* Expose one thread per vcpu */
|
||||
if (get_tmrn(inst) == TMRN_TMCFG0) {
|
||||
kvmppc_set_gpr(vcpu, rt,
|
||||
1 | (1 << TMRN_TMCFG0_NATHRD_SHIFT));
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
return EMULATE_FAIL;
|
||||
}
|
||||
|
||||
int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int inst, int *advance)
|
||||
{
|
||||
|
@ -165,6 +180,10 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||
emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
|
||||
break;
|
||||
|
||||
case XOP_MFTMR:
|
||||
emulated = kvmppc_e500_emul_mftmr(vcpu, inst, rt);
|
||||
break;
|
||||
|
||||
case XOP_EHPRIV:
|
||||
emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
|
||||
advance);
|
||||
|
|
|
@ -406,7 +406,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
|||
|
||||
for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
|
||||
unsigned long gfn_start, gfn_end;
|
||||
tsize_pages = 1 << (tsize - 2);
|
||||
tsize_pages = 1UL << (tsize - 2);
|
||||
|
||||
gfn_start = gfn & ~(tsize_pages - 1);
|
||||
gfn_end = gfn_start + tsize_pages;
|
||||
|
@ -447,7 +447,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
|||
}
|
||||
|
||||
if (likely(!pfnmap)) {
|
||||
tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
|
||||
tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
|
||||
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||
if (is_error_noslot_pfn(pfn)) {
|
||||
if (printk_ratelimit())
|
||||
|
|
|
@ -559,6 +559,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
else
|
||||
r = num_online_cpus();
|
||||
break;
|
||||
case KVM_CAP_NR_MEMSLOTS:
|
||||
r = KVM_USER_MEM_SLOTS;
|
||||
break;
|
||||
case KVM_CAP_MAX_VCPUS:
|
||||
r = KVM_MAX_VCPUS;
|
||||
break;
|
||||
|
|
|
@ -644,5 +644,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
|
|||
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -336,28 +336,28 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
|
|||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static const intercept_handler_t intercept_funcs[] = {
|
||||
[0x00 >> 2] = handle_noop,
|
||||
[0x04 >> 2] = handle_instruction,
|
||||
[0x08 >> 2] = handle_prog,
|
||||
[0x10 >> 2] = handle_noop,
|
||||
[0x14 >> 2] = handle_external_interrupt,
|
||||
[0x18 >> 2] = handle_noop,
|
||||
[0x1C >> 2] = kvm_s390_handle_wait,
|
||||
[0x20 >> 2] = handle_validity,
|
||||
[0x28 >> 2] = handle_stop,
|
||||
[0x38 >> 2] = handle_partial_execution,
|
||||
};
|
||||
|
||||
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t func;
|
||||
u8 code = vcpu->arch.sie_block->icptcode;
|
||||
|
||||
if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs))
|
||||
switch (vcpu->arch.sie_block->icptcode) {
|
||||
case 0x00:
|
||||
case 0x10:
|
||||
case 0x18:
|
||||
return handle_noop(vcpu);
|
||||
case 0x04:
|
||||
return handle_instruction(vcpu);
|
||||
case 0x08:
|
||||
return handle_prog(vcpu);
|
||||
case 0x14:
|
||||
return handle_external_interrupt(vcpu);
|
||||
case 0x1c:
|
||||
return kvm_s390_handle_wait(vcpu);
|
||||
case 0x20:
|
||||
return handle_validity(vcpu);
|
||||
case 0x28:
|
||||
return handle_stop(vcpu);
|
||||
case 0x38:
|
||||
return handle_partial_execution(vcpu);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
func = intercept_funcs[code >> 2];
|
||||
if (func)
|
||||
return func(vcpu);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,11 +51,9 @@ static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
|
||||
(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
|
||||
(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
|
||||
return 0;
|
||||
return 1;
|
||||
return psw_extint_disabled(vcpu) &&
|
||||
psw_ioint_disabled(vcpu) &&
|
||||
psw_mchk_disabled(vcpu);
|
||||
}
|
||||
|
||||
static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
|
||||
|
@ -71,13 +69,8 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int ckc_irq_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
preempt_disable();
|
||||
if (!(vcpu->arch.sie_block->ckc <
|
||||
get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
|
||||
preempt_enable();
|
||||
if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm))
|
||||
return 0;
|
||||
}
|
||||
preempt_enable();
|
||||
return ckc_interrupts_enabled(vcpu);
|
||||
}
|
||||
|
||||
|
@ -109,14 +102,10 @@ static inline u8 int_word_to_isc(u32 int_word)
|
|||
return (int_word & 0x38000000) >> 27;
|
||||
}
|
||||
|
||||
static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
|
||||
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->kvm->arch.float_int.pending_irqs;
|
||||
}
|
||||
|
||||
static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.local_int.pending_irqs;
|
||||
return vcpu->kvm->arch.float_int.pending_irqs |
|
||||
vcpu->arch.local_int.pending_irqs;
|
||||
}
|
||||
|
||||
static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
|
||||
|
@ -135,8 +124,7 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
unsigned long active_mask;
|
||||
|
||||
active_mask = pending_local_irqs(vcpu);
|
||||
active_mask |= pending_floating_irqs(vcpu);
|
||||
active_mask = pending_irqs(vcpu);
|
||||
if (!active_mask)
|
||||
return 0;
|
||||
|
||||
|
@ -204,7 +192,7 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
|
|||
|
||||
static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
|
||||
if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
|
||||
return;
|
||||
else if (psw_ioint_disabled(vcpu))
|
||||
__set_cpuflag(vcpu, CPUSTAT_IO_INT);
|
||||
|
@ -214,7 +202,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
|
||||
if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
|
||||
return;
|
||||
if (psw_extint_disabled(vcpu))
|
||||
__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
|
||||
|
@ -224,7 +212,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
|
||||
if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
|
||||
return;
|
||||
if (psw_mchk_disabled(vcpu))
|
||||
vcpu->arch.sie_block->ictl |= ICTL_LPSW;
|
||||
|
@ -815,23 +803,21 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
|
|||
|
||||
int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
|
||||
{
|
||||
int rc;
|
||||
if (deliverable_irqs(vcpu))
|
||||
return 1;
|
||||
|
||||
rc = !!deliverable_irqs(vcpu);
|
||||
|
||||
if (!rc && kvm_cpu_has_pending_timer(vcpu))
|
||||
rc = 1;
|
||||
if (kvm_cpu_has_pending_timer(vcpu))
|
||||
return 1;
|
||||
|
||||
/* external call pending and deliverable */
|
||||
if (!rc && kvm_s390_ext_call_pending(vcpu) &&
|
||||
if (kvm_s390_ext_call_pending(vcpu) &&
|
||||
!psw_extint_disabled(vcpu) &&
|
||||
(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
|
||||
rc = 1;
|
||||
return 1;
|
||||
|
||||
if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
|
||||
rc = 1;
|
||||
|
||||
return rc;
|
||||
if (!exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
|
@ -846,7 +832,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
|
|||
vcpu->stat.exit_wait_state++;
|
||||
|
||||
/* fast path */
|
||||
if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu))
|
||||
if (kvm_arch_vcpu_runnable(vcpu))
|
||||
return 0;
|
||||
|
||||
if (psw_interrupts_disabled(vcpu)) {
|
||||
|
@ -860,9 +846,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
|
|||
goto no_timer;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
|
||||
preempt_enable();
|
||||
now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
|
||||
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
|
||||
|
||||
/* underflow */
|
||||
|
@ -901,9 +885,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
|
|||
u64 now, sltime;
|
||||
|
||||
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
|
||||
preempt_disable();
|
||||
now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
|
||||
preempt_enable();
|
||||
now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
|
||||
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
|
||||
|
||||
/*
|
||||
|
@ -981,39 +963,30 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
|||
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
|
||||
irq->u.pgm.code, 0);
|
||||
|
||||
li->irq.pgm = irq->u.pgm;
|
||||
if (irq->u.pgm.code == PGM_PER) {
|
||||
li->irq.pgm.code |= PGM_PER;
|
||||
/* only modify PER related information */
|
||||
li->irq.pgm.per_address = irq->u.pgm.per_address;
|
||||
li->irq.pgm.per_code = irq->u.pgm.per_code;
|
||||
li->irq.pgm.per_atmid = irq->u.pgm.per_atmid;
|
||||
li->irq.pgm.per_access_id = irq->u.pgm.per_access_id;
|
||||
} else if (!(irq->u.pgm.code & PGM_PER)) {
|
||||
li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
|
||||
irq->u.pgm.code;
|
||||
/* only modify non-PER information */
|
||||
li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
|
||||
li->irq.pgm.mon_code = irq->u.pgm.mon_code;
|
||||
li->irq.pgm.data_exc_code = irq->u.pgm.data_exc_code;
|
||||
li->irq.pgm.mon_class_nr = irq->u.pgm.mon_class_nr;
|
||||
li->irq.pgm.exc_access_id = irq->u.pgm.exc_access_id;
|
||||
li->irq.pgm.op_access_id = irq->u.pgm.op_access_id;
|
||||
} else {
|
||||
li->irq.pgm = irq->u.pgm;
|
||||
}
|
||||
set_bit(IRQ_PEND_PROG, &li->pending_irqs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
struct kvm_s390_irq irq;
|
||||
|
||||
spin_lock(&li->lock);
|
||||
irq.u.pgm.code = code;
|
||||
__inject_prog(vcpu, &irq);
|
||||
BUG_ON(waitqueue_active(li->wq));
|
||||
spin_unlock(&li->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_pgm_info *pgm_info)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
struct kvm_s390_irq irq;
|
||||
int rc;
|
||||
|
||||
spin_lock(&li->lock);
|
||||
irq.u.pgm = *pgm_info;
|
||||
rc = __inject_prog(vcpu, &irq);
|
||||
BUG_ON(waitqueue_active(li->wq));
|
||||
spin_unlock(&li->lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
|
@ -1390,12 +1363,9 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
|
|||
|
||||
static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi;
|
||||
u64 type = READ_ONCE(inti->type);
|
||||
int rc;
|
||||
|
||||
fi = &kvm->arch.float_int;
|
||||
|
||||
switch (type) {
|
||||
case KVM_S390_MCHK:
|
||||
rc = __inject_float_mchk(kvm, inti);
|
||||
|
|
|
@ -514,35 +514,20 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
|
||||
if (gtod_high != 0)
|
||||
return -EINVAL;
|
||||
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
|
||||
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_vcpu *cur_vcpu;
|
||||
unsigned int vcpu_idx;
|
||||
u64 host_tod, gtod;
|
||||
int r;
|
||||
u64 gtod;
|
||||
|
||||
if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
|
||||
return -EFAULT;
|
||||
|
||||
r = store_tod_clock(&host_tod);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
preempt_disable();
|
||||
kvm->arch.epoch = gtod - host_tod;
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
|
||||
cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
preempt_enable();
|
||||
mutex_unlock(&kvm->lock);
|
||||
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
|
||||
kvm_s390_set_tod_clock(kvm, gtod);
|
||||
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -574,26 +559,19 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
if (copy_to_user((void __user *)attr->addr, >od_high,
|
||||
sizeof(gtod_high)))
|
||||
return -EFAULT;
|
||||
VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
|
||||
VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
u64 host_tod, gtod;
|
||||
int r;
|
||||
u64 gtod;
|
||||
|
||||
r = store_tod_clock(&host_tod);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
preempt_disable();
|
||||
gtod = host_tod + kvm->arch.epoch;
|
||||
preempt_enable();
|
||||
gtod = kvm_s390_get_tod_clock_fast(kvm);
|
||||
if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
|
||||
return -EFAULT;
|
||||
VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
|
||||
VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1120,7 +1098,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
if (!kvm->arch.sca)
|
||||
goto out_err;
|
||||
spin_lock(&kvm_lock);
|
||||
sca_offset = (sca_offset + 16) & 0x7f0;
|
||||
sca_offset += 16;
|
||||
if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
|
||||
sca_offset = 0;
|
||||
kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
|
||||
spin_unlock(&kvm_lock);
|
||||
|
||||
|
@ -1911,6 +1891,22 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
preempt_disable();
|
||||
kvm->arch.epoch = tod - get_tod_clock();
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
vcpu->arch.sie_block->epoch = kvm->arch.epoch;
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
preempt_enable();
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_arch_fault_in_page - fault-in guest page if necessary
|
||||
* @vcpu: The corresponding virtual cpu
|
||||
|
|
|
@ -175,6 +175,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
|
|||
return kvm->arch.user_cpu_state_ctrl != 0;
|
||||
}
|
||||
|
||||
/* implemented in interrupt.c */
|
||||
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
|
||||
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
|
||||
|
@ -185,7 +186,25 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm,
|
|||
struct kvm_s390_interrupt *s390int);
|
||||
int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_irq *irq);
|
||||
int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
|
||||
static inline int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_pgm_info *pgm_info)
|
||||
{
|
||||
struct kvm_s390_irq irq = {
|
||||
.type = KVM_S390_PROGRAM_INT,
|
||||
.u.pgm = *pgm_info,
|
||||
};
|
||||
|
||||
return kvm_s390_inject_vcpu(vcpu, &irq);
|
||||
}
|
||||
static inline int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
|
||||
{
|
||||
struct kvm_s390_irq irq = {
|
||||
.type = KVM_S390_PROGRAM_INT,
|
||||
.u.pgm.code = code,
|
||||
};
|
||||
|
||||
return kvm_s390_inject_vcpu(vcpu, &irq);
|
||||
}
|
||||
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
|
||||
u64 isc_mask, u32 schid);
|
||||
int kvm_s390_reinject_io_int(struct kvm *kvm,
|
||||
|
@ -212,6 +231,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
|
|||
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* implemented in kvm-s390.c */
|
||||
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
|
||||
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
|
||||
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
|
||||
|
@ -231,9 +251,6 @@ extern unsigned long kvm_s390_fac_list_mask[];
|
|||
|
||||
/* implemented in diag.c */
|
||||
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
|
||||
/* implemented in interrupt.c */
|
||||
int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_pgm_info *pgm_info);
|
||||
|
||||
static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
|
||||
{
|
||||
|
@ -254,6 +271,16 @@ static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
|
|||
kvm_s390_vcpu_unblock(vcpu);
|
||||
}
|
||||
|
||||
static inline u64 kvm_s390_get_tod_clock_fast(struct kvm *kvm)
|
||||
{
|
||||
u64 rc;
|
||||
|
||||
preempt_disable();
|
||||
rc = get_tod_clock_fast() + kvm->arch.epoch;
|
||||
preempt_enable();
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_inject_prog_cond - conditionally inject a program check
|
||||
* @vcpu: virtual cpu
|
||||
|
|
|
@ -33,11 +33,9 @@
|
|||
/* Handle SCK (SET CLOCK) interception */
|
||||
static int handle_set_clock(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu *cpup;
|
||||
s64 hostclk, val;
|
||||
int i, rc;
|
||||
int rc;
|
||||
ar_t ar;
|
||||
u64 op2;
|
||||
u64 op2, val;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
@ -49,19 +47,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
|
|||
if (rc)
|
||||
return kvm_s390_inject_prog_cond(vcpu, rc);
|
||||
|
||||
if (store_tod_clock(&hostclk)) {
|
||||
kvm_s390_set_psw_cc(vcpu, 3);
|
||||
return 0;
|
||||
}
|
||||
VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
|
||||
val = (val - hostclk) & ~0x3fUL;
|
||||
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
preempt_disable();
|
||||
kvm_for_each_vcpu(i, cpup, vcpu->kvm)
|
||||
cpup->arch.sie_block->epoch = val;
|
||||
preempt_enable();
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
kvm_s390_set_tod_clock(vcpu->kvm, val);
|
||||
|
||||
kvm_s390_set_psw_cc(vcpu, 0);
|
||||
return 0;
|
||||
|
|
|
@ -33,6 +33,11 @@ enum irq_remap_cap {
|
|||
IRQ_POSTING_CAP = 0,
|
||||
};
|
||||
|
||||
struct vcpu_data {
|
||||
u64 pi_desc_addr; /* Physical address of PI Descriptor */
|
||||
u32 vector; /* Guest vector of the interrupt */
|
||||
};
|
||||
|
||||
#ifdef CONFIG_IRQ_REMAP
|
||||
|
||||
extern bool irq_remapping_cap(enum irq_remap_cap cap);
|
||||
|
@ -58,11 +63,6 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void)
|
|||
return x86_vector_domain;
|
||||
}
|
||||
|
||||
struct vcpu_data {
|
||||
u64 pi_desc_addr; /* Physical address of PI Descriptor */
|
||||
u32 vector; /* Guest vector of the interrupt */
|
||||
};
|
||||
|
||||
#else /* CONFIG_IRQ_REMAP */
|
||||
|
||||
static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
|
||||
|
|
|
@ -111,6 +111,16 @@ struct x86_emulate_ops {
|
|||
unsigned int bytes,
|
||||
struct x86_exception *fault);
|
||||
|
||||
/*
|
||||
* read_phys: Read bytes of standard (non-emulated/special) memory.
|
||||
* Used for descriptor reading.
|
||||
* @addr: [IN ] Physical address from which to read.
|
||||
* @val: [OUT] Value read from memory.
|
||||
* @bytes: [IN ] Number of bytes to read from memory.
|
||||
*/
|
||||
int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
|
||||
void *val, unsigned int bytes);
|
||||
|
||||
/*
|
||||
* write_std: Write bytes of standard (non-emulated/special) memory.
|
||||
* Used for descriptor writing.
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <linux/perf_event.h>
|
||||
#include <linux/pvclock_gtod.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/irqbypass.h>
|
||||
|
||||
#include <asm/pvclock-abi.h>
|
||||
#include <asm/desc.h>
|
||||
|
@ -176,6 +177,8 @@ enum {
|
|||
*/
|
||||
#define KVM_APIC_PV_EOI_PENDING 1
|
||||
|
||||
struct kvm_kernel_irq_routing_entry;
|
||||
|
||||
/*
|
||||
* We don't want allocation failures within the mmu code, so we preallocate
|
||||
* enough memory for a single page fault in a cache.
|
||||
|
@ -374,6 +377,7 @@ struct kvm_mtrr {
|
|||
/* Hyper-V per vcpu emulation context */
|
||||
struct kvm_vcpu_hv {
|
||||
u64 hv_vapic;
|
||||
s64 runtime_offset;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
|
@ -396,6 +400,7 @@ struct kvm_vcpu_arch {
|
|||
u64 efer;
|
||||
u64 apic_base;
|
||||
struct kvm_lapic *apic; /* kernel irqchip context */
|
||||
u64 eoi_exit_bitmap[4];
|
||||
unsigned long apic_attention;
|
||||
int32_t apic_arb_prio;
|
||||
int mp_state;
|
||||
|
@ -573,6 +578,9 @@ struct kvm_vcpu_arch {
|
|||
struct {
|
||||
bool pv_unhalted;
|
||||
} pv;
|
||||
|
||||
int pending_ioapic_eoi;
|
||||
int pending_external_vector;
|
||||
};
|
||||
|
||||
struct kvm_lpage_info {
|
||||
|
@ -683,6 +691,9 @@ struct kvm_arch {
|
|||
u32 bsp_vcpu_id;
|
||||
|
||||
u64 disabled_quirks;
|
||||
|
||||
bool irqchip_split;
|
||||
u8 nr_reserved_ioapic_pins;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
|
@ -819,10 +830,10 @@ struct kvm_x86_ops {
|
|||
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
||||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||
int (*vm_has_apicv)(struct kvm *kvm);
|
||||
int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
|
||||
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
|
||||
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
|
||||
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
|
||||
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
|
||||
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
|
||||
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
|
||||
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
|
||||
|
@ -887,6 +898,20 @@ struct kvm_x86_ops {
|
|||
gfn_t offset, unsigned long mask);
|
||||
/* pmu operations of sub-arch */
|
||||
const struct kvm_pmu_ops *pmu_ops;
|
||||
|
||||
/*
|
||||
* Architecture specific hooks for vCPU blocking due to
|
||||
* HLT instruction.
|
||||
* Returns for .pre_block():
|
||||
* - 0 means continue to block the vCPU.
|
||||
* - 1 means we cannot block the vCPU since some event
|
||||
* happens during this period, such as, 'ON' bit in
|
||||
* posted-interrupts descriptor is set.
|
||||
*/
|
||||
int (*pre_block)(struct kvm_vcpu *vcpu);
|
||||
void (*post_block)(struct kvm_vcpu *vcpu);
|
||||
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
|
||||
uint32_t guest_irq, bool set);
|
||||
};
|
||||
|
||||
struct kvm_arch_async_pf {
|
||||
|
@ -1231,4 +1256,13 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
|
|||
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
|
||||
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
||||
struct kvm_vcpu **dest_vcpu);
|
||||
|
||||
void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm_lapic_irq *irq);
|
||||
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
|
|
|
@ -72,7 +72,7 @@
|
|||
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
|
||||
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
|
||||
#define SECONDARY_EXEC_XSAVES 0x00100000
|
||||
|
||||
#define SECONDARY_EXEC_PCOMMIT 0x00200000
|
||||
|
||||
#define PIN_BASED_EXT_INTR_MASK 0x00000001
|
||||
#define PIN_BASED_NMI_EXITING 0x00000008
|
||||
|
@ -416,6 +416,7 @@ enum vmcs_field {
|
|||
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
|
||||
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
|
||||
|
||||
#define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */
|
||||
#define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */
|
||||
#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */
|
||||
|
||||
|
|
|
@ -153,6 +153,12 @@
|
|||
/* MSR used to provide vcpu index */
|
||||
#define HV_X64_MSR_VP_INDEX 0x40000002
|
||||
|
||||
/* MSR used to reset the guest OS. */
|
||||
#define HV_X64_MSR_RESET 0x40000003
|
||||
|
||||
/* MSR used to provide vcpu runtime in 100ns units */
|
||||
#define HV_X64_MSR_VP_RUNTIME 0x40000010
|
||||
|
||||
/* MSR used to read the per-partition time reference counter */
|
||||
#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
|
||||
|
||||
|
@ -251,4 +257,16 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
|
|||
__s64 tsc_offset;
|
||||
} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
|
||||
|
||||
/* Define the number of synthetic interrupt sources. */
|
||||
#define HV_SYNIC_SINT_COUNT (16)
|
||||
/* Define the expected SynIC version. */
|
||||
#define HV_SYNIC_VERSION_1 (0x1)
|
||||
|
||||
#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
|
||||
#define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
|
||||
#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0)
|
||||
#define HV_SYNIC_SINT_MASKED (1ULL << 16)
|
||||
#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17)
|
||||
#define HV_SYNIC_SINT_VECTOR_MASK (0xFF)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -78,6 +78,7 @@
|
|||
#define EXIT_REASON_PML_FULL 62
|
||||
#define EXIT_REASON_XSAVES 63
|
||||
#define EXIT_REASON_XRSTORS 64
|
||||
#define EXIT_REASON_PCOMMIT 65
|
||||
|
||||
#define VMX_EXIT_REASONS \
|
||||
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
|
||||
|
@ -126,7 +127,8 @@
|
|||
{ EXIT_REASON_INVVPID, "INVVPID" }, \
|
||||
{ EXIT_REASON_INVPCID, "INVPCID" }, \
|
||||
{ EXIT_REASON_XSAVES, "XSAVES" }, \
|
||||
{ EXIT_REASON_XRSTORS, "XRSTORS" }
|
||||
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
|
||||
{ EXIT_REASON_PCOMMIT, "PCOMMIT" }
|
||||
|
||||
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
|
||||
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
static int kvmclock = 1;
|
||||
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
|
||||
static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
|
||||
static cycle_t kvm_sched_clock_offset;
|
||||
|
||||
static int parse_no_kvmclock(char *arg)
|
||||
{
|
||||
|
@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
|
|||
return kvm_clock_read();
|
||||
}
|
||||
|
||||
static cycle_t kvm_sched_clock_read(void)
|
||||
{
|
||||
return kvm_clock_read() - kvm_sched_clock_offset;
|
||||
}
|
||||
|
||||
static inline void kvm_sched_clock_init(bool stable)
|
||||
{
|
||||
if (!stable) {
|
||||
pv_time_ops.sched_clock = kvm_clock_read;
|
||||
return;
|
||||
}
|
||||
|
||||
kvm_sched_clock_offset = kvm_clock_read();
|
||||
pv_time_ops.sched_clock = kvm_sched_clock_read;
|
||||
set_sched_clock_stable();
|
||||
|
||||
printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n",
|
||||
kvm_sched_clock_offset);
|
||||
|
||||
BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) >
|
||||
sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time));
|
||||
}
|
||||
|
||||
/*
|
||||
* If we don't do that, there is the possibility that the guest
|
||||
* will calibrate under heavy load - thus, getting a lower lpj -
|
||||
|
@ -248,7 +272,17 @@ void __init kvmclock_init(void)
|
|||
memblock_free(mem, size);
|
||||
return;
|
||||
}
|
||||
pv_time_ops.sched_clock = kvm_clock_read;
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
|
||||
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
|
||||
|
||||
cpu = get_cpu();
|
||||
vcpu_time = &hv_clock[cpu].pvti;
|
||||
flags = pvclock_read_flags(vcpu_time);
|
||||
|
||||
kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
|
||||
put_cpu();
|
||||
|
||||
x86_platform.calibrate_tsc = kvm_get_tsc_khz;
|
||||
x86_platform.get_wallclock = kvm_get_wallclock;
|
||||
x86_platform.set_wallclock = kvm_set_wallclock;
|
||||
|
@ -265,16 +299,6 @@ void __init kvmclock_init(void)
|
|||
kvm_get_preset_lpj();
|
||||
clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
|
||||
pv_info.name = "KVM";
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
|
||||
pvclock_set_flags(~0);
|
||||
|
||||
cpu = get_cpu();
|
||||
vcpu_time = &hv_clock[cpu].pvti;
|
||||
flags = pvclock_read_flags(vcpu_time);
|
||||
if (flags & PVCLOCK_COUNTS_FROM_ZERO)
|
||||
set_sched_clock_stable();
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
int __init kvm_setup_vsyscall_timeinfo(void)
|
||||
|
|
|
@ -28,6 +28,8 @@ config KVM
|
|||
select ANON_INODES
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select IRQ_BYPASS_MANAGER
|
||||
select HAVE_KVM_IRQ_BYPASS
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_EVENTFD
|
||||
select KVM_APIC_ARCHITECTURE
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <linux/fs.h>
|
||||
#include "irq.h"
|
||||
#include "assigned-dev.h"
|
||||
#include "trace/events/kvm.h"
|
||||
|
||||
struct kvm_assigned_dev_kernel {
|
||||
struct kvm_irq_ack_notifier ack_notifier;
|
||||
|
@ -131,7 +132,42 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
|
|||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
/*
|
||||
* Deliver an IRQ in an atomic context if we can, or return a failure,
|
||||
* user can retry in a process context.
|
||||
* Return value:
|
||||
* -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
|
||||
* Other values - No need to retry.
|
||||
*/
|
||||
static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
|
||||
int level)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
int ret = -EINVAL;
|
||||
int idx;
|
||||
|
||||
trace_kvm_set_irq(irq, level, irq_source_id);
|
||||
|
||||
/*
|
||||
* Injection into either PIC or IOAPIC might need to scan all CPUs,
|
||||
* which would need to be retried from thread context; when same GSI
|
||||
* is connected to both PIC and IOAPIC, we'd have to report a
|
||||
* partial failure here.
|
||||
* Since there's no easy way to do this, we only support injecting MSI
|
||||
* which is limited to 1:1 GSI mapping.
|
||||
*/
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
|
||||
e = &entries[0];
|
||||
ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
|
||||
irq, level);
|
||||
}
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
|
@ -150,9 +186,7 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
|
|||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
|
@ -183,7 +217,6 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
|
|||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Ack the irq line for an assigned device */
|
||||
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
|
||||
|
@ -386,7 +419,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
static int assigned_device_enable_host_msi(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev)
|
||||
{
|
||||
|
@ -408,9 +440,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
|
|||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static int assigned_device_enable_host_msix(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev)
|
||||
{
|
||||
|
@ -443,8 +473,6 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
|
|||
return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int assigned_device_enable_guest_intx(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev,
|
||||
struct kvm_assigned_irq *irq)
|
||||
|
@ -454,7 +482,6 @@ static int assigned_device_enable_guest_intx(struct kvm *kvm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
static int assigned_device_enable_guest_msi(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev,
|
||||
struct kvm_assigned_irq *irq)
|
||||
|
@ -463,9 +490,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
|
|||
dev->ack_notifier.gsi = -1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static int assigned_device_enable_guest_msix(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev,
|
||||
struct kvm_assigned_irq *irq)
|
||||
|
@ -474,7 +499,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
|
|||
dev->ack_notifier.gsi = -1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int assign_host_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev,
|
||||
|
@ -492,16 +516,12 @@ static int assign_host_irq(struct kvm *kvm,
|
|||
case KVM_DEV_IRQ_HOST_INTX:
|
||||
r = assigned_device_enable_host_intx(kvm, dev);
|
||||
break;
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
case KVM_DEV_IRQ_HOST_MSI:
|
||||
r = assigned_device_enable_host_msi(kvm, dev);
|
||||
break;
|
||||
#endif
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
case KVM_DEV_IRQ_HOST_MSIX:
|
||||
r = assigned_device_enable_host_msix(kvm, dev);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
@ -534,16 +554,12 @@ static int assign_guest_irq(struct kvm *kvm,
|
|||
case KVM_DEV_IRQ_GUEST_INTX:
|
||||
r = assigned_device_enable_guest_intx(kvm, dev, irq);
|
||||
break;
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
case KVM_DEV_IRQ_GUEST_MSI:
|
||||
r = assigned_device_enable_guest_msi(kvm, dev, irq);
|
||||
break;
|
||||
#endif
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
case KVM_DEV_IRQ_GUEST_MSIX:
|
||||
r = assigned_device_enable_guest_msix(kvm, dev, irq);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
@ -826,7 +842,6 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
|
|||
}
|
||||
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
|
||||
struct kvm_assigned_msix_nr *entry_nr)
|
||||
{
|
||||
|
@ -906,7 +921,6 @@ static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
|
|||
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
|
||||
struct kvm_assigned_pci_dev *assigned_dev)
|
||||
|
@ -1012,7 +1026,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
|||
goto out;
|
||||
break;
|
||||
}
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
case KVM_ASSIGN_SET_MSIX_NR: {
|
||||
struct kvm_assigned_msix_nr entry_nr;
|
||||
r = -EFAULT;
|
||||
|
@ -1033,7 +1046,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
|||
goto out;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case KVM_ASSIGN_SET_INTX_MASK: {
|
||||
struct kvm_assigned_pci_dev assigned_dev;
|
||||
|
||||
|
|
|
@ -348,7 +348,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
|
||||
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
|
||||
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
|
||||
F(AVX512CD);
|
||||
F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
|
||||
|
||||
/* cpuid 0xD.1.eax */
|
||||
const u32 kvm_supported_word10_x86_features =
|
||||
|
|
|
@ -133,4 +133,41 @@ static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
|
|||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_MPX));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
|
||||
return best && (best->edx & bit(X86_FEATURE_RDTSCP));
|
||||
}
|
||||
|
||||
/*
|
||||
* NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
|
||||
*/
|
||||
#define BIT_NRIPS 3
|
||||
|
||||
static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0);
|
||||
|
||||
/*
|
||||
* NRIPS is a scattered cpuid feature, so we can't use
|
||||
* X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit
|
||||
* position 8, not 3).
|
||||
*/
|
||||
return best && (best->edx & bit(BIT_NRIPS));
|
||||
}
|
||||
#undef BIT_NRIPS
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2272,8 +2272,8 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
|
|||
#define GET_SMSTATE(type, smbase, offset) \
|
||||
({ \
|
||||
type __val; \
|
||||
int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \
|
||||
sizeof(__val), NULL); \
|
||||
int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
|
||||
sizeof(__val)); \
|
||||
if (r != X86EMUL_CONTINUE) \
|
||||
return X86EMUL_UNHANDLEABLE; \
|
||||
__val; \
|
||||
|
@ -2484,17 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
|
|||
|
||||
/*
|
||||
* Get back to real mode, to prepare a safe state in which to load
|
||||
* CR0/CR3/CR4/EFER. Also this will ensure that addresses passed
|
||||
* to read_std/write_std are not virtual.
|
||||
*
|
||||
* CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
|
||||
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
|
||||
* supports long mode.
|
||||
*/
|
||||
cr4 = ctxt->ops->get_cr(ctxt, 4);
|
||||
if (emulator_has_longmode(ctxt)) {
|
||||
struct desc_struct cs_desc;
|
||||
|
||||
/* Zero CR4.PCIDE before CR0.PG. */
|
||||
if (cr4 & X86_CR4_PCIDE) {
|
||||
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
|
||||
cr4 &= ~X86_CR4_PCIDE;
|
||||
}
|
||||
|
||||
/* A 32-bit code segment is required to clear EFER.LMA. */
|
||||
memset(&cs_desc, 0, sizeof(cs_desc));
|
||||
cs_desc.type = 0xb;
|
||||
cs_desc.s = cs_desc.g = cs_desc.p = 1;
|
||||
ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
|
||||
}
|
||||
|
||||
/* For the 64-bit case, this will clear EFER.LMA. */
|
||||
cr0 = ctxt->ops->get_cr(ctxt, 0);
|
||||
if (cr0 & X86_CR0_PE)
|
||||
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
|
||||
cr4 = ctxt->ops->get_cr(ctxt, 4);
|
||||
|
||||
/* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
|
||||
if (cr4 & X86_CR4_PAE)
|
||||
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
|
||||
|
||||
/* And finally go back to 32-bit mode. */
|
||||
efer = 0;
|
||||
ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
|
||||
|
||||
|
@ -4455,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
|
|||
F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
|
||||
/* 0xA8 - 0xAF */
|
||||
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
|
||||
II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
|
||||
II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
|
||||
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
|
||||
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
|
||||
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
|
||||
|
|
|
@ -41,6 +41,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
|
|||
case HV_X64_MSR_TIME_REF_COUNT:
|
||||
case HV_X64_MSR_CRASH_CTL:
|
||||
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
|
||||
case HV_X64_MSR_RESET:
|
||||
r = true;
|
||||
break;
|
||||
}
|
||||
|
@ -163,6 +164,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
|||
data);
|
||||
case HV_X64_MSR_CRASH_CTL:
|
||||
return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
|
||||
case HV_X64_MSR_RESET:
|
||||
if (data == 1) {
|
||||
vcpu_debug(vcpu, "hyper-v reset requested\n");
|
||||
kvm_make_request(KVM_REQ_HV_RESET, vcpu);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
|
||||
msr, data);
|
||||
|
@ -171,7 +178,16 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
/* Calculate cpu time spent by current task in 100ns units */
|
||||
static u64 current_task_runtime_100ns(void)
|
||||
{
|
||||
cputime_t utime, stime;
|
||||
|
||||
task_cputime_adjusted(current, &utime, &stime);
|
||||
return div_u64(cputime_to_nsecs(utime + stime), 100);
|
||||
}
|
||||
|
||||
static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
|
||||
|
||||
|
@ -205,6 +221,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||
return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
|
||||
case HV_X64_MSR_TPR:
|
||||
return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
|
||||
case HV_X64_MSR_VP_RUNTIME:
|
||||
if (!host)
|
||||
return 1;
|
||||
hv->runtime_offset = data - current_task_runtime_100ns();
|
||||
break;
|
||||
default:
|
||||
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
|
||||
msr, data);
|
||||
|
@ -241,6 +262,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|||
pdata);
|
||||
case HV_X64_MSR_CRASH_CTL:
|
||||
return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
|
||||
case HV_X64_MSR_RESET:
|
||||
data = 0;
|
||||
break;
|
||||
default:
|
||||
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
|
||||
return 1;
|
||||
|
@ -277,6 +301,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|||
case HV_X64_MSR_APIC_ASSIST_PAGE:
|
||||
data = hv->hv_vapic;
|
||||
break;
|
||||
case HV_X64_MSR_VP_RUNTIME:
|
||||
data = current_task_runtime_100ns() + hv->runtime_offset;
|
||||
break;
|
||||
default:
|
||||
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
|
||||
return 1;
|
||||
|
@ -295,7 +322,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
|||
mutex_unlock(&vcpu->kvm->lock);
|
||||
return r;
|
||||
} else
|
||||
return kvm_hv_set_msr(vcpu, msr, data);
|
||||
return kvm_hv_set_msr(vcpu, msr, data, host);
|
||||
}
|
||||
|
||||
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "ioapic.h"
|
||||
#include "irq.h"
|
||||
#include "i8254.h"
|
||||
#include "x86.h"
|
||||
|
@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
|
|||
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
|
||||
s64 interval;
|
||||
|
||||
if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
|
||||
if (!ioapic_in_kernel(kvm) ||
|
||||
ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
|
||||
return;
|
||||
|
||||
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
|
||||
|
|
|
@ -233,21 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
|
|||
}
|
||||
|
||||
|
||||
static void update_handled_vectors(struct kvm_ioapic *ioapic)
|
||||
{
|
||||
DECLARE_BITMAP(handled_vectors, 256);
|
||||
int i;
|
||||
|
||||
memset(handled_vectors, 0, sizeof(handled_vectors));
|
||||
for (i = 0; i < IOAPIC_NUM_PINS; ++i)
|
||||
__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
|
||||
memcpy(ioapic->handled_vectors, handled_vectors,
|
||||
sizeof(handled_vectors));
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
|
||||
u32 *tmr)
|
||||
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
|
||||
union kvm_ioapic_redirect_entry *e;
|
||||
|
@ -260,13 +246,11 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
|
|||
kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
|
||||
index == RTC_GSI) {
|
||||
if (kvm_apic_match_dest(vcpu, NULL, 0,
|
||||
e->fields.dest_id, e->fields.dest_mode)) {
|
||||
e->fields.dest_id, e->fields.dest_mode) ||
|
||||
(e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
|
||||
kvm_apic_pending_eoi(vcpu, e->fields.vector)))
|
||||
__set_bit(e->fields.vector,
|
||||
(unsigned long *)eoi_exit_bitmap);
|
||||
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
|
||||
__set_bit(e->fields.vector,
|
||||
(unsigned long *)tmr);
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&ioapic->lock);
|
||||
|
@ -315,7 +299,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
|||
e->bits |= (u32) val;
|
||||
e->fields.remote_irr = 0;
|
||||
}
|
||||
update_handled_vectors(ioapic);
|
||||
mask_after = e->fields.mask;
|
||||
if (mask_before != mask_after)
|
||||
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
|
||||
|
@ -599,7 +582,6 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
|
|||
ioapic->id = 0;
|
||||
memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
|
||||
rtc_irq_eoi_tracking_reset(ioapic);
|
||||
update_handled_vectors(ioapic);
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops ioapic_mmio_ops = {
|
||||
|
@ -628,8 +610,10 @@ int kvm_ioapic_init(struct kvm *kvm)
|
|||
if (ret < 0) {
|
||||
kvm->arch.vioapic = NULL;
|
||||
kfree(ioapic);
|
||||
return ret;
|
||||
}
|
||||
|
||||
kvm_vcpu_request_scan_ioapic(kvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -666,7 +650,6 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
|
|||
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
|
||||
ioapic->irr = 0;
|
||||
ioapic->irr_delivered = 0;
|
||||
update_handled_vectors(ioapic);
|
||||
kvm_vcpu_request_scan_ioapic(kvm);
|
||||
kvm_ioapic_inject_all(ioapic, state->irr);
|
||||
spin_unlock(&ioapic->lock);
|
||||
|
|
|
@ -9,6 +9,7 @@ struct kvm;
|
|||
struct kvm_vcpu;
|
||||
|
||||
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
|
||||
#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
|
||||
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
|
||||
#define IOAPIC_EDGE_TRIG 0
|
||||
#define IOAPIC_LEVEL_TRIG 1
|
||||
|
@ -73,7 +74,6 @@ struct kvm_ioapic {
|
|||
struct kvm *kvm;
|
||||
void (*ack_notifier)(void *opaque, int irq);
|
||||
spinlock_t lock;
|
||||
DECLARE_BITMAP(handled_vectors, 256);
|
||||
struct rtc_status rtc_status;
|
||||
struct delayed_work eoi_inject;
|
||||
u32 irq_eoi[IOAPIC_NUM_PINS];
|
||||
|
@ -98,11 +98,12 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
|
|||
return kvm->arch.vioapic;
|
||||
}
|
||||
|
||||
static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
|
||||
static inline int ioapic_in_kernel(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
smp_rmb();
|
||||
return test_bit(vector, ioapic->handled_vectors);
|
||||
int ret;
|
||||
|
||||
ret = (ioapic_irqchip(kvm) != NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
|
||||
|
@ -120,7 +121,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
|||
struct kvm_lapic_irq *irq, unsigned long *dest_map);
|
||||
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
|
||||
u32 *tmr);
|
||||
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
|
||||
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -37,15 +37,28 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
|
||||
|
||||
/*
|
||||
* check if there is a pending userspace external interrupt
|
||||
*/
|
||||
static int pending_userspace_extint(struct kvm_vcpu *v)
|
||||
{
|
||||
return v->arch.pending_external_vector != -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* check if there is pending interrupt from
|
||||
* non-APIC source without intack.
|
||||
*/
|
||||
static int kvm_cpu_has_extint(struct kvm_vcpu *v)
|
||||
{
|
||||
if (kvm_apic_accept_pic_intr(v))
|
||||
return pic_irqchip(v->kvm)->output; /* PIC */
|
||||
else
|
||||
u8 accept = kvm_apic_accept_pic_intr(v);
|
||||
|
||||
if (accept) {
|
||||
if (irqchip_split(v->kvm))
|
||||
return pending_userspace_extint(v);
|
||||
else
|
||||
return pic_irqchip(v->kvm)->output;
|
||||
} else
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -57,13 +70,13 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
|
|||
*/
|
||||
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
|
||||
{
|
||||
if (!irqchip_in_kernel(v->kvm))
|
||||
if (!lapic_in_kernel(v))
|
||||
return v->arch.interrupt.pending;
|
||||
|
||||
if (kvm_cpu_has_extint(v))
|
||||
return 1;
|
||||
|
||||
if (kvm_apic_vid_enabled(v->kvm))
|
||||
if (kvm_vcpu_apic_vid_enabled(v))
|
||||
return 0;
|
||||
|
||||
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
|
||||
|
@ -75,7 +88,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
|
|||
*/
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
|
||||
{
|
||||
if (!irqchip_in_kernel(v->kvm))
|
||||
if (!lapic_in_kernel(v))
|
||||
return v->arch.interrupt.pending;
|
||||
|
||||
if (kvm_cpu_has_extint(v))
|
||||
|
@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
|
|||
*/
|
||||
static int kvm_cpu_get_extint(struct kvm_vcpu *v)
|
||||
{
|
||||
if (kvm_cpu_has_extint(v))
|
||||
return kvm_pic_read_irq(v->kvm); /* PIC */
|
||||
return -1;
|
||||
if (kvm_cpu_has_extint(v)) {
|
||||
if (irqchip_split(v->kvm)) {
|
||||
int vector = v->arch.pending_external_vector;
|
||||
|
||||
v->arch.pending_external_vector = -1;
|
||||
return vector;
|
||||
} else
|
||||
return kvm_pic_read_irq(v->kvm); /* PIC */
|
||||
} else
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -103,7 +123,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
|
|||
{
|
||||
int vector;
|
||||
|
||||
if (!irqchip_in_kernel(v->kvm))
|
||||
if (!lapic_in_kernel(v))
|
||||
return v->arch.interrupt.nr;
|
||||
|
||||
vector = kvm_cpu_get_extint(v);
|
||||
|
|
|
@ -83,13 +83,38 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
|
|||
return kvm->arch.vpic;
|
||||
}
|
||||
|
||||
static inline int pic_in_kernel(struct kvm *kvm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = (pic_irqchip(kvm) != NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int irqchip_split(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.irqchip_split;
|
||||
}
|
||||
|
||||
static inline int irqchip_in_kernel(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_pic *vpic = pic_irqchip(kvm);
|
||||
bool ret;
|
||||
|
||||
ret = (vpic != NULL);
|
||||
ret |= irqchip_split(kvm);
|
||||
|
||||
/* Read vpic before kvm->irq_routing. */
|
||||
smp_rmb();
|
||||
return vpic != NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int lapic_in_kernel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Same as irqchip_in_kernel(vcpu->kvm), but with less
|
||||
* pointer chasing and no unnecessary memory barriers.
|
||||
*/
|
||||
return vcpu->arch.apic != NULL;
|
||||
}
|
||||
|
||||
void kvm_pic_reset(struct kvm_kpic_state *s);
|
||||
|
|
|
@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
|||
return r;
|
||||
}
|
||||
|
||||
static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm_lapic_irq *irq)
|
||||
void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm_lapic_irq *irq)
|
||||
{
|
||||
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
|
||||
|
||||
|
@ -108,6 +108,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
|
|||
irq->level = 1;
|
||||
irq->shorthand = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
|
||||
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level, bool line_status)
|
||||
|
@ -123,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
|||
}
|
||||
|
||||
|
||||
static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm)
|
||||
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
bool line_status)
|
||||
{
|
||||
struct kvm_lapic_irq irq;
|
||||
int r;
|
||||
|
||||
if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
|
||||
return -EWOULDBLOCK;
|
||||
|
||||
kvm_set_msi_irq(e, &irq);
|
||||
|
||||
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
|
||||
|
@ -137,42 +142,6 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
|||
return -EWOULDBLOCK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deliver an IRQ in an atomic context if we can, or return a failure,
|
||||
* user can retry in a process context.
|
||||
* Return value:
|
||||
* -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
|
||||
* Other values - No need to retry.
|
||||
*/
|
||||
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
int ret = -EINVAL;
|
||||
int idx;
|
||||
|
||||
trace_kvm_set_irq(irq, level, irq_source_id);
|
||||
|
||||
/*
|
||||
* Injection into either PIC or IOAPIC might need to scan all CPUs,
|
||||
* which would need to be retried from thread context; when same GSI
|
||||
* is connected to both PIC and IOAPIC, we'd have to report a
|
||||
* partial failure here.
|
||||
* Since there's no easy way to do this, we only support injecting MSI
|
||||
* which is limited to 1:1 GSI mapping.
|
||||
*/
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
|
||||
e = &entries[0];
|
||||
if (likely(e->type == KVM_IRQ_ROUTING_MSI))
|
||||
ret = kvm_set_msi_inatomic(e, kvm);
|
||||
else
|
||||
ret = -EWOULDBLOCK;
|
||||
}
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_request_irq_source_id(struct kvm *kvm)
|
||||
{
|
||||
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
|
||||
|
@ -208,7 +177,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
|
|||
goto unlock;
|
||||
}
|
||||
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
if (!ioapic_in_kernel(kvm))
|
||||
goto unlock;
|
||||
|
||||
kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
|
||||
|
@ -297,6 +266,33 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
|
|||
return r;
|
||||
}
|
||||
|
||||
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
||||
struct kvm_vcpu **dest_vcpu)
|
||||
{
|
||||
int i, r = 0;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
|
||||
return true;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (!kvm_apic_present(vcpu))
|
||||
continue;
|
||||
|
||||
if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
|
||||
irq->dest_id, irq->dest_mode))
|
||||
continue;
|
||||
|
||||
if (++r == 2)
|
||||
return false;
|
||||
|
||||
*dest_vcpu = vcpu;
|
||||
}
|
||||
|
||||
return r == 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
|
||||
|
||||
#define IOAPIC_ROUTING_ENTRY(irq) \
|
||||
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
|
||||
.u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
|
||||
|
@ -328,3 +324,54 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
|
|||
return kvm_set_irq_routing(kvm, default_routing,
|
||||
ARRAY_SIZE(default_routing), 0);
|
||||
}
|
||||
|
||||
static const struct kvm_irq_routing_entry empty_routing[] = {};
|
||||
|
||||
int kvm_setup_empty_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
|
||||
}
|
||||
|
||||
void kvm_arch_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
|
||||
return;
|
||||
kvm_make_scan_ioapic_request(kvm);
|
||||
}
|
||||
|
||||
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_kernel_irq_routing_entry *entry;
|
||||
struct kvm_irq_routing_table *table;
|
||||
u32 i, nr_ioapic_pins;
|
||||
int idx;
|
||||
|
||||
/* kvm->irq_routing must be read after clearing
|
||||
* KVM_SCAN_IOAPIC. */
|
||||
smp_mb();
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
||||
nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
|
||||
kvm->arch.nr_reserved_ioapic_pins);
|
||||
for (i = 0; i < nr_ioapic_pins; ++i) {
|
||||
hlist_for_each_entry(entry, &table->map[i], link) {
|
||||
u32 dest_id, dest_mode;
|
||||
bool level;
|
||||
|
||||
if (entry->type != KVM_IRQ_ROUTING_MSI)
|
||||
continue;
|
||||
dest_id = (entry->msi.address_lo >> 12) & 0xff;
|
||||
dest_mode = (entry->msi.address_lo >> 2) & 0x1;
|
||||
level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL;
|
||||
if (level && kvm_apic_match_dest(vcpu, NULL, 0,
|
||||
dest_id, dest_mode)) {
|
||||
u32 vector = entry->msi.data & 0xff;
|
||||
|
||||
__set_bit(vector,
|
||||
(unsigned long *) eoi_exit_bitmap);
|
||||
}
|
||||
}
|
||||
}
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
|
|
@ -209,7 +209,7 @@ static void recalculate_apic_map(struct kvm *kvm)
|
|||
if (old)
|
||||
kfree_rcu(old, rcu);
|
||||
|
||||
kvm_vcpu_request_scan_ioapic(kvm);
|
||||
kvm_make_scan_ioapic_request(kvm);
|
||||
}
|
||||
|
||||
static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
|
||||
|
@ -348,6 +348,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
|
|||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
__kvm_apic_update_irr(pir, apic->regs);
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
|
||||
|
||||
|
@ -390,7 +392,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
|
|||
|
||||
vcpu = apic->vcpu;
|
||||
|
||||
if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
|
||||
if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) {
|
||||
/* try to update RVI */
|
||||
apic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
@ -551,15 +553,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
|
|||
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
|
||||
}
|
||||
|
||||
void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
|
||||
}
|
||||
|
||||
static void apic_update_ppr(struct kvm_lapic *apic)
|
||||
{
|
||||
u32 tpr, isrv, ppr, old_ppr;
|
||||
|
@ -764,6 +757,65 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
|
|||
return ret;
|
||||
}
|
||||
|
||||
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
||||
struct kvm_vcpu **dest_vcpu)
|
||||
{
|
||||
struct kvm_apic_map *map;
|
||||
bool ret = false;
|
||||
struct kvm_lapic *dst = NULL;
|
||||
|
||||
if (irq->shorthand)
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
map = rcu_dereference(kvm->arch.apic_map);
|
||||
|
||||
if (!map)
|
||||
goto out;
|
||||
|
||||
if (irq->dest_mode == APIC_DEST_PHYSICAL) {
|
||||
if (irq->dest_id == 0xFF)
|
||||
goto out;
|
||||
|
||||
if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
|
||||
goto out;
|
||||
|
||||
dst = map->phys_map[irq->dest_id];
|
||||
if (dst && kvm_apic_present(dst->vcpu))
|
||||
*dest_vcpu = dst->vcpu;
|
||||
else
|
||||
goto out;
|
||||
} else {
|
||||
u16 cid;
|
||||
unsigned long bitmap = 1;
|
||||
int i, r = 0;
|
||||
|
||||
if (!kvm_apic_logical_map_valid(map))
|
||||
goto out;
|
||||
|
||||
apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
|
||||
|
||||
if (cid >= ARRAY_SIZE(map->logical_map))
|
||||
goto out;
|
||||
|
||||
for_each_set_bit(i, &bitmap, 16) {
|
||||
dst = map->logical_map[cid][i];
|
||||
if (++r == 2)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (dst && kvm_apic_present(dst->vcpu))
|
||||
*dest_vcpu = dst->vcpu;
|
||||
else
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = true;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a pending IRQ into lapic.
|
||||
* Return 1 if successfully added and 0 if discarded.
|
||||
|
@ -781,6 +833,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
|||
case APIC_DM_LOWEST:
|
||||
vcpu->arch.apic_arb_prio++;
|
||||
case APIC_DM_FIXED:
|
||||
if (unlikely(trig_mode && !level))
|
||||
break;
|
||||
|
||||
/* FIXME add logic for vcpu on reset */
|
||||
if (unlikely(!apic_enabled(apic)))
|
||||
break;
|
||||
|
@ -790,6 +845,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
|||
if (dest_map)
|
||||
__set_bit(vcpu->vcpu_id, dest_map);
|
||||
|
||||
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
|
||||
if (trig_mode)
|
||||
apic_set_vector(vector, apic->regs + APIC_TMR);
|
||||
else
|
||||
apic_clear_vector(vector, apic->regs + APIC_TMR);
|
||||
}
|
||||
|
||||
if (kvm_x86_ops->deliver_posted_interrupt)
|
||||
kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
|
||||
else {
|
||||
|
@ -868,16 +930,32 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
|
|||
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
|
||||
}
|
||||
|
||||
static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
|
||||
{
|
||||
return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap);
|
||||
}
|
||||
|
||||
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
|
||||
{
|
||||
if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
|
||||
int trigger_mode;
|
||||
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
||||
trigger_mode = IOAPIC_LEVEL_TRIG;
|
||||
else
|
||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||
kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
|
||||
int trigger_mode;
|
||||
|
||||
/* Eoi the ioapic only if the ioapic doesn't own the vector. */
|
||||
if (!kvm_ioapic_handles_vector(apic, vector))
|
||||
return;
|
||||
|
||||
/* Request a KVM exit to inform the userspace IOAPIC. */
|
||||
if (irqchip_split(apic->vcpu->kvm)) {
|
||||
apic->vcpu->arch.pending_ioapic_eoi = vector;
|
||||
kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
||||
trigger_mode = IOAPIC_LEVEL_TRIG;
|
||||
else
|
||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||
|
||||
kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
|
||||
}
|
||||
|
||||
static int apic_set_eoi(struct kvm_lapic *apic)
|
||||
|
@ -1615,7 +1693,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
|
||||
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
|
||||
}
|
||||
apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
|
||||
apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu);
|
||||
apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0;
|
||||
apic->highest_isr_cache = -1;
|
||||
update_divide_count(apic);
|
||||
|
@ -1838,7 +1916,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
|
|||
kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
|
||||
apic_find_highest_isr(apic));
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_rtc_eoi_tracking_restore_one(vcpu);
|
||||
if (ioapic_in_kernel(vcpu->kvm))
|
||||
kvm_rtc_eoi_tracking_restore_one(vcpu);
|
||||
|
||||
vcpu->arch.apic_arb_prio = 0;
|
||||
}
|
||||
|
||||
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
|
||||
|
@ -1922,7 +2003,7 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
|
|||
/* Cache not set: could be safe but we don't bother. */
|
||||
apic->highest_isr_cache == -1 ||
|
||||
/* Need EOI to update ioapic. */
|
||||
kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
|
||||
kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
|
||||
/*
|
||||
* PV EOI was disabled by apic_sync_pv_eoi_from_guest
|
||||
* so we need not do anything here.
|
||||
|
@ -1978,7 +2059,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u32 reg = (msr - APIC_BASE_MSR) << 4;
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
|
||||
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
|
||||
return 1;
|
||||
|
||||
if (reg == APIC_ICR2)
|
||||
|
@ -1995,7 +2076,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
|
|||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
|
||||
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
|
||||
return 1;
|
||||
|
||||
if (reg == APIC_DFR || reg == APIC_ICR2) {
|
||||
|
|
|
@ -57,7 +57,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
|
|||
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
|
||||
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
|
||||
void __kvm_apic_update_irr(u32 *pir, void *regs);
|
||||
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
|
||||
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
|
||||
|
@ -144,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
|
|||
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
|
||||
}
|
||||
|
||||
static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
|
||||
static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_x86_ops->vm_has_apicv(kvm);
|
||||
return kvm_x86_ops->cpu_uses_apicv(vcpu);
|
||||
}
|
||||
|
||||
static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
|
||||
|
@ -169,4 +168,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
|
|||
|
||||
void wait_lapic_expire(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
||||
struct kvm_vcpu **dest_vcpu);
|
||||
#endif
|
||||
|
|
|
@ -818,14 +818,11 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
kvm->arch.indirect_shadow_pages--;
|
||||
}
|
||||
|
||||
static int has_wrprotected_page(struct kvm_vcpu *vcpu,
|
||||
gfn_t gfn,
|
||||
int level)
|
||||
static int __has_wrprotected_page(gfn_t gfn, int level,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm_lpage_info *linfo;
|
||||
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
if (slot) {
|
||||
linfo = lpage_info_slot(gfn, slot, level);
|
||||
return linfo->write_count;
|
||||
|
@ -834,6 +831,14 @@ static int has_wrprotected_page(struct kvm_vcpu *vcpu,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
return __has_wrprotected_page(gfn, level, slot);
|
||||
}
|
||||
|
||||
static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
unsigned long page_size;
|
||||
|
@ -851,6 +856,17 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot,
|
||||
bool no_dirty_log)
|
||||
{
|
||||
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
|
||||
return false;
|
||||
if (no_dirty_log && slot->dirty_bitmap)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct kvm_memory_slot *
|
||||
gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
bool no_dirty_log)
|
||||
|
@ -858,21 +874,25 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
|
|||
struct kvm_memory_slot *slot;
|
||||
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
|
||||
(no_dirty_log && slot->dirty_bitmap))
|
||||
if (!memslot_valid_for_gpte(slot, no_dirty_log))
|
||||
slot = NULL;
|
||||
|
||||
return slot;
|
||||
}
|
||||
|
||||
static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn)
|
||||
{
|
||||
return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true);
|
||||
}
|
||||
|
||||
static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
|
||||
static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
|
||||
bool *force_pt_level)
|
||||
{
|
||||
int host_level, level, max_level;
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
if (unlikely(*force_pt_level))
|
||||
return PT_PAGE_TABLE_LEVEL;
|
||||
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn);
|
||||
*force_pt_level = !memslot_valid_for_gpte(slot, true);
|
||||
if (unlikely(*force_pt_level))
|
||||
return PT_PAGE_TABLE_LEVEL;
|
||||
|
||||
host_level = host_mapping_level(vcpu->kvm, large_gfn);
|
||||
|
||||
|
@ -882,7 +902,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
|
|||
max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
|
||||
|
||||
for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
|
||||
if (has_wrprotected_page(vcpu, large_gfn, level))
|
||||
if (__has_wrprotected_page(large_gfn, level, slot))
|
||||
break;
|
||||
|
||||
return level - 1;
|
||||
|
@ -2962,14 +2982,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
|||
{
|
||||
int r;
|
||||
int level;
|
||||
int force_pt_level;
|
||||
bool force_pt_level = false;
|
||||
pfn_t pfn;
|
||||
unsigned long mmu_seq;
|
||||
bool map_writable, write = error_code & PFERR_WRITE_MASK;
|
||||
|
||||
force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
|
||||
level = mapping_level(vcpu, gfn, &force_pt_level);
|
||||
if (likely(!force_pt_level)) {
|
||||
level = mapping_level(vcpu, gfn);
|
||||
/*
|
||||
* This path builds a PAE pagetable - so we can map
|
||||
* 2mb pages at maximum. Therefore check if the level
|
||||
|
@ -2979,8 +2998,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
|||
level = PT_DIRECTORY_LEVEL;
|
||||
|
||||
gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
|
||||
} else
|
||||
level = PT_PAGE_TABLE_LEVEL;
|
||||
}
|
||||
|
||||
if (fast_page_fault(vcpu, v, level, error_code))
|
||||
return 0;
|
||||
|
@ -3427,7 +3445,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
|
|||
|
||||
static bool can_do_async_pf(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
|
||||
if (unlikely(!lapic_in_kernel(vcpu) ||
|
||||
kvm_event_needs_reinjection(vcpu)))
|
||||
return false;
|
||||
|
||||
|
@ -3476,7 +3494,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|||
pfn_t pfn;
|
||||
int r;
|
||||
int level;
|
||||
int force_pt_level;
|
||||
bool force_pt_level;
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
unsigned long mmu_seq;
|
||||
int write = error_code & PFERR_WRITE_MASK;
|
||||
|
@ -3495,20 +3513,15 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
if (mapping_level_dirty_bitmap(vcpu, gfn) ||
|
||||
!check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL))
|
||||
force_pt_level = 1;
|
||||
else
|
||||
force_pt_level = 0;
|
||||
|
||||
force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
|
||||
PT_DIRECTORY_LEVEL);
|
||||
level = mapping_level(vcpu, gfn, &force_pt_level);
|
||||
if (likely(!force_pt_level)) {
|
||||
level = mapping_level(vcpu, gfn);
|
||||
if (level > PT_DIRECTORY_LEVEL &&
|
||||
!check_hugepage_cache_consistency(vcpu, gfn, level))
|
||||
level = PT_DIRECTORY_LEVEL;
|
||||
gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
|
||||
} else
|
||||
level = PT_PAGE_TABLE_LEVEL;
|
||||
}
|
||||
|
||||
if (fast_page_fault(vcpu, gpa, level, error_code))
|
||||
return 0;
|
||||
|
@ -3706,7 +3719,7 @@ static void
|
|||
__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
|
||||
int maxphyaddr, bool execonly)
|
||||
{
|
||||
int pte;
|
||||
u64 bad_mt_xwr;
|
||||
|
||||
rsvd_check->rsvd_bits_mask[0][3] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
|
||||
|
@ -3724,14 +3737,16 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
|
|||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
|
||||
rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
|
||||
|
||||
for (pte = 0; pte < 64; pte++) {
|
||||
int rwx_bits = pte & 7;
|
||||
int mt = pte >> 3;
|
||||
if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
|
||||
rwx_bits == 0x2 || rwx_bits == 0x6 ||
|
||||
(rwx_bits == 0x4 && !execonly))
|
||||
rsvd_check->bad_mt_xwr |= (1ull << pte);
|
||||
bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */
|
||||
bad_mt_xwr |= 0xFFull << (3 * 8); /* bits 3..5 must not be 3 */
|
||||
bad_mt_xwr |= 0xFFull << (7 * 8); /* bits 3..5 must not be 7 */
|
||||
bad_mt_xwr |= REPEAT_BYTE(1ull << 2); /* bits 0..2 must not be 010 */
|
||||
bad_mt_xwr |= REPEAT_BYTE(1ull << 6); /* bits 0..2 must not be 110 */
|
||||
if (!execonly) {
|
||||
/* bits 0..2 must not be 100 unless VMX capabilities allow it */
|
||||
bad_mt_xwr |= REPEAT_BYTE(1ull << 4);
|
||||
}
|
||||
rsvd_check->bad_mt_xwr = bad_mt_xwr;
|
||||
}
|
||||
|
||||
static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
|
||||
|
|
|
@ -698,7 +698,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
|||
int r;
|
||||
pfn_t pfn;
|
||||
int level = PT_PAGE_TABLE_LEVEL;
|
||||
int force_pt_level;
|
||||
bool force_pt_level = false;
|
||||
unsigned long mmu_seq;
|
||||
bool map_writable, is_self_change_mapping;
|
||||
|
||||
|
@ -743,15 +743,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
|||
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
|
||||
&walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
|
||||
|
||||
if (walker.level >= PT_DIRECTORY_LEVEL)
|
||||
force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
|
||||
|| is_self_change_mapping;
|
||||
else
|
||||
force_pt_level = 1;
|
||||
if (!force_pt_level) {
|
||||
level = min(walker.level, mapping_level(vcpu, walker.gfn));
|
||||
walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
|
||||
}
|
||||
if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) {
|
||||
level = mapping_level(vcpu, walker.gfn, &force_pt_level);
|
||||
if (likely(!force_pt_level)) {
|
||||
level = min(walker.level, level);
|
||||
walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
|
||||
}
|
||||
} else
|
||||
force_pt_level = true;
|
||||
|
||||
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
|
|
@ -159,6 +159,9 @@ struct vcpu_svm {
|
|||
u32 apf_reason;
|
||||
|
||||
u64 tsc_ratio;
|
||||
|
||||
/* cached guest cpuid flags for faster access */
|
||||
bool nrips_enabled : 1;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(u64, current_tsc_ratio);
|
||||
|
@ -1086,7 +1089,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
|
|||
return target_tsc - tsc;
|
||||
}
|
||||
|
||||
static void init_vmcb(struct vcpu_svm *svm, bool init_event)
|
||||
static void init_vmcb(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
struct vmcb_save_area *save = &svm->vmcb->save;
|
||||
|
@ -1157,8 +1160,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
|
|||
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
|
||||
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
|
||||
|
||||
if (!init_event)
|
||||
svm_set_efer(&svm->vcpu, 0);
|
||||
svm_set_efer(&svm->vcpu, 0);
|
||||
save->dr6 = 0xffff0ff0;
|
||||
kvm_set_rflags(&svm->vcpu, 2);
|
||||
save->rip = 0x0000fff0;
|
||||
|
@ -1212,7 +1214,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
|
||||
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
|
||||
}
|
||||
init_vmcb(svm, init_event);
|
||||
init_vmcb(svm);
|
||||
|
||||
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
|
||||
|
@ -1268,7 +1270,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
clear_page(svm->vmcb);
|
||||
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
|
||||
svm->asid_generation = 0;
|
||||
init_vmcb(svm, false);
|
||||
init_vmcb(svm);
|
||||
|
||||
svm_init_osvw(&svm->vcpu);
|
||||
|
||||
|
@ -1890,7 +1892,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
|
|||
* so reinitialize it.
|
||||
*/
|
||||
clear_page(svm->vmcb);
|
||||
init_vmcb(svm, false);
|
||||
init_vmcb(svm);
|
||||
|
||||
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
|
||||
return 0;
|
||||
|
@ -2365,7 +2367,9 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
|||
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
|
||||
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
|
||||
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
|
||||
nested_vmcb->control.next_rip = vmcb->control.next_rip;
|
||||
|
||||
if (svm->nrips_enabled)
|
||||
nested_vmcb->control.next_rip = vmcb->control.next_rip;
|
||||
|
||||
/*
|
||||
* If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
|
||||
|
@ -3060,7 +3064,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
|
|||
u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
|
||||
/* instruction emulation calls kvm_set_cr8() */
|
||||
r = cr_interception(svm);
|
||||
if (irqchip_in_kernel(svm->vcpu.kvm))
|
||||
if (lapic_in_kernel(&svm->vcpu))
|
||||
return r;
|
||||
if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
|
||||
return r;
|
||||
|
@ -3294,24 +3298,11 @@ static int msr_interception(struct vcpu_svm *svm)
|
|||
|
||||
static int interrupt_window_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_run *kvm_run = svm->vcpu.run;
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
svm_clear_vintr(svm);
|
||||
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
|
||||
mark_dirty(svm->vmcb, VMCB_INTR);
|
||||
++svm->vcpu.stat.irq_window_exits;
|
||||
/*
|
||||
* If the user space waits to inject interrupts, exit as soon as
|
||||
* possible
|
||||
*/
|
||||
if (!irqchip_in_kernel(svm->vcpu.kvm) &&
|
||||
kvm_run->request_interrupt_window &&
|
||||
!kvm_cpu_has_interrupt(&svm->vcpu)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -3659,12 +3650,12 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
|
|||
return;
|
||||
}
|
||||
|
||||
static int svm_vm_has_apicv(struct kvm *kvm)
|
||||
static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -4098,6 +4089,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
|||
|
||||
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/* Update nrips enabled cache */
|
||||
svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
|
||||
}
|
||||
|
||||
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
||||
|
@ -4425,7 +4420,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
|||
.enable_irq_window = enable_irq_window,
|
||||
.update_cr8_intercept = update_cr8_intercept,
|
||||
.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
|
||||
.vm_has_apicv = svm_vm_has_apicv,
|
||||
.cpu_uses_apicv = svm_cpu_uses_apicv,
|
||||
.load_eoi_exitmap = svm_load_eoi_exitmap,
|
||||
.sync_pir_to_irr = svm_sync_pir_to_irr,
|
||||
|
||||
|
|
|
@ -128,6 +128,24 @@ TRACE_EVENT(kvm_pio,
|
|||
__entry->count > 1 ? "(...)" : "")
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for fast mmio.
|
||||
*/
|
||||
TRACE_EVENT(kvm_fast_mmio,
|
||||
TP_PROTO(u64 gpa),
|
||||
TP_ARGS(gpa),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, gpa)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gpa = gpa;
|
||||
),
|
||||
|
||||
TP_printk("fast mmio at gpa 0x%llx", __entry->gpa)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for cpuid.
|
||||
*/
|
||||
|
@ -974,6 +992,39 @@ TRACE_EVENT(kvm_enter_smm,
|
|||
__entry->smbase)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for VT-d posted-interrupts.
|
||||
*/
|
||||
TRACE_EVENT(kvm_pi_irte_update,
|
||||
TP_PROTO(unsigned int vcpu_id, unsigned int gsi,
|
||||
unsigned int gvec, u64 pi_desc_addr, bool set),
|
||||
TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
__field( unsigned int, gsi )
|
||||
__field( unsigned int, gvec )
|
||||
__field( u64, pi_desc_addr )
|
||||
__field( bool, set )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->gsi = gsi;
|
||||
__entry->gvec = gvec;
|
||||
__entry->pi_desc_addr = pi_desc_addr;
|
||||
__entry->set = set;
|
||||
),
|
||||
|
||||
TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, "
|
||||
"gvec: 0x%x, pi_desc_addr: 0x%llx",
|
||||
__entry->set ? "enabled and being updated" : "disabled",
|
||||
__entry->vcpu_id,
|
||||
__entry->gsi,
|
||||
__entry->gvec,
|
||||
__entry->pi_desc_addr)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -51,6 +51,8 @@
|
|||
#include <linux/pci.h>
|
||||
#include <linux/timekeeper_internal.h>
|
||||
#include <linux/pvclock_gtod.h>
|
||||
#include <linux/kvm_irqfd.h>
|
||||
#include <linux/irqbypass.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
@ -64,6 +66,7 @@
|
|||
#include <asm/fpu/internal.h> /* Ugh! */
|
||||
#include <asm/pvclock.h>
|
||||
#include <asm/div64.h>
|
||||
#include <asm/irq_remapping.h>
|
||||
|
||||
#define MAX_IO_MSRS 256
|
||||
#define KVM_MAX_MCE_BANKS 32
|
||||
|
@ -622,7 +625,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
|||
if ((cr0 ^ old_cr0) & update_bits)
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
||||
if ((cr0 ^ old_cr0) & X86_CR0_CD)
|
||||
if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
|
||||
kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
|
||||
!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
|
||||
kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
|
||||
|
||||
return 0;
|
||||
|
@ -789,7 +794,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
|
|||
{
|
||||
if (cr8 & CR8_RESERVED_BITS)
|
||||
return 1;
|
||||
if (irqchip_in_kernel(vcpu->kvm))
|
||||
if (lapic_in_kernel(vcpu))
|
||||
kvm_lapic_set_tpr(vcpu, cr8);
|
||||
else
|
||||
vcpu->arch.cr8 = cr8;
|
||||
|
@ -799,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8);
|
|||
|
||||
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (irqchip_in_kernel(vcpu->kvm))
|
||||
if (lapic_in_kernel(vcpu))
|
||||
return kvm_lapic_get_cr8(vcpu);
|
||||
else
|
||||
return vcpu->arch.cr8;
|
||||
|
@ -953,6 +958,9 @@ static u32 emulated_msrs[] = {
|
|||
HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
|
||||
HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
|
||||
HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
|
||||
HV_X64_MSR_RESET,
|
||||
HV_X64_MSR_VP_INDEX,
|
||||
HV_X64_MSR_VP_RUNTIME,
|
||||
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
|
||||
MSR_KVM_PV_EOI_EN,
|
||||
|
||||
|
@ -1898,6 +1906,8 @@ static void accumulate_steal_time(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void record_steal_time(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
accumulate_steal_time(vcpu);
|
||||
|
||||
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
|
||||
return;
|
||||
|
||||
|
@ -2048,12 +2058,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
if (!(data & KVM_MSR_ENABLED))
|
||||
break;
|
||||
|
||||
vcpu->arch.st.last_steal = current->sched_info.run_delay;
|
||||
|
||||
preempt_disable();
|
||||
accumulate_steal_time(vcpu);
|
||||
preempt_enable();
|
||||
|
||||
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
|
||||
|
||||
break;
|
||||
|
@ -2449,6 +2453,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_ENABLE_CAP_VM:
|
||||
case KVM_CAP_DISABLE_QUIRKS:
|
||||
case KVM_CAP_SET_BOOT_CPU_ID:
|
||||
case KVM_CAP_SPLIT_IRQCHIP:
|
||||
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
|
||||
case KVM_CAP_ASSIGN_DEV_IRQ:
|
||||
case KVM_CAP_PCI_2_3:
|
||||
|
@ -2628,7 +2633,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
vcpu->cpu = cpu;
|
||||
}
|
||||
|
||||
accumulate_steal_time(vcpu);
|
||||
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
|
||||
}
|
||||
|
||||
|
@ -2662,12 +2666,24 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
|
|||
{
|
||||
if (irq->irq >= KVM_NR_INTERRUPTS)
|
||||
return -EINVAL;
|
||||
if (irqchip_in_kernel(vcpu->kvm))
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm)) {
|
||||
kvm_queue_interrupt(vcpu, irq->irq, false);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* With in-kernel LAPIC, we only use this to inject EXTINT, so
|
||||
* fail for in-kernel 8259.
|
||||
*/
|
||||
if (pic_in_kernel(vcpu->kvm))
|
||||
return -ENXIO;
|
||||
|
||||
kvm_queue_interrupt(vcpu, irq->irq, false);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
if (vcpu->arch.pending_external_vector != -1)
|
||||
return -EEXIST;
|
||||
|
||||
vcpu->arch.pending_external_vector = irq->irq;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3176,7 +3192,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||
struct kvm_vapic_addr va;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
goto out;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&va, argp, sizeof va))
|
||||
|
@ -3425,41 +3441,35 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
|
|||
|
||||
static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
||||
memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
|
||||
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
||||
memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
|
||||
kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
|
||||
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
||||
memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
|
||||
sizeof(ps->channels));
|
||||
ps->flags = kvm->arch.vpit->pit_state.flags;
|
||||
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
||||
memset(&ps->reserved, 0, sizeof(ps->reserved));
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
|
||||
{
|
||||
int r = 0, start = 0;
|
||||
int start = 0;
|
||||
u32 prev_legacy, cur_legacy;
|
||||
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
||||
prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
|
||||
|
@ -3471,7 +3481,7 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
|
|||
kvm->arch.vpit->pit_state.flags = ps->flags;
|
||||
kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
|
||||
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_reinject(struct kvm *kvm,
|
||||
|
@ -3556,6 +3566,28 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
|||
kvm->arch.disabled_quirks = cap->args[0];
|
||||
r = 0;
|
||||
break;
|
||||
case KVM_CAP_SPLIT_IRQCHIP: {
|
||||
mutex_lock(&kvm->lock);
|
||||
r = -EINVAL;
|
||||
if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
|
||||
goto split_irqchip_unlock;
|
||||
r = -EEXIST;
|
||||
if (irqchip_in_kernel(kvm))
|
||||
goto split_irqchip_unlock;
|
||||
if (atomic_read(&kvm->online_vcpus))
|
||||
goto split_irqchip_unlock;
|
||||
r = kvm_setup_empty_irq_routing(kvm);
|
||||
if (r)
|
||||
goto split_irqchip_unlock;
|
||||
/* Pairs with irqchip_in_kernel. */
|
||||
smp_wmb();
|
||||
kvm->arch.irqchip_split = true;
|
||||
kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
|
||||
r = 0;
|
||||
split_irqchip_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
|
@ -3669,7 +3701,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
}
|
||||
|
||||
r = -ENXIO;
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
|
||||
goto get_irqchip_out;
|
||||
r = kvm_vm_ioctl_get_irqchip(kvm, chip);
|
||||
if (r)
|
||||
|
@ -3693,7 +3725,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
}
|
||||
|
||||
r = -ENXIO;
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
|
||||
goto set_irqchip_out;
|
||||
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
|
||||
if (r)
|
||||
|
@ -4060,6 +4092,15 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
|
|||
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
|
||||
}
|
||||
|
||||
static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
|
||||
unsigned long addr, void *val, unsigned int bytes)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
||||
int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
|
||||
|
||||
return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
|
||||
gva_t addr, void *val,
|
||||
unsigned int bytes,
|
||||
|
@ -4795,6 +4836,7 @@ static const struct x86_emulate_ops emulate_ops = {
|
|||
.write_gpr = emulator_write_gpr,
|
||||
.read_std = kvm_read_guest_virt_system,
|
||||
.write_std = kvm_write_guest_virt_system,
|
||||
.read_phys = kvm_read_guest_phys_system,
|
||||
.fetch = kvm_fetch_guest_virt,
|
||||
.read_emulated = emulator_read_emulated,
|
||||
.write_emulated = emulator_write_emulated,
|
||||
|
@ -5667,7 +5709,7 @@ void kvm_arch_exit(void)
|
|||
int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
++vcpu->stat.halt_exits;
|
||||
if (irqchip_in_kernel(vcpu->kvm)) {
|
||||
if (lapic_in_kernel(vcpu)) {
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
|
||||
return 1;
|
||||
} else {
|
||||
|
@ -5774,9 +5816,15 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
|
|||
*/
|
||||
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
|
||||
vcpu->run->request_interrupt_window &&
|
||||
kvm_arch_interrupt_allowed(vcpu));
|
||||
if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm))
|
||||
return false;
|
||||
|
||||
if (kvm_cpu_has_interrupt(vcpu))
|
||||
return false;
|
||||
|
||||
return (irqchip_split(vcpu->kvm)
|
||||
? kvm_apic_accept_pic_intr(vcpu)
|
||||
: kvm_arch_interrupt_allowed(vcpu));
|
||||
}
|
||||
|
||||
static void post_kvm_run_save(struct kvm_vcpu *vcpu)
|
||||
|
@ -5787,13 +5835,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
|
|||
kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
|
||||
kvm_run->cr8 = kvm_get_cr8(vcpu);
|
||||
kvm_run->apic_base = kvm_get_apic_base(vcpu);
|
||||
if (irqchip_in_kernel(vcpu->kvm))
|
||||
kvm_run->ready_for_interrupt_injection = 1;
|
||||
else
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
kvm_run->ready_for_interrupt_injection =
|
||||
kvm_arch_interrupt_allowed(vcpu) &&
|
||||
!kvm_cpu_has_interrupt(vcpu) &&
|
||||
!kvm_event_needs_reinjection(vcpu);
|
||||
else if (!pic_in_kernel(vcpu->kvm))
|
||||
kvm_run->ready_for_interrupt_injection =
|
||||
kvm_apic_accept_pic_intr(vcpu) &&
|
||||
!kvm_cpu_has_interrupt(vcpu);
|
||||
else
|
||||
kvm_run->ready_for_interrupt_injection = 1;
|
||||
}
|
||||
|
||||
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
|
||||
|
@ -6144,18 +6196,18 @@ static void process_smi(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 eoi_exit_bitmap[4];
|
||||
u32 tmr[8];
|
||||
|
||||
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
|
||||
return;
|
||||
|
||||
memset(eoi_exit_bitmap, 0, 32);
|
||||
memset(tmr, 0, 32);
|
||||
memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
|
||||
|
||||
kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
|
||||
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
|
||||
kvm_apic_update_tmr(vcpu, tmr);
|
||||
if (irqchip_split(vcpu->kvm))
|
||||
kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
|
||||
else {
|
||||
kvm_x86_ops->sync_pir_to_irr(vcpu);
|
||||
kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
|
||||
}
|
||||
kvm_x86_ops->load_eoi_exitmap(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
|
||||
|
@ -6168,7 +6220,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct page *page = NULL;
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
return;
|
||||
|
||||
if (!kvm_x86_ops->set_apic_access_page_addr)
|
||||
|
@ -6206,7 +6258,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
|||
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
|
||||
bool req_int_win = !lapic_in_kernel(vcpu) &&
|
||||
vcpu->run->request_interrupt_window;
|
||||
bool req_immediate_exit = false;
|
||||
|
||||
|
@ -6258,6 +6310,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
kvm_pmu_handle_event(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_PMI, vcpu))
|
||||
kvm_pmu_deliver_pmi(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
|
||||
BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
|
||||
if (test_bit(vcpu->arch.pending_ioapic_eoi,
|
||||
(void *) vcpu->arch.eoi_exit_bitmap)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
|
||||
vcpu->run->eoi.vector =
|
||||
vcpu->arch.pending_ioapic_eoi;
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
|
||||
vcpu_scan_ioapic(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
|
||||
|
@ -6268,6 +6331,26 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
|
||||
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM_REQ_EVENT is not set when posted interrupts are set by
|
||||
* VT-d hardware, so we have to update RVI unconditionally.
|
||||
*/
|
||||
if (kvm_lapic_enabled(vcpu)) {
|
||||
/*
|
||||
* Update architecture specific hints for APIC
|
||||
* virtual interrupt delivery.
|
||||
*/
|
||||
if (kvm_x86_ops->hwapic_irr_update)
|
||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||
kvm_lapic_find_highest_irr(vcpu));
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
|
||||
|
@ -6286,13 +6369,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
kvm_x86_ops->enable_irq_window(vcpu);
|
||||
|
||||
if (kvm_lapic_enabled(vcpu)) {
|
||||
/*
|
||||
* Update architecture specific hints for APIC
|
||||
* virtual interrupt delivery.
|
||||
*/
|
||||
if (kvm_x86_ops->hwapic_irr_update)
|
||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||
kvm_lapic_find_highest_irr(vcpu));
|
||||
update_cr8_intercept(vcpu);
|
||||
kvm_lapic_sync_to_vapic(vcpu);
|
||||
}
|
||||
|
@ -6428,10 +6504,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
|
||||
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_arch_vcpu_runnable(vcpu)) {
|
||||
if (!kvm_arch_vcpu_runnable(vcpu) &&
|
||||
(!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
|
||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_block(vcpu);
|
||||
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
if (kvm_x86_ops->post_block)
|
||||
kvm_x86_ops->post_block(vcpu);
|
||||
|
||||
if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
|
||||
return 1;
|
||||
}
|
||||
|
@ -6468,10 +6549,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
|||
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
for (;;) {
|
||||
if (kvm_vcpu_running(vcpu))
|
||||
if (kvm_vcpu_running(vcpu)) {
|
||||
r = vcpu_enter_guest(vcpu);
|
||||
else
|
||||
} else {
|
||||
r = vcpu_block(kvm, vcpu);
|
||||
}
|
||||
|
||||
if (r <= 0)
|
||||
break;
|
||||
|
||||
|
@ -6480,8 +6563,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
|||
kvm_inject_pending_timer_irqs(vcpu);
|
||||
|
||||
if (dm_request_for_irq_injection(vcpu)) {
|
||||
r = -EINTR;
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
||||
r = 0;
|
||||
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
|
||||
++vcpu->stat.request_irq_exits;
|
||||
break;
|
||||
}
|
||||
|
@ -6608,7 +6691,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
}
|
||||
|
||||
/* re-sync apic's tpr */
|
||||
if (!irqchip_in_kernel(vcpu->kvm)) {
|
||||
if (!lapic_in_kernel(vcpu)) {
|
||||
if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
|
@ -7308,7 +7391,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
|
|||
|
||||
bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
|
||||
return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu);
|
||||
}
|
||||
|
||||
struct static_key kvm_no_apic_vcpu __read_mostly;
|
||||
|
@ -7377,6 +7460,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|||
kvm_async_pf_hash_reset(vcpu);
|
||||
kvm_pmu_init(vcpu);
|
||||
|
||||
vcpu->arch.pending_external_vector = -1;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_free_mce_banks:
|
||||
|
@ -7402,7 +7487,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
|||
kvm_mmu_destroy(vcpu);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
free_page((unsigned long)vcpu->arch.pio_data);
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
static_key_slow_dec(&kvm_no_apic_vcpu);
|
||||
}
|
||||
|
||||
|
@ -8029,7 +8114,59 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
|
||||
|
||||
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
|
||||
struct irq_bypass_producer *prod)
|
||||
{
|
||||
struct kvm_kernel_irqfd *irqfd =
|
||||
container_of(cons, struct kvm_kernel_irqfd, consumer);
|
||||
|
||||
if (kvm_x86_ops->update_pi_irte) {
|
||||
irqfd->producer = prod;
|
||||
return kvm_x86_ops->update_pi_irte(irqfd->kvm,
|
||||
prod->irq, irqfd->gsi, 1);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
|
||||
struct irq_bypass_producer *prod)
|
||||
{
|
||||
int ret;
|
||||
struct kvm_kernel_irqfd *irqfd =
|
||||
container_of(cons, struct kvm_kernel_irqfd, consumer);
|
||||
|
||||
if (!kvm_x86_ops->update_pi_irte) {
|
||||
WARN_ON(irqfd->producer != NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
WARN_ON(irqfd->producer != prod);
|
||||
irqfd->producer = NULL;
|
||||
|
||||
/*
|
||||
* When producer of consumer is unregistered, we change back to
|
||||
* remapped mode, so we can re-use the current implementation
|
||||
* when the irq is masked/disabed or the consumer side (KVM
|
||||
* int this case doesn't want to receive the interrupts.
|
||||
*/
|
||||
ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
|
||||
if (ret)
|
||||
printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
|
||||
" fails: %d\n", irqfd->consumer.token, ret);
|
||||
}
|
||||
|
||||
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
|
||||
uint32_t guest_irq, bool set)
|
||||
{
|
||||
if (!kvm_x86_ops->update_pi_irte)
|
||||
return -EINVAL;
|
||||
|
||||
return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
|
||||
}
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
|
||||
|
@ -8044,3 +8181,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
|
|||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
|
||||
|
|
|
@ -63,9 +63,6 @@ enum hv_cpuid_function {
|
|||
/* Define version of the synthetic interrupt controller. */
|
||||
#define HV_SYNIC_VERSION (1)
|
||||
|
||||
/* Define the expected SynIC version. */
|
||||
#define HV_SYNIC_VERSION_1 (0x1)
|
||||
|
||||
/* Define synthetic interrupt controller message constants. */
|
||||
#define HV_MESSAGE_SIZE (256)
|
||||
#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
|
||||
|
@ -105,8 +102,6 @@ enum hv_message_type {
|
|||
HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
|
||||
};
|
||||
|
||||
/* Define the number of synthetic interrupt sources. */
|
||||
#define HV_SYNIC_SINT_COUNT (16)
|
||||
#define HV_SYNIC_STIMER_COUNT (4)
|
||||
|
||||
/* Define invalid partition identifier. */
|
||||
|
|
|
@ -22,7 +22,7 @@ int irq_remap_broken;
|
|||
int disable_sourceid_checking;
|
||||
int no_x2apic_optout;
|
||||
|
||||
int disable_irq_post = 1;
|
||||
int disable_irq_post = 0;
|
||||
|
||||
static int disable_irq_remap;
|
||||
static struct irq_remap_ops *remap_ops;
|
||||
|
@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str)
|
|||
return -EINVAL;
|
||||
|
||||
while (*str) {
|
||||
if (!strncmp(str, "on", 2))
|
||||
if (!strncmp(str, "on", 2)) {
|
||||
disable_irq_remap = 0;
|
||||
else if (!strncmp(str, "off", 3))
|
||||
disable_irq_post = 0;
|
||||
} else if (!strncmp(str, "off", 3)) {
|
||||
disable_irq_remap = 1;
|
||||
else if (!strncmp(str, "nosid", 5))
|
||||
disable_irq_post = 1;
|
||||
} else if (!strncmp(str, "nosid", 5))
|
||||
disable_sourceid_checking = 1;
|
||||
else if (!strncmp(str, "no_x2apic_optout", 16))
|
||||
no_x2apic_optout = 1;
|
||||
else if (!strncmp(str, "nopost", 6))
|
||||
disable_irq_post = 1;
|
||||
|
||||
str += strcspn(str, ",");
|
||||
while (*str == ',')
|
||||
|
|
|
@ -33,3 +33,4 @@ menuconfig VFIO
|
|||
|
||||
source "drivers/vfio/pci/Kconfig"
|
||||
source "drivers/vfio/platform/Kconfig"
|
||||
source "virt/lib/Kconfig"
|
||||
|
|
|
@ -2,6 +2,7 @@ config VFIO_PCI
|
|||
tristate "VFIO support for PCI devices"
|
||||
depends on VFIO && PCI && EVENTFD
|
||||
select VFIO_VIRQFD
|
||||
select IRQ_BYPASS_MANAGER
|
||||
help
|
||||
Support for the PCI VFIO bus driver. This is required to make
|
||||
use of PCI drivers using the VFIO framework.
|
||||
|
|
|
@ -319,6 +319,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
|
|||
|
||||
if (vdev->ctx[vector].trigger) {
|
||||
free_irq(irq, vdev->ctx[vector].trigger);
|
||||
irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
|
||||
kfree(vdev->ctx[vector].name);
|
||||
eventfd_ctx_put(vdev->ctx[vector].trigger);
|
||||
vdev->ctx[vector].trigger = NULL;
|
||||
|
@ -360,6 +361,14 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
|
|||
return ret;
|
||||
}
|
||||
|
||||
vdev->ctx[vector].producer.token = trigger;
|
||||
vdev->ctx[vector].producer.irq = irq;
|
||||
ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
|
||||
if (unlikely(ret))
|
||||
dev_info(&pdev->dev,
|
||||
"irq bypass producer (token %p) registration fails: %d\n",
|
||||
vdev->ctx[vector].producer.token, ret);
|
||||
|
||||
vdev->ctx[vector].trigger = trigger;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/irqbypass.h>
|
||||
|
||||
#ifndef VFIO_PCI_PRIVATE_H
|
||||
#define VFIO_PCI_PRIVATE_H
|
||||
|
@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx {
|
|||
struct virqfd *mask;
|
||||
char *name;
|
||||
bool masked;
|
||||
struct irq_bypass_producer producer;
|
||||
};
|
||||
|
||||
struct vfio_pci_device {
|
||||
|
|
|
@ -51,7 +51,7 @@ struct arch_timer_cpu {
|
|||
bool armed;
|
||||
|
||||
/* Timer IRQ */
|
||||
const struct kvm_irq_level *irq;
|
||||
struct kvm_irq_level irq;
|
||||
|
||||
/* VGIC mapping */
|
||||
struct irq_phys_map *map;
|
||||
|
@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
|
|||
int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
|
||||
|
||||
bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
|
||||
void kvm_timer_schedule(struct kvm_vcpu *vcpu);
|
||||
void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -112,7 +112,6 @@ struct vgic_vmcr {
|
|||
struct vgic_ops {
|
||||
struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int);
|
||||
void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
|
||||
void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
|
||||
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
|
||||
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
|
||||
void (*clear_eisr)(struct kvm_vcpu *vcpu);
|
||||
|
@ -159,7 +158,6 @@ struct irq_phys_map {
|
|||
u32 virt_irq;
|
||||
u32 phys_irq;
|
||||
u32 irq;
|
||||
bool active;
|
||||
};
|
||||
|
||||
struct irq_phys_map_entry {
|
||||
|
@ -296,22 +294,16 @@ struct vgic_v3_cpu_if {
|
|||
};
|
||||
|
||||
struct vgic_cpu {
|
||||
/* per IRQ to LR mapping */
|
||||
u8 *vgic_irq_lr_map;
|
||||
|
||||
/* Pending/active/both interrupts on this VCPU */
|
||||
DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
|
||||
DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS);
|
||||
DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
|
||||
DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS);
|
||||
DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS);
|
||||
DECLARE_BITMAP(pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
|
||||
|
||||
/* Pending/active/both shared interrupts, dynamically sized */
|
||||
unsigned long *pending_shared;
|
||||
unsigned long *active_shared;
|
||||
unsigned long *pend_act_shared;
|
||||
|
||||
/* Bitmap of used/free list registers */
|
||||
DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
|
||||
|
||||
/* Number of list registers on this CPU */
|
||||
int nr_lr;
|
||||
|
||||
|
@ -354,8 +346,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
|
|||
struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
|
||||
int virt_irq, int irq);
|
||||
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
|
||||
bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
|
||||
void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
|
||||
|
||||
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
|
||||
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define _HYPERV_H
|
||||
|
||||
#include <uapi/linux/hyperv.h>
|
||||
#include <uapi/asm/hyperv.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* IRQ offload/bypass manager
|
||||
*
|
||||
* Copyright (C) 2015 Red Hat, Inc.
|
||||
* Copyright (c) 2015 Linaro Ltd.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#ifndef IRQBYPASS_H
|
||||
#define IRQBYPASS_H
|
||||
|
||||
#include <linux/list.h>
|
||||
|
||||
struct irq_bypass_consumer;
|
||||
|
||||
/*
|
||||
* Theory of operation
|
||||
*
|
||||
* The IRQ bypass manager is a simple set of lists and callbacks that allows
|
||||
* IRQ producers (ex. physical interrupt sources) to be matched to IRQ
|
||||
* consumers (ex. virtualization hardware that allows IRQ bypass or offload)
|
||||
* via a shared token (ex. eventfd_ctx). Producers and consumers register
|
||||
* independently. When a token match is found, the optional @stop callback
|
||||
* will be called for each participant. The pair will then be connected via
|
||||
* the @add_* callbacks, and finally the optional @start callback will allow
|
||||
* any final coordination. When either participant is unregistered, the
|
||||
* process is repeated using the @del_* callbacks in place of the @add_*
|
||||
* callbacks. Match tokens must be unique per producer/consumer, 1:N pairings
|
||||
* are not supported.
|
||||
*/
|
||||
|
||||
/**
|
||||
* struct irq_bypass_producer - IRQ bypass producer definition
|
||||
* @node: IRQ bypass manager private list management
|
||||
* @token: opaque token to match between producer and consumer
|
||||
* @irq: Linux IRQ number for the producer device
|
||||
* @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
|
||||
* @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
|
||||
* @stop: Perform any quiesce operations necessary prior to add/del (optional)
|
||||
* @start: Perform any startup operations necessary after add/del (optional)
|
||||
*
|
||||
* The IRQ bypass producer structure represents an interrupt source for
|
||||
* participation in possible host bypass, for instance an interrupt vector
|
||||
* for a physical device assigned to a VM.
|
||||
*/
|
||||
struct irq_bypass_producer {
|
||||
struct list_head node;
|
||||
void *token;
|
||||
int irq;
|
||||
int (*add_consumer)(struct irq_bypass_producer *,
|
||||
struct irq_bypass_consumer *);
|
||||
void (*del_consumer)(struct irq_bypass_producer *,
|
||||
struct irq_bypass_consumer *);
|
||||
void (*stop)(struct irq_bypass_producer *);
|
||||
void (*start)(struct irq_bypass_producer *);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct irq_bypass_consumer - IRQ bypass consumer definition
|
||||
* @node: IRQ bypass manager private list management
|
||||
* @token: opaque token to match between producer and consumer
|
||||
* @add_producer: Connect the IRQ consumer to an IRQ producer
|
||||
* @del_producer: Disconnect the IRQ consumer from an IRQ producer
|
||||
* @stop: Perform any quiesce operations necessary prior to add/del (optional)
|
||||
* @start: Perform any startup operations necessary after add/del (optional)
|
||||
*
|
||||
* The IRQ bypass consumer structure represents an interrupt sink for
|
||||
* participation in possible host bypass, for instance a hypervisor may
|
||||
* support offloads to allow bypassing the host entirely or offload
|
||||
* portions of the interrupt handling to the VM.
|
||||
*/
|
||||
struct irq_bypass_consumer {
|
||||
struct list_head node;
|
||||
void *token;
|
||||
int (*add_producer)(struct irq_bypass_consumer *,
|
||||
struct irq_bypass_producer *);
|
||||
void (*del_producer)(struct irq_bypass_consumer *,
|
||||
struct irq_bypass_producer *);
|
||||
void (*stop)(struct irq_bypass_consumer *);
|
||||
void (*start)(struct irq_bypass_consumer *);
|
||||
};
|
||||
|
||||
int irq_bypass_register_producer(struct irq_bypass_producer *);
|
||||
void irq_bypass_unregister_producer(struct irq_bypass_producer *);
|
||||
int irq_bypass_register_consumer(struct irq_bypass_consumer *);
|
||||
void irq_bypass_unregister_consumer(struct irq_bypass_consumer *);
|
||||
|
||||
#endif /* IRQBYPASS_H */
|
|
@ -24,6 +24,7 @@
|
|||
#include <linux/err.h>
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/irqbypass.h>
|
||||
#include <asm/signal.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
|
@ -140,6 +141,8 @@ static inline bool is_error_page(struct page *page)
|
|||
#define KVM_REQ_APIC_PAGE_RELOAD 25
|
||||
#define KVM_REQ_SMI 26
|
||||
#define KVM_REQ_HV_CRASH 27
|
||||
#define KVM_REQ_IOAPIC_EOI_EXIT 28
|
||||
#define KVM_REQ_HV_RESET 29
|
||||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
|
||||
|
@ -231,6 +234,9 @@ struct kvm_vcpu {
|
|||
unsigned long requests;
|
||||
unsigned long guest_debug;
|
||||
|
||||
int pre_pcpu;
|
||||
struct list_head blocked_vcpu_list;
|
||||
|
||||
struct mutex mutex;
|
||||
struct kvm_run *run;
|
||||
|
||||
|
@ -329,6 +335,18 @@ struct kvm_kernel_irq_routing_entry {
|
|||
struct hlist_node link;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
|
||||
struct kvm_irq_routing_table {
|
||||
int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
|
||||
u32 nr_rt_entries;
|
||||
/*
|
||||
* Array indexed by gsi. Each entry contains list of irq chips
|
||||
* the gsi is connected to.
|
||||
*/
|
||||
struct hlist_head map[0];
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef KVM_PRIVATE_MEM_SLOTS
|
||||
#define KVM_PRIVATE_MEM_SLOTS 0
|
||||
#endif
|
||||
|
@ -455,10 +473,14 @@ void vcpu_put(struct kvm_vcpu *vcpu);
|
|||
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
|
||||
void kvm_arch_irq_routing_update(struct kvm *kvm);
|
||||
#else
|
||||
static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
|
||||
{
|
||||
}
|
||||
static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
|
@ -625,6 +647,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
|
|||
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
|
||||
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
|
||||
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
|
||||
|
@ -803,10 +827,13 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
|||
|
||||
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
|
||||
bool line_status);
|
||||
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
|
||||
int irq_source_id, int level, bool line_status);
|
||||
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id,
|
||||
int level, bool line_status);
|
||||
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
void kvm_notify_acked_gsi(struct kvm *kvm, int gsi);
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian);
|
||||
|
@ -1002,6 +1029,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
|
|||
#endif
|
||||
|
||||
int kvm_setup_default_irq_routing(struct kvm *kvm);
|
||||
int kvm_setup_empty_irq_routing(struct kvm *kvm);
|
||||
int kvm_set_irq_routing(struct kvm *kvm,
|
||||
const struct kvm_irq_routing_entry *entries,
|
||||
unsigned nr,
|
||||
|
@ -1144,5 +1172,15 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
|
|||
{
|
||||
}
|
||||
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
|
||||
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
|
||||
struct irq_bypass_producer *);
|
||||
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
|
||||
struct irq_bypass_producer *);
|
||||
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
|
||||
void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
|
||||
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
|
||||
uint32_t guest_irq, bool set);
|
||||
#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* irqfd: Allows an fd to be used to inject an interrupt to the guest
|
||||
* Credit goes to Avi Kivity for the original idea.
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_KVM_IRQFD_H
|
||||
#define __LINUX_KVM_IRQFD_H
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/poll.h>
|
||||
|
||||
/*
|
||||
* Resampling irqfds are a special variety of irqfds used to emulate
|
||||
* level triggered interrupts. The interrupt is asserted on eventfd
|
||||
* trigger. On acknowledgment through the irq ack notifier, the
|
||||
* interrupt is de-asserted and userspace is notified through the
|
||||
* resamplefd. All resamplers on the same gsi are de-asserted
|
||||
* together, so we don't need to track the state of each individual
|
||||
* user. We can also therefore share the same irq source ID.
|
||||
*/
|
||||
struct kvm_kernel_irqfd_resampler {
|
||||
struct kvm *kvm;
|
||||
/*
|
||||
* List of resampling struct _irqfd objects sharing this gsi.
|
||||
* RCU list modified under kvm->irqfds.resampler_lock
|
||||
*/
|
||||
struct list_head list;
|
||||
struct kvm_irq_ack_notifier notifier;
|
||||
/*
|
||||
* Entry in list of kvm->irqfd.resampler_list. Use for sharing
|
||||
* resamplers among irqfds on the same gsi.
|
||||
* Accessed and modified under kvm->irqfds.resampler_lock
|
||||
*/
|
||||
struct list_head link;
|
||||
};
|
||||
|
||||
struct kvm_kernel_irqfd {
|
||||
/* Used for MSI fast-path */
|
||||
struct kvm *kvm;
|
||||
wait_queue_t wait;
|
||||
/* Update side is protected by irqfds.lock */
|
||||
struct kvm_kernel_irq_routing_entry irq_entry;
|
||||
seqcount_t irq_entry_sc;
|
||||
/* Used for level IRQ fast-path */
|
||||
int gsi;
|
||||
struct work_struct inject;
|
||||
/* The resampler used by this irqfd (resampler-only) */
|
||||
struct kvm_kernel_irqfd_resampler *resampler;
|
||||
/* Eventfd notified on resample (resampler-only) */
|
||||
struct eventfd_ctx *resamplefd;
|
||||
/* Entry in list of irqfds for a resampler (resampler-only) */
|
||||
struct list_head resampler_link;
|
||||
/* Used for setup/shutdown */
|
||||
struct eventfd_ctx *eventfd;
|
||||
struct list_head list;
|
||||
poll_table pt;
|
||||
struct work_struct shutdown;
|
||||
struct irq_bypass_consumer consumer;
|
||||
struct irq_bypass_producer *producer;
|
||||
};
|
||||
|
||||
#endif /* __LINUX_KVM_IRQFD_H */
|
|
@ -183,6 +183,7 @@ struct kvm_s390_skeys {
|
|||
#define KVM_EXIT_EPR 23
|
||||
#define KVM_EXIT_SYSTEM_EVENT 24
|
||||
#define KVM_EXIT_S390_STSI 25
|
||||
#define KVM_EXIT_IOAPIC_EOI 26
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
|
@ -333,6 +334,10 @@ struct kvm_run {
|
|||
__u8 sel1;
|
||||
__u16 sel2;
|
||||
} s390_stsi;
|
||||
/* KVM_EXIT_IOAPIC_EOI */
|
||||
struct {
|
||||
__u8 vector;
|
||||
} eoi;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
|
@ -824,6 +829,8 @@ struct kvm_ppc_smmu_info {
|
|||
#define KVM_CAP_MULTI_ADDRESS_SPACE 118
|
||||
#define KVM_CAP_GUEST_DEBUG_HW_BPS 119
|
||||
#define KVM_CAP_GUEST_DEBUG_HW_WPS 120
|
||||
#define KVM_CAP_SPLIT_IRQCHIP 121
|
||||
#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
|
|
@ -444,6 +444,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
|
|||
*ut = p->utime;
|
||||
*st = p->stime;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(task_cputime_adjusted);
|
||||
|
||||
void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
|
||||
{
|
||||
|
@ -652,6 +653,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
|
|||
task_cputime(p, &cputime.utime, &cputime.stime);
|
||||
cputime_adjust(&cputime, &p->prev_cputime, ut, st);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(task_cputime_adjusted);
|
||||
|
||||
void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
|
||||
{
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
obj-y += lib/
|
|
@ -46,4 +46,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
|||
|
||||
config KVM_COMPAT
|
||||
def_bool y
|
||||
depends on COMPAT && !S390
|
||||
depends on KVM && COMPAT && !S390
|
||||
|
||||
config HAVE_KVM_IRQ_BYPASS
|
||||
bool
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
#include <kvm/arm_vgic.h>
|
||||
#include <kvm/arm_arch_timer.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
||||
static struct timecounter *timecounter;
|
||||
static struct workqueue_struct *wqueue;
|
||||
static unsigned int host_vtimer_irq;
|
||||
|
@ -59,18 +61,6 @@ static void timer_disarm(struct arch_timer_cpu *timer)
|
|||
}
|
||||
}
|
||||
|
||||
static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
kvm_vgic_set_phys_irq_active(timer->map, true);
|
||||
ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
|
||||
timer->map,
|
||||
timer->irq->level);
|
||||
WARN_ON(ret);
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
|
||||
|
@ -111,14 +101,20 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
|
|||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
|
||||
(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
|
||||
}
|
||||
|
||||
bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
cycle_t cval, now;
|
||||
|
||||
if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
|
||||
!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
|
||||
kvm_vgic_get_phys_irq_active(timer->map))
|
||||
if (!kvm_timer_irq_can_fire(vcpu))
|
||||
return false;
|
||||
|
||||
cval = timer->cntv_cval;
|
||||
|
@ -127,12 +123,94 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
|
|||
return cval <= now;
|
||||
}
|
||||
|
||||
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
|
||||
{
|
||||
int ret;
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
BUG_ON(!vgic_initialized(vcpu->kvm));
|
||||
|
||||
timer->irq.level = new_level;
|
||||
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
|
||||
timer->irq.level);
|
||||
ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
|
||||
timer->map,
|
||||
timer->irq.level);
|
||||
WARN_ON(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if there was a change in the timer state (should we raise or lower
|
||||
* the line level to the GIC).
|
||||
*/
|
||||
static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
/*
|
||||
* If userspace modified the timer registers via SET_ONE_REG before
|
||||
* the vgic was initialized, we mustn't set the timer->irq.level value
|
||||
* because the guest would never see the interrupt. Instead wait
|
||||
* until we call this function from kvm_timer_flush_hwstate.
|
||||
*/
|
||||
if (!vgic_initialized(vcpu->kvm))
|
||||
return;
|
||||
|
||||
if (kvm_timer_should_fire(vcpu) != timer->irq.level)
|
||||
kvm_timer_update_irq(vcpu, !timer->irq.level);
|
||||
}
|
||||
|
||||
/*
|
||||
* Schedule the background timer before calling kvm_vcpu_block, so that this
|
||||
* thread is removed from its waitqueue and made runnable when there's a timer
|
||||
* interrupt to handle.
|
||||
*/
|
||||
void kvm_timer_schedule(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
u64 ns;
|
||||
cycle_t cval, now;
|
||||
|
||||
BUG_ON(timer_is_armed(timer));
|
||||
|
||||
/*
|
||||
* No need to schedule a background timer if the guest timer has
|
||||
* already expired, because kvm_vcpu_block will return before putting
|
||||
* the thread to sleep.
|
||||
*/
|
||||
if (kvm_timer_should_fire(vcpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the timer is not capable of raising interrupts (disabled or
|
||||
* masked), then there's no more work for us to do.
|
||||
*/
|
||||
if (!kvm_timer_irq_can_fire(vcpu))
|
||||
return;
|
||||
|
||||
/* The timer has not yet expired, schedule a background timer */
|
||||
cval = timer->cntv_cval;
|
||||
now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
|
||||
|
||||
ns = cyclecounter_cyc2ns(timecounter->cc,
|
||||
cval - now,
|
||||
timecounter->mask,
|
||||
&timecounter->frac);
|
||||
timer_arm(timer, ns);
|
||||
}
|
||||
|
||||
void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
timer_disarm(timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
|
||||
* @vcpu: The vcpu pointer
|
||||
*
|
||||
* Disarm any pending soft timers, since the world-switch code will write the
|
||||
* virtual timer state back to the physical CPU.
|
||||
* Check if the virtual timer has expired while we were running in the host,
|
||||
* and inject an interrupt if that was the case.
|
||||
*/
|
||||
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -140,28 +218,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
|||
bool phys_active;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We're about to run this vcpu again, so there is no need to
|
||||
* keep the background timer running, as we're about to
|
||||
* populate the CPU timer again.
|
||||
*/
|
||||
timer_disarm(timer);
|
||||
kvm_timer_update_state(vcpu);
|
||||
|
||||
/*
|
||||
* If the timer expired while we were not scheduled, now is the time
|
||||
* to inject it.
|
||||
* If we enter the guest with the virtual input level to the VGIC
|
||||
* asserted, then we have already told the VGIC what we need to, and
|
||||
* we don't need to exit from the guest until the guest deactivates
|
||||
* the already injected interrupt, so therefore we should set the
|
||||
* hardware active state to prevent unnecessary exits from the guest.
|
||||
*
|
||||
* Conversely, if the virtual input level is deasserted, then always
|
||||
* clear the hardware active state to ensure that hardware interrupts
|
||||
* from the timer triggers a guest exit.
|
||||
*/
|
||||
if (kvm_timer_should_fire(vcpu))
|
||||
kvm_timer_inject_irq(vcpu);
|
||||
|
||||
/*
|
||||
* We keep track of whether the edge-triggered interrupt has been
|
||||
* signalled to the vgic/guest, and if so, we mask the interrupt and
|
||||
* the physical distributor to prevent the timer from raising a
|
||||
* physical interrupt whenever we run a guest, preventing forward
|
||||
* VCPU progress.
|
||||
*/
|
||||
if (kvm_vgic_get_phys_irq_active(timer->map))
|
||||
if (timer->irq.level)
|
||||
phys_active = true;
|
||||
else
|
||||
phys_active = false;
|
||||
|
@ -176,32 +246,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
|||
* kvm_timer_sync_hwstate - sync timer state from cpu
|
||||
* @vcpu: The vcpu pointer
|
||||
*
|
||||
* Check if the virtual timer was armed and either schedule a corresponding
|
||||
* soft timer or inject directly if already expired.
|
||||
* Check if the virtual timer has expired while we were running in the guest,
|
||||
* and inject an interrupt if that was the case.
|
||||
*/
|
||||
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
cycle_t cval, now;
|
||||
u64 ns;
|
||||
|
||||
BUG_ON(timer_is_armed(timer));
|
||||
|
||||
if (kvm_timer_should_fire(vcpu)) {
|
||||
/*
|
||||
* Timer has already expired while we were not
|
||||
* looking. Inject the interrupt and carry on.
|
||||
*/
|
||||
kvm_timer_inject_irq(vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
cval = timer->cntv_cval;
|
||||
now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
|
||||
|
||||
ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
|
||||
&timecounter->frac);
|
||||
timer_arm(timer, ns);
|
||||
/*
|
||||
* The guest could have modified the timer registers or the timer
|
||||
* could have expired, update the timer state.
|
||||
*/
|
||||
kvm_timer_update_state(vcpu);
|
||||
}
|
||||
|
||||
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
||||
|
@ -216,7 +274,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
|||
* kvm_vcpu_set_target(). To handle this, we determine
|
||||
* vcpu timer irq number when the vcpu is reset.
|
||||
*/
|
||||
timer->irq = irq;
|
||||
timer->irq.irq = irq->irq;
|
||||
|
||||
/*
|
||||
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
|
||||
|
@ -225,6 +283,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
|||
* the ARMv7 architecture.
|
||||
*/
|
||||
timer->cntv_ctl = 0;
|
||||
kvm_timer_update_state(vcpu);
|
||||
|
||||
/*
|
||||
* Tell the VGIC that the virtual interrupt is tied to a
|
||||
|
@ -269,6 +328,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
|
|||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
kvm_timer_update_state(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_KVM_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
|
||||
/*
|
||||
* Tracepoints for vgic
|
||||
*/
|
||||
TRACE_EVENT(vgic_update_irq_pending,
|
||||
TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
|
||||
TP_ARGS(vcpu_id, irq, level),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, vcpu_id )
|
||||
__field( __u32, irq )
|
||||
__field( bool, level )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->irq = irq;
|
||||
__entry->level = level;
|
||||
),
|
||||
|
||||
TP_printk("VCPU: %ld, IRQ %d, level: %d",
|
||||
__entry->vcpu_id, __entry->irq, __entry->level)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoints for arch_timer
|
||||
*/
|
||||
TRACE_EVENT(kvm_timer_update_irq,
|
||||
TP_PROTO(unsigned long vcpu_id, __u32 irq, int level),
|
||||
TP_ARGS(vcpu_id, irq, level),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, vcpu_id )
|
||||
__field( __u32, irq )
|
||||
__field( int, level )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->irq = irq;
|
||||
__entry->level = level;
|
||||
),
|
||||
|
||||
TP_printk("VCPU: %ld, IRQ %d, level %d",
|
||||
__entry->vcpu_id, __entry->irq, __entry->level)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_FILE trace
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
|
@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
|
|||
lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
|
||||
}
|
||||
|
||||
static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
|
||||
struct vgic_lr lr_desc)
|
||||
{
|
||||
if (!(lr_desc.state & LR_STATE_MASK))
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
|
||||
else
|
||||
|
@ -158,6 +154,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
|
|||
* anyway.
|
||||
*/
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
|
||||
|
||||
/* Get the show on the road... */
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
|
||||
|
@ -166,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
|
|||
static const struct vgic_ops vgic_v2_ops = {
|
||||
.get_lr = vgic_v2_get_lr,
|
||||
.set_lr = vgic_v2_set_lr,
|
||||
.sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
|
||||
.get_elrsr = vgic_v2_get_elrsr,
|
||||
.get_eisr = vgic_v2_get_eisr,
|
||||
.clear_eisr = vgic_v2_clear_eisr,
|
||||
|
|
|
@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
|
|||
}
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
|
||||
}
|
||||
|
||||
static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
|
||||
struct vgic_lr lr_desc)
|
||||
{
|
||||
if (!(lr_desc.state & LR_STATE_MASK))
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
|
||||
else
|
||||
|
@ -193,6 +189,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
|
|||
* anyway.
|
||||
*/
|
||||
vgic_v3->vgic_vmcr = 0;
|
||||
vgic_v3->vgic_elrsr = ~0;
|
||||
|
||||
/*
|
||||
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
|
||||
|
@ -211,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
|
|||
static const struct vgic_ops vgic_v3_ops = {
|
||||
.get_lr = vgic_v3_get_lr,
|
||||
.set_lr = vgic_v3_set_lr,
|
||||
.sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
|
||||
.get_elrsr = vgic_v3_get_elrsr,
|
||||
.get_eisr = vgic_v3_get_eisr,
|
||||
.clear_eisr = vgic_v3_clear_eisr,
|
||||
|
|
|
@ -34,6 +34,9 @@
|
|||
#include <asm/kvm.h>
|
||||
#include <kvm/iodev.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
|
||||
/*
|
||||
* How the whole thing works (courtesy of Christoffer Dall):
|
||||
*
|
||||
|
@ -102,11 +105,13 @@
|
|||
#include "vgic.h"
|
||||
|
||||
static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
|
||||
static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
|
||||
static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu);
|
||||
static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
|
||||
static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
|
||||
static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
|
||||
static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
|
||||
int virt_irq);
|
||||
static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
|
||||
|
||||
static const struct vgic_ops *vgic_ops;
|
||||
static const struct vgic_params *vgic;
|
||||
|
@ -357,6 +362,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
|
|||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
|
||||
vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
|
||||
if (!vgic_dist_irq_get_level(vcpu, irq)) {
|
||||
vgic_dist_irq_clear_pending(vcpu, irq);
|
||||
if (!compute_pending_for_cpu(vcpu))
|
||||
clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
|
||||
}
|
||||
}
|
||||
|
||||
static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
|
||||
|
@ -531,34 +541,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm,
|
|||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a mapped interrupt's state has been modified by the guest such that it
|
||||
* is no longer active or pending, without it have gone through the sync path,
|
||||
* then the map->active field must be cleared so the interrupt can be taken
|
||||
* again.
|
||||
*/
|
||||
static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct list_head *root;
|
||||
struct irq_phys_map_entry *entry;
|
||||
struct irq_phys_map *map;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* Check for PPIs */
|
||||
root = &vgic_cpu->irq_phys_map_list;
|
||||
list_for_each_entry_rcu(entry, root, entry) {
|
||||
map = &entry->map;
|
||||
|
||||
if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) &&
|
||||
!vgic_irq_is_active(vcpu, map->virt_irq))
|
||||
map->active = false;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
bool vgic_handle_clear_pending_reg(struct kvm *kvm,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset, int vcpu_id)
|
||||
|
@ -589,7 +571,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
|
|||
vcpu_id, offset);
|
||||
vgic_reg_access(mmio, reg, offset, mode);
|
||||
|
||||
vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
|
||||
vgic_update_state(kvm);
|
||||
return true;
|
||||
}
|
||||
|
@ -627,7 +608,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm,
|
|||
ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
|
||||
|
||||
if (mmio->is_write) {
|
||||
vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
|
||||
vgic_update_state(kvm);
|
||||
return true;
|
||||
}
|
||||
|
@ -684,10 +664,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
|
|||
vgic_reg_access(mmio, &val, offset,
|
||||
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
|
||||
if (mmio->is_write) {
|
||||
if (offset < 8) {
|
||||
*reg = ~0U; /* Force PPIs/SGIs to 1 */
|
||||
/* Ignore writes to read-only SGI and PPI bits */
|
||||
if (offset < 8)
|
||||
return false;
|
||||
}
|
||||
|
||||
val = vgic_cfg_compress(val);
|
||||
if (offset & 4) {
|
||||
|
@ -713,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
|
|||
void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
u64 elrsr = vgic_get_elrsr(vcpu);
|
||||
unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
|
||||
int i;
|
||||
|
||||
for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
|
||||
for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
|
||||
struct vgic_lr lr = vgic_get_lr(vcpu, i);
|
||||
|
||||
/*
|
||||
|
@ -736,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
|
|||
* interrupt then move the active state to the
|
||||
* distributor tracking bit.
|
||||
*/
|
||||
if (lr.state & LR_STATE_ACTIVE) {
|
||||
if (lr.state & LR_STATE_ACTIVE)
|
||||
vgic_irq_set_active(vcpu, lr.irq);
|
||||
lr.state &= ~LR_STATE_ACTIVE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reestablish the pending state on the distributor and the
|
||||
* CPU interface. It may have already been pending, but that
|
||||
* is fine, then we are only setting a few bits that were
|
||||
* already set.
|
||||
* CPU interface and mark the LR as free for other use.
|
||||
*/
|
||||
if (lr.state & LR_STATE_PENDING) {
|
||||
vgic_dist_irq_set_pending(vcpu, lr.irq);
|
||||
lr.state &= ~LR_STATE_PENDING;
|
||||
}
|
||||
|
||||
vgic_set_lr(vcpu, i, lr);
|
||||
|
||||
/*
|
||||
* Mark the LR as free for other use.
|
||||
*/
|
||||
BUG_ON(lr.state & LR_STATE_MASK);
|
||||
vgic_retire_lr(i, lr.irq, vcpu);
|
||||
vgic_irq_clear_queued(vcpu, lr.irq);
|
||||
vgic_retire_lr(i, vcpu);
|
||||
|
||||
/* Finally update the VGIC state. */
|
||||
vgic_update_state(vcpu->kvm);
|
||||
|
@ -1067,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
|
|||
vgic_ops->set_lr(vcpu, lr, vlr);
|
||||
}
|
||||
|
||||
static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
|
||||
struct vgic_lr vlr)
|
||||
{
|
||||
vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
|
||||
}
|
||||
|
||||
static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vgic_ops->get_elrsr(vcpu);
|
||||
|
@ -1118,25 +1077,23 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
|
|||
vgic_ops->enable(vcpu);
|
||||
}
|
||||
|
||||
static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
|
||||
static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
|
||||
|
||||
vgic_irq_clear_queued(vcpu, vlr.irq);
|
||||
|
||||
/*
|
||||
* We must transfer the pending state back to the distributor before
|
||||
* retiring the LR, otherwise we may loose edge-triggered interrupts.
|
||||
*/
|
||||
if (vlr.state & LR_STATE_PENDING) {
|
||||
vgic_dist_irq_set_pending(vcpu, irq);
|
||||
vgic_dist_irq_set_pending(vcpu, vlr.irq);
|
||||
vlr.hwirq = 0;
|
||||
}
|
||||
|
||||
vlr.state = 0;
|
||||
vgic_set_lr(vcpu, lr_nr, vlr);
|
||||
clear_bit(lr_nr, vgic_cpu->lr_used);
|
||||
vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
|
||||
vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1150,17 +1107,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
u64 elrsr = vgic_get_elrsr(vcpu);
|
||||
unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
|
||||
int lr;
|
||||
|
||||
for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
|
||||
for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
|
||||
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
|
||||
|
||||
if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
|
||||
vgic_retire_lr(lr, vlr.irq, vcpu);
|
||||
if (vgic_irq_is_queued(vcpu, vlr.irq))
|
||||
vgic_irq_clear_queued(vcpu, vlr.irq);
|
||||
}
|
||||
if (!vgic_irq_is_enabled(vcpu, vlr.irq))
|
||||
vgic_retire_lr(lr, vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1200,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
|
|||
}
|
||||
|
||||
vgic_set_lr(vcpu, lr_nr, vlr);
|
||||
vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1210,8 +1164,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
|
|||
*/
|
||||
bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
u64 elrsr = vgic_get_elrsr(vcpu);
|
||||
unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
|
||||
struct vgic_lr vlr;
|
||||
int lr;
|
||||
|
||||
|
@ -1222,28 +1177,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
|
|||
|
||||
kvm_debug("Queue IRQ%d\n", irq);
|
||||
|
||||
lr = vgic_cpu->vgic_irq_lr_map[irq];
|
||||
|
||||
/* Do we have an active interrupt for the same CPUID? */
|
||||
if (lr != LR_EMPTY) {
|
||||
for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
|
||||
vlr = vgic_get_lr(vcpu, lr);
|
||||
if (vlr.source == sgi_source_id) {
|
||||
if (vlr.irq == irq && vlr.source == sgi_source_id) {
|
||||
kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
|
||||
BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
|
||||
vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to use another LR for this interrupt */
|
||||
lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
|
||||
vgic->nr_lr);
|
||||
lr = find_first_bit(elrsr_ptr, vgic->nr_lr);
|
||||
if (lr >= vgic->nr_lr)
|
||||
return false;
|
||||
|
||||
kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
|
||||
vgic_cpu->vgic_irq_lr_map[irq] = lr;
|
||||
set_bit(lr, vgic_cpu->lr_used);
|
||||
|
||||
vlr.irq = irq;
|
||||
vlr.source = sgi_source_id;
|
||||
|
@ -1338,12 +1287,60 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
static int process_queued_irq(struct kvm_vcpu *vcpu,
|
||||
int lr, struct vgic_lr vlr)
|
||||
{
|
||||
int pending = 0;
|
||||
|
||||
/*
|
||||
* If the IRQ was EOIed (called from vgic_process_maintenance) or it
|
||||
* went from active to non-active (called from vgic_sync_hwirq) it was
|
||||
* also ACKed and we we therefore assume we can clear the soft pending
|
||||
* state (should it had been set) for this interrupt.
|
||||
*
|
||||
* Note: if the IRQ soft pending state was set after the IRQ was
|
||||
* acked, it actually shouldn't be cleared, but we have no way of
|
||||
* knowing that unless we start trapping ACKs when the soft-pending
|
||||
* state is set.
|
||||
*/
|
||||
vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
|
||||
|
||||
/*
|
||||
* Tell the gic to start sampling this interrupt again.
|
||||
*/
|
||||
vgic_irq_clear_queued(vcpu, vlr.irq);
|
||||
|
||||
/* Any additional pending interrupt? */
|
||||
if (vgic_irq_is_edge(vcpu, vlr.irq)) {
|
||||
BUG_ON(!(vlr.state & LR_HW));
|
||||
pending = vgic_dist_irq_is_pending(vcpu, vlr.irq);
|
||||
} else {
|
||||
if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
|
||||
vgic_cpu_irq_set(vcpu, vlr.irq);
|
||||
pending = 1;
|
||||
} else {
|
||||
vgic_dist_irq_clear_pending(vcpu, vlr.irq);
|
||||
vgic_cpu_irq_clear(vcpu, vlr.irq);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Despite being EOIed, the LR may not have
|
||||
* been marked as empty.
|
||||
*/
|
||||
vlr.state = 0;
|
||||
vlr.hwirq = 0;
|
||||
vgic_set_lr(vcpu, lr, vlr);
|
||||
|
||||
return pending;
|
||||
}
|
||||
|
||||
static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 status = vgic_get_interrupt_status(vcpu);
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
bool level_pending = false;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int level_pending = 0;
|
||||
|
||||
kvm_debug("STATUS = %08x\n", status);
|
||||
|
||||
|
@ -1358,54 +1355,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
|
|||
|
||||
for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
|
||||
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
|
||||
|
||||
WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
|
||||
|
||||
spin_lock(&dist->lock);
|
||||
vgic_irq_clear_queued(vcpu, vlr.irq);
|
||||
WARN_ON(vlr.state & LR_STATE_MASK);
|
||||
vlr.state = 0;
|
||||
vgic_set_lr(vcpu, lr, vlr);
|
||||
|
||||
/*
|
||||
* If the IRQ was EOIed it was also ACKed and we we
|
||||
* therefore assume we can clear the soft pending
|
||||
* state (should it had been set) for this interrupt.
|
||||
*
|
||||
* Note: if the IRQ soft pending state was set after
|
||||
* the IRQ was acked, it actually shouldn't be
|
||||
* cleared, but we have no way of knowing that unless
|
||||
* we start trapping ACKs when the soft-pending state
|
||||
* is set.
|
||||
*/
|
||||
vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
|
||||
|
||||
/*
|
||||
* kvm_notify_acked_irq calls kvm_set_irq()
|
||||
* to reset the IRQ level. Need to release the
|
||||
* lock for kvm_set_irq to grab it.
|
||||
* to reset the IRQ level, which grabs the dist->lock
|
||||
* so we call this before taking the dist->lock.
|
||||
*/
|
||||
spin_unlock(&dist->lock);
|
||||
|
||||
kvm_notify_acked_irq(kvm, 0,
|
||||
vlr.irq - VGIC_NR_PRIVATE_IRQS);
|
||||
|
||||
spin_lock(&dist->lock);
|
||||
|
||||
/* Any additional pending interrupt? */
|
||||
if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
|
||||
vgic_cpu_irq_set(vcpu, vlr.irq);
|
||||
level_pending = true;
|
||||
} else {
|
||||
vgic_dist_irq_clear_pending(vcpu, vlr.irq);
|
||||
vgic_cpu_irq_clear(vcpu, vlr.irq);
|
||||
}
|
||||
|
||||
level_pending |= process_queued_irq(vcpu, lr, vlr);
|
||||
spin_unlock(&dist->lock);
|
||||
|
||||
/*
|
||||
* Despite being EOIed, the LR may not have
|
||||
* been marked as empty.
|
||||
*/
|
||||
vgic_sync_lr_elrsr(vcpu, lr, vlr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1426,35 +1391,40 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
|
|||
/*
|
||||
* Save the physical active state, and reset it to inactive.
|
||||
*
|
||||
* Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
|
||||
* Return true if there's a pending forwarded interrupt to queue.
|
||||
*/
|
||||
static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
|
||||
static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
|
||||
{
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
struct irq_phys_map *map;
|
||||
bool phys_active;
|
||||
bool level_pending;
|
||||
int ret;
|
||||
|
||||
if (!(vlr.state & LR_HW))
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
map = vgic_irq_map_search(vcpu, vlr.irq);
|
||||
BUG_ON(!map);
|
||||
|
||||
ret = irq_get_irqchip_state(map->irq,
|
||||
IRQCHIP_STATE_ACTIVE,
|
||||
&map->active);
|
||||
&phys_active);
|
||||
|
||||
WARN_ON(ret);
|
||||
|
||||
if (map->active)
|
||||
if (phys_active)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
spin_lock(&dist->lock);
|
||||
level_pending = process_queued_irq(vcpu, lr, vlr);
|
||||
spin_unlock(&dist->lock);
|
||||
return level_pending;
|
||||
}
|
||||
|
||||
/* Sync back the VGIC state after a guest run */
|
||||
static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
u64 elrsr;
|
||||
unsigned long *elrsr_ptr;
|
||||
|
@ -1462,40 +1432,18 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
|
|||
bool level_pending;
|
||||
|
||||
level_pending = vgic_process_maintenance(vcpu);
|
||||
elrsr = vgic_get_elrsr(vcpu);
|
||||
elrsr_ptr = u64_to_bitmask(&elrsr);
|
||||
|
||||
/* Deal with HW interrupts, and clear mappings for empty LRs */
|
||||
for (lr = 0; lr < vgic->nr_lr; lr++) {
|
||||
struct vgic_lr vlr;
|
||||
|
||||
if (!test_bit(lr, vgic_cpu->lr_used))
|
||||
continue;
|
||||
|
||||
vlr = vgic_get_lr(vcpu, lr);
|
||||
if (vgic_sync_hwirq(vcpu, vlr)) {
|
||||
/*
|
||||
* So this is a HW interrupt that the guest
|
||||
* EOI-ed. Clean the LR state and allow the
|
||||
* interrupt to be sampled again.
|
||||
*/
|
||||
vlr.state = 0;
|
||||
vlr.hwirq = 0;
|
||||
vgic_set_lr(vcpu, lr, vlr);
|
||||
vgic_irq_clear_queued(vcpu, vlr.irq);
|
||||
set_bit(lr, elrsr_ptr);
|
||||
}
|
||||
|
||||
if (!test_bit(lr, elrsr_ptr))
|
||||
continue;
|
||||
|
||||
clear_bit(lr, vgic_cpu->lr_used);
|
||||
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
|
||||
|
||||
level_pending |= vgic_sync_hwirq(vcpu, lr, vlr);
|
||||
BUG_ON(vlr.irq >= dist->nr_irqs);
|
||||
vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
|
||||
}
|
||||
|
||||
/* Check if we still have something up our sleeve... */
|
||||
elrsr = vgic_get_elrsr(vcpu);
|
||||
elrsr_ptr = u64_to_bitmask(&elrsr);
|
||||
pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
|
||||
if (level_pending || pending < vgic->nr_lr)
|
||||
set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
|
||||
|
@ -1585,6 +1533,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
|
|||
int enabled;
|
||||
bool ret = true, can_inject = true;
|
||||
|
||||
trace_vgic_update_irq_pending(cpuid, irq_num, level);
|
||||
|
||||
if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -1863,30 +1813,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
|
|||
kfree(entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
|
||||
*
|
||||
* Return the logical active state of a mapped interrupt. This doesn't
|
||||
* necessarily reflects the current HW state.
|
||||
*/
|
||||
bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
|
||||
{
|
||||
BUG_ON(!map);
|
||||
return map->active;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
|
||||
*
|
||||
* Set the logical active state of a mapped interrupt. This doesn't
|
||||
* immediately affects the HW state.
|
||||
*/
|
||||
void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
|
||||
{
|
||||
BUG_ON(!map);
|
||||
map->active = active;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
|
||||
* @vcpu: The VCPU pointer
|
||||
|
@ -1942,12 +1868,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|||
kfree(vgic_cpu->pending_shared);
|
||||
kfree(vgic_cpu->active_shared);
|
||||
kfree(vgic_cpu->pend_act_shared);
|
||||
kfree(vgic_cpu->vgic_irq_lr_map);
|
||||
vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
|
||||
vgic_cpu->pending_shared = NULL;
|
||||
vgic_cpu->active_shared = NULL;
|
||||
vgic_cpu->pend_act_shared = NULL;
|
||||
vgic_cpu->vgic_irq_lr_map = NULL;
|
||||
}
|
||||
|
||||
static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
|
||||
|
@ -1958,18 +1882,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
|
|||
vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
|
||||
vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
|
||||
vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
|
||||
vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
|
||||
|
||||
if (!vgic_cpu->pending_shared
|
||||
|| !vgic_cpu->active_shared
|
||||
|| !vgic_cpu->pend_act_shared
|
||||
|| !vgic_cpu->vgic_irq_lr_map) {
|
||||
|| !vgic_cpu->pend_act_shared) {
|
||||
kvm_vgic_vcpu_destroy(vcpu);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
|
||||
|
||||
/*
|
||||
* Store the number of LRs per vcpu, so we don't have to go
|
||||
* all the way to the distributor structure to find out. Only
|
||||
|
@ -2111,14 +2031,24 @@ int vgic_init(struct kvm *kvm)
|
|||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < dist->nr_irqs; i++) {
|
||||
if (i < VGIC_NR_PPIS)
|
||||
/*
|
||||
* Enable and configure all SGIs to be edge-triggere and
|
||||
* configure all PPIs as level-triggered.
|
||||
*/
|
||||
for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
|
||||
if (i < VGIC_NR_SGIS) {
|
||||
/* SGIs */
|
||||
vgic_bitmap_set_irq_val(&dist->irq_enabled,
|
||||
vcpu->vcpu_id, i, 1);
|
||||
if (i < VGIC_NR_PRIVATE_IRQS)
|
||||
vgic_bitmap_set_irq_val(&dist->irq_cfg,
|
||||
vcpu->vcpu_id, i,
|
||||
VGIC_CFG_EDGE);
|
||||
} else if (i < VGIC_NR_PRIVATE_IRQS) {
|
||||
/* PPIs */
|
||||
vgic_bitmap_set_irq_val(&dist->irq_cfg,
|
||||
vcpu->vcpu_id, i,
|
||||
VGIC_CFG_LEVEL);
|
||||
}
|
||||
}
|
||||
|
||||
vgic_enable(vcpu);
|
||||
|
|
|
@ -94,6 +94,10 @@ static void async_pf_execute(struct work_struct *work)
|
|||
|
||||
trace_kvm_async_pf_completed(addr, gva);
|
||||
|
||||
/*
|
||||
* This memory barrier pairs with prepare_to_wait's set_current_state()
|
||||
*/
|
||||
smp_mb();
|
||||
if (waitqueue_active(&vcpu->wq))
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_irqfd.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/wait.h>
|
||||
|
@ -34,73 +35,20 @@
|
|||
#include <linux/srcu.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/irqbypass.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#include <kvm/iodev.h>
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
/*
|
||||
* --------------------------------------------------------------------
|
||||
* irqfd: Allows an fd to be used to inject an interrupt to the guest
|
||||
*
|
||||
* Credit goes to Avi Kivity for the original idea.
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Resampling irqfds are a special variety of irqfds used to emulate
|
||||
* level triggered interrupts. The interrupt is asserted on eventfd
|
||||
* trigger. On acknowledgement through the irq ack notifier, the
|
||||
* interrupt is de-asserted and userspace is notified through the
|
||||
* resamplefd. All resamplers on the same gsi are de-asserted
|
||||
* together, so we don't need to track the state of each individual
|
||||
* user. We can also therefore share the same irq source ID.
|
||||
*/
|
||||
struct _irqfd_resampler {
|
||||
struct kvm *kvm;
|
||||
/*
|
||||
* List of resampling struct _irqfd objects sharing this gsi.
|
||||
* RCU list modified under kvm->irqfds.resampler_lock
|
||||
*/
|
||||
struct list_head list;
|
||||
struct kvm_irq_ack_notifier notifier;
|
||||
/*
|
||||
* Entry in list of kvm->irqfd.resampler_list. Use for sharing
|
||||
* resamplers among irqfds on the same gsi.
|
||||
* Accessed and modified under kvm->irqfds.resampler_lock
|
||||
*/
|
||||
struct list_head link;
|
||||
};
|
||||
|
||||
struct _irqfd {
|
||||
/* Used for MSI fast-path */
|
||||
struct kvm *kvm;
|
||||
wait_queue_t wait;
|
||||
/* Update side is protected by irqfds.lock */
|
||||
struct kvm_kernel_irq_routing_entry irq_entry;
|
||||
seqcount_t irq_entry_sc;
|
||||
/* Used for level IRQ fast-path */
|
||||
int gsi;
|
||||
struct work_struct inject;
|
||||
/* The resampler used by this irqfd (resampler-only) */
|
||||
struct _irqfd_resampler *resampler;
|
||||
/* Eventfd notified on resample (resampler-only) */
|
||||
struct eventfd_ctx *resamplefd;
|
||||
/* Entry in list of irqfds for a resampler (resampler-only) */
|
||||
struct list_head resampler_link;
|
||||
/* Used for setup/shutdown */
|
||||
struct eventfd_ctx *eventfd;
|
||||
struct list_head list;
|
||||
poll_table pt;
|
||||
struct work_struct shutdown;
|
||||
};
|
||||
|
||||
static struct workqueue_struct *irqfd_cleanup_wq;
|
||||
|
||||
static void
|
||||
irqfd_inject(struct work_struct *work)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
|
||||
struct kvm_kernel_irqfd *irqfd =
|
||||
container_of(work, struct kvm_kernel_irqfd, inject);
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
|
||||
if (!irqfd->resampler) {
|
||||
|
@ -121,12 +69,13 @@ irqfd_inject(struct work_struct *work)
|
|||
static void
|
||||
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
struct _irqfd_resampler *resampler;
|
||||
struct kvm_kernel_irqfd_resampler *resampler;
|
||||
struct kvm *kvm;
|
||||
struct _irqfd *irqfd;
|
||||
struct kvm_kernel_irqfd *irqfd;
|
||||
int idx;
|
||||
|
||||
resampler = container_of(kian, struct _irqfd_resampler, notifier);
|
||||
resampler = container_of(kian,
|
||||
struct kvm_kernel_irqfd_resampler, notifier);
|
||||
kvm = resampler->kvm;
|
||||
|
||||
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||
|
@ -141,9 +90,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
|
|||
}
|
||||
|
||||
static void
|
||||
irqfd_resampler_shutdown(struct _irqfd *irqfd)
|
||||
irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
|
||||
{
|
||||
struct _irqfd_resampler *resampler = irqfd->resampler;
|
||||
struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
|
||||
struct kvm *kvm = resampler->kvm;
|
||||
|
||||
mutex_lock(&kvm->irqfds.resampler_lock);
|
||||
|
@ -168,7 +117,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
|
|||
static void
|
||||
irqfd_shutdown(struct work_struct *work)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
|
||||
struct kvm_kernel_irqfd *irqfd =
|
||||
container_of(work, struct kvm_kernel_irqfd, shutdown);
|
||||
u64 cnt;
|
||||
|
||||
/*
|
||||
|
@ -191,6 +141,9 @@ irqfd_shutdown(struct work_struct *work)
|
|||
/*
|
||||
* It is now safe to release the object's resources
|
||||
*/
|
||||
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
|
||||
irq_bypass_unregister_consumer(&irqfd->consumer);
|
||||
#endif
|
||||
eventfd_ctx_put(irqfd->eventfd);
|
||||
kfree(irqfd);
|
||||
}
|
||||
|
@ -198,7 +151,7 @@ irqfd_shutdown(struct work_struct *work)
|
|||
|
||||
/* assumes kvm->irqfds.lock is held */
|
||||
static bool
|
||||
irqfd_is_active(struct _irqfd *irqfd)
|
||||
irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
|
||||
{
|
||||
return list_empty(&irqfd->list) ? false : true;
|
||||
}
|
||||
|
@ -209,7 +162,7 @@ irqfd_is_active(struct _irqfd *irqfd)
|
|||
* assumes kvm->irqfds.lock is held
|
||||
*/
|
||||
static void
|
||||
irqfd_deactivate(struct _irqfd *irqfd)
|
||||
irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
|
||||
{
|
||||
BUG_ON(!irqfd_is_active(irqfd));
|
||||
|
||||
|
@ -218,13 +171,23 @@ irqfd_deactivate(struct _irqfd *irqfd)
|
|||
queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
|
||||
}
|
||||
|
||||
int __attribute__((weak)) kvm_arch_set_irq_inatomic(
|
||||
struct kvm_kernel_irq_routing_entry *irq,
|
||||
struct kvm *kvm, int irq_source_id,
|
||||
int level,
|
||||
bool line_status)
|
||||
{
|
||||
return -EWOULDBLOCK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with wqh->lock held and interrupts disabled
|
||||
*/
|
||||
static int
|
||||
irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
|
||||
struct kvm_kernel_irqfd *irqfd =
|
||||
container_of(wait, struct kvm_kernel_irqfd, wait);
|
||||
unsigned long flags = (unsigned long)key;
|
||||
struct kvm_kernel_irq_routing_entry irq;
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
|
@ -238,10 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
|
|||
irq = irqfd->irq_entry;
|
||||
} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
|
||||
/* An event has been signaled, inject an interrupt */
|
||||
if (irq.type == KVM_IRQ_ROUTING_MSI)
|
||||
kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
|
||||
false);
|
||||
else
|
||||
if (kvm_arch_set_irq_inatomic(&irq, kvm,
|
||||
KVM_USERSPACE_IRQ_SOURCE_ID, 1,
|
||||
false) == -EWOULDBLOCK)
|
||||
schedule_work(&irqfd->inject);
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
@ -274,37 +236,54 @@ static void
|
|||
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
|
||||
poll_table *pt)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
|
||||
struct kvm_kernel_irqfd *irqfd =
|
||||
container_of(pt, struct kvm_kernel_irqfd, pt);
|
||||
add_wait_queue(wqh, &irqfd->wait);
|
||||
}
|
||||
|
||||
/* Must be called under irqfds.lock */
|
||||
static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
|
||||
static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
|
||||
int i, n_entries;
|
||||
int n_entries;
|
||||
|
||||
n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
|
||||
|
||||
write_seqcount_begin(&irqfd->irq_entry_sc);
|
||||
|
||||
irqfd->irq_entry.type = 0;
|
||||
|
||||
e = entries;
|
||||
for (i = 0; i < n_entries; ++i, ++e) {
|
||||
/* Only fast-path MSI. */
|
||||
if (e->type == KVM_IRQ_ROUTING_MSI)
|
||||
irqfd->irq_entry = *e;
|
||||
}
|
||||
if (n_entries == 1)
|
||||
irqfd->irq_entry = *e;
|
||||
else
|
||||
irqfd->irq_entry.type = 0;
|
||||
|
||||
write_seqcount_end(&irqfd->irq_entry_sc);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
|
||||
void __attribute__((weak)) kvm_arch_irq_bypass_stop(
|
||||
struct irq_bypass_consumer *cons)
|
||||
{
|
||||
}
|
||||
|
||||
void __attribute__((weak)) kvm_arch_irq_bypass_start(
|
||||
struct irq_bypass_consumer *cons)
|
||||
{
|
||||
}
|
||||
|
||||
int __attribute__((weak)) kvm_arch_update_irqfd_routing(
|
||||
struct kvm *kvm, unsigned int host_irq,
|
||||
uint32_t guest_irq, bool set)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
{
|
||||
struct _irqfd *irqfd, *tmp;
|
||||
struct kvm_kernel_irqfd *irqfd, *tmp;
|
||||
struct fd f;
|
||||
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
|
||||
int ret;
|
||||
|
@ -340,7 +319,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
|||
irqfd->eventfd = eventfd;
|
||||
|
||||
if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
|
||||
struct _irqfd_resampler *resampler;
|
||||
struct kvm_kernel_irqfd_resampler *resampler;
|
||||
|
||||
resamplefd = eventfd_ctx_fdget(args->resamplefd);
|
||||
if (IS_ERR(resamplefd)) {
|
||||
|
@ -428,6 +407,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
|||
* we might race against the POLLHUP
|
||||
*/
|
||||
fdput(f);
|
||||
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
|
||||
irqfd->consumer.token = (void *)irqfd->eventfd;
|
||||
irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
|
||||
irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
|
||||
irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
|
||||
irqfd->consumer.start = kvm_arch_irq_bypass_start;
|
||||
ret = irq_bypass_register_consumer(&irqfd->consumer);
|
||||
if (ret)
|
||||
pr_info("irq bypass consumer (token %p) registration fails: %d\n",
|
||||
irqfd->consumer.token, ret);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -469,9 +459,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
|
||||
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
|
||||
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
if (kian->gsi == gsi)
|
||||
kian->irq_acked(kian);
|
||||
}
|
||||
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
int gsi, idx;
|
||||
|
||||
trace_kvm_ack_irq(irqchip, pin);
|
||||
|
@ -479,10 +478,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
|||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
if (kian->gsi == gsi)
|
||||
kian->irq_acked(kian);
|
||||
kvm_notify_acked_gsi(kvm, gsi);
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
|
@ -525,7 +521,7 @@ kvm_eventfd_init(struct kvm *kvm)
|
|||
static int
|
||||
kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
{
|
||||
struct _irqfd *irqfd, *tmp;
|
||||
struct kvm_kernel_irqfd *irqfd, *tmp;
|
||||
struct eventfd_ctx *eventfd;
|
||||
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
|
@ -581,7 +577,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
|
|||
void
|
||||
kvm_irqfd_release(struct kvm *kvm)
|
||||
{
|
||||
struct _irqfd *irqfd, *tmp;
|
||||
struct kvm_kernel_irqfd *irqfd, *tmp;
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
|
@ -604,13 +600,23 @@ kvm_irqfd_release(struct kvm *kvm)
|
|||
*/
|
||||
void kvm_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
struct _irqfd *irqfd;
|
||||
struct kvm_kernel_irqfd *irqfd;
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
list_for_each_entry(irqfd, &kvm->irqfds.items, list)
|
||||
list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
|
||||
irqfd_update(kvm, irqfd);
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
|
||||
if (irqfd->producer) {
|
||||
int ret = kvm_arch_update_irqfd_routing(
|
||||
irqfd->kvm, irqfd->producer->irq,
|
||||
irqfd->gsi, 1);
|
||||
WARN_ON(ret);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
}
|
||||
|
||||
|
@ -914,9 +920,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
|||
return -EINVAL;
|
||||
|
||||
/* ioeventfd with no length can't be combined with DATAMATCH */
|
||||
if (!args->len &&
|
||||
args->flags & (KVM_IOEVENTFD_FLAG_PIO |
|
||||
KVM_IOEVENTFD_FLAG_DATAMATCH))
|
||||
if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
|
||||
return -EINVAL;
|
||||
|
||||
ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
|
||||
|
|
|
@ -31,16 +31,6 @@
|
|||
#include <trace/events/kvm.h>
|
||||
#include "irq.h"
|
||||
|
||||
struct kvm_irq_routing_table {
|
||||
int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
|
||||
u32 nr_rt_entries;
|
||||
/*
|
||||
* Array indexed by gsi. Each entry contains list of irq chips
|
||||
* the gsi is connected to.
|
||||
*/
|
||||
struct hlist_head map[0];
|
||||
};
|
||||
|
||||
int kvm_irq_map_gsi(struct kvm *kvm,
|
||||
struct kvm_kernel_irq_routing_entry *entries, int gsi)
|
||||
{
|
||||
|
@ -154,11 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
|
|||
|
||||
/*
|
||||
* Do not allow GSI to be mapped to the same irqchip more than once.
|
||||
* Allow only one to one mapping between GSI and MSI.
|
||||
* Allow only one to one mapping between GSI and non-irqchip routing.
|
||||
*/
|
||||
hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
|
||||
if (ei->type == KVM_IRQ_ROUTING_MSI ||
|
||||
ue->type == KVM_IRQ_ROUTING_MSI ||
|
||||
if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
|
||||
ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
|
||||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
|
||||
return r;
|
||||
|
||||
|
@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
|||
kvm_irq_routing_update(kvm);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
kvm_arch_irq_routing_update(kvm);
|
||||
|
||||
synchronize_srcu_expedited(&kvm->irq_srcu);
|
||||
|
||||
new = old;
|
||||
|
|
|
@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
|||
init_waitqueue_head(&vcpu->wq);
|
||||
kvm_async_pf_vcpu_init(vcpu);
|
||||
|
||||
vcpu->pre_pcpu = -1;
|
||||
INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
|
||||
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!page) {
|
||||
r = -ENOMEM;
|
||||
|
@ -2018,6 +2021,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
|||
} while (single_task_running() && ktime_before(cur, stop));
|
||||
}
|
||||
|
||||
kvm_arch_vcpu_blocking(vcpu);
|
||||
|
||||
for (;;) {
|
||||
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
|
@ -2031,6 +2036,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
|||
finish_wait(&vcpu->wq, &wait);
|
||||
cur = ktime_get();
|
||||
|
||||
kvm_arch_vcpu_unblocking(vcpu);
|
||||
out:
|
||||
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
|
||||
|
||||
|
@ -2718,6 +2724,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
|||
case KVM_CAP_IRQFD:
|
||||
case KVM_CAP_IRQFD_RESAMPLE:
|
||||
#endif
|
||||
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
|
||||
case KVM_CAP_CHECK_EXTENSION_VM:
|
||||
return 1;
|
||||
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
|
||||
|
@ -3341,7 +3348,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
|||
if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
|
||||
return -ENOSPC;
|
||||
|
||||
new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) *
|
||||
new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) *
|
||||
sizeof(struct kvm_io_range)), GFP_KERNEL);
|
||||
if (!new_bus)
|
||||
return -ENOMEM;
|
||||
|
@ -3373,7 +3380,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) *
|
||||
new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
|
||||
sizeof(struct kvm_io_range)), GFP_KERNEL);
|
||||
if (!new_bus)
|
||||
return -ENOMEM;
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
config IRQ_BYPASS_MANAGER
|
||||
tristate
|
|
@ -0,0 +1 @@
|
|||
obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o
|
|
@ -0,0 +1,257 @@
|
|||
/*
|
||||
* IRQ offload/bypass manager
|
||||
*
|
||||
* Copyright (C) 2015 Red Hat, Inc.
|
||||
* Copyright (c) 2015 Linaro Ltd.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Various virtualization hardware acceleration techniques allow bypassing or
|
||||
* offloading interrupts received from devices around the host kernel. Posted
|
||||
* Interrupts on Intel VT-d systems can allow interrupts to be received
|
||||
* directly by a virtual machine. ARM IRQ Forwarding allows forwarded physical
|
||||
* interrupts to be directly deactivated by the guest. This manager allows
|
||||
* interrupt producers and consumers to find each other to enable this sort of
|
||||
* bypass.
|
||||
*/
|
||||
|
||||
#include <linux/irqbypass.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("IRQ bypass manager utility module");
|
||||
|
||||
static LIST_HEAD(producers);
|
||||
static LIST_HEAD(consumers);
|
||||
static DEFINE_MUTEX(lock);
|
||||
|
||||
/* @lock must be held when calling connect */
|
||||
static int __connect(struct irq_bypass_producer *prod,
|
||||
struct irq_bypass_consumer *cons)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (prod->stop)
|
||||
prod->stop(prod);
|
||||
if (cons->stop)
|
||||
cons->stop(cons);
|
||||
|
||||
if (prod->add_consumer)
|
||||
ret = prod->add_consumer(prod, cons);
|
||||
|
||||
if (!ret) {
|
||||
ret = cons->add_producer(cons, prod);
|
||||
if (ret && prod->del_consumer)
|
||||
prod->del_consumer(prod, cons);
|
||||
}
|
||||
|
||||
if (cons->start)
|
||||
cons->start(cons);
|
||||
if (prod->start)
|
||||
prod->start(prod);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* @lock must be held when calling disconnect */
|
||||
static void __disconnect(struct irq_bypass_producer *prod,
|
||||
struct irq_bypass_consumer *cons)
|
||||
{
|
||||
if (prod->stop)
|
||||
prod->stop(prod);
|
||||
if (cons->stop)
|
||||
cons->stop(cons);
|
||||
|
||||
cons->del_producer(cons, prod);
|
||||
|
||||
if (prod->del_consumer)
|
||||
prod->del_consumer(prod, cons);
|
||||
|
||||
if (cons->start)
|
||||
cons->start(cons);
|
||||
if (prod->start)
|
||||
prod->start(prod);
|
||||
}
|
||||
|
||||
/**
|
||||
* irq_bypass_register_producer - register IRQ bypass producer
|
||||
* @producer: pointer to producer structure
|
||||
*
|
||||
* Add the provided IRQ producer to the list of producers and connect
|
||||
* with any matching token found on the IRQ consumers list.
|
||||
*/
|
||||
int irq_bypass_register_producer(struct irq_bypass_producer *producer)
|
||||
{
|
||||
struct irq_bypass_producer *tmp;
|
||||
struct irq_bypass_consumer *consumer;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
return -ENODEV;
|
||||
|
||||
mutex_lock(&lock);
|
||||
|
||||
list_for_each_entry(tmp, &producers, node) {
|
||||
if (tmp->token == producer->token) {
|
||||
mutex_unlock(&lock);
|
||||
module_put(THIS_MODULE);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
|
||||
list_for_each_entry(consumer, &consumers, node) {
|
||||
if (consumer->token == producer->token) {
|
||||
int ret = __connect(producer, consumer);
|
||||
if (ret) {
|
||||
mutex_unlock(&lock);
|
||||
module_put(THIS_MODULE);
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_add(&producer->node, &producers);
|
||||
|
||||
mutex_unlock(&lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_bypass_register_producer);
|
||||
|
||||
/**
|
||||
* irq_bypass_unregister_producer - unregister IRQ bypass producer
|
||||
* @producer: pointer to producer structure
|
||||
*
|
||||
* Remove a previously registered IRQ producer from the list of producers
|
||||
* and disconnect it from any connected IRQ consumer.
|
||||
*/
|
||||
void irq_bypass_unregister_producer(struct irq_bypass_producer *producer)
|
||||
{
|
||||
struct irq_bypass_producer *tmp;
|
||||
struct irq_bypass_consumer *consumer;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
return; /* nothing in the list anyway */
|
||||
|
||||
mutex_lock(&lock);
|
||||
|
||||
list_for_each_entry(tmp, &producers, node) {
|
||||
if (tmp->token != producer->token)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(consumer, &consumers, node) {
|
||||
if (consumer->token == producer->token) {
|
||||
__disconnect(producer, consumer);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_del(&producer->node);
|
||||
module_put(THIS_MODULE);
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&lock);
|
||||
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer);
|
||||
|
||||
/**
|
||||
* irq_bypass_register_consumer - register IRQ bypass consumer
|
||||
* @consumer: pointer to consumer structure
|
||||
*
|
||||
* Add the provided IRQ consumer to the list of consumers and connect
|
||||
* with any matching token found on the IRQ producer list.
|
||||
*/
|
||||
int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
|
||||
{
|
||||
struct irq_bypass_consumer *tmp;
|
||||
struct irq_bypass_producer *producer;
|
||||
|
||||
if (!consumer->add_producer || !consumer->del_producer)
|
||||
return -EINVAL;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
return -ENODEV;
|
||||
|
||||
mutex_lock(&lock);
|
||||
|
||||
list_for_each_entry(tmp, &consumers, node) {
|
||||
if (tmp->token == consumer->token) {
|
||||
mutex_unlock(&lock);
|
||||
module_put(THIS_MODULE);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
|
||||
list_for_each_entry(producer, &producers, node) {
|
||||
if (producer->token == consumer->token) {
|
||||
int ret = __connect(producer, consumer);
|
||||
if (ret) {
|
||||
mutex_unlock(&lock);
|
||||
module_put(THIS_MODULE);
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_add(&consumer->node, &consumers);
|
||||
|
||||
mutex_unlock(&lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_bypass_register_consumer);
|
||||
|
||||
/**
|
||||
* irq_bypass_unregister_consumer - unregister IRQ bypass consumer
|
||||
* @consumer: pointer to consumer structure
|
||||
*
|
||||
* Remove a previously registered IRQ consumer from the list of consumers
|
||||
* and disconnect it from any connected IRQ producer.
|
||||
*/
|
||||
void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
|
||||
{
|
||||
struct irq_bypass_consumer *tmp;
|
||||
struct irq_bypass_producer *producer;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
return; /* nothing in the list anyway */
|
||||
|
||||
mutex_lock(&lock);
|
||||
|
||||
list_for_each_entry(tmp, &consumers, node) {
|
||||
if (tmp->token != consumer->token)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(producer, &producers, node) {
|
||||
if (producer->token == consumer->token) {
|
||||
__disconnect(producer, consumer);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_del(&consumer->node);
|
||||
module_put(THIS_MODULE);
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&lock);
|
||||
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer);
|
Loading…
Reference in New Issue