mirror of https://gitee.com/openkylin/linux.git
* s390: ioctl hardening, selftests
* ARM: ITS translation cache; support for 512 vCPUs, various cleanups and bugfixes * PPC: various minor fixes and preparation * x86: bugfixes all over the place (posted interrupts, SVM, emulation corner cases, blocked INIT), some IPI optimizations -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJdf7fdAAoJEL/70l94x66DJzkIAKDcuWXJB4Qtoto6yUvPiHZm LYkY/Dn1zulb/DhzrBoXFey/jZXwl9kxMYkVTefnrAl0fRwFGX+G1UYnQrtAL6Gr ifdTYdy3kZhXCnnp99QAantWDswJHo1THwbmHrlmkxS4MdisEaTHwgjaHrDRZ4/d FAEwW2isSonP3YJfTtsKFFjL9k2D4iMnwZ/R2B7UOaWvgnerZ1GLmOkilvnzGGEV IQ89IIkWlkKd4SKgq8RkDKlfW5JrLrSdTK2Uf0DvAxV+J0EFkEaR+WlLsqumra0z Eg3KwNScfQj0DyT0TzurcOxObcQPoMNSFYXLRbUu1+i0CGgm90XpF1IosiuihgU= =w6I3 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Paolo Bonzini: "s390: - ioctl hardening - selftests ARM: - ITS translation cache - support for 512 vCPUs - various cleanups and bugfixes PPC: - various minor fixes and preparation x86: - bugfixes all over the place (posted interrupts, SVM, emulation corner cases, blocked INIT) - some IPI optimizations" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (75 commits) KVM: X86: Use IPI shorthands in kvm guest when support KVM: x86: Fix INIT signal handling in various CPU states KVM: VMX: Introduce exit reason for receiving INIT signal on guest-mode KVM: VMX: Stop the preemption timer during vCPU reset KVM: LAPIC: Micro optimize IPI latency kvm: Nested KVM MMUs need PAE root too KVM: x86: set ctxt->have_exception in x86_decode_insn() KVM: x86: always stop emulation on page fault KVM: nVMX: trace nested VM-Enter failures detected by H/W KVM: nVMX: add tracepoint for failed nested VM-Enter x86: KVM: svm: Fix a check in nested_svm_vmrun() KVM: x86: Return to userspace with internal error on unexpected exit reason KVM: x86: Add kvm_emulate_{rd,wr}msr() to consolidate VXM/SVM code KVM: x86: Refactor up kvm_{g,s}et_msr() to simplify callers doc: kvm: Fix return description of KVM_SET_MSRS KVM: X86: Tune PLE Window tracepoint KVM: VMX: Change ple_window type to unsigned int KVM: X86: Remove tailing newline for tracepoints KVM: X86: Trace vcpu_id for vmexit KVM: x86: Manually calculate reserved bits when loading PDPTRS ...
This commit is contained in:
commit
fe38bd6862
|
@ -586,7 +586,7 @@ Capability: basic
|
|||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_msrs (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
Returns: number of msrs successfully set (see below), -1 on error
|
||||
|
||||
Writes model-specific registers to the vcpu. See KVM_GET_MSRS for the
|
||||
data structures.
|
||||
|
@ -595,6 +595,11 @@ Application code should set the 'nmsrs' member (which indicates the
|
|||
size of the entries array), and the 'index' and 'data' members of each
|
||||
array entry.
|
||||
|
||||
It tries to set the MSRs in array entries[] one by one. If setting an MSR
|
||||
fails, e.g., due to setting reserved bits, the MSR isn't supported/emulated
|
||||
by KVM, etc..., it stops processing the MSR list and returns the number of
|
||||
MSRs that have been set successfully.
|
||||
|
||||
|
||||
4.20 KVM_SET_CPUID
|
||||
|
||||
|
@ -753,8 +758,8 @@ in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
|
|||
use PPIs designated for specific cpus. The irq field is interpreted
|
||||
like this:
|
||||
|
||||
bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 |
|
||||
field: | irq_type | vcpu_index | irq_id |
|
||||
bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 |
|
||||
field: | vcpu2_index | irq_type | vcpu_index | irq_id |
|
||||
|
||||
The irq_type field has the following values:
|
||||
- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
|
||||
|
@ -766,6 +771,14 @@ The irq_type field has the following values:
|
|||
|
||||
In both cases, level is used to assert/deassert the line.
|
||||
|
||||
When KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 is supported, the target vcpu is
|
||||
identified as (256 * vcpu2_index + vcpu_index). Otherwise, vcpu2_index
|
||||
must be zero.
|
||||
|
||||
Note that on arm/arm64, the KVM_CAP_IRQCHIP capability only conditions
|
||||
injection of interrupts for the in-kernel irqchip. KVM_IRQ_LINE can always
|
||||
be used for a userspace interrupt controller.
|
||||
|
||||
struct kvm_irq_level {
|
||||
union {
|
||||
__u32 irq; /* GSI */
|
||||
|
@ -3079,12 +3092,14 @@ This exception is also raised directly at the corresponding VCPU if the
|
|||
flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field.
|
||||
|
||||
The start address of the memory region has to be specified in the "gaddr"
|
||||
field, and the length of the region in the "size" field. "buf" is the buffer
|
||||
supplied by the userspace application where the read data should be written
|
||||
to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written
|
||||
is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL
|
||||
when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access
|
||||
register number to be used.
|
||||
field, and the length of the region in the "size" field (which must not
|
||||
be 0). The maximum value for "size" can be obtained by checking the
|
||||
KVM_CAP_S390_MEM_OP capability. "buf" is the buffer supplied by the
|
||||
userspace application where the read data should be written to for
|
||||
KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written is
|
||||
stored for a KVM_S390_MEMOP_LOGICAL_WRITE. When KVM_S390_MEMOP_F_CHECK_ONLY
|
||||
is specified, "buf" is unused and can be NULL. "ar" designates the access
|
||||
register number to be used; the valid range is 0..15.
|
||||
|
||||
The "reserved" field is meant for future extensions. It is not used by
|
||||
KVM with the currently defined set of flags.
|
||||
|
|
|
@ -294,7 +294,7 @@ Handling a page fault is performed as follows:
|
|||
- walk shadow page table
|
||||
- check for valid generation number in the spte (see "Fast invalidation of
|
||||
MMIO sptes" below)
|
||||
- cache the information to vcpu->arch.mmio_gva, vcpu->arch.access and
|
||||
- cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and
|
||||
vcpu->arch.mmio_gfn, and call the emulator
|
||||
- If both P bit and R/W bit of error code are set, this could possibly
|
||||
be handled as a "fast page fault" (fixed without taking the MMU lock). See
|
||||
|
@ -304,7 +304,7 @@ Handling a page fault is performed as follows:
|
|||
- if permissions are insufficient, reflect the fault back to the guest
|
||||
- determine the host page
|
||||
- if this is an mmio request, there is no host page; cache the info to
|
||||
vcpu->arch.mmio_gva, vcpu->arch.access and vcpu->arch.mmio_gfn
|
||||
vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn
|
||||
- walk the shadow page table to find the spte for the translation,
|
||||
instantiating missing intermediate page tables as necessary
|
||||
- If this is an mmio request, cache the mmio info to the spte and set some
|
||||
|
|
|
@ -266,8 +266,10 @@ struct kvm_vcpu_events {
|
|||
#define KVM_DEV_ARM_ITS_CTRL_RESET 4
|
||||
|
||||
/* KVM_IRQ_LINE irq field index values */
|
||||
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
|
||||
#define KVM_ARM_IRQ_VCPU2_MASK 0xf
|
||||
#define KVM_ARM_IRQ_TYPE_SHIFT 24
|
||||
#define KVM_ARM_IRQ_TYPE_MASK 0xff
|
||||
#define KVM_ARM_IRQ_TYPE_MASK 0xf
|
||||
#define KVM_ARM_IRQ_VCPU_SHIFT 16
|
||||
#define KVM_ARM_IRQ_VCPU_MASK 0xff
|
||||
#define KVM_ARM_IRQ_NUM_SHIFT 0
|
||||
|
|
|
@ -77,7 +77,7 @@
|
|||
})
|
||||
|
||||
#define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN)
|
||||
#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN)
|
||||
#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
|
||||
|
||||
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
|
||||
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
|
||||
|
|
|
@ -325,8 +325,10 @@ struct kvm_vcpu_events {
|
|||
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
|
||||
|
||||
/* KVM_IRQ_LINE irq field index values */
|
||||
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
|
||||
#define KVM_ARM_IRQ_VCPU2_MASK 0xf
|
||||
#define KVM_ARM_IRQ_TYPE_SHIFT 24
|
||||
#define KVM_ARM_IRQ_TYPE_MASK 0xff
|
||||
#define KVM_ARM_IRQ_TYPE_MASK 0xf
|
||||
#define KVM_ARM_IRQ_VCPU_SHIFT 16
|
||||
#define KVM_ARM_IRQ_VCPU_MASK 0xff
|
||||
#define KVM_ARM_IRQ_NUM_SHIFT 0
|
||||
|
|
|
@ -193,6 +193,18 @@ void __hyp_text __kvm_flush_vm_context(void)
|
|||
{
|
||||
dsb(ishst);
|
||||
__tlbi(alle1is);
|
||||
asm volatile("ic ialluis" : : );
|
||||
|
||||
/*
|
||||
* VIPT and PIPT caches are not affected by VMID, so no maintenance
|
||||
* is necessary across a VMID rollover.
|
||||
*
|
||||
* VPIPT caches constrain lookup and maintenance to the active VMID,
|
||||
* so we need to invalidate lines with a stale VMID to avoid an ABA
|
||||
* race after multiple rollovers.
|
||||
*
|
||||
*/
|
||||
if (icache_is_vpipt())
|
||||
asm volatile("ic ialluis");
|
||||
|
||||
dsb(ish);
|
||||
}
|
||||
|
|
|
@ -232,11 +232,25 @@ struct revmap_entry {
|
|||
};
|
||||
|
||||
/*
|
||||
* We use the top bit of each memslot->arch.rmap entry as a lock bit,
|
||||
* and bit 32 as a present flag. The bottom 32 bits are the
|
||||
* index in the guest HPT of a HPTE that points to the page.
|
||||
* The rmap array of size number of guest pages is allocated for each memslot.
|
||||
* This array is used to store usage specific information about the guest page.
|
||||
* Below are the encodings of the various possible usage types.
|
||||
*/
|
||||
#define KVMPPC_RMAP_LOCK_BIT 63
|
||||
/* Free bits which can be used to define a new usage */
|
||||
#define KVMPPC_RMAP_TYPE_MASK 0xff00000000000000
|
||||
#define KVMPPC_RMAP_NESTED 0xc000000000000000 /* Nested rmap array */
|
||||
#define KVMPPC_RMAP_HPT 0x0100000000000000 /* HPT guest */
|
||||
|
||||
/*
|
||||
* rmap usage definition for a hash page table (hpt) guest:
|
||||
* 0x0000080000000000 Lock bit
|
||||
* 0x0000018000000000 RC bits
|
||||
* 0x0000000100000000 Present bit
|
||||
* 0x00000000ffffffff HPT index bits
|
||||
* The bottom 32 bits are the index in the guest HPT of a HPTE that points to
|
||||
* the page.
|
||||
*/
|
||||
#define KVMPPC_RMAP_LOCK_BIT 43
|
||||
#define KVMPPC_RMAP_RC_SHIFT 32
|
||||
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
|
||||
#define KVMPPC_RMAP_PRESENT 0x100000000ul
|
||||
|
|
|
@ -598,6 +598,7 @@ extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
|
|||
union kvmppc_one_reg *val);
|
||||
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
|
||||
union kvmppc_one_reg *val);
|
||||
extern bool kvmppc_xive_native_supported(void);
|
||||
|
||||
#else
|
||||
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
|
||||
|
|
|
@ -46,7 +46,15 @@ struct xive_irq_data {
|
|||
|
||||
/* Setup/used by frontend */
|
||||
int target;
|
||||
/*
|
||||
* saved_p means that there is a queue entry for this interrupt
|
||||
* in some CPU's queue (not including guest vcpu queues), even
|
||||
* if P is not set in the source ESB.
|
||||
* stale_p means that there is no queue entry for this interrupt
|
||||
* in some CPU's queue, even if P is set in the source ESB.
|
||||
*/
|
||||
bool saved_p;
|
||||
bool stale_p;
|
||||
};
|
||||
#define XIVE_IRQ_FLAG_STORE_EOI 0x01
|
||||
#define XIVE_IRQ_FLAG_LSI 0x02
|
||||
|
@ -127,6 +135,7 @@ extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
|
|||
extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
|
||||
u32 qindex);
|
||||
extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
|
||||
extern bool xive_native_has_queue_state_support(void);
|
||||
|
||||
#else
|
||||
|
||||
|
|
|
@ -1083,9 +1083,11 @@ static int kvmppc_book3s_init(void)
|
|||
if (xics_on_xive()) {
|
||||
kvmppc_xive_init_module();
|
||||
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
|
||||
kvmppc_xive_native_init_module();
|
||||
kvm_register_device_ops(&kvm_xive_native_ops,
|
||||
KVM_DEV_TYPE_XIVE);
|
||||
if (kvmppc_xive_native_supported()) {
|
||||
kvmppc_xive_native_init_module();
|
||||
kvm_register_device_ops(&kvm_xive_native_ops,
|
||||
KVM_DEV_TYPE_XIVE);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
|
||||
|
|
|
@ -1678,7 +1678,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
|
|||
*val = get_reg_val(id, vcpu->arch.pspb);
|
||||
break;
|
||||
case KVM_REG_PPC_DPDES:
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
|
||||
/*
|
||||
* On POWER9, where we are emulating msgsndp etc.,
|
||||
* we return 1 bit for each vcpu, which can come from
|
||||
* either vcore->dpdes or doorbell_request.
|
||||
* On POWER8, doorbell_request is 0.
|
||||
*/
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->dpdes |
|
||||
vcpu->arch.doorbell_request);
|
||||
break;
|
||||
case KVM_REG_PPC_VTB:
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
|
||||
|
@ -2860,7 +2867,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
|
|||
if (!spin_trylock(&pvc->lock))
|
||||
continue;
|
||||
prepare_threads(pvc);
|
||||
if (!pvc->n_runnable) {
|
||||
if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
|
||||
list_del_init(&pvc->preempt_list);
|
||||
if (pvc->runner == NULL) {
|
||||
pvc->vcore_state = VCORE_INACTIVE;
|
||||
|
@ -2881,15 +2888,20 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
|
|||
spin_unlock(&lp->lock);
|
||||
}
|
||||
|
||||
static bool recheck_signals(struct core_info *cip)
|
||||
static bool recheck_signals_and_mmu(struct core_info *cip)
|
||||
{
|
||||
int sub, i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvmppc_vcore *vc;
|
||||
|
||||
for (sub = 0; sub < cip->n_subcores; ++sub)
|
||||
for_each_runnable_thread(i, vcpu, cip->vc[sub])
|
||||
for (sub = 0; sub < cip->n_subcores; ++sub) {
|
||||
vc = cip->vc[sub];
|
||||
if (!vc->kvm->arch.mmu_ready)
|
||||
return true;
|
||||
for_each_runnable_thread(i, vcpu, vc)
|
||||
if (signal_pending(vcpu->arch.run_task))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -3119,7 +3131,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
local_irq_disable();
|
||||
hard_irq_disable();
|
||||
if (lazy_irq_pending() || need_resched() ||
|
||||
recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
|
||||
recheck_signals_and_mmu(&core_info)) {
|
||||
local_irq_enable();
|
||||
vc->vcore_state = VCORE_INACTIVE;
|
||||
/* Unlock all except the primary vcore */
|
||||
|
|
|
@ -99,7 +99,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
|
|||
} else {
|
||||
rev->forw = rev->back = pte_index;
|
||||
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
|
||||
pte_index | KVMPPC_RMAP_PRESENT;
|
||||
pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
|
||||
}
|
||||
unlock_rmap(rmap);
|
||||
}
|
||||
|
|
|
@ -942,6 +942,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
|||
ld r11, VCPU_XIVE_SAVED_STATE(r4)
|
||||
li r9, TM_QW1_OS
|
||||
lwz r8, VCPU_XIVE_CAM_WORD(r4)
|
||||
cmpwi r8, 0
|
||||
beq no_xive
|
||||
li r7, TM_QW1_OS + TM_WORD2
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
|
@ -2831,29 +2833,39 @@ kvm_cede_prodded:
|
|||
kvm_cede_exit:
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
/* Abort if we still have a pending escalation */
|
||||
lbz r5, VCPU_XIVE_ESC_ON(r9)
|
||||
cmpwi r5, 0
|
||||
beq 1f
|
||||
li r0, 0
|
||||
stb r0, VCPU_CEDED(r9)
|
||||
1: /* Enable XIVE escalation */
|
||||
li r5, XIVE_ESB_SET_PQ_00
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
beq 1f
|
||||
/* are we using XIVE with single escalation? */
|
||||
ld r10, VCPU_XIVE_ESC_VADDR(r9)
|
||||
cmpdi r10, 0
|
||||
beq 3f
|
||||
ldx r0, r10, r5
|
||||
li r6, XIVE_ESB_SET_PQ_00
|
||||
/*
|
||||
* If we still have a pending escalation, abort the cede,
|
||||
* and we must set PQ to 10 rather than 00 so that we don't
|
||||
* potentially end up with two entries for the escalation
|
||||
* interrupt in the XIVE interrupt queue. In that case
|
||||
* we also don't want to set xive_esc_on to 1 here in
|
||||
* case we race with xive_esc_irq().
|
||||
*/
|
||||
lbz r5, VCPU_XIVE_ESC_ON(r9)
|
||||
cmpwi r5, 0
|
||||
beq 4f
|
||||
li r0, 0
|
||||
stb r0, VCPU_CEDED(r9)
|
||||
li r6, XIVE_ESB_SET_PQ_10
|
||||
b 5f
|
||||
4: li r0, 1
|
||||
stb r0, VCPU_XIVE_ESC_ON(r9)
|
||||
/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
|
||||
sync
|
||||
5: /* Enable XIVE escalation */
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
beq 1f
|
||||
ldx r0, r10, r6
|
||||
b 2f
|
||||
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
|
||||
cmpdi r10, 0
|
||||
beq 3f
|
||||
ldcix r0, r10, r5
|
||||
ldcix r0, r10, r6
|
||||
2: sync
|
||||
li r0, 1
|
||||
stb r0, VCPU_XIVE_ESC_ON(r9)
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
3: b guest_exit_cont
|
||||
|
||||
|
|
|
@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
|
|||
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
|
||||
u64 pq;
|
||||
|
||||
if (!tima)
|
||||
/*
|
||||
* Nothing to do if the platform doesn't have a XIVE
|
||||
* or this vCPU doesn't have its own XIVE context
|
||||
* (e.g. because it's not using an in-kernel interrupt controller).
|
||||
*/
|
||||
if (!tima || !vcpu->arch.xive_cam_word)
|
||||
return;
|
||||
|
||||
eieio();
|
||||
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
|
||||
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
|
||||
|
@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
|
|||
*/
|
||||
vcpu->arch.xive_esc_on = false;
|
||||
|
||||
/* This orders xive_esc_on = false vs. subsequent stale_p = true */
|
||||
smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
|
@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.xive_esc_raddr = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* In single escalation mode, the escalation interrupt is marked so
|
||||
* that EOI doesn't re-enable it, but just sets the stale_p flag to
|
||||
* indicate that the P bit has already been dealt with. However, the
|
||||
* assembly code that enters the guest sets PQ to 00 without clearing
|
||||
* stale_p (because it has no easy way to address it). Hence we have
|
||||
* to adjust stale_p before shutting down the interrupt.
|
||||
*/
|
||||
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
|
||||
struct kvmppc_xive_vcpu *xc, int irq)
|
||||
{
|
||||
struct irq_data *d = irq_get_irq_data(irq);
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
|
||||
/*
|
||||
* This slightly odd sequence gives the right result
|
||||
* (i.e. stale_p set if xive_esc_on is false) even if
|
||||
* we race with xive_esc_irq() and xive_irq_eoi().
|
||||
*/
|
||||
xd->stale_p = false;
|
||||
smp_mb(); /* paired with smb_wmb in xive_esc_irq */
|
||||
if (!vcpu->arch.xive_esc_on)
|
||||
xd->stale_p = true;
|
||||
}
|
||||
|
||||
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
|
||||
|
@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
|||
/* Mask the VP IPI */
|
||||
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Free the queues & associated interrupts */
|
||||
/* Free escalations */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
|
||||
/* Free the escalation irq */
|
||||
if (xc->esc_virq[i]) {
|
||||
if (xc->xive->single_escalation)
|
||||
xive_cleanup_single_escalation(vcpu, xc,
|
||||
xc->esc_virq[i]);
|
||||
free_irq(xc->esc_virq[i], vcpu);
|
||||
irq_dispose_mapping(xc->esc_virq[i]);
|
||||
kfree(xc->esc_virq_names[i]);
|
||||
}
|
||||
/* Free the queue */
|
||||
}
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Clear the cam word so guest entry won't try to push context */
|
||||
vcpu->arch.xive_cam_word = 0;
|
||||
|
||||
/* Free the queues */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
|
||||
xive_native_disable_queue(xc->vp_id, q, i);
|
||||
if (q->qpage) {
|
||||
free_pages((unsigned long)q->qpage,
|
||||
|
|
|
@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
|
|||
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
|
||||
bool single_escalation);
|
||||
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
|
||||
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
|
||||
struct kvmppc_xive_vcpu *xc, int irq);
|
||||
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
#endif /* _KVM_PPC_BOOK3S_XICS_H */
|
||||
|
|
|
@ -67,20 +67,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
|||
xc->valid = false;
|
||||
kvmppc_xive_disable_vcpu_interrupts(vcpu);
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Free the queues & associated interrupts */
|
||||
/* Free escalations */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
/* Free the escalation irq */
|
||||
if (xc->esc_virq[i]) {
|
||||
if (xc->xive->single_escalation)
|
||||
xive_cleanup_single_escalation(vcpu, xc,
|
||||
xc->esc_virq[i]);
|
||||
free_irq(xc->esc_virq[i], vcpu);
|
||||
irq_dispose_mapping(xc->esc_virq[i]);
|
||||
kfree(xc->esc_virq_names[i]);
|
||||
xc->esc_virq[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Free the queue */
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Clear the cam word so guest entry won't try to push context */
|
||||
vcpu->arch.xive_cam_word = 0;
|
||||
|
||||
/* Free the queues */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
kvmppc_xive_native_cleanup_queue(vcpu, i);
|
||||
}
|
||||
|
||||
|
@ -1171,6 +1179,11 @@ int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool kvmppc_xive_native_supported(void)
|
||||
{
|
||||
return xive_native_has_queue_state_support();
|
||||
}
|
||||
|
||||
static int xive_native_debug_show(struct seq_file *m, void *private)
|
||||
{
|
||||
struct kvmppc_xive *xive = m->private;
|
||||
|
|
|
@ -440,6 +440,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
|
|||
struct kvm_vcpu *vcpu;
|
||||
int err;
|
||||
|
||||
BUILD_BUG_ON_MSG(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0,
|
||||
"struct kvm_vcpu must be at offset 0 for arch usercopy region");
|
||||
|
||||
vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
|
||||
if (!vcpu_e500) {
|
||||
err = -ENOMEM;
|
||||
|
|
|
@ -271,6 +271,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
if (inst == KVMPPC_INST_SW_BREAKPOINT) {
|
||||
run->exit_reason = KVM_EXIT_DEBUG;
|
||||
run->debug.arch.status = 0;
|
||||
run->debug.arch.address = kvmppc_get_pc(vcpu);
|
||||
emulated = EMULATE_EXIT_USER;
|
||||
advance = 0;
|
||||
|
|
|
@ -89,12 +89,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
|
|||
rs = get_rs(inst);
|
||||
rt = get_rt(inst);
|
||||
|
||||
/*
|
||||
* if mmio_vsx_tx_sx_enabled == 0, copy data between
|
||||
* VSR[0..31] and memory
|
||||
* if mmio_vsx_tx_sx_enabled == 1, copy data between
|
||||
* VSR[32..63] and memory
|
||||
*/
|
||||
vcpu->arch.mmio_vsx_copy_nums = 0;
|
||||
vcpu->arch.mmio_vsx_offset = 0;
|
||||
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
|
||||
|
|
|
@ -561,7 +561,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
* a POWER9 processor) and the PowerNV platform, as
|
||||
* nested is not yet supported.
|
||||
*/
|
||||
r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE);
|
||||
r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) &&
|
||||
kvmppc_xive_native_supported();
|
||||
break;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q, bool just_peek)
|
|||
static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
|
||||
{
|
||||
u32 irq = 0;
|
||||
u8 prio;
|
||||
u8 prio = 0;
|
||||
|
||||
/* Find highest pending priority */
|
||||
while (xc->pending_prio != 0) {
|
||||
|
@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
|
|||
irq = xive_read_eq(&xc->queue[prio], just_peek);
|
||||
|
||||
/* Found something ? That's it */
|
||||
if (irq)
|
||||
break;
|
||||
if (irq) {
|
||||
if (just_peek || irq_to_desc(irq))
|
||||
break;
|
||||
/*
|
||||
* We should never get here; if we do then we must
|
||||
* have failed to synchronize the interrupt properly
|
||||
* when shutting it down.
|
||||
*/
|
||||
pr_crit("xive: got interrupt %d without descriptor, dropping\n",
|
||||
irq);
|
||||
WARN_ON(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Clear pending bits */
|
||||
xc->pending_prio &= ~(1 << prio);
|
||||
|
@ -307,6 +318,7 @@ static void xive_do_queue_eoi(struct xive_cpu *xc)
|
|||
*/
|
||||
static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
|
||||
{
|
||||
xd->stale_p = false;
|
||||
/* If the XIVE supports the new "store EOI facility, use it */
|
||||
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
|
||||
xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
|
||||
|
@ -350,7 +362,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
|
|||
}
|
||||
}
|
||||
|
||||
/* irq_chip eoi callback */
|
||||
/* irq_chip eoi callback, called with irq descriptor lock held */
|
||||
static void xive_irq_eoi(struct irq_data *d)
|
||||
{
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
|
@ -366,6 +378,8 @@ static void xive_irq_eoi(struct irq_data *d)
|
|||
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
|
||||
!(xd->flags & XIVE_IRQ_NO_EOI))
|
||||
xive_do_source_eoi(irqd_to_hwirq(d), xd);
|
||||
else
|
||||
xd->stale_p = true;
|
||||
|
||||
/*
|
||||
* Clear saved_p to indicate that it's no longer occupying
|
||||
|
@ -397,11 +411,16 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
|
|||
*/
|
||||
if (mask) {
|
||||
val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
|
||||
xd->saved_p = !!(val & XIVE_ESB_VAL_P);
|
||||
} else if (xd->saved_p)
|
||||
if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
|
||||
xd->saved_p = true;
|
||||
xd->stale_p = false;
|
||||
} else if (xd->saved_p) {
|
||||
xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
|
||||
else
|
||||
xd->saved_p = false;
|
||||
} else {
|
||||
xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
|
||||
xd->stale_p = false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -541,6 +560,8 @@ static unsigned int xive_irq_startup(struct irq_data *d)
|
|||
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
|
||||
int target, rc;
|
||||
|
||||
xd->saved_p = false;
|
||||
xd->stale_p = false;
|
||||
pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
|
||||
d->irq, hw_irq, d);
|
||||
|
||||
|
@ -587,6 +608,7 @@ static unsigned int xive_irq_startup(struct irq_data *d)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* called with irq descriptor lock held */
|
||||
static void xive_irq_shutdown(struct irq_data *d)
|
||||
{
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
|
@ -601,16 +623,6 @@ static void xive_irq_shutdown(struct irq_data *d)
|
|||
/* Mask the interrupt at the source */
|
||||
xive_do_source_set_mask(xd, true);
|
||||
|
||||
/*
|
||||
* The above may have set saved_p. We clear it otherwise it
|
||||
* will prevent re-enabling later on. It is ok to forget the
|
||||
* fact that the interrupt might be in a queue because we are
|
||||
* accounting that already in xive_dec_target_count() and will
|
||||
* be re-routing it to a new queue with proper accounting when
|
||||
* it's started up again
|
||||
*/
|
||||
xd->saved_p = false;
|
||||
|
||||
/*
|
||||
* Mask the interrupt in HW in the IVT/EAS and set the number
|
||||
* to be the "bad" IRQ number
|
||||
|
@ -797,6 +809,10 @@ static int xive_irq_retrigger(struct irq_data *d)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Caller holds the irq descriptor lock, so this won't be called
|
||||
* concurrently with xive_get_irqchip_state on the same interrupt.
|
||||
*/
|
||||
static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
||||
{
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
|
@ -820,6 +836,10 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
|||
|
||||
/* Set it to PQ=10 state to prevent further sends */
|
||||
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
|
||||
if (!xd->stale_p) {
|
||||
xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
|
||||
xd->stale_p = !xd->saved_p;
|
||||
}
|
||||
|
||||
/* No target ? nothing to do */
|
||||
if (xd->target == XIVE_INVALID_TARGET) {
|
||||
|
@ -827,7 +847,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
|||
* An untargetted interrupt should have been
|
||||
* also masked at the source
|
||||
*/
|
||||
WARN_ON(pq & 2);
|
||||
WARN_ON(xd->saved_p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -847,9 +867,8 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
|||
* This saved_p is cleared by the host EOI, when we know
|
||||
* for sure the queue slot is no longer in use.
|
||||
*/
|
||||
if (pq & 2) {
|
||||
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
|
||||
xd->saved_p = true;
|
||||
if (xd->saved_p) {
|
||||
xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
|
||||
|
||||
/*
|
||||
* Sync the XIVE source HW to ensure the interrupt
|
||||
|
@ -862,8 +881,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
|||
*/
|
||||
if (xive_ops->sync_source)
|
||||
xive_ops->sync_source(hw_irq);
|
||||
} else
|
||||
xd->saved_p = false;
|
||||
}
|
||||
} else {
|
||||
irqd_clr_forwarded_to_vcpu(d);
|
||||
|
||||
|
@ -914,6 +932,23 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Called with irq descriptor lock held. */
|
||||
static int xive_get_irqchip_state(struct irq_data *data,
|
||||
enum irqchip_irq_state which, bool *state)
|
||||
{
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
|
||||
|
||||
switch (which) {
|
||||
case IRQCHIP_STATE_ACTIVE:
|
||||
*state = !xd->stale_p &&
|
||||
(xd->saved_p ||
|
||||
!!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P));
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static struct irq_chip xive_irq_chip = {
|
||||
.name = "XIVE-IRQ",
|
||||
.irq_startup = xive_irq_startup,
|
||||
|
@ -925,6 +960,7 @@ static struct irq_chip xive_irq_chip = {
|
|||
.irq_set_type = xive_irq_set_type,
|
||||
.irq_retrigger = xive_irq_retrigger,
|
||||
.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
|
||||
.irq_get_irqchip_state = xive_get_irqchip_state,
|
||||
};
|
||||
|
||||
bool is_xive_irq(struct irq_chip *chip)
|
||||
|
@ -1337,6 +1373,11 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
|
|||
raw_spin_lock(&desc->lock);
|
||||
xd = irq_desc_get_handler_data(desc);
|
||||
|
||||
/*
|
||||
* Clear saved_p to indicate that it's no longer pending
|
||||
*/
|
||||
xd->saved_p = false;
|
||||
|
||||
/*
|
||||
* For LSIs, we EOI, this will cause a resend if it's
|
||||
* still asserted. Otherwise do an MSI retrigger.
|
||||
|
|
|
@ -800,6 +800,13 @@ int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
|
||||
|
||||
bool xive_native_has_queue_state_support(void)
|
||||
{
|
||||
return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) &&
|
||||
opal_check_token(OPAL_XIVE_SET_QUEUE_STATE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support);
|
||||
|
||||
int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
|
||||
{
|
||||
__be64 state;
|
||||
|
|
|
@ -231,6 +231,12 @@ struct kvm_guest_debug_arch {
|
|||
#define KVM_SYNC_GSCB (1UL << 9)
|
||||
#define KVM_SYNC_BPBC (1UL << 10)
|
||||
#define KVM_SYNC_ETOKEN (1UL << 11)
|
||||
|
||||
#define KVM_SYNC_S390_VALID_FIELDS \
|
||||
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
|
||||
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
|
||||
KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
|
||||
|
||||
/* length and alignment of the sdnx as a power of two */
|
||||
#define SDNXC 8
|
||||
#define SDNXL (1UL << SDNXC)
|
||||
|
|
|
@ -4000,6 +4000,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
if (kvm_run->immediate_exit)
|
||||
return -EINTR;
|
||||
|
||||
if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
|
||||
kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
|
||||
return -EINVAL;
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
if (guestdbg_exit_pending(vcpu)) {
|
||||
|
@ -4257,7 +4261,7 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
|
|||
const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
|
||||
| KVM_S390_MEMOP_F_CHECK_ONLY;
|
||||
|
||||
if (mop->flags & ~supported_flags)
|
||||
if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
|
||||
return -EINVAL;
|
||||
|
||||
if (mop->size > MEM_OP_MAX_SIZE)
|
||||
|
|
|
@ -229,7 +229,7 @@ struct x86_emulate_ops {
|
|||
int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
|
||||
const char *smstate);
|
||||
void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
|
||||
|
||||
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
|
||||
};
|
||||
|
||||
typedef u32 __attribute__((vector_size(16))) sse128_t;
|
||||
|
@ -429,6 +429,7 @@ enum x86_intercept {
|
|||
x86_intercept_ins,
|
||||
x86_intercept_out,
|
||||
x86_intercept_outs,
|
||||
x86_intercept_xsetbv,
|
||||
|
||||
nr_x86_intercepts
|
||||
};
|
||||
|
|
|
@ -718,7 +718,7 @@ struct kvm_vcpu_arch {
|
|||
|
||||
/* Cache MMIO info */
|
||||
u64 mmio_gva;
|
||||
unsigned access;
|
||||
unsigned mmio_access;
|
||||
gfn_t mmio_gfn;
|
||||
u64 mmio_gen;
|
||||
|
||||
|
@ -1072,7 +1072,7 @@ struct kvm_x86_ops {
|
|||
|
||||
void (*run)(struct kvm_vcpu *vcpu);
|
||||
int (*handle_exit)(struct kvm_vcpu *vcpu);
|
||||
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
|
||||
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
|
||||
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
|
||||
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
|
||||
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
|
||||
|
@ -1211,6 +1211,8 @@ struct kvm_x86_ops {
|
|||
uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
|
||||
};
|
||||
|
||||
struct kvm_arch_async_pf {
|
||||
|
@ -1328,8 +1330,10 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
|
|||
|
||||
void kvm_enable_efer_bits(u64);
|
||||
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
|
||||
int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
|
||||
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
|
||||
int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
|
||||
|
||||
struct x86_emulate_ctxt;
|
||||
|
||||
|
@ -1583,6 +1587,13 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
|||
void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm_lapic_irq *irq);
|
||||
|
||||
static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
|
||||
{
|
||||
/* We can only post Fixed and LowPrio IRQs */
|
||||
return (irq->delivery_mode == dest_Fixed ||
|
||||
irq->delivery_mode == dest_LowestPrio);
|
||||
}
|
||||
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_x86_ops->vcpu_blocking)
|
||||
|
|
|
@ -562,6 +562,20 @@ enum vm_instruction_error_number {
|
|||
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
|
||||
};
|
||||
|
||||
/*
|
||||
* VM-instruction errors that can be encountered on VM-Enter, used to trace
|
||||
* nested VM-Enter failures reported by hardware. Errors unique to VM-Enter
|
||||
* from a SMI Transfer Monitor are not included as things have gone seriously
|
||||
* sideways if we get one of those...
|
||||
*/
|
||||
#define VMX_VMENTER_INSTRUCTION_ERRORS \
|
||||
{ VMXERR_VMLAUNCH_NONCLEAR_VMCS, "VMLAUNCH_NONCLEAR_VMCS" }, \
|
||||
{ VMXERR_VMRESUME_NONLAUNCHED_VMCS, "VMRESUME_NONLAUNCHED_VMCS" }, \
|
||||
{ VMXERR_VMRESUME_AFTER_VMXOFF, "VMRESUME_AFTER_VMXOFF" }, \
|
||||
{ VMXERR_ENTRY_INVALID_CONTROL_FIELD, "VMENTRY_INVALID_CONTROL_FIELD" }, \
|
||||
{ VMXERR_ENTRY_INVALID_HOST_STATE_FIELD, "VMENTRY_INVALID_HOST_STATE_FIELD" }, \
|
||||
{ VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS, "VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" }
|
||||
|
||||
enum vmx_l1d_flush_state {
|
||||
VMENTER_L1D_FLUSH_AUTO,
|
||||
VMENTER_L1D_FLUSH_NEVER,
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#define EXIT_REASON_EXCEPTION_NMI 0
|
||||
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
|
||||
#define EXIT_REASON_TRIPLE_FAULT 2
|
||||
#define EXIT_REASON_INIT_SIGNAL 3
|
||||
|
||||
#define EXIT_REASON_PENDING_INTERRUPT 7
|
||||
#define EXIT_REASON_NMI_WINDOW 8
|
||||
|
@ -90,6 +91,7 @@
|
|||
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
|
||||
{ EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
|
||||
{ EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
|
||||
{ EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \
|
||||
{ EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
|
||||
{ EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
|
||||
{ EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
|
||||
|
|
|
@ -502,16 +502,6 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
|
|||
__send_ipi_mask(local_mask, vector);
|
||||
}
|
||||
|
||||
static void kvm_send_ipi_allbutself(int vector)
|
||||
{
|
||||
kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
|
||||
}
|
||||
|
||||
static void kvm_send_ipi_all(int vector)
|
||||
{
|
||||
__send_ipi_mask(cpu_online_mask, vector);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the IPI entry points
|
||||
*/
|
||||
|
@ -519,8 +509,6 @@ static void kvm_setup_pv_ipi(void)
|
|||
{
|
||||
apic->send_IPI_mask = kvm_send_ipi_mask;
|
||||
apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
|
||||
apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
|
||||
apic->send_IPI_all = kvm_send_ipi_all;
|
||||
pr_info("KVM setup pv IPIs\n");
|
||||
}
|
||||
|
||||
|
|
|
@ -392,6 +392,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
|
|||
|
||||
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
|
||||
cpuid_mask(&entry->edx, CPUID_7_EDX);
|
||||
if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
|
||||
entry->edx |= F(SPEC_CTRL);
|
||||
if (boot_cpu_has(X86_FEATURE_STIBP))
|
||||
entry->edx |= F(INTEL_STIBP);
|
||||
if (boot_cpu_has(X86_FEATURE_SSBD))
|
||||
entry->edx |= F(SPEC_CTRL_SSBD);
|
||||
/*
|
||||
* We emulate ARCH_CAPABILITIES in software even
|
||||
* if the host doesn't support it.
|
||||
|
@ -729,18 +735,23 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
g_phys_as = phys_as;
|
||||
entry->eax = g_phys_as | (virt_as << 8);
|
||||
entry->edx = 0;
|
||||
/*
|
||||
* IBRS, IBPB and VIRT_SSBD aren't necessarily present in
|
||||
* hardware cpuid
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
|
||||
entry->ebx |= F(AMD_IBPB);
|
||||
if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
|
||||
entry->ebx |= F(AMD_IBRS);
|
||||
if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
|
||||
entry->ebx |= F(VIRT_SSBD);
|
||||
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
|
||||
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
|
||||
/*
|
||||
* AMD has separate bits for each SPEC_CTRL bit.
|
||||
* arch/x86/kernel/cpu/bugs.c is kind enough to
|
||||
* record that in cpufeatures so use them.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_IBPB))
|
||||
entry->ebx |= F(AMD_IBPB);
|
||||
if (boot_cpu_has(X86_FEATURE_IBRS))
|
||||
entry->ebx |= F(AMD_IBRS);
|
||||
if (boot_cpu_has(X86_FEATURE_STIBP))
|
||||
entry->ebx |= F(AMD_STIBP);
|
||||
if (boot_cpu_has(X86_FEATURE_SSBD))
|
||||
entry->ebx |= F(AMD_SSBD);
|
||||
if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
|
||||
entry->ebx |= F(AMD_SSB_NO);
|
||||
/*
|
||||
* The preference is to use SPEC CTRL MSR instead of the
|
||||
* VIRT_SPEC MSR.
|
||||
|
|
|
@ -4156,6 +4156,20 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
|
|||
return rc;
|
||||
}
|
||||
|
||||
static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u32 eax, ecx, edx;
|
||||
|
||||
eax = reg_read(ctxt, VCPU_REGS_RAX);
|
||||
edx = reg_read(ctxt, VCPU_REGS_RDX);
|
||||
ecx = reg_read(ctxt, VCPU_REGS_RCX);
|
||||
|
||||
if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static bool valid_cr(int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
|
@ -4409,6 +4423,12 @@ static const struct opcode group7_rm1[] = {
|
|||
N, N, N, N, N, N,
|
||||
};
|
||||
|
||||
static const struct opcode group7_rm2[] = {
|
||||
N,
|
||||
II(ImplicitOps | Priv, em_xsetbv, xsetbv),
|
||||
N, N, N, N, N, N,
|
||||
};
|
||||
|
||||
static const struct opcode group7_rm3[] = {
|
||||
DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
|
||||
II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
|
||||
|
@ -4498,7 +4518,8 @@ static const struct group_dual group7 = { {
|
|||
}, {
|
||||
EXT(0, group7_rm0),
|
||||
EXT(0, group7_rm1),
|
||||
N, EXT(0, group7_rm3),
|
||||
EXT(0, group7_rm2),
|
||||
EXT(0, group7_rm3),
|
||||
II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
|
||||
II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
|
||||
EXT(0, group7_rm7),
|
||||
|
@ -5144,7 +5165,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
|
|||
else {
|
||||
rc = __do_insn_fetch_bytes(ctxt, 1);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
goto done;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
|
@ -5395,6 +5416,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
|
|||
ctxt->memopp->addr.mem.ea + ctxt->_eip);
|
||||
|
||||
done:
|
||||
if (rc == X86EMUL_PROPAGATE_FAULT)
|
||||
ctxt->have_exception = true;
|
||||
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -1198,10 +1198,8 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
|
||||
|
||||
static void apic_send_ipi(struct kvm_lapic *apic)
|
||||
static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
|
||||
{
|
||||
u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
|
||||
u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
|
||||
struct kvm_lapic_irq irq;
|
||||
|
||||
irq.vector = icr_low & APIC_VECTOR_MASK;
|
||||
|
@ -1914,8 +1912,9 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
|||
}
|
||||
case APIC_ICR:
|
||||
/* No delay here, so we always clear the pending bit */
|
||||
kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
|
||||
apic_send_ipi(apic);
|
||||
val &= ~(1 << 12);
|
||||
apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
|
||||
kvm_lapic_set_reg(apic, APIC_ICR, val);
|
||||
break;
|
||||
|
||||
case APIC_ICR2:
|
||||
|
@ -2707,11 +2706,14 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
|
|||
return;
|
||||
|
||||
/*
|
||||
* INITs are latched while in SMM. Because an SMM CPU cannot
|
||||
* be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
|
||||
* and delay processing of INIT until the next RSM.
|
||||
* INITs are latched while CPU is in specific states
|
||||
* (SMM, VMX non-root mode, SVM with GIF=0).
|
||||
* Because a CPU cannot be in these states immediately
|
||||
* after it has processed an INIT signal (and thus in
|
||||
* KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
|
||||
* and leave the INIT pending.
|
||||
*/
|
||||
if (is_smm(vcpu)) {
|
||||
if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
|
||||
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
|
||||
if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
|
||||
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
|
||||
|
|
|
@ -214,6 +214,7 @@ static u64 __read_mostly shadow_accessed_mask;
|
|||
static u64 __read_mostly shadow_dirty_mask;
|
||||
static u64 __read_mostly shadow_mmio_mask;
|
||||
static u64 __read_mostly shadow_mmio_value;
|
||||
static u64 __read_mostly shadow_mmio_access_mask;
|
||||
static u64 __read_mostly shadow_present_mask;
|
||||
static u64 __read_mostly shadow_me_mask;
|
||||
|
||||
|
@ -299,14 +300,21 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
|
|||
kvm_flush_remote_tlbs_with_range(kvm, &range);
|
||||
}
|
||||
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
|
||||
{
|
||||
BUG_ON((u64)(unsigned)access_mask != access_mask);
|
||||
BUG_ON((mmio_mask & mmio_value) != mmio_value);
|
||||
shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
|
||||
shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
|
||||
shadow_mmio_access_mask = access_mask;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
|
||||
|
||||
static bool is_mmio_spte(u64 spte)
|
||||
{
|
||||
return (spte & shadow_mmio_mask) == shadow_mmio_value;
|
||||
}
|
||||
|
||||
static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
|
||||
{
|
||||
return sp->role.ad_disabled;
|
||||
|
@ -314,19 +322,19 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
|
|||
|
||||
static inline bool spte_ad_enabled(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
|
||||
MMU_WARN_ON(is_mmio_spte(spte));
|
||||
return !(spte & shadow_acc_track_value);
|
||||
}
|
||||
|
||||
static inline u64 spte_shadow_accessed_mask(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
|
||||
MMU_WARN_ON(is_mmio_spte(spte));
|
||||
return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
|
||||
}
|
||||
|
||||
static inline u64 spte_shadow_dirty_mask(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
|
||||
MMU_WARN_ON(is_mmio_spte(spte));
|
||||
return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
|
||||
}
|
||||
|
||||
|
@ -389,7 +397,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
|||
u64 mask = generation_mmio_spte_mask(gen);
|
||||
u64 gpa = gfn << PAGE_SHIFT;
|
||||
|
||||
access &= ACC_WRITE_MASK | ACC_USER_MASK;
|
||||
access &= shadow_mmio_access_mask;
|
||||
mask |= shadow_mmio_value | access;
|
||||
mask |= gpa | shadow_nonpresent_or_rsvd_mask;
|
||||
mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
|
||||
|
@ -401,11 +409,6 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
|||
mmu_spte_set(sptep, mask);
|
||||
}
|
||||
|
||||
static bool is_mmio_spte(u64 spte)
|
||||
{
|
||||
return (spte & shadow_mmio_mask) == shadow_mmio_value;
|
||||
}
|
||||
|
||||
static gfn_t get_mmio_spte_gfn(u64 spte)
|
||||
{
|
||||
u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
|
||||
|
@ -418,8 +421,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)
|
|||
|
||||
static unsigned get_mmio_spte_access(u64 spte)
|
||||
{
|
||||
u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask;
|
||||
return (spte & ~mask) & ~PAGE_MASK;
|
||||
return spte & shadow_mmio_access_mask;
|
||||
}
|
||||
|
||||
static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
|
||||
|
@ -3302,7 +3304,8 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
|
|||
}
|
||||
|
||||
if (unlikely(is_noslot_pfn(pfn)))
|
||||
vcpu_cache_mmio_info(vcpu, gva, gfn, access);
|
||||
vcpu_cache_mmio_info(vcpu, gva, gfn,
|
||||
access & shadow_mmio_access_mask);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -5611,13 +5614,13 @@ slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
|
||||
}
|
||||
|
||||
static void free_mmu_pages(struct kvm_vcpu *vcpu)
|
||||
static void free_mmu_pages(struct kvm_mmu *mmu)
|
||||
{
|
||||
free_page((unsigned long)vcpu->arch.mmu->pae_root);
|
||||
free_page((unsigned long)vcpu->arch.mmu->lm_root);
|
||||
free_page((unsigned long)mmu->pae_root);
|
||||
free_page((unsigned long)mmu->lm_root);
|
||||
}
|
||||
|
||||
static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
||||
static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
|
||||
{
|
||||
struct page *page;
|
||||
int i;
|
||||
|
@ -5638,9 +5641,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
|||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
vcpu->arch.mmu->pae_root = page_address(page);
|
||||
mmu->pae_root = page_address(page);
|
||||
for (i = 0; i < 4; ++i)
|
||||
vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
|
||||
mmu->pae_root[i] = INVALID_PAGE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -5648,6 +5651,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
|||
int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
uint i;
|
||||
int ret;
|
||||
|
||||
vcpu->arch.mmu = &vcpu->arch.root_mmu;
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
|
||||
|
@ -5665,7 +5669,19 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
|
||||
|
||||
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
|
||||
return alloc_mmu_pages(vcpu);
|
||||
|
||||
ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu);
|
||||
if (ret)
|
||||
goto fail_allocate_root;
|
||||
|
||||
return ret;
|
||||
fail_allocate_root:
|
||||
free_mmu_pages(&vcpu->arch.guest_mmu);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
@ -6094,7 +6110,7 @@ static void kvm_set_mmio_spte_mask(void)
|
|||
if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
|
||||
mask &= ~1ull;
|
||||
|
||||
kvm_mmu_set_mmio_spte_mask(mask, mask);
|
||||
kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
|
||||
}
|
||||
|
||||
int kvm_mmu_module_init(void)
|
||||
|
@ -6168,7 +6184,8 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
|
|||
void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_mmu_unload(vcpu);
|
||||
free_mmu_pages(vcpu);
|
||||
free_mmu_pages(&vcpu->arch.root_mmu);
|
||||
free_mmu_pages(&vcpu->arch.guest_mmu);
|
||||
mmu_free_memory_caches(vcpu);
|
||||
}
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ static inline u64 rsvd_bits(int s, int e)
|
|||
return ((1ULL << (e - s + 1)) - 1) << s;
|
||||
}
|
||||
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask);
|
||||
|
||||
void
|
||||
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
|
||||
|
|
|
@ -68,10 +68,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
|
|||
#define SEG_TYPE_LDT 2
|
||||
#define SEG_TYPE_BUSY_TSS16 3
|
||||
|
||||
#define SVM_FEATURE_NPT (1 << 0)
|
||||
#define SVM_FEATURE_LBRV (1 << 1)
|
||||
#define SVM_FEATURE_SVML (1 << 2)
|
||||
#define SVM_FEATURE_NRIP (1 << 3)
|
||||
#define SVM_FEATURE_TSC_RATE (1 << 4)
|
||||
#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
|
||||
#define SVM_FEATURE_FLUSH_ASID (1 << 6)
|
||||
|
@ -770,7 +768,7 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
|
|||
|
||||
}
|
||||
|
||||
static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
|
@ -779,18 +777,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
|||
svm->next_rip = svm->vmcb->control.next_rip;
|
||||
}
|
||||
|
||||
if (!svm->next_rip) {
|
||||
if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
|
||||
EMULATE_DONE)
|
||||
printk(KERN_DEBUG "%s: NOP\n", __func__);
|
||||
return;
|
||||
}
|
||||
if (!svm->next_rip)
|
||||
return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
|
||||
|
||||
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
|
||||
printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
|
||||
__func__, kvm_rip_read(vcpu), svm->next_rip);
|
||||
|
||||
kvm_rip_write(vcpu, svm->next_rip);
|
||||
svm_set_interrupt_shadow(vcpu, 0);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
static void svm_queue_exception(struct kvm_vcpu *vcpu)
|
||||
|
@ -821,7 +818,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
|
|||
* raises a fault that is not intercepted. Still better than
|
||||
* failing in all cases.
|
||||
*/
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
(void)skip_emulated_instruction(&svm->vcpu);
|
||||
rip = kvm_rip_read(&svm->vcpu);
|
||||
svm->int3_rip = rip + svm->vmcb->save.cs.base;
|
||||
svm->int3_injected = rip - old_rip;
|
||||
|
@ -1269,11 +1266,11 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
|
|||
pause_filter_count_grow,
|
||||
pause_filter_count_max);
|
||||
|
||||
if (control->pause_filter_count != old)
|
||||
if (control->pause_filter_count != old) {
|
||||
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
|
||||
|
||||
trace_kvm_ple_window_grow(vcpu->vcpu_id,
|
||||
control->pause_filter_count, old);
|
||||
trace_kvm_ple_window_update(vcpu->vcpu_id,
|
||||
control->pause_filter_count, old);
|
||||
}
|
||||
}
|
||||
|
||||
static void shrink_ple_window(struct kvm_vcpu *vcpu)
|
||||
|
@ -1287,11 +1284,11 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
|
|||
pause_filter_count,
|
||||
pause_filter_count_shrink,
|
||||
pause_filter_count);
|
||||
if (control->pause_filter_count != old)
|
||||
if (control->pause_filter_count != old) {
|
||||
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
|
||||
|
||||
trace_kvm_ple_window_shrink(vcpu->vcpu_id,
|
||||
control->pause_filter_count, old);
|
||||
trace_kvm_ple_window_update(vcpu->vcpu_id,
|
||||
control->pause_filter_count, old);
|
||||
}
|
||||
}
|
||||
|
||||
static __init int svm_hardware_setup(void)
|
||||
|
@ -2136,6 +2133,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
struct page *nested_msrpm_pages;
|
||||
int err;
|
||||
|
||||
BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0,
|
||||
"struct kvm_vcpu must be at offset 0 for arch usercopy region");
|
||||
|
||||
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
|
||||
if (!svm) {
|
||||
err = -ENOMEM;
|
||||
|
@ -2903,13 +2903,11 @@ static int nop_on_interception(struct vcpu_svm *svm)
|
|||
|
||||
static int halt_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
|
||||
return kvm_emulate_halt(&svm->vcpu);
|
||||
}
|
||||
|
||||
static int vmmcall_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
return kvm_emulate_hypercall(&svm->vcpu);
|
||||
}
|
||||
|
||||
|
@ -3588,9 +3586,9 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
|
|||
mark_all_dirty(svm->vmcb);
|
||||
}
|
||||
|
||||
static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
||||
static int nested_svm_vmrun(struct vcpu_svm *svm)
|
||||
{
|
||||
int rc;
|
||||
int ret;
|
||||
struct vmcb *nested_vmcb;
|
||||
struct vmcb *hsave = svm->nested.hsave;
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
|
@ -3599,13 +3597,16 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
|||
|
||||
vmcb_gpa = svm->vmcb->save.rax;
|
||||
|
||||
rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
|
||||
if (rc) {
|
||||
if (rc == -EINVAL)
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
return false;
|
||||
ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
|
||||
if (ret == -EINVAL) {
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
return 1;
|
||||
} else if (ret) {
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
nested_vmcb = map.hva;
|
||||
|
||||
if (!nested_vmcb_checks(nested_vmcb)) {
|
||||
|
@ -3616,7 +3617,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
|||
|
||||
kvm_vcpu_unmap(&svm->vcpu, &map, true);
|
||||
|
||||
return false;
|
||||
return ret;
|
||||
}
|
||||
|
||||
trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
|
||||
|
@ -3660,7 +3661,16 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
|||
|
||||
enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
|
||||
|
||||
return true;
|
||||
if (!nested_svm_vmrun_msrpm(svm)) {
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
|
||||
svm->vmcb->control.exit_code_hi = 0;
|
||||
svm->vmcb->control.exit_info_1 = 0;
|
||||
svm->vmcb->control.exit_info_2 = 0;
|
||||
|
||||
nested_svm_vmexit(svm);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
|
||||
|
@ -3697,7 +3707,6 @@ static int vmload_interception(struct vcpu_svm *svm)
|
|||
|
||||
nested_vmcb = map.hva;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
|
||||
|
@ -3724,7 +3733,6 @@ static int vmsave_interception(struct vcpu_svm *svm)
|
|||
|
||||
nested_vmcb = map.hva;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
|
||||
|
@ -3738,27 +3746,7 @@ static int vmrun_interception(struct vcpu_svm *svm)
|
|||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
|
||||
/* Save rip after vmrun instruction */
|
||||
kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
|
||||
|
||||
if (!nested_svm_vmrun(svm))
|
||||
return 1;
|
||||
|
||||
if (!nested_svm_vmrun_msrpm(svm))
|
||||
goto failed;
|
||||
|
||||
return 1;
|
||||
|
||||
failed:
|
||||
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
|
||||
svm->vmcb->control.exit_code_hi = 0;
|
||||
svm->vmcb->control.exit_info_1 = 0;
|
||||
svm->vmcb->control.exit_info_2 = 0;
|
||||
|
||||
nested_svm_vmexit(svm);
|
||||
|
||||
return 1;
|
||||
return nested_svm_vmrun(svm);
|
||||
}
|
||||
|
||||
static int stgi_interception(struct vcpu_svm *svm)
|
||||
|
@ -3775,7 +3763,6 @@ static int stgi_interception(struct vcpu_svm *svm)
|
|||
if (vgif_enabled(svm))
|
||||
clr_intercept(svm, INTERCEPT_STGI);
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
|
||||
|
@ -3791,7 +3778,6 @@ static int clgi_interception(struct vcpu_svm *svm)
|
|||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
disable_gif(svm);
|
||||
|
@ -3816,7 +3802,6 @@ static int invlpga_interception(struct vcpu_svm *svm)
|
|||
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
|
||||
kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
|
||||
|
@ -3839,7 +3824,6 @@ static int xsetbv_interception(struct vcpu_svm *svm)
|
|||
u32 index = kvm_rcx_read(&svm->vcpu);
|
||||
|
||||
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
|
||||
|
@ -3898,25 +3882,29 @@ static int task_switch_interception(struct vcpu_svm *svm)
|
|||
if (reason != TASK_SWITCH_GATE ||
|
||||
int_type == SVM_EXITINTINFO_TYPE_SOFT ||
|
||||
(int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
|
||||
(int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
(int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
|
||||
if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
|
||||
int_vec = -1;
|
||||
|
||||
if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
|
||||
has_error_code, error_code) == EMULATE_FAIL) {
|
||||
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||
svm->vcpu.run->internal.ndata = 0;
|
||||
return 0;
|
||||
}
|
||||
has_error_code, error_code) == EMULATE_FAIL)
|
||||
goto fail;
|
||||
|
||||
return 1;
|
||||
|
||||
fail:
|
||||
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||
svm->vcpu.run->internal.ndata = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cpuid_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
|
||||
return kvm_emulate_cpuid(&svm->vcpu);
|
||||
}
|
||||
|
||||
|
@ -4232,23 +4220,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
|
||||
static int rdmsr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
u32 ecx = kvm_rcx_read(&svm->vcpu);
|
||||
struct msr_data msr_info;
|
||||
|
||||
msr_info.index = ecx;
|
||||
msr_info.host_initiated = false;
|
||||
if (svm_get_msr(&svm->vcpu, &msr_info)) {
|
||||
trace_kvm_msr_read_ex(ecx);
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
return 1;
|
||||
} else {
|
||||
trace_kvm_msr_read(ecx, msr_info.data);
|
||||
|
||||
kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff);
|
||||
kvm_rdx_write(&svm->vcpu, msr_info.data >> 32);
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
return kvm_emulate_rdmsr(&svm->vcpu);
|
||||
}
|
||||
|
||||
static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
|
||||
|
@ -4438,23 +4410,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
|||
|
||||
static int wrmsr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct msr_data msr;
|
||||
u32 ecx = kvm_rcx_read(&svm->vcpu);
|
||||
u64 data = kvm_read_edx_eax(&svm->vcpu);
|
||||
|
||||
msr.data = data;
|
||||
msr.index = ecx;
|
||||
msr.host_initiated = false;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
|
||||
if (kvm_set_msr(&svm->vcpu, &msr)) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
return 1;
|
||||
} else {
|
||||
trace_kvm_msr_write(ecx, data);
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
return kvm_emulate_wrmsr(&svm->vcpu);
|
||||
}
|
||||
|
||||
static int msr_interception(struct vcpu_svm *svm)
|
||||
|
@ -5025,9 +4981,14 @@ static int handle_exit(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|
||||
|| !svm_exit_handlers[exit_code]) {
|
||||
WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
|
||||
dump_vmcb(vcpu);
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror =
|
||||
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
|
||||
vcpu->run->internal.ndata = 1;
|
||||
vcpu->run->internal.data[0] = exit_code;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return svm_exit_handlers[exit_code](svm);
|
||||
|
@ -5274,7 +5235,8 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
|
|||
|
||||
kvm_set_msi_irq(kvm, e, &irq);
|
||||
|
||||
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
|
||||
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
|
||||
!kvm_irq_is_postable(&irq)) {
|
||||
pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
|
||||
__func__, irq.vector);
|
||||
return -1;
|
||||
|
@ -5328,6 +5290,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
|
|||
* 1. When cannot target interrupt to a specific vcpu.
|
||||
* 2. Unsetting posted interrupt.
|
||||
* 3. APIC virtialization is disabled for the vcpu.
|
||||
* 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
|
||||
*/
|
||||
if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
|
||||
kvm_vcpu_apicv_active(&svm->vcpu)) {
|
||||
|
@ -5933,6 +5896,8 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
|
|||
guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
|
||||
}
|
||||
|
||||
#define F(x) bit(X86_FEATURE_##x)
|
||||
|
||||
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
||||
{
|
||||
switch (func) {
|
||||
|
@ -5944,6 +5909,11 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
|||
if (nested)
|
||||
entry->ecx |= (1 << 2); /* Set SVM bit */
|
||||
break;
|
||||
case 0x80000008:
|
||||
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
|
||||
boot_cpu_has(X86_FEATURE_AMD_SSBD))
|
||||
entry->ebx |= F(VIRT_SSBD);
|
||||
break;
|
||||
case 0x8000000A:
|
||||
entry->eax = 1; /* SVM revision 1 */
|
||||
entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
|
||||
|
@ -5954,11 +5924,11 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
|||
|
||||
/* Support next_rip if host supports it */
|
||||
if (boot_cpu_has(X86_FEATURE_NRIPS))
|
||||
entry->edx |= SVM_FEATURE_NRIP;
|
||||
entry->edx |= F(NRIPS);
|
||||
|
||||
/* Support NPT for the guest if enabled */
|
||||
if (npt_enabled)
|
||||
entry->edx |= SVM_FEATURE_NPT;
|
||||
entry->edx |= F(NPT);
|
||||
|
||||
break;
|
||||
case 0x8000001F:
|
||||
|
@ -6067,6 +6037,7 @@ static const struct __x86_intercept {
|
|||
[x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
|
||||
[x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
|
||||
[x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
|
||||
[x86_intercept_xsetbv] = PRE_EX(SVM_EXIT_XSETBV),
|
||||
};
|
||||
|
||||
#undef PRE_EX
|
||||
|
@ -7193,6 +7164,21 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/*
|
||||
* TODO: Last condition latch INIT signals on vCPU when
|
||||
* vCPU is in guest-mode and vmcb12 defines intercept on INIT.
|
||||
* To properly emulate the INIT intercept, SVM should implement
|
||||
* kvm_x86_ops->check_nested_events() and call nested_svm_vmexit()
|
||||
* there if an INIT signal is pending.
|
||||
*/
|
||||
return !gif_set(svm) ||
|
||||
(svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.cpu_has_kvm_support = has_svm,
|
||||
.disabled_by_bios = is_disabled,
|
||||
|
@ -7329,6 +7315,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
|||
.nested_get_evmcs_version = NULL,
|
||||
|
||||
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
|
||||
|
||||
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
|
||||
};
|
||||
|
||||
static int __init svm_init(void)
|
||||
|
|
|
@ -232,17 +232,20 @@ TRACE_EVENT(kvm_exit,
|
|||
__field( u32, isa )
|
||||
__field( u64, info1 )
|
||||
__field( u64, info2 )
|
||||
__field( unsigned int, vcpu_id )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->exit_reason = exit_reason;
|
||||
__entry->guest_rip = kvm_rip_read(vcpu);
|
||||
__entry->isa = isa;
|
||||
__entry->vcpu_id = vcpu->vcpu_id;
|
||||
kvm_x86_ops->get_exit_info(vcpu, &__entry->info1,
|
||||
&__entry->info2);
|
||||
),
|
||||
|
||||
TP_printk("reason %s rip 0x%lx info %llx %llx",
|
||||
TP_printk("vcpu %u reason %s rip 0x%lx info %llx %llx",
|
||||
__entry->vcpu_id,
|
||||
(__entry->isa == KVM_ISA_VMX) ?
|
||||
__print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) :
|
||||
__print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS),
|
||||
|
@ -887,36 +890,27 @@ TRACE_EVENT(kvm_pml_full,
|
|||
TP_printk("vcpu %d: PML full", __entry->vcpu_id)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_ple_window,
|
||||
TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
|
||||
TP_ARGS(grow, vcpu_id, new, old),
|
||||
TRACE_EVENT(kvm_ple_window_update,
|
||||
TP_PROTO(unsigned int vcpu_id, unsigned int new, unsigned int old),
|
||||
TP_ARGS(vcpu_id, new, old),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( bool, grow )
|
||||
__field( unsigned int, vcpu_id )
|
||||
__field( int, new )
|
||||
__field( int, old )
|
||||
__field( unsigned int, new )
|
||||
__field( unsigned int, old )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->grow = grow;
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->new = new;
|
||||
__entry->old = old;
|
||||
),
|
||||
|
||||
TP_printk("vcpu %u: ple_window %d (%s %d)",
|
||||
__entry->vcpu_id,
|
||||
__entry->new,
|
||||
__entry->grow ? "grow" : "shrink",
|
||||
__entry->old)
|
||||
TP_printk("vcpu %u old %u new %u (%s)",
|
||||
__entry->vcpu_id, __entry->old, __entry->new,
|
||||
__entry->old < __entry->new ? "growed" : "shrinked")
|
||||
);
|
||||
|
||||
#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
|
||||
trace_kvm_ple_window(true, vcpu_id, new, old)
|
||||
#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
|
||||
trace_kvm_ple_window(false, vcpu_id, new, old)
|
||||
|
||||
TRACE_EVENT(kvm_pvclock_update,
|
||||
TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
|
||||
TP_ARGS(vcpu_id, pvclock),
|
||||
|
@ -1320,7 +1314,7 @@ TRACE_EVENT(kvm_avic_incomplete_ipi,
|
|||
__entry->index = index;
|
||||
),
|
||||
|
||||
TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
|
||||
TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u",
|
||||
__entry->vcpu, __entry->icrh, __entry->icrl,
|
||||
__entry->id, __entry->index)
|
||||
);
|
||||
|
@ -1345,7 +1339,7 @@ TRACE_EVENT(kvm_avic_unaccelerated_access,
|
|||
__entry->vec = vec;
|
||||
),
|
||||
|
||||
TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
|
||||
TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x",
|
||||
__entry->vcpu,
|
||||
__entry->offset,
|
||||
__print_symbolic(__entry->offset, kvm_trace_symbol_apic),
|
||||
|
@ -1462,6 +1456,46 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
|
|||
__entry->vector, __entry->format,
|
||||
__entry->valid_bank_mask)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_pv_tlb_flush,
|
||||
TP_PROTO(unsigned int vcpu_id, bool need_flush_tlb),
|
||||
TP_ARGS(vcpu_id, need_flush_tlb),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
__field( bool, need_flush_tlb )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->need_flush_tlb = need_flush_tlb;
|
||||
),
|
||||
|
||||
TP_printk("vcpu %u need_flush_tlb %s", __entry->vcpu_id,
|
||||
__entry->need_flush_tlb ? "true" : "false")
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for failed nested VMX VM-Enter.
|
||||
*/
|
||||
TRACE_EVENT(kvm_nested_vmenter_failed,
|
||||
TP_PROTO(const char *msg, u32 err),
|
||||
TP_ARGS(msg, err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(const char *, msg)
|
||||
__field(u32, err)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->msg = msg;
|
||||
__entry->err = err;
|
||||
),
|
||||
|
||||
TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
|
||||
__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
|
|
@ -19,6 +19,14 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
|
|||
static bool __read_mostly nested_early_check = 0;
|
||||
module_param(nested_early_check, bool, S_IRUGO);
|
||||
|
||||
#define CC(consistency_check) \
|
||||
({ \
|
||||
bool failed = (consistency_check); \
|
||||
if (failed) \
|
||||
trace_kvm_nested_vmenter_failed(#consistency_check, 0); \
|
||||
failed; \
|
||||
})
|
||||
|
||||
/*
|
||||
* Hyper-V requires all of these, so mark them as supported even though
|
||||
* they are just treated the same as all-context.
|
||||
|
@ -430,8 +438,8 @@ static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
|
|||
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
|
||||
return 0;
|
||||
|
||||
if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
|
||||
!page_address_valid(vcpu, vmcs12->io_bitmap_b))
|
||||
if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
|
||||
CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -443,7 +451,7 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
|
|||
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
|
||||
return 0;
|
||||
|
||||
if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
|
||||
if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -455,7 +463,7 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
|
|||
if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
|
||||
return 0;
|
||||
|
||||
if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
|
||||
if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -688,7 +696,7 @@ static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
|
|||
struct vmcs12 *vmcs12)
|
||||
{
|
||||
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
|
||||
!page_address_valid(vcpu, vmcs12->apic_access_addr))
|
||||
CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
|
||||
return -EINVAL;
|
||||
else
|
||||
return 0;
|
||||
|
@ -707,16 +715,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
|
|||
* If virtualize x2apic mode is enabled,
|
||||
* virtualize apic access must be disabled.
|
||||
*/
|
||||
if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
|
||||
nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
|
||||
if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
|
||||
nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If virtual interrupt delivery is enabled,
|
||||
* we must exit on external interrupts.
|
||||
*/
|
||||
if (nested_cpu_has_vid(vmcs12) &&
|
||||
!nested_exit_on_intr(vcpu))
|
||||
if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
|
@ -727,15 +734,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
|
|||
* bits 5:0 of posted_intr_desc_addr should be zero.
|
||||
*/
|
||||
if (nested_cpu_has_posted_intr(vmcs12) &&
|
||||
(!nested_cpu_has_vid(vmcs12) ||
|
||||
!nested_exit_intr_ack_set(vcpu) ||
|
||||
(vmcs12->posted_intr_nv & 0xff00) ||
|
||||
(vmcs12->posted_intr_desc_addr & 0x3f) ||
|
||||
(vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu))))
|
||||
(CC(!nested_cpu_has_vid(vmcs12)) ||
|
||||
CC(!nested_exit_intr_ack_set(vcpu)) ||
|
||||
CC((vmcs12->posted_intr_nv & 0xff00)) ||
|
||||
CC((vmcs12->posted_intr_desc_addr & 0x3f)) ||
|
||||
CC((vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))))
|
||||
return -EINVAL;
|
||||
|
||||
/* tpr shadow is needed by all apicv features. */
|
||||
if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
|
||||
if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -759,10 +766,12 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
|
|||
static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12)
|
||||
{
|
||||
if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_load_count,
|
||||
vmcs12->vm_exit_msr_load_addr) ||
|
||||
nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_store_count,
|
||||
vmcs12->vm_exit_msr_store_addr))
|
||||
if (CC(nested_vmx_check_msr_switch(vcpu,
|
||||
vmcs12->vm_exit_msr_load_count,
|
||||
vmcs12->vm_exit_msr_load_addr)) ||
|
||||
CC(nested_vmx_check_msr_switch(vcpu,
|
||||
vmcs12->vm_exit_msr_store_count,
|
||||
vmcs12->vm_exit_msr_store_addr)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -771,8 +780,9 @@ static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
|
|||
static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12)
|
||||
{
|
||||
if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_entry_msr_load_count,
|
||||
vmcs12->vm_entry_msr_load_addr))
|
||||
if (CC(nested_vmx_check_msr_switch(vcpu,
|
||||
vmcs12->vm_entry_msr_load_count,
|
||||
vmcs12->vm_entry_msr_load_addr)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -784,8 +794,8 @@ static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
|
|||
if (!nested_cpu_has_pml(vmcs12))
|
||||
return 0;
|
||||
|
||||
if (!nested_cpu_has_ept(vmcs12) ||
|
||||
!page_address_valid(vcpu, vmcs12->pml_address))
|
||||
if (CC(!nested_cpu_has_ept(vmcs12)) ||
|
||||
CC(!page_address_valid(vcpu, vmcs12->pml_address)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -794,8 +804,8 @@ static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
|
|||
static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12)
|
||||
{
|
||||
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
|
||||
!nested_cpu_has_ept(vmcs12))
|
||||
if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
|
||||
!nested_cpu_has_ept(vmcs12)))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
@ -803,8 +813,8 @@ static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
|
|||
static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12)
|
||||
{
|
||||
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
|
||||
!nested_cpu_has_ept(vmcs12))
|
||||
if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
|
||||
!nested_cpu_has_ept(vmcs12)))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
@ -815,8 +825,8 @@ static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
|
|||
if (!nested_cpu_has_shadow_vmcs(vmcs12))
|
||||
return 0;
|
||||
|
||||
if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
|
||||
!page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
|
||||
if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
|
||||
CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -826,12 +836,12 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
|
|||
struct vmx_msr_entry *e)
|
||||
{
|
||||
/* x2APIC MSR accesses are not allowed */
|
||||
if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
|
||||
if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
|
||||
return -EINVAL;
|
||||
if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
|
||||
e->index == MSR_IA32_UCODE_REV)
|
||||
if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */
|
||||
CC(e->index == MSR_IA32_UCODE_REV))
|
||||
return -EINVAL;
|
||||
if (e->reserved != 0)
|
||||
if (CC(e->reserved != 0))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
@ -839,9 +849,9 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
|
|||
static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
|
||||
struct vmx_msr_entry *e)
|
||||
{
|
||||
if (e->index == MSR_FS_BASE ||
|
||||
e->index == MSR_GS_BASE ||
|
||||
e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
|
||||
if (CC(e->index == MSR_FS_BASE) ||
|
||||
CC(e->index == MSR_GS_BASE) ||
|
||||
CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */
|
||||
nested_vmx_msr_check_common(vcpu, e))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
|
@ -850,7 +860,7 @@ static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
|
|||
static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
|
||||
struct vmx_msr_entry *e)
|
||||
{
|
||||
if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
|
||||
if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */
|
||||
nested_vmx_msr_check_common(vcpu, e))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
|
@ -864,9 +874,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
|||
{
|
||||
u32 i;
|
||||
struct vmx_msr_entry e;
|
||||
struct msr_data msr;
|
||||
|
||||
msr.host_initiated = false;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
|
||||
&e, sizeof(e))) {
|
||||
|
@ -881,9 +889,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
|||
__func__, i, e.index, e.reserved);
|
||||
goto fail;
|
||||
}
|
||||
msr.index = e.index;
|
||||
msr.data = e.value;
|
||||
if (kvm_set_msr(vcpu, &msr)) {
|
||||
if (kvm_set_msr(vcpu, e.index, e.value)) {
|
||||
pr_debug_ratelimited(
|
||||
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
|
||||
__func__, i, e.index, e.value);
|
||||
|
@ -897,11 +903,11 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
|||
|
||||
static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
||||
{
|
||||
u64 data;
|
||||
u32 i;
|
||||
struct vmx_msr_entry e;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
struct msr_data msr_info;
|
||||
if (kvm_vcpu_read_guest(vcpu,
|
||||
gpa + i * sizeof(e),
|
||||
&e, 2 * sizeof(u32))) {
|
||||
|
@ -916,9 +922,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
|||
__func__, i, e.index, e.reserved);
|
||||
return -EINVAL;
|
||||
}
|
||||
msr_info.host_initiated = false;
|
||||
msr_info.index = e.index;
|
||||
if (kvm_get_msr(vcpu, &msr_info)) {
|
||||
if (kvm_get_msr(vcpu, e.index, &data)) {
|
||||
pr_debug_ratelimited(
|
||||
"%s cannot read MSR (%u, 0x%x)\n",
|
||||
__func__, i, e.index);
|
||||
|
@ -927,10 +931,10 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
|||
if (kvm_vcpu_write_guest(vcpu,
|
||||
gpa + i * sizeof(e) +
|
||||
offsetof(struct vmx_msr_entry, value),
|
||||
&msr_info.data, sizeof(msr_info.data))) {
|
||||
&data, sizeof(data))) {
|
||||
pr_debug_ratelimited(
|
||||
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
|
||||
__func__, i, e.index, msr_info.data);
|
||||
__func__, i, e.index, data);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
@ -955,7 +959,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
|
|||
u32 *entry_failure_code)
|
||||
{
|
||||
if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
|
||||
if (!nested_cr3_valid(vcpu, cr3)) {
|
||||
if (CC(!nested_cr3_valid(vcpu, cr3))) {
|
||||
*entry_failure_code = ENTRY_FAIL_DEFAULT;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -965,7 +969,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
|
|||
* must not be dereferenced.
|
||||
*/
|
||||
if (is_pae_paging(vcpu) && !nested_ept) {
|
||||
if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
|
||||
if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
|
||||
*entry_failure_code = ENTRY_FAIL_PDPTE;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -2411,12 +2415,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|||
|
||||
static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
|
||||
{
|
||||
if (!nested_cpu_has_nmi_exiting(vmcs12) &&
|
||||
nested_cpu_has_virtual_nmis(vmcs12))
|
||||
if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
|
||||
nested_cpu_has_virtual_nmis(vmcs12)))
|
||||
return -EINVAL;
|
||||
|
||||
if (!nested_cpu_has_virtual_nmis(vmcs12) &&
|
||||
nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
|
||||
if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
|
||||
nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -2430,11 +2434,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
|
|||
/* Check for memory type validity */
|
||||
switch (address & VMX_EPTP_MT_MASK) {
|
||||
case VMX_EPTP_MT_UC:
|
||||
if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
|
||||
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
|
||||
return false;
|
||||
break;
|
||||
case VMX_EPTP_MT_WB:
|
||||
if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
|
||||
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
|
@ -2442,16 +2446,16 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
|
|||
}
|
||||
|
||||
/* only 4 levels page-walk length are valid */
|
||||
if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
|
||||
if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4))
|
||||
return false;
|
||||
|
||||
/* Reserved bits should not be set */
|
||||
if (address >> maxphyaddr || ((address >> 7) & 0x1f))
|
||||
if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f)))
|
||||
return false;
|
||||
|
||||
/* AD, if set, should be supported */
|
||||
if (address & VMX_EPTP_AD_ENABLE_BIT) {
|
||||
if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
|
||||
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -2466,21 +2470,21 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
|
|||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
|
||||
vmx->nested.msrs.pinbased_ctls_low,
|
||||
vmx->nested.msrs.pinbased_ctls_high) ||
|
||||
!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
|
||||
vmx->nested.msrs.procbased_ctls_low,
|
||||
vmx->nested.msrs.procbased_ctls_high))
|
||||
if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
|
||||
vmx->nested.msrs.pinbased_ctls_low,
|
||||
vmx->nested.msrs.pinbased_ctls_high)) ||
|
||||
CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
|
||||
vmx->nested.msrs.procbased_ctls_low,
|
||||
vmx->nested.msrs.procbased_ctls_high)))
|
||||
return -EINVAL;
|
||||
|
||||
if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
|
||||
!vmx_control_verify(vmcs12->secondary_vm_exec_control,
|
||||
vmx->nested.msrs.secondary_ctls_low,
|
||||
vmx->nested.msrs.secondary_ctls_high))
|
||||
CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
|
||||
vmx->nested.msrs.secondary_ctls_low,
|
||||
vmx->nested.msrs.secondary_ctls_high)))
|
||||
return -EINVAL;
|
||||
|
||||
if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu) ||
|
||||
if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
|
||||
nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
|
||||
nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
|
||||
nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
|
||||
|
@ -2491,7 +2495,7 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
|
|||
nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
|
||||
nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
|
||||
nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
|
||||
(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
|
||||
CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
|
||||
return -EINVAL;
|
||||
|
||||
if (!nested_cpu_has_preemption_timer(vmcs12) &&
|
||||
|
@ -2499,17 +2503,17 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
|
|||
return -EINVAL;
|
||||
|
||||
if (nested_cpu_has_ept(vmcs12) &&
|
||||
!valid_ept_address(vcpu, vmcs12->ept_pointer))
|
||||
CC(!valid_ept_address(vcpu, vmcs12->ept_pointer)))
|
||||
return -EINVAL;
|
||||
|
||||
if (nested_cpu_has_vmfunc(vmcs12)) {
|
||||
if (vmcs12->vm_function_control &
|
||||
~vmx->nested.msrs.vmfunc_controls)
|
||||
if (CC(vmcs12->vm_function_control &
|
||||
~vmx->nested.msrs.vmfunc_controls))
|
||||
return -EINVAL;
|
||||
|
||||
if (nested_cpu_has_eptp_switching(vmcs12)) {
|
||||
if (!nested_cpu_has_ept(vmcs12) ||
|
||||
!page_address_valid(vcpu, vmcs12->eptp_list_address))
|
||||
if (CC(!nested_cpu_has_ept(vmcs12)) ||
|
||||
CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
@ -2525,10 +2529,10 @@ static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
|
|||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (!vmx_control_verify(vmcs12->vm_exit_controls,
|
||||
vmx->nested.msrs.exit_ctls_low,
|
||||
vmx->nested.msrs.exit_ctls_high) ||
|
||||
nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12))
|
||||
if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
|
||||
vmx->nested.msrs.exit_ctls_low,
|
||||
vmx->nested.msrs.exit_ctls_high)) ||
|
||||
CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -2542,9 +2546,9 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
|
|||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (!vmx_control_verify(vmcs12->vm_entry_controls,
|
||||
vmx->nested.msrs.entry_ctls_low,
|
||||
vmx->nested.msrs.entry_ctls_high))
|
||||
if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
|
||||
vmx->nested.msrs.entry_ctls_low,
|
||||
vmx->nested.msrs.entry_ctls_high)))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
|
@ -2564,31 +2568,31 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
|
|||
bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
|
||||
|
||||
/* VM-entry interruption-info field: interruption type */
|
||||
if (intr_type == INTR_TYPE_RESERVED ||
|
||||
(intr_type == INTR_TYPE_OTHER_EVENT &&
|
||||
!nested_cpu_supports_monitor_trap_flag(vcpu)))
|
||||
if (CC(intr_type == INTR_TYPE_RESERVED) ||
|
||||
CC(intr_type == INTR_TYPE_OTHER_EVENT &&
|
||||
!nested_cpu_supports_monitor_trap_flag(vcpu)))
|
||||
return -EINVAL;
|
||||
|
||||
/* VM-entry interruption-info field: vector */
|
||||
if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
|
||||
(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
|
||||
(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
|
||||
if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
|
||||
CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
|
||||
CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
|
||||
return -EINVAL;
|
||||
|
||||
/* VM-entry interruption-info field: deliver error code */
|
||||
should_have_error_code =
|
||||
intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
|
||||
x86_exception_has_error_code(vector);
|
||||
if (has_error_code != should_have_error_code)
|
||||
if (CC(has_error_code != should_have_error_code))
|
||||
return -EINVAL;
|
||||
|
||||
/* VM-entry exception error code */
|
||||
if (has_error_code &&
|
||||
vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
|
||||
if (CC(has_error_code &&
|
||||
vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)))
|
||||
return -EINVAL;
|
||||
|
||||
/* VM-entry interruption-info field: reserved bits */
|
||||
if (intr_info & INTR_INFO_RESVD_BITS_MASK)
|
||||
if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
|
||||
return -EINVAL;
|
||||
|
||||
/* VM-entry instruction length */
|
||||
|
@ -2596,9 +2600,9 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
|
|||
case INTR_TYPE_SOFT_EXCEPTION:
|
||||
case INTR_TYPE_SOFT_INTR:
|
||||
case INTR_TYPE_PRIV_SW_EXCEPTION:
|
||||
if ((vmcs12->vm_entry_instruction_len > 15) ||
|
||||
(vmcs12->vm_entry_instruction_len == 0 &&
|
||||
!nested_cpu_has_zero_length_injection(vcpu)))
|
||||
if (CC(vmcs12->vm_entry_instruction_len > 15) ||
|
||||
CC(vmcs12->vm_entry_instruction_len == 0 &&
|
||||
CC(!nested_cpu_has_zero_length_injection(vcpu))))
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
@ -2625,40 +2629,40 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
|
|||
{
|
||||
bool ia32e;
|
||||
|
||||
if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
|
||||
!nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
|
||||
!nested_cr3_valid(vcpu, vmcs12->host_cr3))
|
||||
if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
|
||||
CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
|
||||
CC(!nested_cr3_valid(vcpu, vmcs12->host_cr3)))
|
||||
return -EINVAL;
|
||||
|
||||
if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) ||
|
||||
is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))
|
||||
if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
|
||||
CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
|
||||
return -EINVAL;
|
||||
|
||||
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
|
||||
!kvm_pat_valid(vmcs12->host_ia32_pat))
|
||||
CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
|
||||
return -EINVAL;
|
||||
|
||||
ia32e = (vmcs12->vm_exit_controls &
|
||||
VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
|
||||
|
||||
if (vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
|
||||
vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
|
||||
vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
|
||||
vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
|
||||
vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
|
||||
vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
|
||||
vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
|
||||
vmcs12->host_cs_selector == 0 ||
|
||||
vmcs12->host_tr_selector == 0 ||
|
||||
(vmcs12->host_ss_selector == 0 && !ia32e))
|
||||
if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
||||
CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
||||
CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
||||
CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
||||
CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
||||
CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
||||
CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
||||
CC(vmcs12->host_cs_selector == 0) ||
|
||||
CC(vmcs12->host_tr_selector == 0) ||
|
||||
CC(vmcs12->host_ss_selector == 0 && !ia32e))
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (is_noncanonical_address(vmcs12->host_fs_base, vcpu) ||
|
||||
is_noncanonical_address(vmcs12->host_gs_base, vcpu) ||
|
||||
is_noncanonical_address(vmcs12->host_gdtr_base, vcpu) ||
|
||||
is_noncanonical_address(vmcs12->host_idtr_base, vcpu) ||
|
||||
is_noncanonical_address(vmcs12->host_tr_base, vcpu))
|
||||
if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
|
||||
CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
|
||||
CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
|
||||
CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
|
||||
CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)))
|
||||
return -EINVAL;
|
||||
#endif
|
||||
|
||||
|
@ -2669,9 +2673,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
|
|||
* the host address-space size VM-exit control.
|
||||
*/
|
||||
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
|
||||
if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
|
||||
ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
|
||||
ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
|
||||
if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
|
||||
CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
|
||||
CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -2688,16 +2692,16 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
|
|||
if (vmcs12->vmcs_link_pointer == -1ull)
|
||||
return 0;
|
||||
|
||||
if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
|
||||
if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
|
||||
if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
|
||||
return -EINVAL;
|
||||
|
||||
shadow = map.hva;
|
||||
|
||||
if (shadow->hdr.revision_id != VMCS12_REVISION ||
|
||||
shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
|
||||
if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
|
||||
CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
|
||||
r = -EINVAL;
|
||||
|
||||
kvm_vcpu_unmap(vcpu, &map, false);
|
||||
|
@ -2709,8 +2713,8 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
|
|||
*/
|
||||
static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
|
||||
{
|
||||
if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
|
||||
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
|
||||
if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
|
||||
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -2724,12 +2728,12 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
|
|||
|
||||
*exit_qual = ENTRY_FAIL_DEFAULT;
|
||||
|
||||
if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
|
||||
!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
|
||||
if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
|
||||
CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
|
||||
return -EINVAL;
|
||||
|
||||
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
|
||||
!kvm_pat_valid(vmcs12->guest_ia32_pat))
|
||||
CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
|
||||
return -EINVAL;
|
||||
|
||||
if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
|
||||
|
@ -2749,16 +2753,16 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
|
|||
if (to_vmx(vcpu)->nested.nested_run_pending &&
|
||||
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
|
||||
ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
|
||||
if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
|
||||
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
|
||||
((vmcs12->guest_cr0 & X86_CR0_PG) &&
|
||||
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
|
||||
if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
|
||||
CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
|
||||
CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
|
||||
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
|
||||
(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
|
||||
(vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
|
||||
(CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
|
||||
CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
|
||||
return -EINVAL;
|
||||
|
||||
if (nested_check_guest_non_reg_state(vmcs12))
|
||||
|
@ -2841,9 +2845,13 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
|
|||
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
|
||||
|
||||
if (vm_fail) {
|
||||
u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
|
||||
|
||||
preempt_enable();
|
||||
WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
|
||||
VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
||||
|
||||
trace_kvm_nested_vmenter_failed(
|
||||
"early hardware check VM-instruction error: ", error);
|
||||
WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -3401,6 +3409,15 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
|
|||
unsigned long exit_qual;
|
||||
bool block_nested_events =
|
||||
vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
if (lapic_in_kernel(vcpu) &&
|
||||
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
|
||||
if (block_nested_events)
|
||||
return -EBUSY;
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vcpu->arch.exception.pending &&
|
||||
nested_vmx_check_exception(vcpu, &exit_qual)) {
|
||||
|
@ -3889,7 +3906,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
|
|||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct vmx_msr_entry g, h;
|
||||
struct msr_data msr;
|
||||
gpa_t gpa;
|
||||
u32 i, j;
|
||||
|
||||
|
@ -3949,7 +3965,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
|
|||
* from the guest value. The intent is to stuff host state as
|
||||
* silently as possible, not to fully process the exit load list.
|
||||
*/
|
||||
msr.host_initiated = false;
|
||||
for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
|
||||
gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
|
||||
|
@ -3979,9 +3994,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
|
|||
goto vmabort;
|
||||
}
|
||||
|
||||
msr.index = h.index;
|
||||
msr.data = h.value;
|
||||
if (kvm_set_msr(vcpu, &msr)) {
|
||||
if (kvm_set_msr(vcpu, h.index, h.value)) {
|
||||
pr_debug_ratelimited(
|
||||
"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
|
||||
__func__, j, h.index, h.value);
|
||||
|
@ -4466,7 +4479,12 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
if (!nested_vmx_check_permission(vcpu))
|
||||
return 1;
|
||||
|
||||
free_nested(vcpu);
|
||||
|
||||
/* Process a latched INIT during time CPU was in VMX operation */
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
return nested_vmx_succeed(vcpu);
|
||||
}
|
||||
|
||||
|
@ -5261,8 +5279,9 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
|
|||
return false;
|
||||
|
||||
if (unlikely(vmx->fail)) {
|
||||
pr_info_ratelimited("%s failed vm entry %x\n", __func__,
|
||||
vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
trace_kvm_nested_vmenter_failed(
|
||||
"hardware VM-instruction error: ",
|
||||
vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -94,7 +94,7 @@ ENDPROC(vmx_vmexit)
|
|||
|
||||
/**
|
||||
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
|
||||
* @vmx: struct vcpu_vmx *
|
||||
* @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
|
||||
* @regs: unsigned long * (to guest registers)
|
||||
* @launched: %true if the VMCS has been launched
|
||||
*
|
||||
|
@ -151,7 +151,7 @@ ENTRY(__vmx_vcpu_run)
|
|||
mov VCPU_R14(%_ASM_AX), %r14
|
||||
mov VCPU_R15(%_ASM_AX), %r15
|
||||
#endif
|
||||
/* Load guest RAX. This kills the vmx_vcpu pointer! */
|
||||
/* Load guest RAX. This kills the @regs pointer! */
|
||||
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
|
||||
|
||||
/* Enter guest mode */
|
||||
|
|
|
@ -1472,8 +1472,11 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* Returns an int to be compatible with SVM implementation (which can fail).
|
||||
* Do not use directly, use skip_emulated_instruction() instead.
|
||||
*/
|
||||
static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long rip;
|
||||
|
||||
|
@ -1483,6 +1486,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
|||
|
||||
/* skipping an emulated instruction also counts */
|
||||
vmx_set_interrupt_shadow(vcpu, 0);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
(void)__skip_emulated_instruction(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
|
||||
|
@ -4026,7 +4036,7 @@ static void ept_set_mmio_spte_mask(void)
|
|||
* of an EPT paging-structure entry is 110b (write/execute).
|
||||
*/
|
||||
kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
|
||||
VMX_EPT_MISCONFIG_WX_VALUE);
|
||||
VMX_EPT_MISCONFIG_WX_VALUE, 0);
|
||||
}
|
||||
|
||||
#define VMX_XSS_EXIT_BITMAP 0
|
||||
|
@ -4152,6 +4162,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
|
||||
vcpu->arch.microcode_version = 0x100000000ULL;
|
||||
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
|
||||
vmx->hv_deadline_tsc = -1;
|
||||
kvm_set_cr8(vcpu, 0);
|
||||
|
||||
if (!init_event) {
|
||||
|
@ -4856,41 +4867,12 @@ static int handle_cpuid(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int handle_rdmsr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 ecx = kvm_rcx_read(vcpu);
|
||||
struct msr_data msr_info;
|
||||
|
||||
msr_info.index = ecx;
|
||||
msr_info.host_initiated = false;
|
||||
if (vmx_get_msr(vcpu, &msr_info)) {
|
||||
trace_kvm_msr_read_ex(ecx);
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
trace_kvm_msr_read(ecx, msr_info.data);
|
||||
|
||||
kvm_rax_write(vcpu, msr_info.data & -1u);
|
||||
kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u);
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
return kvm_emulate_rdmsr(vcpu);
|
||||
}
|
||||
|
||||
static int handle_wrmsr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct msr_data msr;
|
||||
u32 ecx = kvm_rcx_read(vcpu);
|
||||
u64 data = kvm_read_edx_eax(vcpu);
|
||||
|
||||
msr.data = data;
|
||||
msr.index = ecx;
|
||||
msr.host_initiated = false;
|
||||
if (kvm_set_msr(vcpu, &msr) != 0) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
trace_kvm_msr_write(ecx, data);
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
return kvm_emulate_wrmsr(vcpu);
|
||||
}
|
||||
|
||||
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
|
||||
|
@ -5227,31 +5209,33 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
|||
static void grow_ple_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
int old = vmx->ple_window;
|
||||
unsigned int old = vmx->ple_window;
|
||||
|
||||
vmx->ple_window = __grow_ple_window(old, ple_window,
|
||||
ple_window_grow,
|
||||
ple_window_max);
|
||||
|
||||
if (vmx->ple_window != old)
|
||||
if (vmx->ple_window != old) {
|
||||
vmx->ple_window_dirty = true;
|
||||
|
||||
trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
|
||||
trace_kvm_ple_window_update(vcpu->vcpu_id,
|
||||
vmx->ple_window, old);
|
||||
}
|
||||
}
|
||||
|
||||
static void shrink_ple_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
int old = vmx->ple_window;
|
||||
unsigned int old = vmx->ple_window;
|
||||
|
||||
vmx->ple_window = __shrink_ple_window(old, ple_window,
|
||||
ple_window_shrink,
|
||||
ple_window);
|
||||
|
||||
if (vmx->ple_window != old)
|
||||
if (vmx->ple_window != old) {
|
||||
vmx->ple_window_dirty = true;
|
||||
|
||||
trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
|
||||
trace_kvm_ple_window_update(vcpu->vcpu_id,
|
||||
vmx->ple_window, old);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -5887,8 +5871,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
|||
else {
|
||||
vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
|
||||
exit_reason);
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
dump_vmcs();
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror =
|
||||
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
|
||||
vcpu->run->internal.ndata = 1;
|
||||
vcpu->run->internal.data[0] = exit_reason;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6615,6 +6604,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
unsigned long *msr_bitmap;
|
||||
int cpu;
|
||||
|
||||
BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
|
||||
"struct kvm_vcpu must be at offset 0 for arch usercopy region");
|
||||
|
||||
vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
|
||||
if (!vmx)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
@ -7369,10 +7361,14 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
|
|||
* irqbalance to make the interrupts single-CPU.
|
||||
*
|
||||
* We will support full lowest-priority interrupt later.
|
||||
*
|
||||
* In addition, we can only inject generic interrupts using
|
||||
* the PI mechanism, refuse to route others through it.
|
||||
*/
|
||||
|
||||
kvm_set_msi_irq(kvm, e, &irq);
|
||||
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
|
||||
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
|
||||
!kvm_irq_is_postable(&irq)) {
|
||||
/*
|
||||
* Make sure the IRTE is in remapped mode if
|
||||
* we don't handle it in posted mode.
|
||||
|
@ -7474,6 +7470,11 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_vmx(vcpu)->nested.vmxon;
|
||||
}
|
||||
|
||||
static __init int hardware_setup(void)
|
||||
{
|
||||
unsigned long host_bndcfgs;
|
||||
|
@ -7705,7 +7706,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
|||
|
||||
.run = vmx_vcpu_run,
|
||||
.handle_exit = vmx_handle_exit,
|
||||
.skip_emulated_instruction = skip_emulated_instruction,
|
||||
.skip_emulated_instruction = __skip_emulated_instruction,
|
||||
.set_interrupt_shadow = vmx_set_interrupt_shadow,
|
||||
.get_interrupt_shadow = vmx_get_interrupt_shadow,
|
||||
.patch_hypercall = vmx_patch_hypercall,
|
||||
|
@ -7799,6 +7800,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
|||
.nested_enable_evmcs = NULL,
|
||||
.nested_get_evmcs_version = NULL,
|
||||
.need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
|
||||
.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
|
||||
};
|
||||
|
||||
static void vmx_cleanup_l1d_flush(void)
|
||||
|
|
|
@ -253,7 +253,7 @@ struct vcpu_vmx {
|
|||
struct nested_vmx nested;
|
||||
|
||||
/* Dynamic PLE window. */
|
||||
int ple_window;
|
||||
unsigned int ple_window;
|
||||
bool ple_window_dirty;
|
||||
|
||||
bool req_immediate_exit;
|
||||
|
|
|
@ -674,8 +674,14 @@ static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
|
|||
data, offset, len, access);
|
||||
}
|
||||
|
||||
static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) |
|
||||
rsvd_bits(1, 2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Load the pae pdptrs. Return true is they are all valid.
|
||||
* Load the pae pdptrs. Return 1 if they are all valid, 0 otherwise.
|
||||
*/
|
||||
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
|
||||
{
|
||||
|
@ -694,8 +700,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
|
|||
}
|
||||
for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
|
||||
if ((pdpte[i] & PT_PRESENT_MASK) &&
|
||||
(pdpte[i] &
|
||||
vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) {
|
||||
(pdpte[i] & pdptr_rsvd_bits(vcpu))) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1254,6 +1259,13 @@ static u64 kvm_get_arch_capabilities(void)
|
|||
if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
|
||||
data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
|
||||
|
||||
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
|
||||
data |= ARCH_CAP_RDCL_NO;
|
||||
if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
|
||||
data |= ARCH_CAP_SSB_NO;
|
||||
if (!boot_cpu_has_bug(X86_BUG_MDS))
|
||||
data |= ARCH_CAP_MDS_NO;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
@ -1351,19 +1363,23 @@ void kvm_enable_efer_bits(u64 mask)
|
|||
EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
|
||||
|
||||
/*
|
||||
* Writes msr value into into the appropriate "register".
|
||||
* Write @data into the MSR specified by @index. Select MSR specific fault
|
||||
* checks are bypassed if @host_initiated is %true.
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
* Assumes vcpu_load() was already called.
|
||||
*/
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
|
||||
bool host_initiated)
|
||||
{
|
||||
switch (msr->index) {
|
||||
struct msr_data msr;
|
||||
|
||||
switch (index) {
|
||||
case MSR_FS_BASE:
|
||||
case MSR_GS_BASE:
|
||||
case MSR_KERNEL_GS_BASE:
|
||||
case MSR_CSTAR:
|
||||
case MSR_LSTAR:
|
||||
if (is_noncanonical_address(msr->data, vcpu))
|
||||
if (is_noncanonical_address(data, vcpu))
|
||||
return 1;
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_EIP:
|
||||
|
@ -1380,38 +1396,95 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
|||
* value, and that something deterministic happens if the guest
|
||||
* invokes 64-bit SYSENTER.
|
||||
*/
|
||||
msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
|
||||
data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
|
||||
}
|
||||
return kvm_x86_ops->set_msr(vcpu, msr);
|
||||
|
||||
msr.data = data;
|
||||
msr.index = index;
|
||||
msr.host_initiated = host_initiated;
|
||||
|
||||
return kvm_x86_ops->set_msr(vcpu, &msr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the MSR specified by @index into @data. Select MSR specific fault
|
||||
* checks are bypassed if @host_initiated is %true.
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
* Assumes vcpu_load() was already called.
|
||||
*/
|
||||
static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
|
||||
bool host_initiated)
|
||||
{
|
||||
struct msr_data msr;
|
||||
int ret;
|
||||
|
||||
msr.index = index;
|
||||
msr.host_initiated = host_initiated;
|
||||
|
||||
ret = kvm_x86_ops->get_msr(vcpu, &msr);
|
||||
if (!ret)
|
||||
*data = msr.data;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
|
||||
{
|
||||
return __kvm_get_msr(vcpu, index, data, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_msr);
|
||||
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
|
||||
{
|
||||
return __kvm_set_msr(vcpu, index, data, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_msr);
|
||||
|
||||
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 ecx = kvm_rcx_read(vcpu);
|
||||
u64 data;
|
||||
|
||||
if (kvm_get_msr(vcpu, ecx, &data)) {
|
||||
trace_kvm_msr_read_ex(ecx);
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
trace_kvm_msr_read(ecx, data);
|
||||
|
||||
kvm_rax_write(vcpu, data & -1u);
|
||||
kvm_rdx_write(vcpu, (data >> 32) & -1u);
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
|
||||
|
||||
int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 ecx = kvm_rcx_read(vcpu);
|
||||
u64 data = kvm_read_edx_eax(vcpu);
|
||||
|
||||
if (kvm_set_msr(vcpu, ecx, data)) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
trace_kvm_msr_write(ecx, data);
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
|
||||
|
||||
/*
|
||||
* Adapt set_msr() to msr_io()'s calling convention
|
||||
*/
|
||||
static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
|
||||
{
|
||||
struct msr_data msr;
|
||||
int r;
|
||||
|
||||
msr.index = index;
|
||||
msr.host_initiated = true;
|
||||
r = kvm_get_msr(vcpu, &msr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
*data = msr.data;
|
||||
return 0;
|
||||
return __kvm_get_msr(vcpu, index, data, true);
|
||||
}
|
||||
|
||||
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
|
||||
{
|
||||
struct msr_data msr;
|
||||
|
||||
msr.data = *data;
|
||||
msr.index = index;
|
||||
msr.host_initiated = true;
|
||||
return kvm_set_msr(vcpu, &msr);
|
||||
return __kvm_set_msr(vcpu, index, *data, true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -2452,6 +2525,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
|||
* Doing a TLB flush here, on the guest's behalf, can avoid
|
||||
* expensive IPIs.
|
||||
*/
|
||||
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
|
||||
vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB);
|
||||
if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
|
||||
kvm_vcpu_flush_tlb(vcpu, false);
|
||||
|
||||
|
@ -2748,18 +2823,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_msr_common);
|
||||
|
||||
|
||||
/*
|
||||
* Reads an msr value (of 'msr_index') into 'pdata'.
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
* Assumes vcpu_load() was already called.
|
||||
*/
|
||||
int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
return kvm_x86_ops->get_msr(vcpu, msr);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_msr);
|
||||
|
||||
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
|
||||
{
|
||||
u64 data;
|
||||
|
@ -3506,8 +3569,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
|
|||
for (bank = 0; bank < bank_num; bank++)
|
||||
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
|
||||
|
||||
if (kvm_x86_ops->setup_mce)
|
||||
kvm_x86_ops->setup_mce(vcpu);
|
||||
kvm_x86_ops->setup_mce(vcpu);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
@ -5377,7 +5439,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
|
|||
*/
|
||||
if (vcpu_match_mmio_gva(vcpu, gva)
|
||||
&& !permission_fault(vcpu, vcpu->arch.walk_mmu,
|
||||
vcpu->arch.access, 0, access)) {
|
||||
vcpu->arch.mmio_access, 0, access)) {
|
||||
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
|
||||
(gva & (PAGE_SIZE - 1));
|
||||
trace_vcpu_match_mmio(gva, *gpa, write, false);
|
||||
|
@ -5971,28 +6033,13 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
|
|||
static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
|
||||
u32 msr_index, u64 *pdata)
|
||||
{
|
||||
struct msr_data msr;
|
||||
int r;
|
||||
|
||||
msr.index = msr_index;
|
||||
msr.host_initiated = false;
|
||||
r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
*pdata = msr.data;
|
||||
return 0;
|
||||
return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
|
||||
}
|
||||
|
||||
static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
|
||||
u32 msr_index, u64 data)
|
||||
{
|
||||
struct msr_data msr;
|
||||
|
||||
msr.data = data;
|
||||
msr.index = msr_index;
|
||||
msr.host_initiated = false;
|
||||
return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
|
||||
return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
|
||||
}
|
||||
|
||||
static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
|
||||
|
@ -6075,6 +6122,11 @@ static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
|
|||
kvm_smm_changed(emul_to_vcpu(ctxt));
|
||||
}
|
||||
|
||||
static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
|
||||
{
|
||||
return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
|
||||
}
|
||||
|
||||
static const struct x86_emulate_ops emulate_ops = {
|
||||
.read_gpr = emulator_read_gpr,
|
||||
.write_gpr = emulator_write_gpr,
|
||||
|
@ -6116,6 +6168,7 @@ static const struct x86_emulate_ops emulate_ops = {
|
|||
.set_hflags = emulator_set_hflags,
|
||||
.pre_leave_smm = emulator_pre_leave_smm,
|
||||
.post_leave_smm = emulator_post_leave_smm,
|
||||
.set_xcr = emulator_set_xcr,
|
||||
};
|
||||
|
||||
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
|
||||
|
@ -6390,9 +6443,11 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
|
|||
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
int r = EMULATE_DONE;
|
||||
int r;
|
||||
|
||||
kvm_x86_ops->skip_emulated_instruction(vcpu);
|
||||
r = kvm_x86_ops->skip_emulated_instruction(vcpu);
|
||||
if (unlikely(r != EMULATE_DONE))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* rflags is the old, "raw" value of the flags. The new value has
|
||||
|
@ -6528,8 +6583,16 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
|||
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
|
||||
emulation_type))
|
||||
return EMULATE_DONE;
|
||||
if (ctxt->have_exception && inject_emulated_exception(vcpu))
|
||||
if (ctxt->have_exception) {
|
||||
/*
|
||||
* #UD should result in just EMULATION_FAILED, and trap-like
|
||||
* exception should not be encountered during decode.
|
||||
*/
|
||||
WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
|
||||
exception_type(ctxt->exception.vector) == EXCPT_TRAP);
|
||||
inject_emulated_exception(vcpu);
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
if (emulation_type & EMULTYPE_SKIP)
|
||||
return EMULATE_FAIL;
|
||||
return handle_emulation_failure(vcpu, emulation_type);
|
||||
|
@ -6544,6 +6607,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
|||
kvm_rip_write(vcpu, ctxt->_eip);
|
||||
if (ctxt->eflags & X86_EFLAGS_RF)
|
||||
kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
|
||||
kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
|
@ -9322,10 +9386,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
kvm_page_track_init(kvm);
|
||||
kvm_mmu_init_vm(kvm);
|
||||
|
||||
if (kvm_x86_ops->vm_init)
|
||||
return kvm_x86_ops->vm_init(kvm);
|
||||
|
||||
return 0;
|
||||
return kvm_x86_ops->vm_init(kvm);
|
||||
}
|
||||
|
||||
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
|
||||
|
@ -10017,7 +10078,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
|
|||
|
||||
bool kvm_arch_has_irq_bypass(void)
|
||||
{
|
||||
return kvm_x86_ops->update_pi_irte != NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
|
||||
|
@ -10057,9 +10118,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
|
|||
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
|
||||
uint32_t guest_irq, bool set)
|
||||
{
|
||||
if (!kvm_x86_ops->update_pi_irte)
|
||||
return -EINVAL;
|
||||
|
||||
return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
|
||||
}
|
||||
|
||||
|
@ -10086,11 +10144,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
|
|||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
|
||||
|
|
|
@ -196,7 +196,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
|
|||
* actually a nGPA.
|
||||
*/
|
||||
vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
|
||||
vcpu->arch.access = access;
|
||||
vcpu->arch.mmio_access = access;
|
||||
vcpu->arch.mmio_gfn = gfn;
|
||||
vcpu->arch.mmio_gen = gen;
|
||||
}
|
||||
|
|
|
@ -249,6 +249,9 @@ struct vgic_dist {
|
|||
struct list_head lpi_list_head;
|
||||
int lpi_list_count;
|
||||
|
||||
/* LPI translation cache */
|
||||
struct list_head lpi_translation_cache;
|
||||
|
||||
/* used by vgic-debug */
|
||||
struct vgic_state_iter *iter;
|
||||
|
||||
|
@ -311,7 +314,6 @@ struct vgic_cpu {
|
|||
* parts of the redistributor.
|
||||
*/
|
||||
struct vgic_io_device rd_iodev;
|
||||
struct vgic_io_device sgi_iodev;
|
||||
struct vgic_redist_region *rdreg;
|
||||
|
||||
/* Contains the attributes and gpa of the LPI pending tables. */
|
||||
|
|
|
@ -243,6 +243,8 @@ struct kvm_hyperv_exit {
|
|||
#define KVM_INTERNAL_ERROR_SIMUL_EX 2
|
||||
/* Encounter unexpected vm-exit due to delivery event. */
|
||||
#define KVM_INTERNAL_ERROR_DELIVERY_EV 3
|
||||
/* Encounter unexpected vm-exit reason */
|
||||
#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4
|
||||
|
||||
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
|
||||
struct kvm_run {
|
||||
|
@ -996,6 +998,7 @@ struct kvm_ppc_resize_hpt {
|
|||
#define KVM_CAP_ARM_PTRAUTH_ADDRESS 171
|
||||
#define KVM_CAP_ARM_PTRAUTH_GENERIC 172
|
||||
#define KVM_CAP_PMU_EVENT_FILTER 173
|
||||
#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
|
|
@ -7,10 +7,10 @@ top_srcdir = ../../../..
|
|||
KSFT_KHDR_INSTALL := 1
|
||||
UNAME_M := $(shell uname -m)
|
||||
|
||||
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
|
||||
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c
|
||||
LIBKVM_aarch64 = lib/aarch64/processor.c
|
||||
LIBKVM_s390x = lib/s390x/processor.c
|
||||
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
|
||||
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/ucall.c
|
||||
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
|
||||
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
|
||||
|
||||
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
|
||||
|
@ -32,7 +32,9 @@ TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
|
|||
TEST_GEN_PROGS_aarch64 += dirty_log_test
|
||||
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
|
||||
|
||||
TEST_GEN_PROGS_s390x = s390x/memop
|
||||
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
|
||||
TEST_GEN_PROGS_s390x += dirty_log_test
|
||||
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
|
||||
|
||||
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
/* The memory slot index to track dirty pages */
|
||||
#define TEST_MEM_SLOT_INDEX 1
|
||||
|
||||
/* Default guest test memory offset, 1G */
|
||||
#define DEFAULT_GUEST_TEST_MEM 0x40000000
|
||||
/* Default guest test virtual memory offset */
|
||||
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
|
||||
|
||||
/* How many pages to dirty for each guest loop */
|
||||
#define TEST_PAGES_PER_LOOP 1024
|
||||
|
@ -38,6 +38,27 @@
|
|||
/* Interval for each host loop (ms) */
|
||||
#define TEST_HOST_LOOP_INTERVAL 10UL
|
||||
|
||||
/* Dirty bitmaps are always little endian, so we need to swap on big endian */
|
||||
#if defined(__s390x__)
|
||||
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
|
||||
# define test_bit_le(nr, addr) \
|
||||
test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define set_bit_le(nr, addr) \
|
||||
set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define clear_bit_le(nr, addr) \
|
||||
clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define test_and_set_bit_le(nr, addr) \
|
||||
test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define test_and_clear_bit_le(nr, addr) \
|
||||
test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
#else
|
||||
# define test_bit_le test_bit
|
||||
# define set_bit_le set_bit
|
||||
# define clear_bit_le clear_bit
|
||||
# define test_and_set_bit_le test_and_set_bit
|
||||
# define test_and_clear_bit_le test_and_clear_bit
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Guest/Host shared variables. Ensure addr_gva2hva() and/or
|
||||
* sync_global_to/from_guest() are used when accessing from
|
||||
|
@ -69,11 +90,23 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
|
|||
*/
|
||||
static void guest_code(void)
|
||||
{
|
||||
uint64_t addr;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* On s390x, all pages of a 1M segment are initially marked as dirty
|
||||
* when a page of the segment is written to for the very first time.
|
||||
* To compensate this specialty in this test, we need to touch all
|
||||
* pages during the first iteration.
|
||||
*/
|
||||
for (i = 0; i < guest_num_pages; i++) {
|
||||
addr = guest_test_virt_mem + i * guest_page_size;
|
||||
*(uint64_t *)addr = READ_ONCE(iteration);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
|
||||
uint64_t addr = guest_test_virt_mem;
|
||||
addr = guest_test_virt_mem;
|
||||
addr += (READ_ONCE(random_array[i]) % guest_num_pages)
|
||||
* guest_page_size;
|
||||
addr &= ~(host_page_size - 1);
|
||||
|
@ -158,15 +191,15 @@ static void vm_dirty_log_verify(unsigned long *bmap)
|
|||
value_ptr = host_test_mem + page * host_page_size;
|
||||
|
||||
/* If this is a special page that we were tracking... */
|
||||
if (test_and_clear_bit(page, host_bmap_track)) {
|
||||
if (test_and_clear_bit_le(page, host_bmap_track)) {
|
||||
host_track_next_count++;
|
||||
TEST_ASSERT(test_bit(page, bmap),
|
||||
TEST_ASSERT(test_bit_le(page, bmap),
|
||||
"Page %"PRIu64" should have its dirty bit "
|
||||
"set in this iteration but it is missing",
|
||||
page);
|
||||
}
|
||||
|
||||
if (test_bit(page, bmap)) {
|
||||
if (test_bit_le(page, bmap)) {
|
||||
host_dirty_count++;
|
||||
/*
|
||||
* If the bit is set, the value written onto
|
||||
|
@ -209,7 +242,7 @@ static void vm_dirty_log_verify(unsigned long *bmap)
|
|||
* should report its dirtyness in the
|
||||
* next run
|
||||
*/
|
||||
set_bit(page, host_bmap_track);
|
||||
set_bit_le(page, host_bmap_track);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -293,6 +326,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
|||
* case where the size is not aligned to 64 pages.
|
||||
*/
|
||||
guest_num_pages = (1ul << (30 - guest_page_shift)) + 16;
|
||||
#ifdef __s390x__
|
||||
/* Round up to multiple of 1M (segment size) */
|
||||
guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
|
||||
#endif
|
||||
host_page_size = getpagesize();
|
||||
host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
|
||||
!!((guest_num_pages * guest_page_size) % host_page_size);
|
||||
|
@ -304,6 +341,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
|||
guest_test_phys_mem = phys_offset;
|
||||
}
|
||||
|
||||
#ifdef __s390x__
|
||||
/* Align to 1M (segment size) */
|
||||
guest_test_phys_mem &= ~((1 << 20) - 1);
|
||||
#endif
|
||||
|
||||
DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
|
||||
|
||||
bmap = bitmap_alloc(host_num_pages);
|
||||
|
@ -337,7 +379,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
|||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
#endif
|
||||
#ifdef __aarch64__
|
||||
ucall_init(vm, UCALL_MMIO, NULL);
|
||||
ucall_init(vm, NULL);
|
||||
#endif
|
||||
|
||||
/* Export the shared variables to the guest */
|
||||
|
@ -454,6 +496,9 @@ int main(int argc, char *argv[])
|
|||
vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true);
|
||||
}
|
||||
#endif
|
||||
#ifdef __s390x__
|
||||
vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
|
||||
#endif
|
||||
|
||||
while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
|
||||
switch (opt) {
|
||||
|
|
|
@ -165,12 +165,6 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
|
|||
memcpy(&(g), _p, sizeof(g)); \
|
||||
})
|
||||
|
||||
/* ucall implementation types */
|
||||
typedef enum {
|
||||
UCALL_PIO,
|
||||
UCALL_MMIO,
|
||||
} ucall_type_t;
|
||||
|
||||
/* Common ucalls */
|
||||
enum {
|
||||
UCALL_NONE,
|
||||
|
@ -186,7 +180,7 @@ struct ucall {
|
|||
uint64_t args[UCALL_MAX_ARGS];
|
||||
};
|
||||
|
||||
void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg);
|
||||
void ucall_init(struct kvm_vm *vm, void *arg);
|
||||
void ucall_uninit(struct kvm_vm *vm);
|
||||
void ucall(uint64_t cmd, int nargs, ...);
|
||||
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* ucall support. A ucall is a "hypercall to userspace".
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
*/
|
||||
#include "kvm_util.h"
|
||||
#include "../kvm_util_internal.h"
|
||||
|
||||
static vm_vaddr_t *ucall_exit_mmio_addr;
|
||||
|
||||
static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
|
||||
{
|
||||
if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
|
||||
return false;
|
||||
|
||||
virt_pg_map(vm, gpa, gpa, 0);
|
||||
|
||||
ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
|
||||
sync_global_to_guest(vm, ucall_exit_mmio_addr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ucall_init(struct kvm_vm *vm, void *arg)
|
||||
{
|
||||
vm_paddr_t gpa, start, end, step, offset;
|
||||
unsigned int bits;
|
||||
bool ret;
|
||||
|
||||
if (arg) {
|
||||
gpa = (vm_paddr_t)arg;
|
||||
ret = ucall_mmio_init(vm, gpa);
|
||||
TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find an address within the allowed physical and virtual address
|
||||
* spaces, that does _not_ have a KVM memory region associated with
|
||||
* it. Identity mapping an address like this allows the guest to
|
||||
* access it, but as KVM doesn't know what to do with it, it
|
||||
* will assume it's something userspace handles and exit with
|
||||
* KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
|
||||
* Here we start with a guess that the addresses around 5/8th
|
||||
* of the allowed space are unmapped and then work both down and
|
||||
* up from there in 1/16th allowed space sized steps.
|
||||
*
|
||||
* Note, we need to use VA-bits - 1 when calculating the allowed
|
||||
* virtual address space for an identity mapping because the upper
|
||||
* half of the virtual address space is the two's complement of the
|
||||
* lower and won't match physical addresses.
|
||||
*/
|
||||
bits = vm->va_bits - 1;
|
||||
bits = vm->pa_bits < bits ? vm->pa_bits : bits;
|
||||
end = 1ul << bits;
|
||||
start = end * 5 / 8;
|
||||
step = end / 16;
|
||||
for (offset = 0; offset < end - start; offset += step) {
|
||||
if (ucall_mmio_init(vm, start - offset))
|
||||
return;
|
||||
if (ucall_mmio_init(vm, start + offset))
|
||||
return;
|
||||
}
|
||||
TEST_ASSERT(false, "Can't find a ucall mmio address");
|
||||
}
|
||||
|
||||
void ucall_uninit(struct kvm_vm *vm)
|
||||
{
|
||||
ucall_exit_mmio_addr = 0;
|
||||
sync_global_to_guest(vm, ucall_exit_mmio_addr);
|
||||
}
|
||||
|
||||
void ucall(uint64_t cmd, int nargs, ...)
|
||||
{
|
||||
struct ucall uc = {
|
||||
.cmd = cmd,
|
||||
};
|
||||
va_list va;
|
||||
int i;
|
||||
|
||||
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
|
||||
|
||||
va_start(va, nargs);
|
||||
for (i = 0; i < nargs; ++i)
|
||||
uc.args[i] = va_arg(va, uint64_t);
|
||||
va_end(va);
|
||||
|
||||
*ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
|
||||
}
|
||||
|
||||
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
||||
{
|
||||
struct kvm_run *run = vcpu_state(vm, vcpu_id);
|
||||
struct ucall ucall = {};
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_MMIO &&
|
||||
run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
|
||||
vm_vaddr_t gva;
|
||||
|
||||
TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
|
||||
"Unexpected ucall exit mmio address access");
|
||||
memcpy(&gva, run->mmio.data, sizeof(gva));
|
||||
memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
|
||||
|
||||
vcpu_run_complete_io(vm, vcpu_id);
|
||||
if (uc)
|
||||
memcpy(uc, &ucall, sizeof(ucall));
|
||||
}
|
||||
|
||||
return ucall.cmd;
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* ucall support. A ucall is a "hypercall to userspace".
|
||||
*
|
||||
* Copyright (C) 2019 Red Hat, Inc.
|
||||
*/
|
||||
#include "kvm_util.h"
|
||||
|
||||
void ucall_init(struct kvm_vm *vm, void *arg)
|
||||
{
|
||||
}
|
||||
|
||||
void ucall_uninit(struct kvm_vm *vm)
|
||||
{
|
||||
}
|
||||
|
||||
void ucall(uint64_t cmd, int nargs, ...)
|
||||
{
|
||||
struct ucall uc = {
|
||||
.cmd = cmd,
|
||||
};
|
||||
va_list va;
|
||||
int i;
|
||||
|
||||
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
|
||||
|
||||
va_start(va, nargs);
|
||||
for (i = 0; i < nargs; ++i)
|
||||
uc.args[i] = va_arg(va, uint64_t);
|
||||
va_end(va);
|
||||
|
||||
/* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
|
||||
asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
|
||||
}
|
||||
|
||||
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
||||
{
|
||||
struct kvm_run *run = vcpu_state(vm, vcpu_id);
|
||||
struct ucall ucall = {};
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
|
||||
run->s390_sieic.icptcode == 4 &&
|
||||
(run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
|
||||
(run->s390_sieic.ipb >> 16) == 0x501) {
|
||||
int reg = run->s390_sieic.ipa & 0xf;
|
||||
|
||||
memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]),
|
||||
sizeof(ucall));
|
||||
|
||||
vcpu_run_complete_io(vm, vcpu_id);
|
||||
if (uc)
|
||||
memcpy(uc, &ucall, sizeof(ucall));
|
||||
}
|
||||
|
||||
return ucall.cmd;
|
||||
}
|
|
@ -1,157 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* ucall support. A ucall is a "hypercall to userspace".
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
*/
|
||||
#include "kvm_util.h"
|
||||
#include "kvm_util_internal.h"
|
||||
|
||||
#define UCALL_PIO_PORT ((uint16_t)0x1000)
|
||||
|
||||
static ucall_type_t ucall_type;
|
||||
static vm_vaddr_t *ucall_exit_mmio_addr;
|
||||
|
||||
static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
|
||||
{
|
||||
if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
|
||||
return false;
|
||||
|
||||
virt_pg_map(vm, gpa, gpa, 0);
|
||||
|
||||
ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
|
||||
sync_global_to_guest(vm, ucall_exit_mmio_addr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg)
|
||||
{
|
||||
ucall_type = type;
|
||||
sync_global_to_guest(vm, ucall_type);
|
||||
|
||||
if (type == UCALL_PIO)
|
||||
return;
|
||||
|
||||
if (type == UCALL_MMIO) {
|
||||
vm_paddr_t gpa, start, end, step, offset;
|
||||
unsigned bits;
|
||||
bool ret;
|
||||
|
||||
if (arg) {
|
||||
gpa = (vm_paddr_t)arg;
|
||||
ret = ucall_mmio_init(vm, gpa);
|
||||
TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find an address within the allowed physical and virtual address
|
||||
* spaces, that does _not_ have a KVM memory region associated with
|
||||
* it. Identity mapping an address like this allows the guest to
|
||||
* access it, but as KVM doesn't know what to do with it, it
|
||||
* will assume it's something userspace handles and exit with
|
||||
* KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
|
||||
* Here we start with a guess that the addresses around 5/8th
|
||||
* of the allowed space are unmapped and then work both down and
|
||||
* up from there in 1/16th allowed space sized steps.
|
||||
*
|
||||
* Note, we need to use VA-bits - 1 when calculating the allowed
|
||||
* virtual address space for an identity mapping because the upper
|
||||
* half of the virtual address space is the two's complement of the
|
||||
* lower and won't match physical addresses.
|
||||
*/
|
||||
bits = vm->va_bits - 1;
|
||||
bits = vm->pa_bits < bits ? vm->pa_bits : bits;
|
||||
end = 1ul << bits;
|
||||
start = end * 5 / 8;
|
||||
step = end / 16;
|
||||
for (offset = 0; offset < end - start; offset += step) {
|
||||
if (ucall_mmio_init(vm, start - offset))
|
||||
return;
|
||||
if (ucall_mmio_init(vm, start + offset))
|
||||
return;
|
||||
}
|
||||
TEST_ASSERT(false, "Can't find a ucall mmio address");
|
||||
}
|
||||
}
|
||||
|
||||
void ucall_uninit(struct kvm_vm *vm)
|
||||
{
|
||||
ucall_type = 0;
|
||||
sync_global_to_guest(vm, ucall_type);
|
||||
ucall_exit_mmio_addr = 0;
|
||||
sync_global_to_guest(vm, ucall_exit_mmio_addr);
|
||||
}
|
||||
|
||||
static void ucall_pio_exit(struct ucall *uc)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
asm volatile("in %[port], %%al"
|
||||
: : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void ucall_mmio_exit(struct ucall *uc)
|
||||
{
|
||||
*ucall_exit_mmio_addr = (vm_vaddr_t)uc;
|
||||
}
|
||||
|
||||
void ucall(uint64_t cmd, int nargs, ...)
|
||||
{
|
||||
struct ucall uc = {
|
||||
.cmd = cmd,
|
||||
};
|
||||
va_list va;
|
||||
int i;
|
||||
|
||||
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
|
||||
|
||||
va_start(va, nargs);
|
||||
for (i = 0; i < nargs; ++i)
|
||||
uc.args[i] = va_arg(va, uint64_t);
|
||||
va_end(va);
|
||||
|
||||
switch (ucall_type) {
|
||||
case UCALL_PIO:
|
||||
ucall_pio_exit(&uc);
|
||||
break;
|
||||
case UCALL_MMIO:
|
||||
ucall_mmio_exit(&uc);
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
||||
{
|
||||
struct kvm_run *run = vcpu_state(vm, vcpu_id);
|
||||
struct ucall ucall = {};
|
||||
bool got_ucall = false;
|
||||
|
||||
#ifdef __x86_64__
|
||||
if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO &&
|
||||
run->io.port == UCALL_PIO_PORT) {
|
||||
struct kvm_regs regs;
|
||||
vcpu_regs_get(vm, vcpu_id, ®s);
|
||||
memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(ucall));
|
||||
got_ucall = true;
|
||||
}
|
||||
#endif
|
||||
if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO &&
|
||||
run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
|
||||
vm_vaddr_t gva;
|
||||
TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
|
||||
"Unexpected ucall exit mmio address access");
|
||||
memcpy(&gva, run->mmio.data, sizeof(gva));
|
||||
memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
|
||||
got_ucall = true;
|
||||
}
|
||||
|
||||
if (got_ucall) {
|
||||
vcpu_run_complete_io(vm, vcpu_id);
|
||||
if (uc)
|
||||
memcpy(uc, &ucall, sizeof(ucall));
|
||||
}
|
||||
|
||||
return ucall.cmd;
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* ucall support. A ucall is a "hypercall to userspace".
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
*/
|
||||
#include "kvm_util.h"
|
||||
|
||||
#define UCALL_PIO_PORT ((uint16_t)0x1000)
|
||||
|
||||
void ucall_init(struct kvm_vm *vm, void *arg)
|
||||
{
|
||||
}
|
||||
|
||||
void ucall_uninit(struct kvm_vm *vm)
|
||||
{
|
||||
}
|
||||
|
||||
void ucall(uint64_t cmd, int nargs, ...)
|
||||
{
|
||||
struct ucall uc = {
|
||||
.cmd = cmd,
|
||||
};
|
||||
va_list va;
|
||||
int i;
|
||||
|
||||
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
|
||||
|
||||
va_start(va, nargs);
|
||||
for (i = 0; i < nargs; ++i)
|
||||
uc.args[i] = va_arg(va, uint64_t);
|
||||
va_end(va);
|
||||
|
||||
asm volatile("in %[port], %%al"
|
||||
: : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax");
|
||||
}
|
||||
|
||||
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
||||
{
|
||||
struct kvm_run *run = vcpu_state(vm, vcpu_id);
|
||||
struct ucall ucall = {};
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
|
||||
struct kvm_regs regs;
|
||||
|
||||
vcpu_regs_get(vm, vcpu_id, ®s);
|
||||
memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi),
|
||||
sizeof(ucall));
|
||||
|
||||
vcpu_run_complete_io(vm, vcpu_id);
|
||||
if (uc)
|
||||
memcpy(uc, &ucall, sizeof(ucall));
|
||||
}
|
||||
|
||||
return ucall.cmd;
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Test for s390x KVM_S390_MEM_OP
|
||||
*
|
||||
* Copyright (C) 2019, Red Hat, Inc.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
|
||||
#define VCPU_ID 1
|
||||
|
||||
static uint8_t mem1[65536];
|
||||
static uint8_t mem2[65536];
|
||||
|
||||
static void guest_code(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (;;) {
|
||||
for (i = 0; i < sizeof(mem2); i++)
|
||||
mem2[i] = mem1[i];
|
||||
GUEST_SYNC(0);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_run *run;
|
||||
struct kvm_s390_mem_op ksmo;
|
||||
int rv, i, maxsize;
|
||||
|
||||
setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
|
||||
|
||||
maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP);
|
||||
if (!maxsize) {
|
||||
fprintf(stderr, "CAP_S390_MEM_OP not supported -> skip test\n");
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
if (maxsize > sizeof(mem1))
|
||||
maxsize = sizeof(mem1);
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
for (i = 0; i < sizeof(mem1); i++)
|
||||
mem1[i] = i * i + i;
|
||||
|
||||
/* Set the first array */
|
||||
ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1);
|
||||
ksmo.flags = 0;
|
||||
ksmo.size = maxsize;
|
||||
ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
|
||||
ksmo.buf = (uintptr_t)mem1;
|
||||
ksmo.ar = 0;
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
|
||||
|
||||
/* Let the guest code copy the first array to the second */
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
|
||||
"Unexpected exit reason: %u (%s)\n",
|
||||
run->exit_reason,
|
||||
exit_reason_str(run->exit_reason));
|
||||
|
||||
memset(mem2, 0xaa, sizeof(mem2));
|
||||
|
||||
/* Get the second array */
|
||||
ksmo.gaddr = (uintptr_t)mem2;
|
||||
ksmo.flags = 0;
|
||||
ksmo.size = maxsize;
|
||||
ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
|
||||
ksmo.buf = (uintptr_t)mem2;
|
||||
ksmo.ar = 0;
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
|
||||
|
||||
TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
|
||||
"Memory contents do not match!");
|
||||
|
||||
/* Check error conditions - first bad size: */
|
||||
ksmo.gaddr = (uintptr_t)mem1;
|
||||
ksmo.flags = 0;
|
||||
ksmo.size = -1;
|
||||
ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
|
||||
ksmo.buf = (uintptr_t)mem1;
|
||||
ksmo.ar = 0;
|
||||
rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
|
||||
TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
|
||||
|
||||
/* Zero size: */
|
||||
ksmo.gaddr = (uintptr_t)mem1;
|
||||
ksmo.flags = 0;
|
||||
ksmo.size = 0;
|
||||
ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
|
||||
ksmo.buf = (uintptr_t)mem1;
|
||||
ksmo.ar = 0;
|
||||
rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
|
||||
TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
|
||||
"ioctl allows 0 as size");
|
||||
|
||||
/* Bad flags: */
|
||||
ksmo.gaddr = (uintptr_t)mem1;
|
||||
ksmo.flags = -1;
|
||||
ksmo.size = maxsize;
|
||||
ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
|
||||
ksmo.buf = (uintptr_t)mem1;
|
||||
ksmo.ar = 0;
|
||||
rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
|
||||
TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
|
||||
|
||||
/* Bad operation: */
|
||||
ksmo.gaddr = (uintptr_t)mem1;
|
||||
ksmo.flags = 0;
|
||||
ksmo.size = maxsize;
|
||||
ksmo.op = -1;
|
||||
ksmo.buf = (uintptr_t)mem1;
|
||||
ksmo.ar = 0;
|
||||
rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
|
||||
TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
|
||||
|
||||
/* Bad guest address: */
|
||||
ksmo.gaddr = ~0xfffUL;
|
||||
ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY;
|
||||
ksmo.size = maxsize;
|
||||
ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
|
||||
ksmo.buf = (uintptr_t)mem1;
|
||||
ksmo.ar = 0;
|
||||
rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
|
||||
TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
|
||||
|
||||
/* Bad host address: */
|
||||
ksmo.gaddr = (uintptr_t)mem1;
|
||||
ksmo.flags = 0;
|
||||
ksmo.size = maxsize;
|
||||
ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
|
||||
ksmo.buf = 0;
|
||||
ksmo.ar = 0;
|
||||
rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
|
||||
TEST_ASSERT(rv == -1 && errno == EFAULT,
|
||||
"ioctl does not report bad host memory address");
|
||||
|
||||
/* Bad access register: */
|
||||
run->psw_mask &= ~(3UL << (63 - 17));
|
||||
run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
|
||||
vcpu_run(vm, VCPU_ID); /* To sync new state to SIE block */
|
||||
ksmo.gaddr = (uintptr_t)mem1;
|
||||
ksmo.flags = 0;
|
||||
ksmo.size = maxsize;
|
||||
ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
|
||||
ksmo.buf = (uintptr_t)mem1;
|
||||
ksmo.ar = 17;
|
||||
rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
|
||||
TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
|
||||
run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */
|
||||
vcpu_run(vm, VCPU_ID); /* Run to sync new state */
|
||||
|
||||
kvm_vm_free(vm);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -25,9 +25,11 @@
|
|||
|
||||
static void guest_code(void)
|
||||
{
|
||||
register u64 stage asm("11") = 0;
|
||||
|
||||
for (;;) {
|
||||
asm volatile ("diag 0,0,0x501");
|
||||
asm volatile ("ahi 11,1");
|
||||
GUEST_SYNC(0);
|
||||
asm volatile ("ahi %0,1" : : "r"(stage));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,6 +85,36 @@ int main(int argc, char *argv[])
|
|||
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
/* Request reading invalid register set from VCPU. */
|
||||
run->kvm_valid_regs = INVALID_SYNC_FIELD;
|
||||
rv = _vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(rv < 0 && errno == EINVAL,
|
||||
"Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
|
||||
rv);
|
||||
vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
|
||||
|
||||
run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
|
||||
rv = _vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(rv < 0 && errno == EINVAL,
|
||||
"Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
|
||||
rv);
|
||||
vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
|
||||
|
||||
/* Request setting invalid register set into VCPU. */
|
||||
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
|
||||
rv = _vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(rv < 0 && errno == EINVAL,
|
||||
"Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
|
||||
rv);
|
||||
vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
|
||||
|
||||
run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
|
||||
rv = _vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(rv < 0 && errno == EINVAL,
|
||||
"Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
|
||||
rv);
|
||||
vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
|
||||
|
||||
/* Request and verify all valid register sets. */
|
||||
run->kvm_valid_regs = TEST_SYNC_FIELDS;
|
||||
rv = _vcpu_run(vm, VCPU_ID);
|
||||
|
|
|
@ -196,6 +196,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_MP_STATE:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
case KVM_CAP_VCPU_EVENTS:
|
||||
case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_ARM_SET_DEVICE_ADDR:
|
||||
|
@ -888,6 +889,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
|
|||
|
||||
irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
|
||||
vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
|
||||
vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
|
||||
irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
|
||||
|
||||
trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
|
||||
|
|
|
@ -54,6 +54,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
|
|||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
|
||||
INIT_LIST_HEAD(&dist->lpi_list_head);
|
||||
INIT_LIST_HEAD(&dist->lpi_translation_cache);
|
||||
raw_spin_lock_init(&dist->lpi_list_lock);
|
||||
}
|
||||
|
||||
|
@ -199,7 +200,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
|
|||
int i;
|
||||
|
||||
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
|
||||
vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF;
|
||||
|
||||
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
|
||||
raw_spin_lock_init(&vgic_cpu->ap_list_lock);
|
||||
|
@ -304,6 +304,7 @@ int vgic_init(struct kvm *kvm)
|
|||
}
|
||||
|
||||
if (vgic_has_its(kvm)) {
|
||||
vgic_lpi_translation_cache_init(kvm);
|
||||
ret = vgic_v4_init(kvm);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -345,6 +346,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
|
|||
INIT_LIST_HEAD(&dist->rd_regions);
|
||||
}
|
||||
|
||||
if (vgic_has_its(kvm))
|
||||
vgic_lpi_translation_cache_destroy(kvm);
|
||||
|
||||
if (vgic_supports_direct_msis(kvm))
|
||||
vgic_v4_teardown(kvm);
|
||||
}
|
||||
|
@ -515,7 +519,7 @@ int kvm_vgic_hyp_init(void)
|
|||
break;
|
||||
default:
|
||||
ret = -ENODEV;
|
||||
};
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
|
|
@ -66,6 +66,15 @@ int kvm_set_routing_entry(struct kvm *kvm,
|
|||
return r;
|
||||
}
|
||||
|
||||
static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm_msi *msi)
|
||||
{
|
||||
msi->address_lo = e->msi.address_lo;
|
||||
msi->address_hi = e->msi.address_hi;
|
||||
msi->data = e->msi.data;
|
||||
msi->flags = e->msi.flags;
|
||||
msi->devid = e->msi.devid;
|
||||
}
|
||||
/**
|
||||
* kvm_set_msi: inject the MSI corresponding to the
|
||||
* MSI routing entry
|
||||
|
@ -79,21 +88,36 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
|||
{
|
||||
struct kvm_msi msi;
|
||||
|
||||
msi.address_lo = e->msi.address_lo;
|
||||
msi.address_hi = e->msi.address_hi;
|
||||
msi.data = e->msi.data;
|
||||
msi.flags = e->msi.flags;
|
||||
msi.devid = e->msi.devid;
|
||||
|
||||
if (!vgic_has_its(kvm))
|
||||
return -ENODEV;
|
||||
|
||||
if (!level)
|
||||
return -1;
|
||||
|
||||
kvm_populate_msi(e, &msi);
|
||||
return vgic_its_inject_msi(kvm, &msi);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_arch_set_irq_inatomic: fast-path for irqfd injection
|
||||
*
|
||||
* Currently only direct MSI injection is supported.
|
||||
*/
|
||||
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
bool line_status)
|
||||
{
|
||||
if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) {
|
||||
struct kvm_msi msi;
|
||||
|
||||
kvm_populate_msi(e, &msi);
|
||||
if (!vgic_its_inject_cached_translation(kvm, &msi))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EWOULDBLOCK;
|
||||
}
|
||||
|
||||
int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_irq_routing_entry *entries;
|
||||
|
|
|
@ -138,6 +138,14 @@ struct its_ite {
|
|||
u32 event_id;
|
||||
};
|
||||
|
||||
struct vgic_translation_cache_entry {
|
||||
struct list_head entry;
|
||||
phys_addr_t db;
|
||||
u32 devid;
|
||||
u32 eventid;
|
||||
struct vgic_irq *irq;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vgic_its_abi - ITS abi ops and settings
|
||||
* @cte_esz: collection table entry size
|
||||
|
@ -527,6 +535,127 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist,
|
||||
phys_addr_t db,
|
||||
u32 devid, u32 eventid)
|
||||
{
|
||||
struct vgic_translation_cache_entry *cte;
|
||||
|
||||
list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
|
||||
/*
|
||||
* If we hit a NULL entry, there is nothing after this
|
||||
* point.
|
||||
*/
|
||||
if (!cte->irq)
|
||||
break;
|
||||
|
||||
if (cte->db != db || cte->devid != devid ||
|
||||
cte->eventid != eventid)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Move this entry to the head, as it is the most
|
||||
* recently used.
|
||||
*/
|
||||
if (!list_is_first(&cte->entry, &dist->lpi_translation_cache))
|
||||
list_move(&cte->entry, &dist->lpi_translation_cache);
|
||||
|
||||
return cte->irq;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
|
||||
u32 devid, u32 eventid)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct vgic_irq *irq;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
|
||||
irq = __vgic_its_check_cache(dist, db, devid, eventid);
|
||||
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
|
||||
|
||||
return irq;
|
||||
}
|
||||
|
||||
static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
|
||||
u32 devid, u32 eventid,
|
||||
struct vgic_irq *irq)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct vgic_translation_cache_entry *cte;
|
||||
unsigned long flags;
|
||||
phys_addr_t db;
|
||||
|
||||
/* Do not cache a directly injected interrupt */
|
||||
if (irq->hw)
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
|
||||
|
||||
if (unlikely(list_empty(&dist->lpi_translation_cache)))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We could have raced with another CPU caching the same
|
||||
* translation behind our back, so let's check it is not in
|
||||
* already
|
||||
*/
|
||||
db = its->vgic_its_base + GITS_TRANSLATER;
|
||||
if (__vgic_its_check_cache(dist, db, devid, eventid))
|
||||
goto out;
|
||||
|
||||
/* Always reuse the last entry (LRU policy) */
|
||||
cte = list_last_entry(&dist->lpi_translation_cache,
|
||||
typeof(*cte), entry);
|
||||
|
||||
/*
|
||||
* Caching the translation implies having an extra reference
|
||||
* to the interrupt, so drop the potential reference on what
|
||||
* was in the cache, and increment it on the new interrupt.
|
||||
*/
|
||||
if (cte->irq)
|
||||
__vgic_put_lpi_locked(kvm, cte->irq);
|
||||
|
||||
vgic_get_irq_kref(irq);
|
||||
|
||||
cte->db = db;
|
||||
cte->devid = devid;
|
||||
cte->eventid = eventid;
|
||||
cte->irq = irq;
|
||||
|
||||
/* Move the new translation to the head of the list */
|
||||
list_move(&cte->entry, &dist->lpi_translation_cache);
|
||||
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
|
||||
}
|
||||
|
||||
void vgic_its_invalidate_cache(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct vgic_translation_cache_entry *cte;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
|
||||
|
||||
list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
|
||||
/*
|
||||
* If we hit a NULL entry, there is nothing after this
|
||||
* point.
|
||||
*/
|
||||
if (!cte->irq)
|
||||
break;
|
||||
|
||||
__vgic_put_lpi_locked(kvm, cte->irq);
|
||||
cte->irq = NULL;
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
|
||||
}
|
||||
|
||||
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
|
||||
u32 devid, u32 eventid, struct vgic_irq **irq)
|
||||
{
|
||||
|
@ -547,6 +676,8 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
|
|||
if (!vcpu->arch.vgic_cpu.lpis_enabled)
|
||||
return -EBUSY;
|
||||
|
||||
vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq);
|
||||
|
||||
*irq = ite->irq;
|
||||
return 0;
|
||||
}
|
||||
|
@ -608,6 +739,25 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
|
||||
{
|
||||
struct vgic_irq *irq;
|
||||
unsigned long flags;
|
||||
phys_addr_t db;
|
||||
|
||||
db = (u64)msi->address_hi << 32 | msi->address_lo;
|
||||
irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data);
|
||||
|
||||
if (!irq)
|
||||
return -1;
|
||||
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
irq->pending_latch = true;
|
||||
vgic_queue_irq_unlock(kvm, irq, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Queries the KVM IO bus framework to get the ITS pointer from the given
|
||||
* doorbell address.
|
||||
|
@ -619,6 +769,9 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
|
|||
struct vgic_its *its;
|
||||
int ret;
|
||||
|
||||
if (!vgic_its_inject_cached_translation(kvm, msi))
|
||||
return 1;
|
||||
|
||||
its = vgic_msi_to_its(kvm, msi);
|
||||
if (IS_ERR(its))
|
||||
return PTR_ERR(its);
|
||||
|
@ -691,6 +844,8 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
|
|||
* don't bother here since we clear the ITTE anyway and the
|
||||
* pending state is a property of the ITTE struct.
|
||||
*/
|
||||
vgic_its_invalidate_cache(kvm);
|
||||
|
||||
its_free_ite(kvm, ite);
|
||||
return 0;
|
||||
}
|
||||
|
@ -726,6 +881,8 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
|
|||
ite->collection = collection;
|
||||
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
|
||||
|
||||
vgic_its_invalidate_cache(kvm);
|
||||
|
||||
return update_affinity(ite->irq, vcpu);
|
||||
}
|
||||
|
||||
|
@ -954,6 +1111,8 @@ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
|
|||
list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
|
||||
its_free_ite(kvm, ite);
|
||||
|
||||
vgic_its_invalidate_cache(kvm);
|
||||
|
||||
list_del(&device->dev_list);
|
||||
kfree(device);
|
||||
}
|
||||
|
@ -1059,6 +1218,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
|
|||
|
||||
if (!valid) {
|
||||
vgic_its_free_collection(its, coll_id);
|
||||
vgic_its_invalidate_cache(kvm);
|
||||
} else {
|
||||
collection = find_collection(its, coll_id);
|
||||
|
||||
|
@ -1207,6 +1367,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
|
|||
vgic_put_irq(kvm, irq);
|
||||
}
|
||||
|
||||
vgic_its_invalidate_cache(kvm);
|
||||
|
||||
kfree(intids);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1557,6 +1719,8 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
|
|||
goto out;
|
||||
|
||||
its->enabled = !!(val & GITS_CTLR_ENABLE);
|
||||
if (!its->enabled)
|
||||
vgic_its_invalidate_cache(kvm);
|
||||
|
||||
/*
|
||||
* Try to process any pending commands. This function bails out early
|
||||
|
@ -1657,6 +1821,47 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Default is 16 cached LPIs per vcpu */
|
||||
#define LPI_DEFAULT_PCPU_CACHE_SIZE 16
|
||||
|
||||
void vgic_lpi_translation_cache_init(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
unsigned int sz;
|
||||
int i;
|
||||
|
||||
if (!list_empty(&dist->lpi_translation_cache))
|
||||
return;
|
||||
|
||||
sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
|
||||
|
||||
for (i = 0; i < sz; i++) {
|
||||
struct vgic_translation_cache_entry *cte;
|
||||
|
||||
/* An allocation failure is not fatal */
|
||||
cte = kzalloc(sizeof(*cte), GFP_KERNEL);
|
||||
if (WARN_ON(!cte))
|
||||
break;
|
||||
|
||||
INIT_LIST_HEAD(&cte->entry);
|
||||
list_add(&cte->entry, &dist->lpi_translation_cache);
|
||||
}
|
||||
}
|
||||
|
||||
void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct vgic_translation_cache_entry *cte, *tmp;
|
||||
|
||||
vgic_its_invalidate_cache(kvm);
|
||||
|
||||
list_for_each_entry_safe(cte, tmp,
|
||||
&dist->lpi_translation_cache, entry) {
|
||||
list_del(&cte->entry);
|
||||
kfree(cte);
|
||||
}
|
||||
}
|
||||
|
||||
#define INITIAL_BASER_VALUE \
|
||||
(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \
|
||||
GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \
|
||||
|
@ -1685,6 +1890,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
|
|||
kfree(its);
|
||||
return ret;
|
||||
}
|
||||
|
||||
vgic_lpi_translation_cache_init(dev->kvm);
|
||||
}
|
||||
|
||||
mutex_init(&its->its_lock);
|
||||
|
|
|
@ -192,8 +192,10 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
|
|||
|
||||
vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
|
||||
|
||||
if (was_enabled && !vgic_cpu->lpis_enabled)
|
||||
if (was_enabled && !vgic_cpu->lpis_enabled) {
|
||||
vgic_flush_pending_lpis(vcpu);
|
||||
vgic_its_invalidate_cache(vcpu->kvm);
|
||||
}
|
||||
|
||||
if (!was_enabled && vgic_cpu->lpis_enabled)
|
||||
vgic_enable_lpis(vcpu);
|
||||
|
@ -515,7 +517,8 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
|
|||
VGIC_ACCESS_32bit),
|
||||
};
|
||||
|
||||
static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
|
||||
static const struct vgic_register_region vgic_v3_rd_registers[] = {
|
||||
/* RD_base registers */
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
|
||||
vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
|
@ -540,44 +543,42 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
|
|||
REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
|
||||
vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
|
||||
VGIC_ACCESS_32bit),
|
||||
};
|
||||
|
||||
static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
|
||||
/* SGI_base registers */
|
||||
REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0,
|
||||
vgic_mmio_read_group, vgic_mmio_write_group, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
|
||||
REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0,
|
||||
vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
|
||||
REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0,
|
||||
vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0,
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_spending,
|
||||
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_cpending,
|
||||
vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0,
|
||||
vgic_mmio_read_active, vgic_mmio_write_sactive,
|
||||
NULL, vgic_mmio_uaccess_write_sactive,
|
||||
4, VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0,
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0,
|
||||
vgic_mmio_read_active, vgic_mmio_write_cactive,
|
||||
NULL, vgic_mmio_uaccess_write_cactive,
|
||||
4, VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
|
||||
REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0,
|
||||
vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
|
||||
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
|
||||
REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0,
|
||||
vgic_mmio_read_config, vgic_mmio_write_config, 8,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
|
||||
REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0,
|
||||
vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
|
||||
REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR,
|
||||
vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
};
|
||||
|
@ -607,9 +608,8 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
|
|||
struct vgic_dist *vgic = &kvm->arch.vgic;
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
|
||||
struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
|
||||
struct vgic_redist_region *rdreg;
|
||||
gpa_t rd_base, sgi_base;
|
||||
gpa_t rd_base;
|
||||
int ret;
|
||||
|
||||
if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
|
||||
|
@ -631,52 +631,31 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
|
|||
vgic_cpu->rdreg = rdreg;
|
||||
|
||||
rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
|
||||
sgi_base = rd_base + SZ_64K;
|
||||
|
||||
kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
|
||||
rd_dev->base_addr = rd_base;
|
||||
rd_dev->iodev_type = IODEV_REDIST;
|
||||
rd_dev->regions = vgic_v3_rdbase_registers;
|
||||
rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
|
||||
rd_dev->regions = vgic_v3_rd_registers;
|
||||
rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
|
||||
rd_dev->redist_vcpu = vcpu;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
|
||||
SZ_64K, &rd_dev->dev);
|
||||
2 * SZ_64K, &rd_dev->dev);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
|
||||
sgi_dev->base_addr = sgi_base;
|
||||
sgi_dev->iodev_type = IODEV_REDIST;
|
||||
sgi_dev->regions = vgic_v3_sgibase_registers;
|
||||
sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
|
||||
sgi_dev->redist_vcpu = vcpu;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
|
||||
SZ_64K, &sgi_dev->dev);
|
||||
if (ret) {
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
|
||||
&rd_dev->dev);
|
||||
goto out;
|
||||
}
|
||||
|
||||
rdreg->free_index++;
|
||||
out:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
|
||||
struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
|
||||
|
||||
kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
|
||||
kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
|
||||
}
|
||||
|
||||
static int vgic_register_all_redist_iodevs(struct kvm *kvm)
|
||||
|
@ -826,8 +805,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
|
|||
iodev.base_addr = 0;
|
||||
break;
|
||||
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{
|
||||
iodev.regions = vgic_v3_rdbase_registers;
|
||||
iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
|
||||
iodev.regions = vgic_v3_rd_registers;
|
||||
iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
|
||||
iodev.base_addr = 0;
|
||||
break;
|
||||
}
|
||||
|
@ -985,21 +964,11 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
|||
int offset, u32 *val)
|
||||
{
|
||||
struct vgic_io_device rd_dev = {
|
||||
.regions = vgic_v3_rdbase_registers,
|
||||
.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers),
|
||||
.regions = vgic_v3_rd_registers,
|
||||
.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers),
|
||||
};
|
||||
|
||||
struct vgic_io_device sgi_dev = {
|
||||
.regions = vgic_v3_sgibase_registers,
|
||||
.nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers),
|
||||
};
|
||||
|
||||
/* SGI_base is the next 64K frame after RD_base */
|
||||
if (offset >= SZ_64K)
|
||||
return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K,
|
||||
val);
|
||||
else
|
||||
return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
|
||||
return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
|
||||
}
|
||||
|
||||
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
|
|
|
@ -357,10 +357,11 @@ int vgic_v2_map_resources(struct kvm *kvm)
|
|||
DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap);
|
||||
|
||||
/**
|
||||
* vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
|
||||
* @node: pointer to the DT node
|
||||
* vgic_v2_probe - probe for a VGICv2 compatible interrupt controller
|
||||
* @info: pointer to the GIC description
|
||||
*
|
||||
* Returns 0 if a GICv2 has been found, returns an error code otherwise
|
||||
* Returns 0 if the VGICv2 has been probed successfully, returns an error code
|
||||
* otherwise
|
||||
*/
|
||||
int vgic_v2_probe(const struct gic_kvm_info *info)
|
||||
{
|
||||
|
|
|
@ -573,10 +573,11 @@ static int __init early_gicv4_enable(char *buf)
|
|||
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
|
||||
|
||||
/**
|
||||
* vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
|
||||
* @node: pointer to the DT node
|
||||
* vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
|
||||
* @info: pointer to the GIC description
|
||||
*
|
||||
* Returns 0 if a GICv3 has been found, returns an error code otherwise
|
||||
* Returns 0 if the VGICv3 has been probed successfully, returns an error code
|
||||
* otherwise
|
||||
*/
|
||||
int vgic_v3_probe(const struct gic_kvm_info *info)
|
||||
{
|
||||
|
|
|
@ -119,6 +119,22 @@ static void vgic_irq_release(struct kref *ref)
|
|||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the refcount on the LPI. Must be called with lpi_list_lock held.
|
||||
*/
|
||||
void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
|
||||
if (!kref_put(&irq->refcount, vgic_irq_release))
|
||||
return;
|
||||
|
||||
list_del(&irq->lpi_list);
|
||||
dist->lpi_list_count--;
|
||||
|
||||
kfree(irq);
|
||||
}
|
||||
|
||||
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
|
@ -128,16 +144,8 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
|
|||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
|
||||
if (!kref_put(&irq->refcount, vgic_irq_release)) {
|
||||
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
|
||||
return;
|
||||
};
|
||||
|
||||
list_del(&irq->lpi_list);
|
||||
dist->lpi_list_count--;
|
||||
__vgic_put_lpi_locked(kvm, irq);
|
||||
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
|
||||
|
||||
kfree(irq);
|
||||
}
|
||||
|
||||
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -161,6 +161,7 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
|
|||
gpa_t addr, int len);
|
||||
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
||||
u32 intid);
|
||||
void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq);
|
||||
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
|
||||
bool vgic_get_phys_line_level(struct vgic_irq *irq);
|
||||
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
|
||||
|
@ -307,6 +308,10 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
|
|||
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
|
||||
u32 devid, u32 eventid, struct vgic_irq **irq);
|
||||
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
|
||||
int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi);
|
||||
void vgic_lpi_translation_cache_init(struct kvm *kvm);
|
||||
void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
|
||||
void vgic_its_invalidate_cache(struct kvm *kvm);
|
||||
|
||||
bool vgic_supports_direct_msis(struct kvm *kvm);
|
||||
int vgic_v4_init(struct kvm *kvm);
|
||||
|
|
|
@ -2321,6 +2321,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
|||
bool waited = false;
|
||||
u64 block_ns;
|
||||
|
||||
kvm_arch_vcpu_blocking(vcpu);
|
||||
|
||||
start = cur = ktime_get();
|
||||
if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
|
||||
ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
|
||||
|
@ -2341,8 +2343,6 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
|||
} while (single_task_running() && ktime_before(cur, stop));
|
||||
}
|
||||
|
||||
kvm_arch_vcpu_blocking(vcpu);
|
||||
|
||||
for (;;) {
|
||||
prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
|
@ -2355,9 +2355,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
|||
|
||||
finish_swait(&vcpu->wq, &wait);
|
||||
cur = ktime_get();
|
||||
|
||||
kvm_arch_vcpu_unblocking(vcpu);
|
||||
out:
|
||||
kvm_arch_vcpu_unblocking(vcpu);
|
||||
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
|
||||
|
||||
if (!vcpu_valid_wakeup(vcpu))
|
||||
|
|
Loading…
Reference in New Issue