Second batch of KVM updates for v4.13
Common: - add uevents for VM creation/destruction - annotate and properly access RCU-protected objects s390: - rename IOCTL added in the first v4.13 merge x86: - emulate VMLOAD VMSAVE feature in SVM - support paravirtual asynchronous page fault while nested - add Hyper-V userspace interfaces for better migration - improve master clock corner cases - extend internal error reporting after EPT misconfig - correct single-stepping of emulated instructions in SVM - handle MCE during VM entry - fix nVMX VM entry checks and nVMX VMCS shadowing -----BEGIN PGP SIGNATURE----- iQEcBAABCAAGBQJZaOm6AAoJEED/6hsPKofoqO8H/3breVIyVv9mwg7A5+o+6LTq GzV/YXHSC8NtfxZn8ViS/TCziYiBSFv7XiPSodkXbOgYSz8Yya5x9D+dbEH+xgG7 l+LsZEqdSFbHCkvKrMiwSsoXtsT5WygA56+KZiBmu8cvlwqSyXWHFn3ZJ1wKzGq/ zivlkfCoh2m6bGdNmrG9pHUSgxvDh94pXesaVBKy4hgeovY1qjzby3Lo+HuIUzai exuEU1EKRlUIfLK1B2Anp5IIv5Q1lFnMSvD6YSiWYywZb95dN/adsX1bv+MKeOdt TIAgotsWjaAuT9JolAJjfVPHG0+uMBMsWg4Zh9Ra/gPPaSh3KEC2h1++zEYKjvw= =1zII -----END PGP SIGNATURE----- Merge tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull more KVM updates from Radim Krčmář: "Second batch of KVM updates for v4.13 Common: - add uevents for VM creation/destruction - annotate and properly access RCU-protected objects s390: - rename IOCTL added in the first v4.13 merge x86: - emulate VMLOAD VMSAVE feature in SVM - support paravirtual asynchronous page fault while nested - add Hyper-V userspace interfaces for better migration - improve master clock corner cases - extend internal error reporting after EPT misconfig - correct single-stepping of emulated instructions in SVM - handle MCE during VM entry - fix nVMX VM entry checks and nVMX VMCS shadowing" * tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits) kvm: x86: hyperv: make VP_INDEX managed by userspace KVM: async_pf: Let guest support delivery of async_pf from guest mode KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf KVM: async_pf: Add L1 guest async_pf #PF vmexit handler KVM: x86: Simplify kvm_x86_ops->queue_exception parameter list kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2 KVM: x86: make backwards_tsc_observed a per-VM variable KVM: trigger uevents when creating or destroying a VM KVM: SVM: Enable Virtual VMLOAD VMSAVE feature KVM: SVM: Add Virtual VMLOAD VMSAVE feature definition KVM: SVM: Rename lbr_ctl field in the vmcb control area KVM: SVM: Prepare for new bit definition in lbr_ctl KVM: SVM: handle singlestep exception when skipping emulated instructions KVM: x86: take slots_lock in kvm_free_pit KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl definition kvm: vmx: Properly handle machine check during VM-entry KVM: x86: update master clock before computing kvmclock_offset kvm: nVMX: Shadow "high" parts of shadowed 64-bit VMCS fields kvm: nVMX: Fix nested_vmx_check_msr_bitmap_controls kvm: nVMX: Validate the I/O bitmaps on nested VM-entry ...
This commit is contained in:
commit
e37a07e0c2
|
@ -4329,3 +4329,21 @@ Querying this capability returns a bitmap indicating the possible
|
|||
virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
|
||||
(counting from the right) is set, then a virtual SMT mode of 2^N is
|
||||
available.
|
||||
|
||||
8.11 KVM_CAP_HYPERV_SYNIC2
|
||||
|
||||
Architectures: x86
|
||||
|
||||
This capability enables a newer version of Hyper-V Synthetic interrupt
|
||||
controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
|
||||
doesn't clear SynIC message and event flags pages when they are enabled by
|
||||
writing to the respective MSRs.
|
||||
|
||||
8.12 KVM_CAP_HYPERV_VP_INDEX
|
||||
|
||||
Architectures: x86
|
||||
|
||||
This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr. Its
|
||||
value is used to denote the target vcpu for a SynIC interrupt. For
|
||||
compatibilty, KVM initializes this msr to KVM's internal vcpu index. When this
|
||||
capability is absent, userspace can still query this msr's value.
|
||||
|
|
|
@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
|
|||
MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
||||
data: Bits 63-6 hold 64-byte aligned physical address of a
|
||||
64 byte memory area which must be in guest RAM and must be
|
||||
zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
|
||||
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
|
||||
when asynchronous page faults are enabled on the vcpu 0 when
|
||||
disabled. Bit 1 is 1 if asynchronous page faults can be injected
|
||||
when vcpu is in cpl == 0.
|
||||
when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
|
||||
are delivered to L1 as #PF vmexits.
|
||||
|
||||
First 4 byte of 64 byte memory location will be written to by
|
||||
the hypervisor at the time of asynchronous page fault (APF)
|
||||
|
|
|
@ -286,6 +286,7 @@
|
|||
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
|
||||
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
|
||||
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
|
||||
#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
|
||||
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
|
||||
|
|
|
@ -23,6 +23,7 @@ struct x86_exception {
|
|||
u16 error_code;
|
||||
bool nested_page_fault;
|
||||
u64 address; /* cr2 or nested page fault gpa */
|
||||
u8 async_page_fault;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -462,10 +462,12 @@ struct kvm_vcpu_hv_synic {
|
|||
DECLARE_BITMAP(auto_eoi_bitmap, 256);
|
||||
DECLARE_BITMAP(vec_bitmap, 256);
|
||||
bool active;
|
||||
bool dont_zero_synic_pages;
|
||||
};
|
||||
|
||||
/* Hyper-V per vcpu emulation context */
|
||||
struct kvm_vcpu_hv {
|
||||
u32 vp_index;
|
||||
u64 hv_vapic;
|
||||
s64 runtime_offset;
|
||||
struct kvm_vcpu_hv_synic synic;
|
||||
|
@ -549,6 +551,7 @@ struct kvm_vcpu_arch {
|
|||
bool reinject;
|
||||
u8 nr;
|
||||
u32 error_code;
|
||||
u8 nested_apf;
|
||||
} exception;
|
||||
|
||||
struct kvm_queued_interrupt {
|
||||
|
@ -649,6 +652,9 @@ struct kvm_vcpu_arch {
|
|||
u64 msr_val;
|
||||
u32 id;
|
||||
bool send_user_only;
|
||||
u32 host_apf_reason;
|
||||
unsigned long nested_apf_token;
|
||||
bool delivery_as_pf_vmexit;
|
||||
} apf;
|
||||
|
||||
/* OSVW MSRs (AMD only) */
|
||||
|
@ -803,6 +809,7 @@ struct kvm_arch {
|
|||
int audit_point;
|
||||
#endif
|
||||
|
||||
bool backwards_tsc_observed;
|
||||
bool boot_vcpu_runs_old_kvmclock;
|
||||
u32 bsp_vcpu_id;
|
||||
|
||||
|
@ -952,9 +959,7 @@ struct kvm_x86_ops {
|
|||
unsigned char *hypercall_addr);
|
||||
void (*set_irq)(struct kvm_vcpu *vcpu);
|
||||
void (*set_nmi)(struct kvm_vcpu *vcpu);
|
||||
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
|
||||
bool has_error_code, u32 error_code,
|
||||
bool reinject);
|
||||
void (*queue_exception)(struct kvm_vcpu *vcpu);
|
||||
void (*cancel_injection)(struct kvm_vcpu *vcpu);
|
||||
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
|
||||
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
|||
u32 event_inj;
|
||||
u32 event_inj_err;
|
||||
u64 nested_cr3;
|
||||
u64 lbr_ctl;
|
||||
u64 virt_ext;
|
||||
u32 clean;
|
||||
u32 reserved_5;
|
||||
u64 next_rip;
|
||||
|
@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
|||
#define AVIC_ENABLE_SHIFT 31
|
||||
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
|
||||
|
||||
#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
|
||||
#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
|
||||
|
||||
#define SVM_INTERRUPT_SHADOW_MASK 1
|
||||
|
||||
#define SVM_IOIO_STR_SHIFT 2
|
||||
|
|
|
@ -67,6 +67,7 @@ struct kvm_clock_pairing {
|
|||
|
||||
#define KVM_ASYNC_PF_ENABLED (1 << 0)
|
||||
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
|
||||
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
|
||||
|
||||
/* Operations for KVM_HC_MMU_OP */
|
||||
#define KVM_MMU_OP_WRITE_PTE 1
|
||||
|
|
|
@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void)
|
|||
#ifdef CONFIG_PREEMPT
|
||||
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
|
||||
#endif
|
||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
|
||||
pa |= KVM_ASYNC_PF_ENABLED;
|
||||
|
||||
/* Async page fault support for L1 hypervisor is optional */
|
||||
if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
|
||||
(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
|
||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
|
||||
__this_cpu_write(apf_reason.enabled, 1);
|
||||
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
|
||||
smp_processor_id());
|
||||
|
|
|
@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
|
||||
static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = NULL;
|
||||
int i;
|
||||
|
||||
if (vpidx < KVM_MAX_VCPUS)
|
||||
vcpu = kvm_get_vcpu(kvm, vpidx);
|
||||
if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
|
||||
return vcpu;
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
|
||||
return vcpu;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vcpu_hv_synic *synic;
|
||||
|
||||
if (vcpu_id >= atomic_read(&kvm->online_vcpus))
|
||||
return NULL;
|
||||
vcpu = kvm_get_vcpu(kvm, vcpu_id);
|
||||
vcpu = get_vcpu_by_vpidx(kvm, vpidx);
|
||||
if (!vcpu)
|
||||
return NULL;
|
||||
synic = vcpu_to_synic(vcpu);
|
||||
|
@ -221,7 +234,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
|
|||
synic->version = data;
|
||||
break;
|
||||
case HV_X64_MSR_SIEFP:
|
||||
if (data & HV_SYNIC_SIEFP_ENABLE)
|
||||
if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
|
||||
!synic->dont_zero_synic_pages)
|
||||
if (kvm_clear_guest(vcpu->kvm,
|
||||
data & PAGE_MASK, PAGE_SIZE)) {
|
||||
ret = 1;
|
||||
|
@ -232,7 +246,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
|
|||
synic_exit(synic, msr);
|
||||
break;
|
||||
case HV_X64_MSR_SIMP:
|
||||
if (data & HV_SYNIC_SIMP_ENABLE)
|
||||
if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
|
||||
!synic->dont_zero_synic_pages)
|
||||
if (kvm_clear_guest(vcpu->kvm,
|
||||
data & PAGE_MASK, PAGE_SIZE)) {
|
||||
ret = 1;
|
||||
|
@ -318,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
|
||||
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
|
||||
{
|
||||
struct kvm_vcpu_hv_synic *synic;
|
||||
|
||||
synic = synic_get(kvm, vcpu_id);
|
||||
synic = synic_get(kvm, vpidx);
|
||||
if (!synic)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -341,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
|
|||
kvm_hv_notify_acked_sint(vcpu, i);
|
||||
}
|
||||
|
||||
static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
|
||||
static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
|
||||
{
|
||||
struct kvm_vcpu_hv_synic *synic;
|
||||
|
||||
synic = synic_get(kvm, vcpu_id);
|
||||
synic = synic_get(kvm, vpidx);
|
||||
if (!synic)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -687,14 +702,24 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
|
|||
stimer_init(&hv_vcpu->stimer[i], i);
|
||||
}
|
||||
|
||||
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
|
||||
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
||||
|
||||
hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
|
||||
}
|
||||
|
||||
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
|
||||
{
|
||||
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
||||
|
||||
/*
|
||||
* Hyper-V SynIC auto EOI SINT's are
|
||||
* not compatible with APICV, so deactivate APICV
|
||||
*/
|
||||
kvm_vcpu_deactivate_apicv(vcpu);
|
||||
vcpu_to_synic(vcpu)->active = true;
|
||||
synic->active = true;
|
||||
synic->dont_zero_synic_pages = dont_zero_synic_pages;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -978,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
|||
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
|
||||
|
||||
switch (msr) {
|
||||
case HV_X64_MSR_VP_INDEX:
|
||||
if (!host)
|
||||
return 1;
|
||||
hv->vp_index = (u32)data;
|
||||
break;
|
||||
case HV_X64_MSR_APIC_ASSIST_PAGE: {
|
||||
u64 gfn;
|
||||
unsigned long addr;
|
||||
|
@ -1089,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|||
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
|
||||
|
||||
switch (msr) {
|
||||
case HV_X64_MSR_VP_INDEX: {
|
||||
int r;
|
||||
struct kvm_vcpu *v;
|
||||
|
||||
kvm_for_each_vcpu(r, v, vcpu->kvm) {
|
||||
if (v == vcpu) {
|
||||
data = r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
case HV_X64_MSR_VP_INDEX:
|
||||
data = hv->vp_index;
|
||||
break;
|
||||
}
|
||||
case HV_X64_MSR_EOI:
|
||||
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
|
||||
case HV_X64_MSR_ICR:
|
||||
|
|
|
@ -56,9 +56,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
|
|||
void kvm_hv_irq_routing_update(struct kvm *kvm);
|
||||
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
|
||||
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
|
||||
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
|
||||
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
|
||||
|
||||
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
|
||||
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
|
||||
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
|
||||
|
|
|
@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm)
|
|||
struct kvm_pit *pit = kvm->arch.vpit;
|
||||
|
||||
if (pit) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
kvm_pit_set_reinject(pit, false);
|
||||
hrtimer_cancel(&pit->pit_state.timer);
|
||||
kthread_destroy_worker(pit->worker);
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include <asm/io.h>
|
||||
#include <asm/vmx.h>
|
||||
#include <asm/kvm_page_track.h>
|
||||
#include "trace.h"
|
||||
|
||||
/*
|
||||
* When setting this variable to true it enables Two-Dimensional-Paging
|
||||
|
@ -3748,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
|
|||
kvm_event_needs_reinjection(vcpu)))
|
||||
return false;
|
||||
|
||||
if (is_guest_mode(vcpu))
|
||||
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
|
||||
return false;
|
||||
|
||||
return kvm_x86_ops->interrupt_allowed(vcpu);
|
||||
|
@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
|
|||
return false;
|
||||
}
|
||||
|
||||
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
|
||||
u64 fault_address, char *insn, int insn_len,
|
||||
bool need_unprotect)
|
||||
{
|
||||
int r = 1;
|
||||
|
||||
switch (vcpu->arch.apf.host_apf_reason) {
|
||||
default:
|
||||
trace_kvm_page_fault(fault_address, error_code);
|
||||
|
||||
if (need_unprotect && kvm_event_needs_reinjection(vcpu))
|
||||
kvm_mmu_unprotect_page_virt(vcpu, fault_address);
|
||||
r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
|
||||
insn_len);
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_NOT_PRESENT:
|
||||
vcpu->arch.apf.host_apf_reason = 0;
|
||||
local_irq_disable();
|
||||
kvm_async_pf_task_wait(fault_address);
|
||||
local_irq_enable();
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_READY:
|
||||
vcpu->arch.apf.host_apf_reason = 0;
|
||||
local_irq_disable();
|
||||
kvm_async_pf_task_wake(fault_address);
|
||||
local_irq_enable();
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
|
||||
|
||||
static bool
|
||||
check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
|
||||
{
|
||||
|
|
|
@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
|
|||
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
||||
bool accessed_dirty);
|
||||
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
|
||||
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
|
||||
u64 fault_address, char *insn, int insn_len,
|
||||
bool need_unprotect);
|
||||
|
||||
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
|
||||
{
|
||||
|
|
|
@ -194,7 +194,6 @@ struct vcpu_svm {
|
|||
|
||||
unsigned int3_injected;
|
||||
unsigned long int3_rip;
|
||||
u32 apf_reason;
|
||||
|
||||
/* cached guest cpuid flags for faster access */
|
||||
bool nrips_enabled : 1;
|
||||
|
@ -277,6 +276,10 @@ static int avic;
|
|||
module_param(avic, int, S_IRUGO);
|
||||
#endif
|
||||
|
||||
/* enable/disable Virtual VMLOAD VMSAVE */
|
||||
static int vls = true;
|
||||
module_param(vls, int, 0444);
|
||||
|
||||
/* AVIC VM ID bit masks and lock */
|
||||
static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
|
||||
static DEFINE_SPINLOCK(avic_vm_id_lock);
|
||||
|
@ -633,11 +636,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
|||
svm_set_interrupt_shadow(vcpu, 0);
|
||||
}
|
||||
|
||||
static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
|
||||
bool has_error_code, u32 error_code,
|
||||
bool reinject)
|
||||
static void svm_queue_exception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
unsigned nr = vcpu->arch.exception.nr;
|
||||
bool has_error_code = vcpu->arch.exception.has_error_code;
|
||||
bool reinject = vcpu->arch.exception.reinject;
|
||||
u32 error_code = vcpu->arch.exception.error_code;
|
||||
|
||||
/*
|
||||
* If we are within a nested VM we'd better #VMEXIT and let the guest
|
||||
|
@ -947,7 +952,7 @@ static void svm_enable_lbrv(struct vcpu_svm *svm)
|
|||
{
|
||||
u32 *msrpm = svm->msrpm;
|
||||
|
||||
svm->vmcb->control.lbr_ctl = 1;
|
||||
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
|
||||
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
|
||||
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
|
||||
set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
|
||||
|
@ -958,7 +963,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
|
|||
{
|
||||
u32 *msrpm = svm->msrpm;
|
||||
|
||||
svm->vmcb->control.lbr_ctl = 0;
|
||||
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
|
||||
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
|
||||
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
|
||||
set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
|
||||
|
@ -1093,6 +1098,16 @@ static __init int svm_hardware_setup(void)
|
|||
}
|
||||
}
|
||||
|
||||
if (vls) {
|
||||
if (!npt_enabled ||
|
||||
!boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) ||
|
||||
!IS_ENABLED(CONFIG_X86_64)) {
|
||||
vls = false;
|
||||
} else {
|
||||
pr_info("Virtual VMLOAD VMSAVE supported\n");
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
|
@ -1280,6 +1295,16 @@ static void init_vmcb(struct vcpu_svm *svm)
|
|||
if (avic)
|
||||
avic_init_vmcb(svm);
|
||||
|
||||
/*
|
||||
* If hardware supports Virtual VMLOAD VMSAVE then enable it
|
||||
* in VMCB and clear intercepts to avoid #VMEXIT.
|
||||
*/
|
||||
if (vls) {
|
||||
clr_intercept(svm, INTERCEPT_VMLOAD);
|
||||
clr_intercept(svm, INTERCEPT_VMSAVE);
|
||||
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
|
||||
}
|
||||
|
||||
mark_all_dirty(svm->vmcb);
|
||||
|
||||
enable_gif(svm);
|
||||
|
@ -2096,34 +2121,11 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
|
|||
static int pf_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
u64 fault_address = svm->vmcb->control.exit_info_2;
|
||||
u64 error_code;
|
||||
int r = 1;
|
||||
u64 error_code = svm->vmcb->control.exit_info_1;
|
||||
|
||||
switch (svm->apf_reason) {
|
||||
default:
|
||||
error_code = svm->vmcb->control.exit_info_1;
|
||||
|
||||
trace_kvm_page_fault(fault_address, error_code);
|
||||
if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
|
||||
kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
|
||||
r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
|
||||
return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
|
||||
svm->vmcb->control.insn_bytes,
|
||||
svm->vmcb->control.insn_len);
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_NOT_PRESENT:
|
||||
svm->apf_reason = 0;
|
||||
local_irq_disable();
|
||||
kvm_async_pf_task_wait(fault_address);
|
||||
local_irq_enable();
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_READY:
|
||||
svm->apf_reason = 0;
|
||||
local_irq_disable();
|
||||
kvm_async_pf_task_wake(fault_address);
|
||||
local_irq_enable();
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
svm->vmcb->control.insn_len, !npt_enabled);
|
||||
}
|
||||
|
||||
static int db_interception(struct vcpu_svm *svm)
|
||||
|
@ -2267,7 +2269,7 @@ static int io_interception(struct vcpu_svm *svm)
|
|||
{
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
|
||||
int size, in, string;
|
||||
int size, in, string, ret;
|
||||
unsigned port;
|
||||
|
||||
++svm->vcpu.stat.io_exits;
|
||||
|
@ -2279,10 +2281,16 @@ static int io_interception(struct vcpu_svm *svm)
|
|||
port = io_info >> 16;
|
||||
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
|
||||
svm->next_rip = svm->vmcb->control.exit_info_2;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
return in ? kvm_fast_pio_in(vcpu, size, port)
|
||||
: kvm_fast_pio_out(vcpu, size, port);
|
||||
/*
|
||||
* TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
|
||||
* KVM_EXIT_DEBUG here.
|
||||
*/
|
||||
if (in)
|
||||
return kvm_fast_pio_in(vcpu, size, port) && ret;
|
||||
else
|
||||
return kvm_fast_pio_out(vcpu, size, port) && ret;
|
||||
}
|
||||
|
||||
static int nmi_interception(struct vcpu_svm *svm)
|
||||
|
@ -2415,15 +2423,19 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
|
|||
if (!is_guest_mode(&svm->vcpu))
|
||||
return 0;
|
||||
|
||||
vmexit = nested_svm_intercept(svm);
|
||||
if (vmexit != NESTED_EXIT_DONE)
|
||||
return 0;
|
||||
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
|
||||
svm->vmcb->control.exit_code_hi = 0;
|
||||
svm->vmcb->control.exit_info_1 = error_code;
|
||||
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
|
||||
|
||||
vmexit = nested_svm_intercept(svm);
|
||||
if (vmexit == NESTED_EXIT_DONE)
|
||||
svm->nested.exit_required = true;
|
||||
if (svm->vcpu.arch.exception.nested_apf)
|
||||
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
|
||||
else
|
||||
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
|
||||
|
||||
svm->nested.exit_required = true;
|
||||
return vmexit;
|
||||
}
|
||||
|
||||
|
@ -2598,7 +2610,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
|
|||
break;
|
||||
case SVM_EXIT_EXCP_BASE + PF_VECTOR:
|
||||
/* When we're shadowing, trap PFs, but not async PF */
|
||||
if (!npt_enabled && svm->apf_reason == 0)
|
||||
if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
|
||||
return NESTED_EXIT_HOST;
|
||||
break;
|
||||
default:
|
||||
|
@ -2645,7 +2657,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
|
|||
}
|
||||
/* async page fault always cause vmexit */
|
||||
else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
|
||||
svm->apf_reason != 0)
|
||||
svm->vcpu.arch.exception.nested_apf != 0)
|
||||
vmexit = NESTED_EXIT_DONE;
|
||||
break;
|
||||
}
|
||||
|
@ -2702,7 +2714,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
|
|||
dst->event_inj = from->event_inj;
|
||||
dst->event_inj_err = from->event_inj_err;
|
||||
dst->nested_cr3 = from->nested_cr3;
|
||||
dst->lbr_ctl = from->lbr_ctl;
|
||||
dst->virt_ext = from->virt_ext;
|
||||
}
|
||||
|
||||
static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
|
@ -3008,7 +3020,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
|||
/* We don't want to see VMMCALLs from a nested guest */
|
||||
clr_intercept(svm, INTERCEPT_VMMCALL);
|
||||
|
||||
svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
|
||||
svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
|
||||
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
|
||||
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
|
||||
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
|
||||
|
@ -3055,6 +3067,7 @@ static int vmload_interception(struct vcpu_svm *svm)
|
|||
{
|
||||
struct vmcb *nested_vmcb;
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
|
@ -3064,18 +3077,19 @@ static int vmload_interception(struct vcpu_svm *svm)
|
|||
return 1;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
|
||||
nested_svm_unmap(page);
|
||||
|
||||
return 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vmsave_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *nested_vmcb;
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
|
@ -3085,12 +3099,12 @@ static int vmsave_interception(struct vcpu_svm *svm)
|
|||
return 1;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
|
||||
nested_svm_unmap(page);
|
||||
|
||||
return 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vmrun_interception(struct vcpu_svm *svm)
|
||||
|
@ -3123,25 +3137,29 @@ static int vmrun_interception(struct vcpu_svm *svm)
|
|||
|
||||
static int stgi_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
|
||||
enable_gif(svm);
|
||||
|
||||
return 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int clgi_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
disable_gif(svm);
|
||||
|
||||
|
@ -3152,7 +3170,7 @@ static int clgi_interception(struct vcpu_svm *svm)
|
|||
mark_dirty(svm->vmcb, VMCB_INTR);
|
||||
}
|
||||
|
||||
return 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int invlpga_interception(struct vcpu_svm *svm)
|
||||
|
@ -3166,8 +3184,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
|
|||
kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
return 1;
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
|
||||
static int skinit_interception(struct vcpu_svm *svm)
|
||||
|
@ -3190,7 +3207,7 @@ static int xsetbv_interception(struct vcpu_svm *svm)
|
|||
|
||||
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
@ -3286,8 +3303,7 @@ static int invlpg_interception(struct vcpu_svm *svm)
|
|||
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
|
||||
|
||||
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
return 1;
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
|
||||
static int emulate_on_interception(struct vcpu_svm *svm)
|
||||
|
@ -3437,9 +3453,7 @@ static int dr_interception(struct vcpu_svm *svm)
|
|||
kvm_register_write(&svm->vcpu, reg, val);
|
||||
}
|
||||
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
return 1;
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
|
||||
static int cr8_write_interception(struct vcpu_svm *svm)
|
||||
|
@ -3562,6 +3576,7 @@ static int rdmsr_interception(struct vcpu_svm *svm)
|
|||
if (svm_get_msr(&svm->vcpu, &msr_info)) {
|
||||
trace_kvm_msr_read_ex(ecx);
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
return 1;
|
||||
} else {
|
||||
trace_kvm_msr_read(ecx, msr_info.data);
|
||||
|
||||
|
@ -3570,9 +3585,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
|
|||
kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
|
||||
msr_info.data >> 32);
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
|
||||
|
@ -3698,11 +3712,11 @@ static int wrmsr_interception(struct vcpu_svm *svm)
|
|||
if (kvm_set_msr(&svm->vcpu, &msr)) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
return 1;
|
||||
} else {
|
||||
trace_kvm_msr_write(ecx, data);
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int msr_interception(struct vcpu_svm *svm)
|
||||
|
@ -3731,8 +3745,7 @@ static int pause_interception(struct vcpu_svm *svm)
|
|||
|
||||
static int nop_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
skip_emulated_instruction(&(svm->vcpu));
|
||||
return 1;
|
||||
return kvm_skip_emulated_instruction(&(svm->vcpu));
|
||||
}
|
||||
|
||||
static int monitor_interception(struct vcpu_svm *svm)
|
||||
|
@ -4117,7 +4130,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
|
|||
pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
|
||||
pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
|
||||
pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
|
||||
pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
|
||||
pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
|
||||
pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
|
||||
pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
|
||||
pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
|
||||
|
@ -4965,7 +4978,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
|
||||
/* if exit due to PF check for async PF */
|
||||
if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
|
||||
svm->apf_reason = kvm_read_and_reset_pf_reason();
|
||||
svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
|
||||
|
||||
if (npt_enabled) {
|
||||
vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
|
||||
|
|
|
@ -2422,28 +2422,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
|||
* KVM wants to inject page-faults which it got to the guest. This function
|
||||
* checks whether in a nested guest, we need to inject them to L1 or L2.
|
||||
*/
|
||||
static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
|
||||
static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
unsigned int nr = vcpu->arch.exception.nr;
|
||||
|
||||
if (!(vmcs12->exception_bitmap & (1u << nr)))
|
||||
if (!((vmcs12->exception_bitmap & (1u << nr)) ||
|
||||
(nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
|
||||
return 0;
|
||||
|
||||
if (vcpu->arch.exception.nested_apf) {
|
||||
vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
|
||||
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
|
||||
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
|
||||
vcpu->arch.apf.nested_apf_token);
|
||||
return 1;
|
||||
}
|
||||
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
|
||||
vmcs_read32(VM_EXIT_INTR_INFO),
|
||||
vmcs_readl(EXIT_QUALIFICATION));
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
|
||||
bool has_error_code, u32 error_code,
|
||||
bool reinject)
|
||||
static void vmx_queue_exception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned nr = vcpu->arch.exception.nr;
|
||||
bool has_error_code = vcpu->arch.exception.has_error_code;
|
||||
bool reinject = vcpu->arch.exception.reinject;
|
||||
u32 error_code = vcpu->arch.exception.error_code;
|
||||
u32 intr_info = nr | INTR_INFO_VALID_MASK;
|
||||
|
||||
if (!reinject && is_guest_mode(vcpu) &&
|
||||
nested_vmx_check_exception(vcpu, nr))
|
||||
nested_vmx_check_exception(vcpu))
|
||||
return;
|
||||
|
||||
if (has_error_code) {
|
||||
|
@ -3764,6 +3777,25 @@ static void free_kvm_area(void)
|
|||
}
|
||||
}
|
||||
|
||||
enum vmcs_field_type {
|
||||
VMCS_FIELD_TYPE_U16 = 0,
|
||||
VMCS_FIELD_TYPE_U64 = 1,
|
||||
VMCS_FIELD_TYPE_U32 = 2,
|
||||
VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
|
||||
};
|
||||
|
||||
static inline int vmcs_field_type(unsigned long field)
|
||||
{
|
||||
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
|
||||
return VMCS_FIELD_TYPE_U32;
|
||||
return (field >> 13) & 0x3 ;
|
||||
}
|
||||
|
||||
static inline int vmcs_field_readonly(unsigned long field)
|
||||
{
|
||||
return (((field >> 10) & 0x3) == 1);
|
||||
}
|
||||
|
||||
static void init_vmcs_shadow_fields(void)
|
||||
{
|
||||
int i, j;
|
||||
|
@ -3789,14 +3821,22 @@ static void init_vmcs_shadow_fields(void)
|
|||
|
||||
/* shadowed fields guest access without vmexit */
|
||||
for (i = 0; i < max_shadow_read_write_fields; i++) {
|
||||
clear_bit(shadow_read_write_fields[i],
|
||||
vmx_vmwrite_bitmap);
|
||||
clear_bit(shadow_read_write_fields[i],
|
||||
vmx_vmread_bitmap);
|
||||
unsigned long field = shadow_read_write_fields[i];
|
||||
|
||||
clear_bit(field, vmx_vmwrite_bitmap);
|
||||
clear_bit(field, vmx_vmread_bitmap);
|
||||
if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
|
||||
clear_bit(field + 1, vmx_vmwrite_bitmap);
|
||||
clear_bit(field + 1, vmx_vmread_bitmap);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < max_shadow_read_only_fields; i++) {
|
||||
unsigned long field = shadow_read_only_fields[i];
|
||||
|
||||
clear_bit(field, vmx_vmread_bitmap);
|
||||
if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
|
||||
clear_bit(field + 1, vmx_vmread_bitmap);
|
||||
}
|
||||
for (i = 0; i < max_shadow_read_only_fields; i++)
|
||||
clear_bit(shadow_read_only_fields[i],
|
||||
vmx_vmread_bitmap);
|
||||
}
|
||||
|
||||
static __init int alloc_kvm_area(void)
|
||||
|
@ -4634,6 +4674,11 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
|
||||
{
|
||||
return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
|
||||
}
|
||||
|
||||
static int init_rmode_tss(struct kvm *kvm)
|
||||
{
|
||||
gfn_t fn;
|
||||
|
@ -5664,14 +5709,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
if (is_page_fault(intr_info)) {
|
||||
/* EPT won't cause page fault directly */
|
||||
BUG_ON(enable_ept);
|
||||
cr2 = vmcs_readl(EXIT_QUALIFICATION);
|
||||
trace_kvm_page_fault(cr2, error_code);
|
||||
|
||||
if (kvm_event_needs_reinjection(vcpu))
|
||||
kvm_mmu_unprotect_page_virt(vcpu, cr2);
|
||||
return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
|
||||
/* EPT won't cause page fault directly */
|
||||
WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
|
||||
return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0,
|
||||
true);
|
||||
}
|
||||
|
||||
ex_no = intr_info & INTR_INFO_VECTOR_MASK;
|
||||
|
@ -7214,25 +7256,6 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
|
|||
return nested_vmx_run(vcpu, false);
|
||||
}
|
||||
|
||||
enum vmcs_field_type {
|
||||
VMCS_FIELD_TYPE_U16 = 0,
|
||||
VMCS_FIELD_TYPE_U64 = 1,
|
||||
VMCS_FIELD_TYPE_U32 = 2,
|
||||
VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
|
||||
};
|
||||
|
||||
static inline int vmcs_field_type(unsigned long field)
|
||||
{
|
||||
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
|
||||
return VMCS_FIELD_TYPE_U32;
|
||||
return (field >> 13) & 0x3 ;
|
||||
}
|
||||
|
||||
static inline int vmcs_field_readonly(unsigned long field)
|
||||
{
|
||||
return (((field >> 10) & 0x3) == 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a vmcs12 field. Since these can have varying lengths and we return
|
||||
* one type, we chose the biggest type (u64) and zero-extend the return value
|
||||
|
@ -8014,7 +8037,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|||
if (is_nmi(intr_info))
|
||||
return false;
|
||||
else if (is_page_fault(intr_info))
|
||||
return enable_ept;
|
||||
return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
|
||||
else if (is_no_device(intr_info) &&
|
||||
!(vmcs12->guest_cr0 & X86_CR0_TS))
|
||||
return false;
|
||||
|
@ -8418,9 +8441,15 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
|||
exit_reason != EXIT_REASON_TASK_SWITCH)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
|
||||
vcpu->run->internal.ndata = 2;
|
||||
vcpu->run->internal.ndata = 3;
|
||||
vcpu->run->internal.data[0] = vectoring_info;
|
||||
vcpu->run->internal.data[1] = exit_reason;
|
||||
vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
|
||||
if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
|
||||
vcpu->run->internal.ndata++;
|
||||
vcpu->run->internal.data[3] =
|
||||
vmcs_read64(GUEST_PHYSICAL_ADDRESS);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -8611,17 +8640,24 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 exit_intr_info;
|
||||
u32 exit_intr_info = 0;
|
||||
u16 basic_exit_reason = (u16)vmx->exit_reason;
|
||||
|
||||
if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
|
||||
|| vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
|
||||
if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
|
||||
|| basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
|
||||
return;
|
||||
|
||||
vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
exit_intr_info = vmx->exit_intr_info;
|
||||
if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
|
||||
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
vmx->exit_intr_info = exit_intr_info;
|
||||
|
||||
/* if exit due to PF check for async PF */
|
||||
if (is_page_fault(exit_intr_info))
|
||||
vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
|
||||
|
||||
/* Handle machine checks before interrupts are enabled */
|
||||
if (is_machine_check(exit_intr_info))
|
||||
if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
|
||||
is_machine_check(exit_intr_info))
|
||||
kvm_machine_check();
|
||||
|
||||
/* We need to handle NMIs before interrupts are enabled */
|
||||
|
@ -9589,23 +9625,26 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
|
|||
ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12)
|
||||
{
|
||||
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
|
||||
return 0;
|
||||
|
||||
if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
|
||||
!page_address_valid(vcpu, vmcs12->io_bitmap_b))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12)
|
||||
{
|
||||
int maxphyaddr;
|
||||
u64 addr;
|
||||
|
||||
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
|
||||
return 0;
|
||||
|
||||
if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) {
|
||||
WARN_ON(1);
|
||||
return -EINVAL;
|
||||
}
|
||||
maxphyaddr = cpuid_maxphyaddr(vcpu);
|
||||
|
||||
if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
|
||||
((addr + PAGE_SIZE) >> maxphyaddr))
|
||||
if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -10293,6 +10332,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|||
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
|
||||
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
|
||||
|
||||
if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
|
||||
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
|
||||
|
||||
if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
|
||||
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
|
||||
|
||||
|
@ -10429,8 +10471,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
|
|||
return 1;
|
||||
}
|
||||
|
||||
vmcs12->launch_state = 1;
|
||||
|
||||
/*
|
||||
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
|
||||
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
|
||||
|
@ -10804,6 +10844,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|||
vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
||||
|
||||
if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
|
||||
vmcs12->launch_state = 1;
|
||||
|
||||
/* vm_entry_intr_info_field is cleared on exit. Emulate this
|
||||
* instead of reading the real value. */
|
||||
vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
|
||||
|
|
|
@ -134,8 +134,6 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
|
|||
static bool __read_mostly vector_hashing = true;
|
||||
module_param(vector_hashing, bool, S_IRUGO);
|
||||
|
||||
static bool __read_mostly backwards_tsc_observed = false;
|
||||
|
||||
#define KVM_NR_SHARED_MSRS 16
|
||||
|
||||
struct kvm_shared_msrs_global {
|
||||
|
@ -452,7 +450,12 @@ EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
|
|||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
|
||||
{
|
||||
++vcpu->stat.pf_guest;
|
||||
vcpu->arch.cr2 = fault->address;
|
||||
vcpu->arch.exception.nested_apf =
|
||||
is_guest_mode(vcpu) && fault->async_page_fault;
|
||||
if (vcpu->arch.exception.nested_apf)
|
||||
vcpu->arch.apf.nested_apf_token = fault->address;
|
||||
else
|
||||
vcpu->arch.cr2 = fault->address;
|
||||
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
|
||||
|
@ -1719,7 +1722,7 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
|
|||
&ka->master_cycle_now);
|
||||
|
||||
ka->use_master_clock = host_tsc_clocksource && vcpus_matched
|
||||
&& !backwards_tsc_observed
|
||||
&& !ka->backwards_tsc_observed
|
||||
&& !ka->boot_vcpu_runs_old_kvmclock;
|
||||
|
||||
if (ka->use_master_clock)
|
||||
|
@ -2060,8 +2063,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
|
|||
{
|
||||
gpa_t gpa = data & ~0x3f;
|
||||
|
||||
/* Bits 2:5 are reserved, Should be zero */
|
||||
if (data & 0x3c)
|
||||
/* Bits 3:5 are reserved, Should be zero */
|
||||
if (data & 0x38)
|
||||
return 1;
|
||||
|
||||
vcpu->arch.apf.msr_val = data;
|
||||
|
@ -2077,6 +2080,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
|
|||
return 1;
|
||||
|
||||
vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
|
||||
vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
|
||||
kvm_async_pf_wakeup_all(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
@ -2661,6 +2665,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_HYPERV_VAPIC:
|
||||
case KVM_CAP_HYPERV_SPIN:
|
||||
case KVM_CAP_HYPERV_SYNIC:
|
||||
case KVM_CAP_HYPERV_SYNIC2:
|
||||
case KVM_CAP_HYPERV_VP_INDEX:
|
||||
case KVM_CAP_PCI_SEGMENT:
|
||||
case KVM_CAP_DEBUGREGS:
|
||||
case KVM_CAP_X86_ROBUST_SINGLESTEP:
|
||||
|
@ -3384,10 +3390,14 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
|||
return -EINVAL;
|
||||
|
||||
switch (cap->cap) {
|
||||
case KVM_CAP_HYPERV_SYNIC2:
|
||||
if (cap->args[0])
|
||||
return -EINVAL;
|
||||
case KVM_CAP_HYPERV_SYNIC:
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
return -EINVAL;
|
||||
return kvm_hv_activate_synic(vcpu);
|
||||
return kvm_hv_activate_synic(vcpu, cap->cap ==
|
||||
KVM_CAP_HYPERV_SYNIC2);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -4188,9 +4198,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
goto out;
|
||||
|
||||
r = 0;
|
||||
/*
|
||||
* TODO: userspace has to take care of races with VCPU_RUN, so
|
||||
* kvm_gen_update_masterclock() can be cut down to locked
|
||||
* pvclock_update_vm_gtod_copy().
|
||||
*/
|
||||
kvm_gen_update_masterclock(kvm);
|
||||
now_ns = get_kvmclock_ns(kvm);
|
||||
kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
|
||||
kvm_gen_update_masterclock(kvm);
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_CLOCK: {
|
||||
|
@ -6347,10 +6363,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
|
|||
kvm_update_dr7(vcpu);
|
||||
}
|
||||
|
||||
kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
|
||||
vcpu->arch.exception.has_error_code,
|
||||
vcpu->arch.exception.error_code,
|
||||
vcpu->arch.exception.reinject);
|
||||
kvm_x86_ops->queue_exception(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -7676,6 +7689,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
|||
struct msr_data msr;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
kvm_hv_vcpu_postcreate(vcpu);
|
||||
|
||||
if (vcpu_load(vcpu))
|
||||
return;
|
||||
msr.data = 0x0;
|
||||
|
@ -7829,8 +7844,8 @@ int kvm_arch_hardware_enable(void)
|
|||
*/
|
||||
if (backwards_tsc) {
|
||||
u64 delta_cyc = max_tsc - local_tsc;
|
||||
backwards_tsc_observed = true;
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
kvm->arch.backwards_tsc_observed = true;
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
vcpu->arch.tsc_offset_adjustment += delta_cyc;
|
||||
vcpu->arch.last_host_tsc = local_tsc;
|
||||
|
@ -8576,6 +8591,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
|||
fault.error_code = 0;
|
||||
fault.nested_page_fault = false;
|
||||
fault.address = work->arch.token;
|
||||
fault.async_page_fault = true;
|
||||
kvm_inject_page_fault(vcpu, &fault);
|
||||
}
|
||||
}
|
||||
|
@ -8598,6 +8614,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
|
|||
fault.error_code = 0;
|
||||
fault.nested_page_fault = false;
|
||||
fault.address = work->arch.token;
|
||||
fault.async_page_fault = true;
|
||||
kvm_inject_page_fault(vcpu, &fault);
|
||||
}
|
||||
vcpu->arch.apf.halted = false;
|
||||
|
|
|
@ -234,7 +234,7 @@ struct kvm_vcpu {
|
|||
|
||||
int guest_fpu_loaded, guest_xcr0_loaded;
|
||||
struct swait_queue_head wq;
|
||||
struct pid *pid;
|
||||
struct pid __rcu *pid;
|
||||
int sigset_active;
|
||||
sigset_t sigset;
|
||||
struct kvm_vcpu_stat stat;
|
||||
|
@ -390,7 +390,7 @@ struct kvm {
|
|||
spinlock_t mmu_lock;
|
||||
struct mutex slots_lock;
|
||||
struct mm_struct *mm; /* userspace tied to this vm */
|
||||
struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
|
||||
struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
|
||||
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
|
||||
|
||||
/*
|
||||
|
@ -404,7 +404,7 @@ struct kvm {
|
|||
int last_boosted_vcpu;
|
||||
struct list_head vm_list;
|
||||
struct mutex lock;
|
||||
struct kvm_io_bus *buses[KVM_NR_BUSES];
|
||||
struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
|
||||
#ifdef CONFIG_HAVE_KVM_EVENTFD
|
||||
struct {
|
||||
spinlock_t lock;
|
||||
|
@ -473,6 +473,12 @@ struct kvm {
|
|||
#define vcpu_err(vcpu, fmt, ...) \
|
||||
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
|
||||
|
||||
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
|
||||
{
|
||||
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
|
||||
lockdep_is_held(&kvm->slots_lock));
|
||||
}
|
||||
|
||||
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
|
||||
{
|
||||
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
|
||||
|
@ -562,9 +568,8 @@ void kvm_put_kvm(struct kvm *kvm);
|
|||
|
||||
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
|
||||
{
|
||||
return rcu_dereference_check(kvm->memslots[as_id],
|
||||
srcu_read_lock_held(&kvm->srcu)
|
||||
|| lockdep_is_held(&kvm->slots_lock));
|
||||
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
|
||||
lockdep_is_held(&kvm->slots_lock));
|
||||
}
|
||||
|
||||
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
|
||||
|
|
|
@ -927,6 +927,8 @@ struct kvm_ppc_resize_hpt {
|
|||
#define KVM_CAP_S390_CMMA_MIGRATION 145
|
||||
#define KVM_CAP_PPC_FWNMI 146
|
||||
#define KVM_CAP_PPC_SMT_POSSIBLE 147
|
||||
#define KVM_CAP_HYPERV_SYNIC2 148
|
||||
#define KVM_CAP_HYPERV_VP_INDEX 149
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -1351,7 +1353,7 @@ struct kvm_s390_ucas_mapping {
|
|||
/* Available with KVM_CAP_X86_SMM */
|
||||
#define KVM_SMI _IO(KVMIO, 0xb7)
|
||||
/* Available with KVM_CAP_S390_CMMA_MIGRATION */
|
||||
#define KVM_S390_GET_CMMA_BITS _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log)
|
||||
#define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
|
||||
#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
|
||||
|
||||
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
|
||||
|
|
|
@ -825,7 +825,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
|
|||
if (ret < 0)
|
||||
goto unlock_fail;
|
||||
|
||||
kvm->buses[bus_idx]->ioeventfd_count++;
|
||||
kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
|
||||
list_add_tail(&p->list, &kvm->ioeventfds);
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
@ -848,6 +848,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
|
|||
{
|
||||
struct _ioeventfd *p, *tmp;
|
||||
struct eventfd_ctx *eventfd;
|
||||
struct kvm_io_bus *bus;
|
||||
int ret = -ENOENT;
|
||||
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
|
@ -870,8 +871,9 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
|
|||
continue;
|
||||
|
||||
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
|
||||
if (kvm->buses[bus_idx])
|
||||
kvm->buses[bus_idx]->ioeventfd_count--;
|
||||
bus = kvm_get_bus(kvm, bus_idx);
|
||||
if (bus)
|
||||
bus->ioeventfd_count--;
|
||||
ioeventfd_release(p);
|
||||
ret = 0;
|
||||
break;
|
||||
|
|
|
@ -230,7 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
|||
}
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
old = kvm->irq_routing;
|
||||
old = rcu_dereference_protected(kvm->irq_routing, 1);
|
||||
rcu_assign_pointer(kvm->irq_routing, new);
|
||||
kvm_irq_routing_update(kvm);
|
||||
kvm_arch_irq_routing_update(kvm);
|
||||
|
|
|
@ -130,6 +130,12 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
|
|||
|
||||
static bool largepages_enabled = true;
|
||||
|
||||
#define KVM_EVENT_CREATE_VM 0
|
||||
#define KVM_EVENT_DESTROY_VM 1
|
||||
static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
|
||||
static unsigned long long kvm_createvm_count;
|
||||
static unsigned long long kvm_active_vms;
|
||||
|
||||
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
|
||||
{
|
||||
if (pfn_valid(pfn))
|
||||
|
@ -187,12 +193,23 @@ static void ack_flush(void *_completed)
|
|||
{
|
||||
}
|
||||
|
||||
static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
|
||||
{
|
||||
if (unlikely(!cpus))
|
||||
cpus = cpu_online_mask;
|
||||
|
||||
if (cpumask_empty(cpus))
|
||||
return false;
|
||||
|
||||
smp_call_function_many(cpus, ack_flush, NULL, wait);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
|
||||
{
|
||||
int i, cpu, me;
|
||||
cpumask_var_t cpus;
|
||||
bool called = true;
|
||||
bool wait = req & KVM_REQUEST_WAIT;
|
||||
bool called;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
|
||||
|
@ -207,14 +224,9 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
|
|||
|
||||
if (cpus != NULL && cpu != -1 && cpu != me &&
|
||||
kvm_request_needs_ipi(vcpu, req))
|
||||
cpumask_set_cpu(cpu, cpus);
|
||||
__cpumask_set_cpu(cpu, cpus);
|
||||
}
|
||||
if (unlikely(cpus == NULL))
|
||||
smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait);
|
||||
else if (!cpumask_empty(cpus))
|
||||
smp_call_function_many(cpus, ack_flush, NULL, wait);
|
||||
else
|
||||
called = false;
|
||||
called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
|
||||
put_cpu();
|
||||
free_cpumask_var(cpus);
|
||||
return called;
|
||||
|
@ -293,7 +305,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init);
|
|||
|
||||
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
put_pid(vcpu->pid);
|
||||
/*
|
||||
* no need for rcu_read_lock as VCPU_RUN is the only place that
|
||||
* will change the vcpu->pid pointer and on uninit all file
|
||||
* descriptors are already gone.
|
||||
*/
|
||||
put_pid(rcu_dereference_protected(vcpu->pid, 1));
|
||||
kvm_arch_vcpu_uninit(vcpu);
|
||||
free_page((unsigned long)vcpu->run);
|
||||
}
|
||||
|
@ -674,8 +691,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
|||
if (init_srcu_struct(&kvm->irq_srcu))
|
||||
goto out_err_no_irq_srcu;
|
||||
for (i = 0; i < KVM_NR_BUSES; i++) {
|
||||
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
|
||||
GFP_KERNEL);
|
||||
rcu_assign_pointer(kvm->buses[i],
|
||||
kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
|
||||
if (!kvm->buses[i])
|
||||
goto out_err;
|
||||
}
|
||||
|
@ -700,9 +717,10 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
|||
hardware_disable_all();
|
||||
out_err_no_disable:
|
||||
for (i = 0; i < KVM_NR_BUSES; i++)
|
||||
kfree(kvm->buses[i]);
|
||||
kfree(rcu_access_pointer(kvm->buses[i]));
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
kvm_free_memslots(kvm, kvm->memslots[i]);
|
||||
kvm_free_memslots(kvm,
|
||||
rcu_dereference_protected(kvm->memslots[i], 1));
|
||||
kvm_arch_free_vm(kvm);
|
||||
mmdrop(current->mm);
|
||||
return ERR_PTR(r);
|
||||
|
@ -728,6 +746,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
|||
int i;
|
||||
struct mm_struct *mm = kvm->mm;
|
||||
|
||||
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
|
||||
kvm_destroy_vm_debugfs(kvm);
|
||||
kvm_arch_sync_events(kvm);
|
||||
spin_lock(&kvm_lock);
|
||||
|
@ -735,8 +754,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
|||
spin_unlock(&kvm_lock);
|
||||
kvm_free_irq_routing(kvm);
|
||||
for (i = 0; i < KVM_NR_BUSES; i++) {
|
||||
if (kvm->buses[i])
|
||||
kvm_io_bus_destroy(kvm->buses[i]);
|
||||
struct kvm_io_bus *bus;
|
||||
|
||||
bus = rcu_dereference_protected(kvm->buses[i], 1);
|
||||
if (bus)
|
||||
kvm_io_bus_destroy(bus);
|
||||
kvm->buses[i] = NULL;
|
||||
}
|
||||
kvm_coalesced_mmio_free(kvm);
|
||||
|
@ -748,7 +770,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
|||
kvm_arch_destroy_vm(kvm);
|
||||
kvm_destroy_devices(kvm);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
kvm_free_memslots(kvm, kvm->memslots[i]);
|
||||
kvm_free_memslots(kvm,
|
||||
rcu_dereference_protected(kvm->memslots[i], 1));
|
||||
cleanup_srcu_struct(&kvm->irq_srcu);
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
kvm_arch_free_vm(kvm);
|
||||
|
@ -2551,13 +2574,14 @@ static long kvm_vcpu_ioctl(struct file *filp,
|
|||
if (r)
|
||||
return r;
|
||||
switch (ioctl) {
|
||||
case KVM_RUN:
|
||||
case KVM_RUN: {
|
||||
struct pid *oldpid;
|
||||
r = -EINVAL;
|
||||
if (arg)
|
||||
goto out;
|
||||
if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
|
||||
oldpid = rcu_access_pointer(vcpu->pid);
|
||||
if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
|
||||
/* The thread running this VCPU changed. */
|
||||
struct pid *oldpid = vcpu->pid;
|
||||
struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
|
||||
|
||||
rcu_assign_pointer(vcpu->pid, newpid);
|
||||
|
@ -2568,6 +2592,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
|
|||
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
|
||||
trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_REGS: {
|
||||
struct kvm_regs *kvm_regs;
|
||||
|
||||
|
@ -3202,6 +3227,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
|
|||
fput(file);
|
||||
return -ENOMEM;
|
||||
}
|
||||
kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
|
||||
|
||||
fd_install(r, file);
|
||||
return r;
|
||||
|
@ -3563,7 +3589,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
|||
{
|
||||
struct kvm_io_bus *new_bus, *bus;
|
||||
|
||||
bus = kvm->buses[bus_idx];
|
||||
bus = kvm_get_bus(kvm, bus_idx);
|
||||
if (!bus)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -3592,7 +3618,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
|||
int i;
|
||||
struct kvm_io_bus *new_bus, *bus;
|
||||
|
||||
bus = kvm->buses[bus_idx];
|
||||
bus = kvm_get_bus(kvm, bus_idx);
|
||||
if (!bus)
|
||||
return;
|
||||
|
||||
|
@ -3854,6 +3880,67 @@ static const struct file_operations *stat_fops[] = {
|
|||
[KVM_STAT_VM] = &vm_stat_fops,
|
||||
};
|
||||
|
||||
static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
|
||||
{
|
||||
struct kobj_uevent_env *env;
|
||||
char *tmp, *pathbuf = NULL;
|
||||
unsigned long long created, active;
|
||||
|
||||
if (!kvm_dev.this_device || !kvm)
|
||||
return;
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
if (type == KVM_EVENT_CREATE_VM) {
|
||||
kvm_createvm_count++;
|
||||
kvm_active_vms++;
|
||||
} else if (type == KVM_EVENT_DESTROY_VM) {
|
||||
kvm_active_vms--;
|
||||
}
|
||||
created = kvm_createvm_count;
|
||||
active = kvm_active_vms;
|
||||
spin_unlock(&kvm_lock);
|
||||
|
||||
env = kzalloc(sizeof(*env), GFP_KERNEL);
|
||||
if (!env)
|
||||
return;
|
||||
|
||||
add_uevent_var(env, "CREATED=%llu", created);
|
||||
add_uevent_var(env, "COUNT=%llu", active);
|
||||
|
||||
if (type == KVM_EVENT_CREATE_VM)
|
||||
add_uevent_var(env, "EVENT=create");
|
||||
else if (type == KVM_EVENT_DESTROY_VM)
|
||||
add_uevent_var(env, "EVENT=destroy");
|
||||
|
||||
if (kvm->debugfs_dentry) {
|
||||
char p[ITOA_MAX_LEN];
|
||||
|
||||
snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
|
||||
tmp = strchrnul(p + 1, '-');
|
||||
*tmp = '\0';
|
||||
add_uevent_var(env, "PID=%s", p);
|
||||
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
|
||||
if (pathbuf) {
|
||||
/* sizeof counts the final '\0' */
|
||||
int len = sizeof("STATS_PATH=") - 1;
|
||||
const char *pvar = "STATS_PATH=";
|
||||
|
||||
tmp = dentry_path_raw(kvm->debugfs_dentry,
|
||||
pathbuf + len,
|
||||
PATH_MAX - len);
|
||||
if (!IS_ERR(tmp)) {
|
||||
memcpy(tmp - len, pvar, len);
|
||||
env->envp[env->envp_idx++] = tmp - len;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* no need for checks, since we are adding at most only 5 keys */
|
||||
env->envp[env->envp_idx++] = NULL;
|
||||
kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
|
||||
kfree(env);
|
||||
kfree(pathbuf);
|
||||
}
|
||||
|
||||
static int kvm_init_debug(void)
|
||||
{
|
||||
int r = -EEXIST;
|
||||
|
|
Loading…
Reference in New Issue