mirror of https://gitee.com/openkylin/linux.git
* Lots of bug fixes.
* Fix virtualization of RDPID * Virtualization of DR6_BUS_LOCK, which on bare metal is new in the 5.13 merge window * More nested virtualization migration fixes (nSVM and eVMCS) * Fix for KVM guest hibernation * Fix for warning in SEV-ES SRCU usage * Block KVM from loading on AMD machines with 5-level page tables, due to the APM not mentioning how host CR4.LA57 exactly impacts the guest. -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmCZWwgUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroOE9wgAk7Io8cuvnhC9ogVqzZWrPweWqFg8 fJcPMB584JRnMqYHBVYbkTPGe8SsCHKR2MKsNdc4cEP111cyr3suWsxOdmjJn58i 7ahy6PcKx7wWeWwEt7O599l6CeoX5XB9ExvA6eiXAv7iZeOJHFa+Ny2GlWgauy6Y DELryEomx1r4IUkZaSR+2fYjzvOWTXQixwU/jwx8NcTJz0DrzknzLE7XOciPBfn0 t0Q2rCXdL2nF1uPksZbntx8Qoa6t6GDVIyrH/ZCPQYJtAX6cjxNAh3zwCe+hMnOd fW8ntBH1nZRiNnberA4IICAzqnUokgPWdKBrZT2ntWHBK+aqxXHznrlPJA== =e+gD -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: - Lots of bug fixes. - Fix virtualization of RDPID - Virtualization of DR6_BUS_LOCK, which on bare metal is new to this release - More nested virtualization migration fixes (nSVM and eVMCS) - Fix for KVM guest hibernation - Fix for warning in SEV-ES SRCU usage - Block KVM from loading on AMD machines with 5-level page tables, due to the APM not mentioning how host CR4.LA57 exactly impacts the guest. * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (48 commits) KVM: SVM: Move GHCB unmapping to fix RCU warning KVM: SVM: Invert user pointer casting in SEV {en,de}crypt helpers kvm: Cap halt polling at kvm->max_halt_poll_ns tools/kvm_stat: Fix documentation typo KVM: x86: Prevent deadlock against tk_core.seq KVM: x86: Cancel pvclock_gtod_work on module removal KVM: x86: Prevent KVM SVM from loading on kernels with 5-level paging KVM: X86: Expose bus lock debug exception to guest KVM: X86: Add support for the emulation of DR6_BUS_LOCK bit KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks KVM: x86: Hide RDTSCP and RDPID if MSR_TSC_AUX probing failed KVM: x86: Tie Intel and AMD behavior for MSR_TSC_AUX to guest CPU model KVM: x86: Move uret MSR slot management to common x86 KVM: x86: Export the number of uret MSRs to vendor modules KVM: VMX: Disable loading of TSX_CTRL MSR the more conventional way KVM: VMX: Use common x86's uret MSR list as the one true list KVM: VMX: Use flag to indicate "active" uret MSRs instead of sorting list KVM: VMX: Configure list of user return MSRs at module init KVM: x86: Add support for RDPID without RDTSCP KVM: SVM: Probe and load MSR_TSC_AUX regardless of RDTSCP support in host ...
This commit is contained in:
commit
0aa099a312
|
@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY
|
||||||
4.126 KVM_X86_SET_MSR_FILTER
|
4.126 KVM_X86_SET_MSR_FILTER
|
||||||
----------------------------
|
----------------------------
|
||||||
|
|
||||||
:Capability: KVM_X86_SET_MSR_FILTER
|
:Capability: KVM_CAP_X86_MSR_FILTER
|
||||||
:Architectures: x86
|
:Architectures: x86
|
||||||
:Type: vm ioctl
|
:Type: vm ioctl
|
||||||
:Parameters: struct kvm_msr_filter
|
:Parameters: struct kvm_msr_filter
|
||||||
|
@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
|
||||||
instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
|
instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
|
||||||
KVM_EXIT_X86_WRMSR exit notifications.
|
KVM_EXIT_X86_WRMSR exit notifications.
|
||||||
|
|
||||||
8.27 KVM_X86_SET_MSR_FILTER
|
8.27 KVM_CAP_X86_MSR_FILTER
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
:Architectures: x86
|
:Architectures: x86
|
||||||
|
|
|
@ -113,6 +113,7 @@
|
||||||
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
|
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
|
||||||
|
|
||||||
#define UNMAPPED_GVA (~(gpa_t)0)
|
#define UNMAPPED_GVA (~(gpa_t)0)
|
||||||
|
#define INVALID_GPA (~(gpa_t)0)
|
||||||
|
|
||||||
/* KVM Hugepage definitions for x86 */
|
/* KVM Hugepage definitions for x86 */
|
||||||
#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
|
#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
|
||||||
|
@ -199,6 +200,7 @@ enum x86_intercept_stage;
|
||||||
|
|
||||||
#define KVM_NR_DB_REGS 4
|
#define KVM_NR_DB_REGS 4
|
||||||
|
|
||||||
|
#define DR6_BUS_LOCK (1 << 11)
|
||||||
#define DR6_BD (1 << 13)
|
#define DR6_BD (1 << 13)
|
||||||
#define DR6_BS (1 << 14)
|
#define DR6_BS (1 << 14)
|
||||||
#define DR6_BT (1 << 15)
|
#define DR6_BT (1 << 15)
|
||||||
|
@ -212,7 +214,7 @@ enum x86_intercept_stage;
|
||||||
* DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
|
* DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
|
||||||
*/
|
*/
|
||||||
#define DR6_ACTIVE_LOW 0xffff0ff0
|
#define DR6_ACTIVE_LOW 0xffff0ff0
|
||||||
#define DR6_VOLATILE 0x0001e00f
|
#define DR6_VOLATILE 0x0001e80f
|
||||||
#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
|
#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
|
||||||
|
|
||||||
#define DR7_BP_EN_MASK 0x000000ff
|
#define DR7_BP_EN_MASK 0x000000ff
|
||||||
|
@ -407,7 +409,7 @@ struct kvm_mmu {
|
||||||
u32 pkru_mask;
|
u32 pkru_mask;
|
||||||
|
|
||||||
u64 *pae_root;
|
u64 *pae_root;
|
||||||
u64 *lm_root;
|
u64 *pml4_root;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check zero bits on shadow page table entries, these
|
* check zero bits on shadow page table entries, these
|
||||||
|
@ -1417,6 +1419,7 @@ struct kvm_arch_async_pf {
|
||||||
bool direct_map;
|
bool direct_map;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern u32 __read_mostly kvm_nr_uret_msrs;
|
||||||
extern u64 __read_mostly host_efer;
|
extern u64 __read_mostly host_efer;
|
||||||
extern bool __read_mostly allow_smaller_maxphyaddr;
|
extern bool __read_mostly allow_smaller_maxphyaddr;
|
||||||
extern struct kvm_x86_ops kvm_x86_ops;
|
extern struct kvm_x86_ops kvm_x86_ops;
|
||||||
|
@ -1775,9 +1778,15 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
|
||||||
unsigned long ipi_bitmap_high, u32 min,
|
unsigned long ipi_bitmap_high, u32 min,
|
||||||
unsigned long icr, int op_64_bit);
|
unsigned long icr, int op_64_bit);
|
||||||
|
|
||||||
void kvm_define_user_return_msr(unsigned index, u32 msr);
|
int kvm_add_user_return_msr(u32 msr);
|
||||||
|
int kvm_find_user_return_msr(u32 msr);
|
||||||
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
|
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
|
||||||
|
|
||||||
|
static inline bool kvm_is_supported_user_return_msr(u32 msr)
|
||||||
|
{
|
||||||
|
return kvm_find_user_return_msr(msr) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
|
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
|
||||||
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
|
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
|
||||||
|
|
||||||
|
|
|
@ -7,8 +7,6 @@
|
||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
#include <uapi/asm/kvm_para.h>
|
#include <uapi/asm/kvm_para.h>
|
||||||
|
|
||||||
extern void kvmclock_init(void);
|
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_GUEST
|
#ifdef CONFIG_KVM_GUEST
|
||||||
bool kvm_check_and_clear_guest_paused(void);
|
bool kvm_check_and_clear_guest_paused(void);
|
||||||
#else
|
#else
|
||||||
|
@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_GUEST
|
#ifdef CONFIG_KVM_GUEST
|
||||||
|
void kvmclock_init(void);
|
||||||
|
void kvmclock_disable(void);
|
||||||
bool kvm_para_available(void);
|
bool kvm_para_available(void);
|
||||||
unsigned int kvm_arch_para_features(void);
|
unsigned int kvm_arch_para_features(void);
|
||||||
unsigned int kvm_arch_para_hints(void);
|
unsigned int kvm_arch_para_hints(void);
|
||||||
void kvm_async_pf_task_wait_schedule(u32 token);
|
void kvm_async_pf_task_wait_schedule(u32 token);
|
||||||
void kvm_async_pf_task_wake(u32 token);
|
void kvm_async_pf_task_wake(u32 token);
|
||||||
u32 kvm_read_and_reset_apf_flags(void);
|
u32 kvm_read_and_reset_apf_flags(void);
|
||||||
void kvm_disable_steal_time(void);
|
|
||||||
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
|
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
|
||||||
|
|
||||||
DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
|
DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
|
||||||
|
@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void kvm_disable_steal_time(void)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
|
static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
|
||||||
__u16 flags;
|
__u16 flags;
|
||||||
} smm;
|
} smm;
|
||||||
|
|
||||||
|
__u16 pad;
|
||||||
|
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
__u64 preemption_timer_deadline;
|
__u64 preemption_timer_deadline;
|
||||||
};
|
};
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include <linux/kprobes.h>
|
#include <linux/kprobes.h>
|
||||||
#include <linux/nmi.h>
|
#include <linux/nmi.h>
|
||||||
#include <linux/swait.h>
|
#include <linux/swait.h>
|
||||||
|
#include <linux/syscore_ops.h>
|
||||||
#include <asm/timer.h>
|
#include <asm/timer.h>
|
||||||
#include <asm/cpu.h>
|
#include <asm/cpu.h>
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
|
@ -37,6 +38,7 @@
|
||||||
#include <asm/tlb.h>
|
#include <asm/tlb.h>
|
||||||
#include <asm/cpuidle_haltpoll.h>
|
#include <asm/cpuidle_haltpoll.h>
|
||||||
#include <asm/ptrace.h>
|
#include <asm/ptrace.h>
|
||||||
|
#include <asm/reboot.h>
|
||||||
#include <asm/svm.h>
|
#include <asm/svm.h>
|
||||||
|
|
||||||
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
|
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
|
||||||
|
@ -345,7 +347,7 @@ static void kvm_guest_cpu_init(void)
|
||||||
|
|
||||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
|
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
|
||||||
__this_cpu_write(apf_reason.enabled, 1);
|
__this_cpu_write(apf_reason.enabled, 1);
|
||||||
pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
|
pr_info("setup async PF for cpu %d\n", smp_processor_id());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
|
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
|
||||||
|
@ -371,34 +373,17 @@ static void kvm_pv_disable_apf(void)
|
||||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
|
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
|
||||||
__this_cpu_write(apf_reason.enabled, 0);
|
__this_cpu_write(apf_reason.enabled, 0);
|
||||||
|
|
||||||
pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
|
pr_info("disable async PF for cpu %d\n", smp_processor_id());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kvm_pv_guest_cpu_reboot(void *unused)
|
static void kvm_disable_steal_time(void)
|
||||||
{
|
{
|
||||||
/*
|
if (!has_steal_clock)
|
||||||
* We disable PV EOI before we load a new kernel by kexec,
|
return;
|
||||||
* since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
|
|
||||||
* New kernel can re-enable when it boots.
|
|
||||||
*/
|
|
||||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
|
||||||
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
|
|
||||||
kvm_pv_disable_apf();
|
|
||||||
kvm_disable_steal_time();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_pv_reboot_notify(struct notifier_block *nb,
|
wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
|
||||||
unsigned long code, void *unused)
|
|
||||||
{
|
|
||||||
if (code == SYS_RESTART)
|
|
||||||
on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
|
|
||||||
return NOTIFY_DONE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct notifier_block kvm_pv_reboot_nb = {
|
|
||||||
.notifier_call = kvm_pv_reboot_notify,
|
|
||||||
};
|
|
||||||
|
|
||||||
static u64 kvm_steal_clock(int cpu)
|
static u64 kvm_steal_clock(int cpu)
|
||||||
{
|
{
|
||||||
u64 steal;
|
u64 steal;
|
||||||
|
@ -416,14 +401,6 @@ static u64 kvm_steal_clock(int cpu)
|
||||||
return steal;
|
return steal;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_disable_steal_time(void)
|
|
||||||
{
|
|
||||||
if (!has_steal_clock)
|
|
||||||
return;
|
|
||||||
|
|
||||||
wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
|
static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
|
||||||
{
|
{
|
||||||
early_set_memory_decrypted((unsigned long) ptr, size);
|
early_set_memory_decrypted((unsigned long) ptr, size);
|
||||||
|
@ -451,6 +428,27 @@ static void __init sev_map_percpu_data(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void kvm_guest_cpu_offline(bool shutdown)
|
||||||
|
{
|
||||||
|
kvm_disable_steal_time();
|
||||||
|
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
||||||
|
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
|
||||||
|
kvm_pv_disable_apf();
|
||||||
|
if (!shutdown)
|
||||||
|
apf_task_wake_all();
|
||||||
|
kvmclock_disable();
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kvm_cpu_online(unsigned int cpu)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
local_irq_save(flags);
|
||||||
|
kvm_guest_cpu_init();
|
||||||
|
local_irq_restore(flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
|
||||||
static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
|
static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
|
||||||
|
@ -635,33 +633,66 @@ static void __init kvm_smp_prepare_boot_cpu(void)
|
||||||
kvm_spinlock_init();
|
kvm_spinlock_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kvm_guest_cpu_offline(void)
|
|
||||||
{
|
|
||||||
kvm_disable_steal_time();
|
|
||||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
|
||||||
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
|
|
||||||
kvm_pv_disable_apf();
|
|
||||||
apf_task_wake_all();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_cpu_online(unsigned int cpu)
|
|
||||||
{
|
|
||||||
local_irq_disable();
|
|
||||||
kvm_guest_cpu_init();
|
|
||||||
local_irq_enable();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_cpu_down_prepare(unsigned int cpu)
|
static int kvm_cpu_down_prepare(unsigned int cpu)
|
||||||
{
|
{
|
||||||
local_irq_disable();
|
unsigned long flags;
|
||||||
kvm_guest_cpu_offline();
|
|
||||||
local_irq_enable();
|
local_irq_save(flags);
|
||||||
|
kvm_guest_cpu_offline(false);
|
||||||
|
local_irq_restore(flags);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static int kvm_suspend(void)
|
||||||
|
{
|
||||||
|
kvm_guest_cpu_offline(false);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kvm_resume(void)
|
||||||
|
{
|
||||||
|
kvm_cpu_online(raw_smp_processor_id());
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct syscore_ops kvm_syscore_ops = {
|
||||||
|
.suspend = kvm_suspend,
|
||||||
|
.resume = kvm_resume,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void kvm_pv_guest_cpu_reboot(void *unused)
|
||||||
|
{
|
||||||
|
kvm_guest_cpu_offline(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kvm_pv_reboot_notify(struct notifier_block *nb,
|
||||||
|
unsigned long code, void *unused)
|
||||||
|
{
|
||||||
|
if (code == SYS_RESTART)
|
||||||
|
on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
|
||||||
|
return NOTIFY_DONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct notifier_block kvm_pv_reboot_nb = {
|
||||||
|
.notifier_call = kvm_pv_reboot_notify,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* After a PV feature is registered, the host will keep writing to the
|
||||||
|
* registered memory location. If the guest happens to shutdown, this memory
|
||||||
|
* won't be valid. In cases like kexec, in which you install a new kernel, this
|
||||||
|
* means a random memory location will be kept being written.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_KEXEC_CORE
|
||||||
|
static void kvm_crash_shutdown(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
kvm_guest_cpu_offline(true);
|
||||||
|
native_machine_crash_shutdown(regs);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void __init kvm_guest_init(void)
|
static void __init kvm_guest_init(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -704,6 +735,12 @@ static void __init kvm_guest_init(void)
|
||||||
kvm_guest_cpu_init();
|
kvm_guest_cpu_init();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_KEXEC_CORE
|
||||||
|
machine_ops.crash_shutdown = kvm_crash_shutdown;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
register_syscore_ops(&kvm_syscore_ops);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hard lockup detection is enabled by default. Disable it, as guests
|
* Hard lockup detection is enabled by default. Disable it, as guests
|
||||||
* can get false positives too easily, for example if the host is
|
* can get false positives too easily, for example if the host is
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#include <asm/hypervisor.h>
|
#include <asm/hypervisor.h>
|
||||||
#include <asm/mem_encrypt.h>
|
#include <asm/mem_encrypt.h>
|
||||||
#include <asm/x86_init.h>
|
#include <asm/x86_init.h>
|
||||||
#include <asm/reboot.h>
|
|
||||||
#include <asm/kvmclock.h>
|
#include <asm/kvmclock.h>
|
||||||
|
|
||||||
static int kvmclock __initdata = 1;
|
static int kvmclock __initdata = 1;
|
||||||
|
@ -203,28 +202,9 @@ static void kvm_setup_secondary_clock(void)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
void kvmclock_disable(void)
|
||||||
* After the clock is registered, the host will keep writing to the
|
|
||||||
* registered memory location. If the guest happens to shutdown, this memory
|
|
||||||
* won't be valid. In cases like kexec, in which you install a new kernel, this
|
|
||||||
* means a random memory location will be kept being written. So before any
|
|
||||||
* kind of shutdown from our side, we unregister the clock by writing anything
|
|
||||||
* that does not have the 'enable' bit set in the msr
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_KEXEC_CORE
|
|
||||||
static void kvm_crash_shutdown(struct pt_regs *regs)
|
|
||||||
{
|
{
|
||||||
native_write_msr(msr_kvm_system_time, 0, 0);
|
native_write_msr(msr_kvm_system_time, 0, 0);
|
||||||
kvm_disable_steal_time();
|
|
||||||
native_machine_crash_shutdown(regs);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void kvm_shutdown(void)
|
|
||||||
{
|
|
||||||
native_write_msr(msr_kvm_system_time, 0, 0);
|
|
||||||
kvm_disable_steal_time();
|
|
||||||
native_machine_shutdown();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init kvmclock_init_mem(void)
|
static void __init kvmclock_init_mem(void)
|
||||||
|
@ -351,10 +331,6 @@ void __init kvmclock_init(void)
|
||||||
#endif
|
#endif
|
||||||
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
|
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
|
||||||
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
|
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
|
||||||
machine_ops.shutdown = kvm_shutdown;
|
|
||||||
#ifdef CONFIG_KEXEC_CORE
|
|
||||||
machine_ops.crash_shutdown = kvm_crash_shutdown;
|
|
||||||
#endif
|
|
||||||
kvm_get_preset_lpj();
|
kvm_get_preset_lpj();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void)
|
||||||
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
|
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
|
||||||
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
|
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
|
||||||
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
|
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
|
||||||
F(SGX_LC)
|
F(SGX_LC) | F(BUS_LOCK_DETECT)
|
||||||
);
|
);
|
||||||
/* Set LA57 based on hardware capability. */
|
/* Set LA57 based on hardware capability. */
|
||||||
if (cpuid_ecx(7) & F(LA57))
|
if (cpuid_ecx(7) & F(LA57))
|
||||||
|
@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void)
|
||||||
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
|
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
|
||||||
F(PMM) | F(PMM_EN)
|
F(PMM) | F(PMM_EN)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hide RDTSCP and RDPID if either feature is reported as supported but
|
||||||
|
* probing MSR_TSC_AUX failed. This is purely a sanity check and
|
||||||
|
* should never happen, but the guest will likely crash if RDTSCP or
|
||||||
|
* RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in
|
||||||
|
* the past. For example, the sanity check may fire if this instance of
|
||||||
|
* KVM is running as L1 on top of an older, broken KVM.
|
||||||
|
*/
|
||||||
|
if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) ||
|
||||||
|
kvm_cpu_cap_has(X86_FEATURE_RDPID)) &&
|
||||||
|
!kvm_is_supported_user_return_msr(MSR_TSC_AUX))) {
|
||||||
|
kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
|
||||||
|
kvm_cpu_cap_clear(X86_FEATURE_RDPID);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
|
EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
|
||||||
|
|
||||||
|
@ -637,7 +652,8 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
|
||||||
case 7:
|
case 7:
|
||||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||||
entry->eax = 0;
|
entry->eax = 0;
|
||||||
entry->ecx = F(RDPID);
|
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
|
||||||
|
entry->ecx = F(RDPID);
|
||||||
++array->nent;
|
++array->nent;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -4502,7 +4502,7 @@ static const struct opcode group8[] = {
|
||||||
* from the register case of group9.
|
* from the register case of group9.
|
||||||
*/
|
*/
|
||||||
static const struct gprefix pfx_0f_c7_7 = {
|
static const struct gprefix pfx_0f_c7_7 = {
|
||||||
N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
|
N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -468,6 +468,7 @@ enum x86_intercept {
|
||||||
x86_intercept_clgi,
|
x86_intercept_clgi,
|
||||||
x86_intercept_skinit,
|
x86_intercept_skinit,
|
||||||
x86_intercept_rdtscp,
|
x86_intercept_rdtscp,
|
||||||
|
x86_intercept_rdpid,
|
||||||
x86_intercept_icebp,
|
x86_intercept_icebp,
|
||||||
x86_intercept_wbinvd,
|
x86_intercept_wbinvd,
|
||||||
x86_intercept_monitor,
|
x86_intercept_monitor,
|
||||||
|
|
|
@ -1913,8 +1913,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
|
||||||
if (!apic->lapic_timer.hv_timer_in_use)
|
if (!apic->lapic_timer.hv_timer_in_use)
|
||||||
goto out;
|
goto out;
|
||||||
WARN_ON(rcuwait_active(&vcpu->wait));
|
WARN_ON(rcuwait_active(&vcpu->wait));
|
||||||
cancel_hv_timer(apic);
|
|
||||||
apic_timer_expired(apic, false);
|
apic_timer_expired(apic, false);
|
||||||
|
cancel_hv_timer(apic);
|
||||||
|
|
||||||
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
|
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
|
||||||
advance_periodic_target_expiration(apic);
|
advance_periodic_target_expiration(apic);
|
||||||
|
|
|
@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||||
if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
|
if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
|
||||||
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
|
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
|
||||||
|
|
||||||
if (WARN_ON_ONCE(!mmu->lm_root)) {
|
if (WARN_ON_ONCE(!mmu->pml4_root)) {
|
||||||
r = -EIO;
|
r = -EIO;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask;
|
mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < 4; ++i) {
|
for (i = 0; i < 4; ++i) {
|
||||||
|
@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
|
if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
|
||||||
mmu->root_hpa = __pa(mmu->lm_root);
|
mmu->root_hpa = __pa(mmu->pml4_root);
|
||||||
else
|
else
|
||||||
mmu->root_hpa = __pa(mmu->pae_root);
|
mmu->root_hpa = __pa(mmu->pae_root);
|
||||||
|
|
||||||
|
@ -3350,7 +3350,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||||
static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
|
static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_mmu *mmu = vcpu->arch.mmu;
|
struct kvm_mmu *mmu = vcpu->arch.mmu;
|
||||||
u64 *lm_root, *pae_root;
|
u64 *pml4_root, *pae_root;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
|
* When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
|
||||||
|
@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
|
||||||
if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
|
if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
if (mmu->pae_root && mmu->lm_root)
|
if (mmu->pae_root && mmu->pml4_root)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The special roots should always be allocated in concert. Yell and
|
* The special roots should always be allocated in concert. Yell and
|
||||||
* bail if KVM ends up in a state where only one of the roots is valid.
|
* bail if KVM ends up in a state where only one of the roots is valid.
|
||||||
*/
|
*/
|
||||||
if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root))
|
if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
|
||||||
if (!pae_root)
|
if (!pae_root)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
||||||
if (!lm_root) {
|
if (!pml4_root) {
|
||||||
free_page((unsigned long)pae_root);
|
free_page((unsigned long)pae_root);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
mmu->pae_root = pae_root;
|
mmu->pae_root = pae_root;
|
||||||
mmu->lm_root = lm_root;
|
mmu->pml4_root = pml4_root;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -5261,7 +5261,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
|
||||||
if (!tdp_enabled && mmu->pae_root)
|
if (!tdp_enabled && mmu->pae_root)
|
||||||
set_memory_encrypted((unsigned long)mmu->pae_root, 1);
|
set_memory_encrypted((unsigned long)mmu->pae_root, 1);
|
||||||
free_page((unsigned long)mmu->pae_root);
|
free_page((unsigned long)mmu->pae_root);
|
||||||
free_page((unsigned long)mmu->lm_root);
|
free_page((unsigned long)mmu->pml4_root);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
|
static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
|
||||||
|
|
|
@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* handle_changed_spte - handle bookkeeping associated with an SPTE change
|
* __handle_changed_spte - handle bookkeeping associated with an SPTE change
|
||||||
* @kvm: kvm instance
|
* @kvm: kvm instance
|
||||||
* @as_id: the address space of the paging structure the SPTE was a part of
|
* @as_id: the address space of the paging structure the SPTE was a part of
|
||||||
* @gfn: the base GFN that was mapped by the SPTE
|
* @gfn: the base GFN that was mapped by the SPTE
|
||||||
|
@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||||
|
|
||||||
trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
|
trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
|
||||||
|
|
||||||
|
if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
|
||||||
|
if (is_large_pte(old_spte))
|
||||||
|
atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
|
||||||
|
else
|
||||||
|
atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The only times a SPTE should be changed from a non-present to
|
* The only times a SPTE should be changed from a non-present to
|
||||||
* non-present state is when an MMIO entry is installed/modified/
|
* non-present state is when an MMIO entry is installed/modified/
|
||||||
|
@ -1009,6 +1016,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_shadow_present_pte(iter.old_spte)) {
|
if (!is_shadow_present_pte(iter.old_spte)) {
|
||||||
|
/*
|
||||||
|
* If SPTE has been forzen by another thread, just
|
||||||
|
* give up and retry, avoiding unnecessary page table
|
||||||
|
* allocation and free.
|
||||||
|
*/
|
||||||
|
if (is_removed_spte(iter.old_spte))
|
||||||
|
break;
|
||||||
|
|
||||||
sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
|
sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
|
||||||
child_pt = sp->spt;
|
child_pt = sp->spt;
|
||||||
|
|
||||||
|
|
|
@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||||
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
|
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
|
||||||
|
|
||||||
svm_switch_vmcb(svm, &svm->vmcb01);
|
svm_switch_vmcb(svm, &svm->vmcb01);
|
||||||
WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On vmexit the GIF is set to false and
|
* On vmexit the GIF is set to false and
|
||||||
|
@ -872,6 +871,15 @@ void svm_free_nested(struct vcpu_svm *svm)
|
||||||
__free_page(virt_to_page(svm->nested.vmcb02.ptr));
|
__free_page(virt_to_page(svm->nested.vmcb02.ptr));
|
||||||
svm->nested.vmcb02.ptr = NULL;
|
svm->nested.vmcb02.ptr = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When last_vmcb12_gpa matches the current vmcb12 gpa,
|
||||||
|
* some vmcb12 fields are not loaded if they are marked clean
|
||||||
|
* in the vmcb12, since in this case they are up to date already.
|
||||||
|
*
|
||||||
|
* When the vmcb02 is freed, this optimization becomes invalid.
|
||||||
|
*/
|
||||||
|
svm->nested.last_vmcb12_gpa = INVALID_GPA;
|
||||||
|
|
||||||
svm->nested.initialized = false;
|
svm->nested.initialized = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -884,9 +892,11 @@ void svm_leave_nested(struct vcpu_svm *svm)
|
||||||
|
|
||||||
if (is_guest_mode(vcpu)) {
|
if (is_guest_mode(vcpu)) {
|
||||||
svm->nested.nested_run_pending = 0;
|
svm->nested.nested_run_pending = 0;
|
||||||
|
svm->nested.vmcb12_gpa = INVALID_GPA;
|
||||||
|
|
||||||
leave_guest_mode(vcpu);
|
leave_guest_mode(vcpu);
|
||||||
|
|
||||||
svm_switch_vmcb(svm, &svm->nested.vmcb02);
|
svm_switch_vmcb(svm, &svm->vmcb01);
|
||||||
|
|
||||||
nested_svm_uninit_mmu_context(vcpu);
|
nested_svm_uninit_mmu_context(vcpu);
|
||||||
vmcb_mark_all_dirty(svm->vmcb);
|
vmcb_mark_all_dirty(svm->vmcb);
|
||||||
|
@ -1298,12 +1308,17 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
|
||||||
* L2 registers if needed are moved from the current VMCB to VMCB02.
|
* L2 registers if needed are moved from the current VMCB to VMCB02.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
if (is_guest_mode(vcpu))
|
||||||
|
svm_leave_nested(svm);
|
||||||
|
else
|
||||||
|
svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
|
||||||
|
|
||||||
|
svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
|
||||||
|
|
||||||
svm->nested.nested_run_pending =
|
svm->nested.nested_run_pending =
|
||||||
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
|
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
|
||||||
|
|
||||||
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
|
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
|
||||||
if (svm->current_vmcb == &svm->vmcb01)
|
|
||||||
svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
|
|
||||||
|
|
||||||
svm->vmcb01.ptr->save.es = save->es;
|
svm->vmcb01.ptr->save.es = save->es;
|
||||||
svm->vmcb01.ptr->save.cs = save->cs;
|
svm->vmcb01.ptr->save.cs = save->cs;
|
||||||
|
|
|
@ -763,7 +763,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
|
static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
|
||||||
unsigned long __user dst_uaddr,
|
void __user *dst_uaddr,
|
||||||
unsigned long dst_paddr,
|
unsigned long dst_paddr,
|
||||||
int size, int *err)
|
int size, int *err)
|
||||||
{
|
{
|
||||||
|
@ -787,8 +787,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
|
||||||
|
|
||||||
if (tpage) {
|
if (tpage) {
|
||||||
offset = paddr & 15;
|
offset = paddr & 15;
|
||||||
if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
|
if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
|
||||||
page_address(tpage) + offset, size))
|
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -800,9 +799,9 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
|
static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
|
||||||
unsigned long __user vaddr,
|
void __user *vaddr,
|
||||||
unsigned long dst_paddr,
|
unsigned long dst_paddr,
|
||||||
unsigned long __user dst_vaddr,
|
void __user *dst_vaddr,
|
||||||
int size, int *error)
|
int size, int *error)
|
||||||
{
|
{
|
||||||
struct page *src_tpage = NULL;
|
struct page *src_tpage = NULL;
|
||||||
|
@ -810,13 +809,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
|
||||||
int ret, len = size;
|
int ret, len = size;
|
||||||
|
|
||||||
/* If source buffer is not aligned then use an intermediate buffer */
|
/* If source buffer is not aligned then use an intermediate buffer */
|
||||||
if (!IS_ALIGNED(vaddr, 16)) {
|
if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
|
||||||
src_tpage = alloc_page(GFP_KERNEL);
|
src_tpage = alloc_page(GFP_KERNEL);
|
||||||
if (!src_tpage)
|
if (!src_tpage)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
if (copy_from_user(page_address(src_tpage),
|
if (copy_from_user(page_address(src_tpage), vaddr, size)) {
|
||||||
(void __user *)(uintptr_t)vaddr, size)) {
|
|
||||||
__free_page(src_tpage);
|
__free_page(src_tpage);
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
@ -830,7 +828,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
|
||||||
* - copy the source buffer in an intermediate buffer
|
* - copy the source buffer in an intermediate buffer
|
||||||
* - use the intermediate buffer as source buffer
|
* - use the intermediate buffer as source buffer
|
||||||
*/
|
*/
|
||||||
if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
|
if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
|
||||||
int dst_offset;
|
int dst_offset;
|
||||||
|
|
||||||
dst_tpage = alloc_page(GFP_KERNEL);
|
dst_tpage = alloc_page(GFP_KERNEL);
|
||||||
|
@ -855,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
|
||||||
page_address(src_tpage), size);
|
page_address(src_tpage), size);
|
||||||
else {
|
else {
|
||||||
if (copy_from_user(page_address(dst_tpage) + dst_offset,
|
if (copy_from_user(page_address(dst_tpage) + dst_offset,
|
||||||
(void __user *)(uintptr_t)vaddr, size)) {
|
vaddr, size)) {
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
goto e_free;
|
goto e_free;
|
||||||
}
|
}
|
||||||
|
@ -935,15 +933,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
|
||||||
if (dec)
|
if (dec)
|
||||||
ret = __sev_dbg_decrypt_user(kvm,
|
ret = __sev_dbg_decrypt_user(kvm,
|
||||||
__sme_page_pa(src_p[0]) + s_off,
|
__sme_page_pa(src_p[0]) + s_off,
|
||||||
dst_vaddr,
|
(void __user *)dst_vaddr,
|
||||||
__sme_page_pa(dst_p[0]) + d_off,
|
__sme_page_pa(dst_p[0]) + d_off,
|
||||||
len, &argp->error);
|
len, &argp->error);
|
||||||
else
|
else
|
||||||
ret = __sev_dbg_encrypt_user(kvm,
|
ret = __sev_dbg_encrypt_user(kvm,
|
||||||
__sme_page_pa(src_p[0]) + s_off,
|
__sme_page_pa(src_p[0]) + s_off,
|
||||||
vaddr,
|
(void __user *)vaddr,
|
||||||
__sme_page_pa(dst_p[0]) + d_off,
|
__sme_page_pa(dst_p[0]) + d_off,
|
||||||
dst_vaddr,
|
(void __user *)dst_vaddr,
|
||||||
len, &argp->error);
|
len, &argp->error);
|
||||||
|
|
||||||
sev_unpin_memory(kvm, src_p, n);
|
sev_unpin_memory(kvm, src_p, n);
|
||||||
|
@ -1764,7 +1762,8 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
|
||||||
e_source_unlock:
|
e_source_unlock:
|
||||||
mutex_unlock(&source_kvm->lock);
|
mutex_unlock(&source_kvm->lock);
|
||||||
e_source_put:
|
e_source_put:
|
||||||
fput(source_kvm_file);
|
if (source_kvm_file)
|
||||||
|
fput(source_kvm_file);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2198,7 +2197,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pre_sev_es_run(struct vcpu_svm *svm)
|
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
|
||||||
{
|
{
|
||||||
if (!svm->ghcb)
|
if (!svm->ghcb)
|
||||||
return;
|
return;
|
||||||
|
@ -2234,9 +2233,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
|
||||||
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
|
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
|
||||||
int asid = sev_get_asid(svm->vcpu.kvm);
|
int asid = sev_get_asid(svm->vcpu.kvm);
|
||||||
|
|
||||||
/* Perform any SEV-ES pre-run actions */
|
|
||||||
pre_sev_es_run(svm);
|
|
||||||
|
|
||||||
/* Assign the asid allocated with this SEV guest */
|
/* Assign the asid allocated with this SEV guest */
|
||||||
svm->asid = asid;
|
svm->asid = asid;
|
||||||
|
|
||||||
|
|
|
@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
|
||||||
* RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
|
* RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
|
||||||
* defer the restoration of TSC_AUX until the CPU returns to userspace.
|
* defer the restoration of TSC_AUX until the CPU returns to userspace.
|
||||||
*/
|
*/
|
||||||
#define TSC_AUX_URET_SLOT 0
|
static int tsc_aux_uret_slot __read_mostly = -1;
|
||||||
|
|
||||||
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
|
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
|
||||||
|
|
||||||
|
@ -447,6 +447,11 @@ static int has_svm(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pgtable_l5_enabled()) {
|
||||||
|
pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -959,8 +964,7 @@ static __init int svm_hardware_setup(void)
|
||||||
kvm_tsc_scaling_ratio_frac_bits = 32;
|
kvm_tsc_scaling_ratio_frac_bits = 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (boot_cpu_has(X86_FEATURE_RDTSCP))
|
tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
|
||||||
kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);
|
|
||||||
|
|
||||||
/* Check for pause filtering support */
|
/* Check for pause filtering support */
|
||||||
if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
|
if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
|
||||||
|
@ -1100,7 +1104,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||||
return svm->vmcb->control.tsc_offset;
|
return svm->vmcb->control.tsc_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void svm_check_invpcid(struct vcpu_svm *svm)
|
/* Evaluate instruction intercepts that depend on guest CPUID features. */
|
||||||
|
static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
|
||||||
|
struct vcpu_svm *svm)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Intercept INVPCID if shadow paging is enabled to sync/free shadow
|
* Intercept INVPCID if shadow paging is enabled to sync/free shadow
|
||||||
|
@ -1113,6 +1119,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm)
|
||||||
else
|
else
|
||||||
svm_clr_intercept(svm, INTERCEPT_INVPCID);
|
svm_clr_intercept(svm, INTERCEPT_INVPCID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
|
||||||
|
if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
|
||||||
|
svm_clr_intercept(svm, INTERCEPT_RDTSCP);
|
||||||
|
else
|
||||||
|
svm_set_intercept(svm, INTERCEPT_RDTSCP);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void init_vmcb(struct kvm_vcpu *vcpu)
|
static void init_vmcb(struct kvm_vcpu *vcpu)
|
||||||
|
@ -1235,8 +1248,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
||||||
svm->current_vmcb->asid_generation = 0;
|
svm->current_vmcb->asid_generation = 0;
|
||||||
svm->asid = 0;
|
svm->asid = 0;
|
||||||
|
|
||||||
svm->nested.vmcb12_gpa = 0;
|
svm->nested.vmcb12_gpa = INVALID_GPA;
|
||||||
svm->nested.last_vmcb12_gpa = 0;
|
svm->nested.last_vmcb12_gpa = INVALID_GPA;
|
||||||
vcpu->arch.hflags = 0;
|
vcpu->arch.hflags = 0;
|
||||||
|
|
||||||
if (!kvm_pause_in_guest(vcpu->kvm)) {
|
if (!kvm_pause_in_guest(vcpu->kvm)) {
|
||||||
|
@ -1248,7 +1261,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
||||||
svm_clr_intercept(svm, INTERCEPT_PAUSE);
|
svm_clr_intercept(svm, INTERCEPT_PAUSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
svm_check_invpcid(svm);
|
svm_recalc_instruction_intercepts(vcpu, svm);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the host supports V_SPEC_CTRL then disable the interception
|
* If the host supports V_SPEC_CTRL then disable the interception
|
||||||
|
@ -1424,6 +1437,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
|
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
|
||||||
|
|
||||||
|
if (sev_es_guest(vcpu->kvm))
|
||||||
|
sev_es_unmap_ghcb(svm);
|
||||||
|
|
||||||
if (svm->guest_state_loaded)
|
if (svm->guest_state_loaded)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -1445,8 +1461,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (static_cpu_has(X86_FEATURE_RDTSCP))
|
if (likely(tsc_aux_uret_slot >= 0))
|
||||||
kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);
|
kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
|
||||||
|
|
||||||
svm->guest_state_loaded = true;
|
svm->guest_state_loaded = true;
|
||||||
}
|
}
|
||||||
|
@ -2655,11 +2671,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||||
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
|
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
|
||||||
break;
|
break;
|
||||||
case MSR_TSC_AUX:
|
case MSR_TSC_AUX:
|
||||||
if (!boot_cpu_has(X86_FEATURE_RDTSCP))
|
|
||||||
return 1;
|
|
||||||
if (!msr_info->host_initiated &&
|
|
||||||
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
|
|
||||||
return 1;
|
|
||||||
msr_info->data = svm->tsc_aux;
|
msr_info->data = svm->tsc_aux;
|
||||||
break;
|
break;
|
||||||
/*
|
/*
|
||||||
|
@ -2876,30 +2887,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||||
svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
|
svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
|
||||||
break;
|
break;
|
||||||
case MSR_TSC_AUX:
|
case MSR_TSC_AUX:
|
||||||
if (!boot_cpu_has(X86_FEATURE_RDTSCP))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (!msr->host_initiated &&
|
|
||||||
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
|
|
||||||
* incomplete and conflicting architectural behavior. Current
|
|
||||||
* AMD CPUs completely ignore bits 63:32, i.e. they aren't
|
|
||||||
* reserved and always read as zeros. Emulate AMD CPU behavior
|
|
||||||
* to avoid explosions if the vCPU is migrated from an AMD host
|
|
||||||
* to an Intel host.
|
|
||||||
*/
|
|
||||||
data = (u32)data;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TSC_AUX is usually changed only during boot and never read
|
* TSC_AUX is usually changed only during boot and never read
|
||||||
* directly. Intercept TSC_AUX instead of exposing it to the
|
* directly. Intercept TSC_AUX instead of exposing it to the
|
||||||
* guest via direct_access_msrs, and switch it via user return.
|
* guest via direct_access_msrs, and switch it via user return.
|
||||||
*/
|
*/
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);
|
r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
if (r)
|
if (r)
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -3084,6 +3078,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
||||||
[SVM_EXIT_STGI] = stgi_interception,
|
[SVM_EXIT_STGI] = stgi_interception,
|
||||||
[SVM_EXIT_CLGI] = clgi_interception,
|
[SVM_EXIT_CLGI] = clgi_interception,
|
||||||
[SVM_EXIT_SKINIT] = skinit_interception,
|
[SVM_EXIT_SKINIT] = skinit_interception,
|
||||||
|
[SVM_EXIT_RDTSCP] = kvm_handle_invalid_op,
|
||||||
[SVM_EXIT_WBINVD] = kvm_emulate_wbinvd,
|
[SVM_EXIT_WBINVD] = kvm_emulate_wbinvd,
|
||||||
[SVM_EXIT_MONITOR] = kvm_emulate_monitor,
|
[SVM_EXIT_MONITOR] = kvm_emulate_monitor,
|
||||||
[SVM_EXIT_MWAIT] = kvm_emulate_mwait,
|
[SVM_EXIT_MWAIT] = kvm_emulate_mwait,
|
||||||
|
@ -3972,8 +3967,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||||
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
|
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
|
||||||
guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
|
guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
|
||||||
|
|
||||||
/* Check again if INVPCID interception if required */
|
svm_recalc_instruction_intercepts(vcpu, svm);
|
||||||
svm_check_invpcid(svm);
|
|
||||||
|
|
||||||
/* For sev guests, the memory encryption bit is not reserved in CR3. */
|
/* For sev guests, the memory encryption bit is not reserved in CR3. */
|
||||||
if (sev_guest(vcpu->kvm)) {
|
if (sev_guest(vcpu->kvm)) {
|
||||||
|
|
|
@ -581,6 +581,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
|
||||||
void sev_es_create_vcpu(struct vcpu_svm *svm);
|
void sev_es_create_vcpu(struct vcpu_svm *svm);
|
||||||
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
|
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
|
||||||
void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
|
void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
|
||||||
|
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
|
||||||
|
|
||||||
/* vmenter.S */
|
/* vmenter.S */
|
||||||
|
|
||||||
|
|
|
@ -398,6 +398,9 @@ static inline u64 vmx_supported_debugctl(void)
|
||||||
{
|
{
|
||||||
u64 debugctl = 0;
|
u64 debugctl = 0;
|
||||||
|
|
||||||
|
if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
|
||||||
|
debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
|
||||||
|
|
||||||
if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
|
if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
|
||||||
debugctl |= DEBUGCTLMSR_LBR_MASK;
|
debugctl |= DEBUGCTLMSR_LBR_MASK;
|
||||||
|
|
||||||
|
|
|
@ -3098,15 +3098,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
|
||||||
nested_vmx_handle_enlightened_vmptrld(vcpu, false);
|
nested_vmx_handle_enlightened_vmptrld(vcpu, false);
|
||||||
|
|
||||||
if (evmptrld_status == EVMPTRLD_VMFAIL ||
|
if (evmptrld_status == EVMPTRLD_VMFAIL ||
|
||||||
evmptrld_status == EVMPTRLD_ERROR) {
|
evmptrld_status == EVMPTRLD_ERROR)
|
||||||
pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
|
|
||||||
__func__);
|
|
||||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
|
||||||
vcpu->run->internal.suberror =
|
|
||||||
KVM_INTERNAL_ERROR_EMULATION;
|
|
||||||
vcpu->run->internal.ndata = 0;
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -3194,8 +3187,16 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
|
static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
if (!nested_get_evmcs_page(vcpu))
|
if (!nested_get_evmcs_page(vcpu)) {
|
||||||
|
pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
|
||||||
|
__func__);
|
||||||
|
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||||
|
vcpu->run->internal.suberror =
|
||||||
|
KVM_INTERNAL_ERROR_EMULATION;
|
||||||
|
vcpu->run->internal.ndata = 0;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
|
if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
|
||||||
return false;
|
return false;
|
||||||
|
@ -4435,7 +4436,15 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
|
||||||
/* Similarly, triple faults in L2 should never escape. */
|
/* Similarly, triple faults in L2 should never escape. */
|
||||||
WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
|
WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
|
||||||
|
|
||||||
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
|
if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
|
||||||
|
/*
|
||||||
|
* KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
|
||||||
|
* Enlightened VMCS after migration and we still need to
|
||||||
|
* do that when something is forcing L2->L1 exit prior to
|
||||||
|
* the first L2 run.
|
||||||
|
*/
|
||||||
|
(void)nested_get_evmcs_page(vcpu);
|
||||||
|
}
|
||||||
|
|
||||||
/* Service the TLB flush request for L2 before switching to L1. */
|
/* Service the TLB flush request for L2 before switching to L1. */
|
||||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
|
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
|
||||||
|
|
|
@ -455,21 +455,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
|
||||||
|
|
||||||
static unsigned long host_idt_base;
|
static unsigned long host_idt_base;
|
||||||
|
|
||||||
/*
|
|
||||||
* Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
|
|
||||||
* will emulate SYSCALL in legacy mode if the vendor string in guest
|
|
||||||
* CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
|
|
||||||
* support this emulation, IA32_STAR must always be included in
|
|
||||||
* vmx_uret_msrs_list[], even in i386 builds.
|
|
||||||
*/
|
|
||||||
static const u32 vmx_uret_msrs_list[] = {
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
|
|
||||||
#endif
|
|
||||||
MSR_EFER, MSR_TSC_AUX, MSR_STAR,
|
|
||||||
MSR_IA32_TSX_CTRL,
|
|
||||||
};
|
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_HYPERV)
|
#if IS_ENABLED(CONFIG_HYPERV)
|
||||||
static bool __read_mostly enlightened_vmcs = true;
|
static bool __read_mostly enlightened_vmcs = true;
|
||||||
module_param(enlightened_vmcs, bool, 0444);
|
module_param(enlightened_vmcs, bool, 0444);
|
||||||
|
@ -697,21 +682,11 @@ static bool is_valid_passthrough_msr(u32 msr)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < vmx->nr_uret_msrs; ++i)
|
|
||||||
if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
|
|
||||||
return i;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
|
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
i = __vmx_find_uret_msr(vmx, msr);
|
i = kvm_find_user_return_msr(msr);
|
||||||
if (i >= 0)
|
if (i >= 0)
|
||||||
return &vmx->guest_uret_msrs[i];
|
return &vmx->guest_uret_msrs[i];
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -720,13 +695,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
|
||||||
static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
|
static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
|
||||||
struct vmx_uret_msr *msr, u64 data)
|
struct vmx_uret_msr *msr, u64 data)
|
||||||
{
|
{
|
||||||
|
unsigned int slot = msr - vmx->guest_uret_msrs;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
u64 old_msr_data = msr->data;
|
u64 old_msr_data = msr->data;
|
||||||
msr->data = data;
|
msr->data = data;
|
||||||
if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {
|
if (msr->load_into_hardware) {
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);
|
ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
if (ret)
|
if (ret)
|
||||||
msr->data = old_msr_data;
|
msr->data = old_msr_data;
|
||||||
|
@ -1078,7 +1054,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
i = __vmx_find_uret_msr(vmx, MSR_EFER);
|
i = kvm_find_user_return_msr(MSR_EFER);
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -1240,11 +1216,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||||
*/
|
*/
|
||||||
if (!vmx->guest_uret_msrs_loaded) {
|
if (!vmx->guest_uret_msrs_loaded) {
|
||||||
vmx->guest_uret_msrs_loaded = true;
|
vmx->guest_uret_msrs_loaded = true;
|
||||||
for (i = 0; i < vmx->nr_active_uret_msrs; ++i)
|
for (i = 0; i < kvm_nr_uret_msrs; ++i) {
|
||||||
kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,
|
if (!vmx->guest_uret_msrs[i].load_into_hardware)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
kvm_set_user_return_msr(i,
|
||||||
vmx->guest_uret_msrs[i].data,
|
vmx->guest_uret_msrs[i].data,
|
||||||
vmx->guest_uret_msrs[i].mask);
|
vmx->guest_uret_msrs[i].mask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vmx->nested.need_vmcs12_to_shadow_sync)
|
if (vmx->nested.need_vmcs12_to_shadow_sync)
|
||||||
|
@ -1751,19 +1730,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
|
||||||
vmx_clear_hlt(vcpu);
|
vmx_clear_hlt(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
|
static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
|
||||||
|
bool load_into_hardware)
|
||||||
{
|
{
|
||||||
struct vmx_uret_msr tmp;
|
struct vmx_uret_msr *uret_msr;
|
||||||
int from, to;
|
|
||||||
|
|
||||||
from = __vmx_find_uret_msr(vmx, msr);
|
uret_msr = vmx_find_uret_msr(vmx, msr);
|
||||||
if (from < 0)
|
if (!uret_msr)
|
||||||
return;
|
return;
|
||||||
to = vmx->nr_active_uret_msrs++;
|
|
||||||
|
|
||||||
tmp = vmx->guest_uret_msrs[to];
|
uret_msr->load_into_hardware = load_into_hardware;
|
||||||
vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];
|
|
||||||
vmx->guest_uret_msrs[from] = tmp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1773,29 +1749,42 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
|
||||||
*/
|
*/
|
||||||
static void setup_msrs(struct vcpu_vmx *vmx)
|
static void setup_msrs(struct vcpu_vmx *vmx)
|
||||||
{
|
{
|
||||||
vmx->guest_uret_msrs_loaded = false;
|
|
||||||
vmx->nr_active_uret_msrs = 0;
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
bool load_syscall_msrs;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The SYSCALL MSRs are only needed on long mode guests, and only
|
* The SYSCALL MSRs are only needed on long mode guests, and only
|
||||||
* when EFER.SCE is set.
|
* when EFER.SCE is set.
|
||||||
*/
|
*/
|
||||||
if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
|
load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
|
||||||
vmx_setup_uret_msr(vmx, MSR_STAR);
|
(vmx->vcpu.arch.efer & EFER_SCE);
|
||||||
vmx_setup_uret_msr(vmx, MSR_LSTAR);
|
|
||||||
vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);
|
vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
|
||||||
}
|
vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
|
||||||
|
vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
|
||||||
#endif
|
#endif
|
||||||
if (update_transition_efer(vmx))
|
vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
|
||||||
vmx_setup_uret_msr(vmx, MSR_EFER);
|
|
||||||
|
|
||||||
if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
|
vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
|
||||||
vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
|
guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
|
||||||
|
guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
|
||||||
|
|
||||||
vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
|
/*
|
||||||
|
* hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new
|
||||||
|
* kernel and old userspace. If those guests run on a tsx=off host, do
|
||||||
|
* allow guests to use TSX_CTRL, but don't change the value in hardware
|
||||||
|
* so that TSX remains always disabled.
|
||||||
|
*/
|
||||||
|
vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
|
||||||
|
|
||||||
if (cpu_has_vmx_msr_bitmap())
|
if (cpu_has_vmx_msr_bitmap())
|
||||||
vmx_update_msr_bitmap(&vmx->vcpu);
|
vmx_update_msr_bitmap(&vmx->vcpu);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The set of MSRs to load may have changed, reload MSRs before the
|
||||||
|
* next VM-Enter.
|
||||||
|
*/
|
||||||
|
vmx->guest_uret_msrs_loaded = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||||
|
@ -1993,11 +1982,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||||
else
|
else
|
||||||
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
|
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
|
||||||
break;
|
break;
|
||||||
case MSR_TSC_AUX:
|
|
||||||
if (!msr_info->host_initiated &&
|
|
||||||
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
|
|
||||||
return 1;
|
|
||||||
goto find_uret_msr;
|
|
||||||
case MSR_IA32_DEBUGCTLMSR:
|
case MSR_IA32_DEBUGCTLMSR:
|
||||||
msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
||||||
break;
|
break;
|
||||||
|
@ -2031,6 +2015,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
|
||||||
if (!intel_pmu_lbr_is_enabled(vcpu))
|
if (!intel_pmu_lbr_is_enabled(vcpu))
|
||||||
debugctl &= ~DEBUGCTLMSR_LBR_MASK;
|
debugctl &= ~DEBUGCTLMSR_LBR_MASK;
|
||||||
|
|
||||||
|
if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
|
||||||
|
debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
|
||||||
|
|
||||||
return debugctl;
|
return debugctl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2313,14 +2300,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||||
else
|
else
|
||||||
vmx->pt_desc.guest.addr_a[index / 2] = data;
|
vmx->pt_desc.guest.addr_a[index / 2] = data;
|
||||||
break;
|
break;
|
||||||
case MSR_TSC_AUX:
|
|
||||||
if (!msr_info->host_initiated &&
|
|
||||||
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
|
|
||||||
return 1;
|
|
||||||
/* Check reserved bit, higher 32 bits should be zero */
|
|
||||||
if ((data >> 32) != 0)
|
|
||||||
return 1;
|
|
||||||
goto find_uret_msr;
|
|
||||||
case MSR_IA32_PERF_CAPABILITIES:
|
case MSR_IA32_PERF_CAPABILITIES:
|
||||||
if (data && !vcpu_to_pmu(vcpu)->version)
|
if (data && !vcpu_to_pmu(vcpu)->version)
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -4369,7 +4348,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
|
||||||
xsaves_enabled, false);
|
xsaves_enabled, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
|
/*
|
||||||
|
* RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
|
||||||
|
* feature is exposed to the guest. This creates a virtualization hole
|
||||||
|
* if both are supported in hardware but only one is exposed to the
|
||||||
|
* guest, but letting the guest execute RDTSCP or RDPID when either one
|
||||||
|
* is advertised is preferable to emulating the advertised instruction
|
||||||
|
* in KVM on #UD, and obviously better than incorrectly injecting #UD.
|
||||||
|
*/
|
||||||
|
if (cpu_has_vmx_rdtscp()) {
|
||||||
|
bool rdpid_or_rdtscp_enabled =
|
||||||
|
guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
|
||||||
|
guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
|
||||||
|
|
||||||
|
vmx_adjust_secondary_exec_control(vmx, &exec_control,
|
||||||
|
SECONDARY_EXEC_ENABLE_RDTSCP,
|
||||||
|
rdpid_or_rdtscp_enabled, false);
|
||||||
|
}
|
||||||
vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
|
vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
|
||||||
|
|
||||||
vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
|
vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
|
||||||
|
@ -6855,6 +6850,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
|
static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
|
struct vmx_uret_msr *tsx_ctrl;
|
||||||
struct vcpu_vmx *vmx;
|
struct vcpu_vmx *vmx;
|
||||||
int i, cpu, err;
|
int i, cpu, err;
|
||||||
|
|
||||||
|
@ -6877,43 +6873,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
|
||||||
goto free_vpid;
|
goto free_vpid;
|
||||||
}
|
}
|
||||||
|
|
||||||
BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
|
for (i = 0; i < kvm_nr_uret_msrs; ++i) {
|
||||||
|
vmx->guest_uret_msrs[i].data = 0;
|
||||||
for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
|
vmx->guest_uret_msrs[i].mask = -1ull;
|
||||||
u32 index = vmx_uret_msrs_list[i];
|
}
|
||||||
u32 data_low, data_high;
|
if (boot_cpu_has(X86_FEATURE_RTM)) {
|
||||||
int j = vmx->nr_uret_msrs;
|
/*
|
||||||
|
* TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.
|
||||||
if (rdmsr_safe(index, &data_low, &data_high) < 0)
|
* Keep the host value unchanged to avoid changing CPUID bits
|
||||||
continue;
|
* under the host kernel's feet.
|
||||||
if (wrmsr_safe(index, data_low, data_high) < 0)
|
*/
|
||||||
continue;
|
tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
|
||||||
|
if (tsx_ctrl)
|
||||||
vmx->guest_uret_msrs[j].slot = i;
|
vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
|
||||||
vmx->guest_uret_msrs[j].data = 0;
|
|
||||||
switch (index) {
|
|
||||||
case MSR_IA32_TSX_CTRL:
|
|
||||||
/*
|
|
||||||
* TSX_CTRL_CPUID_CLEAR is handled in the CPUID
|
|
||||||
* interception. Keep the host value unchanged to avoid
|
|
||||||
* changing CPUID bits under the host kernel's feet.
|
|
||||||
*
|
|
||||||
* hle=0, rtm=0, tsx_ctrl=1 can be found with some
|
|
||||||
* combinations of new kernel and old userspace. If
|
|
||||||
* those guests run on a tsx=off host, do allow guests
|
|
||||||
* to use TSX_CTRL, but do not change the value on the
|
|
||||||
* host so that TSX remains always disabled.
|
|
||||||
*/
|
|
||||||
if (boot_cpu_has(X86_FEATURE_RTM))
|
|
||||||
vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
|
|
||||||
else
|
|
||||||
vmx->guest_uret_msrs[j].mask = 0;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
vmx->guest_uret_msrs[j].mask = -1ull;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
++vmx->nr_uret_msrs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
err = alloc_loaded_vmcs(&vmx->vmcs01);
|
err = alloc_loaded_vmcs(&vmx->vmcs01);
|
||||||
|
@ -7344,9 +7316,11 @@ static __init void vmx_set_cpu_caps(void)
|
||||||
if (!cpu_has_vmx_xsaves())
|
if (!cpu_has_vmx_xsaves())
|
||||||
kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
|
kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
|
||||||
|
|
||||||
/* CPUID 0x80000001 */
|
/* CPUID 0x80000001 and 0x7 (RDPID) */
|
||||||
if (!cpu_has_vmx_rdtscp())
|
if (!cpu_has_vmx_rdtscp()) {
|
||||||
kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
|
kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
|
||||||
|
kvm_cpu_cap_clear(X86_FEATURE_RDPID);
|
||||||
|
}
|
||||||
|
|
||||||
if (cpu_has_vmx_waitpkg())
|
if (cpu_has_vmx_waitpkg())
|
||||||
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
|
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
|
||||||
|
@ -7402,8 +7376,9 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
|
||||||
/*
|
/*
|
||||||
* RDPID causes #UD if disabled through secondary execution controls.
|
* RDPID causes #UD if disabled through secondary execution controls.
|
||||||
* Because it is marked as EmulateOnUD, we need to intercept it here.
|
* Because it is marked as EmulateOnUD, we need to intercept it here.
|
||||||
|
* Note, RDPID is hidden behind ENABLE_RDTSCP.
|
||||||
*/
|
*/
|
||||||
case x86_intercept_rdtscp:
|
case x86_intercept_rdpid:
|
||||||
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
|
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
|
||||||
exception->vector = UD_VECTOR;
|
exception->vector = UD_VECTOR;
|
||||||
exception->error_code_valid = false;
|
exception->error_code_valid = false;
|
||||||
|
@ -7769,17 +7744,42 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
|
||||||
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
|
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static __init void vmx_setup_user_return_msrs(void)
|
||||||
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
|
||||||
|
* will emulate SYSCALL in legacy mode if the vendor string in guest
|
||||||
|
* CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
|
||||||
|
* support this emulation, MSR_STAR is included in the list for i386,
|
||||||
|
* but is never loaded into hardware. MSR_CSTAR is also never loaded
|
||||||
|
* into hardware and is here purely for emulation purposes.
|
||||||
|
*/
|
||||||
|
const u32 vmx_uret_msrs_list[] = {
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
|
||||||
|
#endif
|
||||||
|
MSR_EFER, MSR_TSC_AUX, MSR_STAR,
|
||||||
|
MSR_IA32_TSX_CTRL,
|
||||||
|
};
|
||||||
|
int i;
|
||||||
|
|
||||||
|
BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
|
||||||
|
kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
|
||||||
|
}
|
||||||
|
|
||||||
static __init int hardware_setup(void)
|
static __init int hardware_setup(void)
|
||||||
{
|
{
|
||||||
unsigned long host_bndcfgs;
|
unsigned long host_bndcfgs;
|
||||||
struct desc_ptr dt;
|
struct desc_ptr dt;
|
||||||
int r, i, ept_lpage_level;
|
int r, ept_lpage_level;
|
||||||
|
|
||||||
store_idt(&dt);
|
store_idt(&dt);
|
||||||
host_idt_base = dt.address;
|
host_idt_base = dt.address;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
|
vmx_setup_user_return_msrs();
|
||||||
kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
|
|
||||||
|
|
||||||
if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
|
if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
|
@ -36,7 +36,7 @@ struct vmx_msrs {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vmx_uret_msr {
|
struct vmx_uret_msr {
|
||||||
unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */
|
bool load_into_hardware;
|
||||||
u64 data;
|
u64 data;
|
||||||
u64 mask;
|
u64 mask;
|
||||||
};
|
};
|
||||||
|
@ -245,8 +245,16 @@ struct vcpu_vmx {
|
||||||
u32 idt_vectoring_info;
|
u32 idt_vectoring_info;
|
||||||
ulong rflags;
|
ulong rflags;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* User return MSRs are always emulated when enabled in the guest, but
|
||||||
|
* only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
|
||||||
|
* of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
|
||||||
|
* be loaded into hardware if those conditions aren't met.
|
||||||
|
* nr_active_uret_msrs tracks the number of MSRs that need to be loaded
|
||||||
|
* into hardware when running the guest. guest_uret_msrs[] is resorted
|
||||||
|
* whenever the number of "active" uret MSRs is modified.
|
||||||
|
*/
|
||||||
struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
|
struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
|
||||||
int nr_uret_msrs;
|
|
||||||
int nr_active_uret_msrs;
|
int nr_active_uret_msrs;
|
||||||
bool guest_uret_msrs_loaded;
|
bool guest_uret_msrs_loaded;
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
|
|
@ -184,11 +184,6 @@ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
|
||||||
*/
|
*/
|
||||||
#define KVM_MAX_NR_USER_RETURN_MSRS 16
|
#define KVM_MAX_NR_USER_RETURN_MSRS 16
|
||||||
|
|
||||||
struct kvm_user_return_msrs_global {
|
|
||||||
int nr;
|
|
||||||
u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct kvm_user_return_msrs {
|
struct kvm_user_return_msrs {
|
||||||
struct user_return_notifier urn;
|
struct user_return_notifier urn;
|
||||||
bool registered;
|
bool registered;
|
||||||
|
@ -198,7 +193,9 @@ struct kvm_user_return_msrs {
|
||||||
} values[KVM_MAX_NR_USER_RETURN_MSRS];
|
} values[KVM_MAX_NR_USER_RETURN_MSRS];
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global;
|
u32 __read_mostly kvm_nr_uret_msrs;
|
||||||
|
EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
|
||||||
|
static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
|
||||||
static struct kvm_user_return_msrs __percpu *user_return_msrs;
|
static struct kvm_user_return_msrs __percpu *user_return_msrs;
|
||||||
|
|
||||||
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
|
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
|
||||||
|
@ -330,23 +327,53 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
|
||||||
user_return_notifier_unregister(urn);
|
user_return_notifier_unregister(urn);
|
||||||
}
|
}
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
|
for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
|
||||||
values = &msrs->values[slot];
|
values = &msrs->values[slot];
|
||||||
if (values->host != values->curr) {
|
if (values->host != values->curr) {
|
||||||
wrmsrl(user_return_msrs_global.msrs[slot], values->host);
|
wrmsrl(kvm_uret_msrs_list[slot], values->host);
|
||||||
values->curr = values->host;
|
values->curr = values->host;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_define_user_return_msr(unsigned slot, u32 msr)
|
static int kvm_probe_user_return_msr(u32 msr)
|
||||||
{
|
{
|
||||||
BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
|
u64 val;
|
||||||
user_return_msrs_global.msrs[slot] = msr;
|
int ret;
|
||||||
if (slot >= user_return_msrs_global.nr)
|
|
||||||
user_return_msrs_global.nr = slot + 1;
|
preempt_disable();
|
||||||
|
ret = rdmsrl_safe(msr, &val);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
ret = wrmsrl_safe(msr, val);
|
||||||
|
out:
|
||||||
|
preempt_enable();
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
|
|
||||||
|
int kvm_add_user_return_msr(u32 msr)
|
||||||
|
{
|
||||||
|
BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
|
||||||
|
|
||||||
|
if (kvm_probe_user_return_msr(msr))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
|
||||||
|
return kvm_nr_uret_msrs++;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
|
||||||
|
|
||||||
|
int kvm_find_user_return_msr(u32 msr)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < kvm_nr_uret_msrs; ++i) {
|
||||||
|
if (kvm_uret_msrs_list[i] == msr)
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
|
||||||
|
|
||||||
static void kvm_user_return_msr_cpu_online(void)
|
static void kvm_user_return_msr_cpu_online(void)
|
||||||
{
|
{
|
||||||
|
@ -355,8 +382,8 @@ static void kvm_user_return_msr_cpu_online(void)
|
||||||
u64 value;
|
u64 value;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < user_return_msrs_global.nr; ++i) {
|
for (i = 0; i < kvm_nr_uret_msrs; ++i) {
|
||||||
rdmsrl_safe(user_return_msrs_global.msrs[i], &value);
|
rdmsrl_safe(kvm_uret_msrs_list[i], &value);
|
||||||
msrs->values[i].host = value;
|
msrs->values[i].host = value;
|
||||||
msrs->values[i].curr = value;
|
msrs->values[i].curr = value;
|
||||||
}
|
}
|
||||||
|
@ -371,7 +398,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
|
||||||
value = (value & mask) | (msrs->values[slot].host & ~mask);
|
value = (value & mask) | (msrs->values[slot].host & ~mask);
|
||||||
if (value == msrs->values[slot].curr)
|
if (value == msrs->values[slot].curr)
|
||||||
return 0;
|
return 0;
|
||||||
err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value);
|
err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
|
||||||
if (err)
|
if (err)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
@ -1149,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
|
if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
|
||||||
fixed |= DR6_RTM;
|
fixed |= DR6_RTM;
|
||||||
|
|
||||||
|
if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
|
||||||
|
fixed |= DR6_BUS_LOCK;
|
||||||
return fixed;
|
return fixed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1615,6 +1645,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
|
||||||
* invokes 64-bit SYSENTER.
|
* invokes 64-bit SYSENTER.
|
||||||
*/
|
*/
|
||||||
data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
|
data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
|
||||||
|
break;
|
||||||
|
case MSR_TSC_AUX:
|
||||||
|
if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!host_initiated &&
|
||||||
|
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
|
||||||
|
!guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
|
||||||
|
* incomplete and conflicting architectural behavior. Current
|
||||||
|
* AMD CPUs completely ignore bits 63:32, i.e. they aren't
|
||||||
|
* reserved and always read as zeros. Enforce Intel's reserved
|
||||||
|
* bits check if and only if the guest CPU is Intel, and clear
|
||||||
|
* the bits in all other cases. This ensures cross-vendor
|
||||||
|
* migration will provide consistent behavior for the guest.
|
||||||
|
*/
|
||||||
|
if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
data = (u32)data;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
msr.data = data;
|
msr.data = data;
|
||||||
|
@ -1651,6 +1705,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
|
||||||
if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
|
if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
|
||||||
return KVM_MSR_RET_FILTERED;
|
return KVM_MSR_RET_FILTERED;
|
||||||
|
|
||||||
|
switch (index) {
|
||||||
|
case MSR_TSC_AUX:
|
||||||
|
if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!host_initiated &&
|
||||||
|
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
|
||||||
|
!guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
msr.index = index;
|
msr.index = index;
|
||||||
msr.host_initiated = host_initiated;
|
msr.host_initiated = host_initiated;
|
||||||
|
|
||||||
|
@ -5468,14 +5534,18 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
|
||||||
static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
|
static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
|
||||||
struct kvm_msr_filter_range *user_range)
|
struct kvm_msr_filter_range *user_range)
|
||||||
{
|
{
|
||||||
struct msr_bitmap_range range;
|
|
||||||
unsigned long *bitmap = NULL;
|
unsigned long *bitmap = NULL;
|
||||||
size_t bitmap_size;
|
size_t bitmap_size;
|
||||||
int r;
|
|
||||||
|
|
||||||
if (!user_range->nmsrs)
|
if (!user_range->nmsrs)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!user_range->flags)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
|
bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
|
||||||
if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
|
if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -5484,31 +5554,15 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
|
||||||
if (IS_ERR(bitmap))
|
if (IS_ERR(bitmap))
|
||||||
return PTR_ERR(bitmap);
|
return PTR_ERR(bitmap);
|
||||||
|
|
||||||
range = (struct msr_bitmap_range) {
|
msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
|
||||||
.flags = user_range->flags,
|
.flags = user_range->flags,
|
||||||
.base = user_range->base,
|
.base = user_range->base,
|
||||||
.nmsrs = user_range->nmsrs,
|
.nmsrs = user_range->nmsrs,
|
||||||
.bitmap = bitmap,
|
.bitmap = bitmap,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {
|
|
||||||
r = -EINVAL;
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!range.flags) {
|
|
||||||
r = -EINVAL;
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Everything ok, add this range identifier. */
|
|
||||||
msr_filter->ranges[msr_filter->count] = range;
|
|
||||||
msr_filter->count++;
|
msr_filter->count++;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
err:
|
|
||||||
kfree(bitmap);
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
|
static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
|
||||||
|
@ -5937,7 +5991,8 @@ static void kvm_init_msr_list(void)
|
||||||
continue;
|
continue;
|
||||||
break;
|
break;
|
||||||
case MSR_TSC_AUX:
|
case MSR_TSC_AUX:
|
||||||
if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
|
if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
|
||||||
|
!kvm_cpu_cap_has(X86_FEATURE_RDPID))
|
||||||
continue;
|
continue;
|
||||||
break;
|
break;
|
||||||
case MSR_IA32_UMWAIT_CONTROL:
|
case MSR_IA32_UMWAIT_CONTROL:
|
||||||
|
@ -8039,6 +8094,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
|
||||||
|
|
||||||
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
|
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Indirection to move queue_work() out of the tk_core.seq write held
|
||||||
|
* region to prevent possible deadlocks against time accessors which
|
||||||
|
* are invoked with work related locks held.
|
||||||
|
*/
|
||||||
|
static void pvclock_irq_work_fn(struct irq_work *w)
|
||||||
|
{
|
||||||
|
queue_work(system_long_wq, &pvclock_gtod_work);
|
||||||
|
}
|
||||||
|
|
||||||
|
static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Notification about pvclock gtod data update.
|
* Notification about pvclock gtod data update.
|
||||||
*/
|
*/
|
||||||
|
@ -8050,13 +8117,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
|
||||||
|
|
||||||
update_pvclock_gtod(tk);
|
update_pvclock_gtod(tk);
|
||||||
|
|
||||||
/* disable master clock if host does not trust, or does not
|
/*
|
||||||
* use, TSC based clocksource.
|
* Disable master clock if host does not trust, or does not use,
|
||||||
|
* TSC based clocksource. Delegate queue_work() to irq_work as
|
||||||
|
* this is invoked with tk_core.seq write held.
|
||||||
*/
|
*/
|
||||||
if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
|
if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
|
||||||
atomic_read(&kvm_guest_has_master_clock) != 0)
|
atomic_read(&kvm_guest_has_master_clock) != 0)
|
||||||
queue_work(system_long_wq, &pvclock_gtod_work);
|
irq_work_queue(&pvclock_irq_work);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8118,6 +8186,7 @@ int kvm_arch_init(void *opaque)
|
||||||
printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
|
printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
|
||||||
goto out_free_x86_emulator_cache;
|
goto out_free_x86_emulator_cache;
|
||||||
}
|
}
|
||||||
|
kvm_nr_uret_msrs = 0;
|
||||||
|
|
||||||
r = kvm_mmu_module_init();
|
r = kvm_mmu_module_init();
|
||||||
if (r)
|
if (r)
|
||||||
|
@ -8168,6 +8237,8 @@ void kvm_arch_exit(void)
|
||||||
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
|
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
|
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
|
||||||
|
irq_work_sync(&pvclock_irq_work);
|
||||||
|
cancel_work_sync(&pvclock_gtod_work);
|
||||||
#endif
|
#endif
|
||||||
kvm_x86_ops.hardware_enable = NULL;
|
kvm_x86_ops.hardware_enable = NULL;
|
||||||
kvm_mmu_module_exit();
|
kvm_mmu_module_exit();
|
||||||
|
|
|
@ -111,7 +111,7 @@ OPTIONS
|
||||||
--tracepoints::
|
--tracepoints::
|
||||||
retrieve statistics from tracepoints
|
retrieve statistics from tracepoints
|
||||||
|
|
||||||
*z*::
|
-z::
|
||||||
--skip-zero-records::
|
--skip-zero-records::
|
||||||
omit records with all zeros in logging mode
|
omit records with all zeros in logging mode
|
||||||
|
|
||||||
|
|
|
@ -54,9 +54,9 @@ idt_handlers:
|
||||||
.align 8
|
.align 8
|
||||||
|
|
||||||
/* Fetch current address and append it to idt_handlers. */
|
/* Fetch current address and append it to idt_handlers. */
|
||||||
current_handler = .
|
666 :
|
||||||
.pushsection .rodata
|
.pushsection .rodata
|
||||||
.quad current_handler
|
.quad 666b
|
||||||
.popsection
|
.popsection
|
||||||
|
|
||||||
.if ! \has_error
|
.if ! \has_error
|
||||||
|
|
|
@ -18,6 +18,28 @@
|
||||||
#include "vmx.h"
|
#include "vmx.h"
|
||||||
|
|
||||||
#define VCPU_ID 5
|
#define VCPU_ID 5
|
||||||
|
#define NMI_VECTOR 2
|
||||||
|
|
||||||
|
static int ud_count;
|
||||||
|
|
||||||
|
void enable_x2apic(void)
|
||||||
|
{
|
||||||
|
uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
|
||||||
|
|
||||||
|
wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
|
||||||
|
MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
|
||||||
|
wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void guest_ud_handler(struct ex_regs *regs)
|
||||||
|
{
|
||||||
|
ud_count++;
|
||||||
|
regs->rip += 3; /* VMLAUNCH */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void guest_nmi_handler(struct ex_regs *regs)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
void l2_guest_code(void)
|
void l2_guest_code(void)
|
||||||
{
|
{
|
||||||
|
@ -25,15 +47,23 @@ void l2_guest_code(void)
|
||||||
|
|
||||||
GUEST_SYNC(8);
|
GUEST_SYNC(8);
|
||||||
|
|
||||||
|
/* Forced exit to L1 upon restore */
|
||||||
|
GUEST_SYNC(9);
|
||||||
|
|
||||||
/* Done, exit to L1 and never come back. */
|
/* Done, exit to L1 and never come back. */
|
||||||
vmcall();
|
vmcall();
|
||||||
}
|
}
|
||||||
|
|
||||||
void l1_guest_code(struct vmx_pages *vmx_pages)
|
void guest_code(struct vmx_pages *vmx_pages)
|
||||||
{
|
{
|
||||||
#define L2_GUEST_STACK_SIZE 64
|
#define L2_GUEST_STACK_SIZE 64
|
||||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||||
|
|
||||||
|
enable_x2apic();
|
||||||
|
|
||||||
|
GUEST_SYNC(1);
|
||||||
|
GUEST_SYNC(2);
|
||||||
|
|
||||||
enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
|
enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
|
||||||
|
|
||||||
GUEST_ASSERT(vmx_pages->vmcs_gpa);
|
GUEST_ASSERT(vmx_pages->vmcs_gpa);
|
||||||
|
@ -55,27 +85,40 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
|
||||||
current_evmcs->revision_id = EVMCS_VERSION;
|
current_evmcs->revision_id = EVMCS_VERSION;
|
||||||
GUEST_SYNC(6);
|
GUEST_SYNC(6);
|
||||||
|
|
||||||
|
current_evmcs->pin_based_vm_exec_control |=
|
||||||
|
PIN_BASED_NMI_EXITING;
|
||||||
GUEST_ASSERT(!vmlaunch());
|
GUEST_ASSERT(!vmlaunch());
|
||||||
GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
|
GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
|
||||||
GUEST_SYNC(9);
|
|
||||||
|
/*
|
||||||
|
* NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
|
||||||
|
* up-to-date (RIP points where it should and not at the beginning
|
||||||
|
* of l2_guest_code(). GUEST_SYNC(9) checkes that.
|
||||||
|
*/
|
||||||
GUEST_ASSERT(!vmresume());
|
GUEST_ASSERT(!vmresume());
|
||||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
|
|
||||||
GUEST_SYNC(10);
|
GUEST_SYNC(10);
|
||||||
}
|
|
||||||
|
|
||||||
void guest_code(struct vmx_pages *vmx_pages)
|
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
|
||||||
{
|
GUEST_SYNC(11);
|
||||||
GUEST_SYNC(1);
|
|
||||||
GUEST_SYNC(2);
|
|
||||||
|
|
||||||
if (vmx_pages)
|
|
||||||
l1_guest_code(vmx_pages);
|
|
||||||
|
|
||||||
GUEST_DONE();
|
|
||||||
|
|
||||||
/* Try enlightened vmptrld with an incorrect GPA */
|
/* Try enlightened vmptrld with an incorrect GPA */
|
||||||
evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
|
evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
|
||||||
GUEST_ASSERT(vmlaunch());
|
GUEST_ASSERT(vmlaunch());
|
||||||
|
GUEST_ASSERT(ud_count == 1);
|
||||||
|
GUEST_DONE();
|
||||||
|
}
|
||||||
|
|
||||||
|
void inject_nmi(struct kvm_vm *vm)
|
||||||
|
{
|
||||||
|
struct kvm_vcpu_events events;
|
||||||
|
|
||||||
|
vcpu_events_get(vm, VCPU_ID, &events);
|
||||||
|
|
||||||
|
events.nmi.pending = 1;
|
||||||
|
events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
|
||||||
|
|
||||||
|
vcpu_events_set(vm, VCPU_ID, &events);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
|
@ -109,6 +152,13 @@ int main(int argc, char *argv[])
|
||||||
vcpu_alloc_vmx(vm, &vmx_pages_gva);
|
vcpu_alloc_vmx(vm, &vmx_pages_gva);
|
||||||
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
|
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
|
||||||
|
|
||||||
|
vm_init_descriptor_tables(vm);
|
||||||
|
vcpu_init_descriptor_tables(vm, VCPU_ID);
|
||||||
|
vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
|
||||||
|
vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
|
||||||
|
|
||||||
|
pr_info("Running L1 which uses EVMCS to run L2\n");
|
||||||
|
|
||||||
for (stage = 1;; stage++) {
|
for (stage = 1;; stage++) {
|
||||||
_vcpu_run(vm, VCPU_ID);
|
_vcpu_run(vm, VCPU_ID);
|
||||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||||
|
@ -124,7 +174,7 @@ int main(int argc, char *argv[])
|
||||||
case UCALL_SYNC:
|
case UCALL_SYNC:
|
||||||
break;
|
break;
|
||||||
case UCALL_DONE:
|
case UCALL_DONE:
|
||||||
goto part1_done;
|
goto done;
|
||||||
default:
|
default:
|
||||||
TEST_FAIL("Unknown ucall %lu", uc.cmd);
|
TEST_FAIL("Unknown ucall %lu", uc.cmd);
|
||||||
}
|
}
|
||||||
|
@ -154,12 +204,14 @@ int main(int argc, char *argv[])
|
||||||
TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
|
TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
|
||||||
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
|
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
|
||||||
(ulong) regs2.rdi, (ulong) regs2.rsi);
|
(ulong) regs2.rdi, (ulong) regs2.rsi);
|
||||||
|
|
||||||
|
/* Force immediate L2->L1 exit before resuming */
|
||||||
|
if (stage == 8) {
|
||||||
|
pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
|
||||||
|
inject_nmi(vm);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
part1_done:
|
done:
|
||||||
_vcpu_run(vm, VCPU_ID);
|
|
||||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
|
|
||||||
"Unexpected successful VMEnter with invalid eVMCS pointer!");
|
|
||||||
|
|
||||||
kvm_vm_free(vm);
|
kvm_vm_free(vm);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2893,8 +2893,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
|
||||||
if (val < grow_start)
|
if (val < grow_start)
|
||||||
val = grow_start;
|
val = grow_start;
|
||||||
|
|
||||||
if (val > halt_poll_ns)
|
if (val > vcpu->kvm->max_halt_poll_ns)
|
||||||
val = halt_poll_ns;
|
val = vcpu->kvm->max_halt_poll_ns;
|
||||||
|
|
||||||
vcpu->halt_poll_ns = val;
|
vcpu->halt_poll_ns = val;
|
||||||
out:
|
out:
|
||||||
|
@ -2973,7 +2973,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
poll_end = cur = ktime_get();
|
poll_end = cur = ktime_get();
|
||||||
} while (single_task_running() && ktime_before(cur, stop));
|
} while (single_task_running() && !need_resched() &&
|
||||||
|
ktime_before(cur, stop));
|
||||||
}
|
}
|
||||||
|
|
||||||
prepare_to_rcuwait(&vcpu->wait);
|
prepare_to_rcuwait(&vcpu->wait);
|
||||||
|
|
Loading…
Reference in New Issue