mirror of https://gitee.com/openkylin/linux.git
x86:
- missing TLB flush - nested virtualization fixes for SMM (secure boot on nested hypervisor) and other nested SVM fixes - syscall fuzzing fixes - live migration fix for AMD SEV - mirror VMs now work for SEV-ES too - fixes for reset - possible out-of-bounds access in IOAPIC emulation - fix enlightened VMCS on Windows 2022 ARM: - Add missing FORCE target when building the EL2 object - Fix a PMU probe regression on some platforms Generic: - KCSAN fixes selftests: - random fixes, mostly for clang compilation -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmFN0EwUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroNqaQf/Vx7ePFTqwWpo+8wKapnc6JN9SLjC hM4jipxfc1WyQWcfCt8ZuPhCnhF7o8mG/mrqTm+JB+oGqIsydHW19DiUT8ekv09F dQ+XYSiR4B547wUH5XLQc4xG9imwYlXGEOHqrE7eJvGH3LOqVFX2fLRBnFefZbO8 GKhRJrGXwG3/JSAP6A0c22iVU+pLbfV9gpKwrAj0V7o8nzT2b3Wmh74WBNb47BzE a4+AwKpWO4rqJGOwdYwy67pdFHh1YmrlZ59cFZc7fzlXE+o0D0bitaJyioZALpOl 4mRGdzoYkNB++ZjDzVFnAClCYQV/oNxCNGFaFF2mh/gzXG1TLmN7B8zGDg== =7oVh -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "A bit late... I got sidetracked by back-from-vacation routines and conferences. But most of these patches are already a few weeks old and things look more calm on the mailing list than what this pull request would suggest. x86: - missing TLB flush - nested virtualization fixes for SMM (secure boot on nested hypervisor) and other nested SVM fixes - syscall fuzzing fixes - live migration fix for AMD SEV - mirror VMs now work for SEV-ES too - fixes for reset - possible out-of-bounds access in IOAPIC emulation - fix enlightened VMCS on Windows 2022 ARM: - Add missing FORCE target when building the EL2 object - Fix a PMU probe regression on some platforms Generic: - KCSAN fixes selftests: - random fixes, mostly for clang compilation" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (43 commits) selftests: KVM: Explicitly use movq to read xmm registers selftests: KVM: Call ucall_init when setting up in rseq_test KVM: Remove tlbs_dirty KVM: X86: Synchronize the shadow pagetable before link it KVM: X86: Fix missed remote tlb flush in rmap_write_protect() KVM: x86: nSVM: don't copy virt_ext from vmcb12 KVM: x86: nSVM: test eax for 4K alignment for GP errata workaround KVM: x86: selftests: test simultaneous uses of V_IRQ from L1 and L0 KVM: x86: nSVM: restore int_vector in svm_clear_vintr kvm: x86: Add AMD PMU MSRs to msrs_to_save_all[] KVM: x86: nVMX: re-evaluate emulation_required on nested VM exit KVM: x86: nVMX: don't fail nested VM entry on invalid guest state if !from_vmentry KVM: x86: VMX: synthesize invalid VM exit when emulating invalid guest state KVM: x86: nSVM: refactor svm_leave_smm and smm_enter_smm KVM: x86: SVM: call KVM_REQ_GET_NESTED_STATE_PAGES on exit from SMM mode KVM: x86: reset pdptrs_from_userspace when exiting smm KVM: x86: nSVM: restore the L1 host state prior to resuming nested guest on SMM exit KVM: nVMX: Filter out all unsupported controls when eVMCS was activated KVM: KVM: Use cpumask_available() to check for NULL cpumask when kicking vCPUs KVM: Clean up benign vcpu->cpu data races when kicking vCPUs ...
This commit is contained in:
commit
9cccec2bf3
|
@ -54,7 +54,7 @@ $(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
|
|||
# runtime. Because the hypervisor is part of the kernel binary, relocations
|
||||
# produce a kernel VA. We enumerate relocations targeting hyp at build time
|
||||
# and convert the kernel VAs at those positions to hyp VAs.
|
||||
$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel
|
||||
$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel FORCE
|
||||
$(call if_changed,hyprel)
|
||||
|
||||
# 5) Compile hyp-reloc.S and link it into the existing partially linked object.
|
||||
|
|
|
@ -50,9 +50,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
|
|||
|
||||
int kvm_perf_init(void)
|
||||
{
|
||||
if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
|
||||
static_branch_enable(&kvm_arm_pmu_available);
|
||||
|
||||
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
|
||||
}
|
||||
|
||||
|
|
|
@ -740,7 +740,14 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
|||
kvm_pmu_create_perf_event(vcpu, select_idx);
|
||||
}
|
||||
|
||||
int kvm_pmu_probe_pmuver(void)
|
||||
void kvm_host_pmu_init(struct arm_pmu *pmu)
|
||||
{
|
||||
if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
|
||||
!kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
|
||||
static_branch_enable(&kvm_arm_pmu_available);
|
||||
}
|
||||
|
||||
static int kvm_pmu_probe_pmuver(void)
|
||||
{
|
||||
struct perf_event_attr attr = { };
|
||||
struct perf_event *event;
|
||||
|
|
|
@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
|
|||
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
|
||||
set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
|
||||
set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
|
||||
}
|
||||
|
||||
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
|
||||
clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
|
||||
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
|
||||
}
|
||||
|
||||
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -4066,7 +4066,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
|
|||
kvm_s390_patch_guest_per_regs(vcpu);
|
||||
}
|
||||
|
||||
clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
|
||||
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
|
||||
|
||||
vcpu->arch.sie_block->icptcode = 0;
|
||||
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
|
||||
|
|
|
@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
|
|||
|
||||
static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
|
||||
return test_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
|
||||
}
|
||||
|
||||
static inline int kvm_is_ucontrol(struct kvm *kvm)
|
||||
|
|
|
@ -46,7 +46,7 @@ struct kvm_page_track_notifier_node {
|
|||
struct kvm_page_track_notifier_node *node);
|
||||
};
|
||||
|
||||
void kvm_page_track_init(struct kvm *kvm);
|
||||
int kvm_page_track_init(struct kvm *kvm);
|
||||
void kvm_page_track_cleanup(struct kvm *kvm);
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
|
||||
|
|
|
@ -4206,7 +4206,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
|
|||
u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
|
||||
|
||||
if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
|
||||
return emulate_ud(ctxt);
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
|
|
@ -939,7 +939,7 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
|
|||
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
|
||||
stimer_init(&hv_vcpu->stimer[i], i);
|
||||
|
||||
hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
|
||||
hv_vcpu->vp_index = vcpu->vcpu_idx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1444,7 +1444,6 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
|||
switch (msr) {
|
||||
case HV_X64_MSR_VP_INDEX: {
|
||||
struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
|
||||
int vcpu_idx = kvm_vcpu_get_idx(vcpu);
|
||||
u32 new_vp_index = (u32)data;
|
||||
|
||||
if (!host || new_vp_index >= KVM_MAX_VCPUS)
|
||||
|
@ -1459,9 +1458,9 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
|||
* VP index is changing, adjust num_mismatched_vp_indexes if
|
||||
* it now matches or no longer matches vcpu_idx.
|
||||
*/
|
||||
if (hv_vcpu->vp_index == vcpu_idx)
|
||||
if (hv_vcpu->vp_index == vcpu->vcpu_idx)
|
||||
atomic_inc(&hv->num_mismatched_vp_indexes);
|
||||
else if (new_vp_index == vcpu_idx)
|
||||
else if (new_vp_index == vcpu->vcpu_idx)
|
||||
atomic_dec(&hv->num_mismatched_vp_indexes);
|
||||
|
||||
hv_vcpu->vp_index = new_vp_index;
|
||||
|
|
|
@ -83,7 +83,7 @@ static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
return hv_vcpu ? hv_vcpu->vp_index : kvm_vcpu_get_idx(vcpu);
|
||||
return hv_vcpu ? hv_vcpu->vp_index : vcpu->vcpu_idx;
|
||||
}
|
||||
|
||||
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
|
||||
|
|
|
@ -319,8 +319,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
|||
unsigned index;
|
||||
bool mask_before, mask_after;
|
||||
union kvm_ioapic_redirect_entry *e;
|
||||
unsigned long vcpu_bitmap;
|
||||
int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode;
|
||||
DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
|
||||
|
||||
switch (ioapic->ioregsel) {
|
||||
case IOAPIC_REG_VERSION:
|
||||
|
@ -384,9 +384,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
|||
irq.shorthand = APIC_DEST_NOSHORT;
|
||||
irq.dest_id = e->fields.dest_id;
|
||||
irq.msi_redir_hint = false;
|
||||
bitmap_zero(&vcpu_bitmap, 16);
|
||||
bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
|
||||
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
|
||||
&vcpu_bitmap);
|
||||
vcpu_bitmap);
|
||||
if (old_dest_mode != e->fields.dest_mode ||
|
||||
old_dest_id != e->fields.dest_id) {
|
||||
/*
|
||||
|
@ -399,10 +399,10 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
|||
kvm_lapic_irq_dest_mode(
|
||||
!!e->fields.dest_mode);
|
||||
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
|
||||
&vcpu_bitmap);
|
||||
vcpu_bitmap);
|
||||
}
|
||||
kvm_make_scan_ioapic_request_mask(ioapic->kvm,
|
||||
&vcpu_bitmap);
|
||||
vcpu_bitmap);
|
||||
} else {
|
||||
kvm_make_scan_ioapic_request(ioapic->kvm);
|
||||
}
|
||||
|
|
|
@ -2027,8 +2027,8 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
|
|||
} while (!sp->unsync_children);
|
||||
}
|
||||
|
||||
static void mmu_sync_children(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *parent)
|
||||
static int mmu_sync_children(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *parent, bool can_yield)
|
||||
{
|
||||
int i;
|
||||
struct kvm_mmu_page *sp;
|
||||
|
@ -2055,12 +2055,18 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
|
||||
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
|
||||
if (!can_yield) {
|
||||
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
|
||||
flush = false;
|
||||
}
|
||||
}
|
||||
|
||||
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
|
||||
|
@ -2146,9 +2152,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
|||
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
|
||||
}
|
||||
|
||||
if (sp->unsync_children)
|
||||
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
|
||||
|
||||
__clear_sp_write_flooding_count(sp);
|
||||
|
||||
trace_get_page:
|
||||
|
@ -3684,7 +3687,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
|
|||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
|
||||
|
||||
mmu_sync_children(vcpu, sp);
|
||||
mmu_sync_children(vcpu, sp, true);
|
||||
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
@ -3700,7 +3703,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
|
|||
if (IS_VALID_PAE_ROOT(root)) {
|
||||
root &= PT64_BASE_ADDR_MASK;
|
||||
sp = to_shadow_page(root);
|
||||
mmu_sync_children(vcpu, sp);
|
||||
mmu_sync_children(vcpu, sp, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -164,13 +164,13 @@ void kvm_page_track_cleanup(struct kvm *kvm)
|
|||
cleanup_srcu_struct(&head->track_srcu);
|
||||
}
|
||||
|
||||
void kvm_page_track_init(struct kvm *kvm)
|
||||
int kvm_page_track_init(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
|
||||
head = &kvm->arch.track_notifier_head;
|
||||
init_srcu_struct(&head->track_srcu);
|
||||
INIT_HLIST_HEAD(&head->track_notifier_list);
|
||||
return init_srcu_struct(&head->track_srcu);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -707,8 +707,27 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
|
|||
if (!is_shadow_present_pte(*it.sptep)) {
|
||||
table_gfn = gw->table_gfn[it.level - 2];
|
||||
access = gw->pt_access[it.level - 2];
|
||||
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
|
||||
false, access);
|
||||
sp = kvm_mmu_get_page(vcpu, table_gfn, addr,
|
||||
it.level-1, false, access);
|
||||
/*
|
||||
* We must synchronize the pagetable before linking it
|
||||
* because the guest doesn't need to flush tlb when
|
||||
* the gpte is changed from non-present to present.
|
||||
* Otherwise, the guest may use the wrong mapping.
|
||||
*
|
||||
* For PG_LEVEL_4K, kvm_mmu_get_page() has already
|
||||
* synchronized it transiently via kvm_sync_page().
|
||||
*
|
||||
* For higher level pagetable, we synchronize it via
|
||||
* the slower mmu_sync_children(). If it needs to
|
||||
* break, some progress has been made; return
|
||||
* RET_PF_RETRY and retry on the next #PF.
|
||||
* KVM_REQ_MMU_SYNC is not necessary but it
|
||||
* expedites the process.
|
||||
*/
|
||||
if (sp->unsync_children &&
|
||||
mmu_sync_children(vcpu, sp, false))
|
||||
return RET_PF_RETRY;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1047,14 +1066,6 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
|
|||
* Using the cached information from sp->gfns is safe because:
|
||||
* - The spte has a reference to the struct page, so the pfn for a given gfn
|
||||
* can't change unless all sptes pointing to it are nuked first.
|
||||
*
|
||||
* Note:
|
||||
* We should flush all tlbs if spte is dropped even though guest is
|
||||
* responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page
|
||||
* and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
|
||||
* used by guest then tlbs are not flushed, so guest is allowed to access the
|
||||
* freed pages.
|
||||
* And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
|
||||
*/
|
||||
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
{
|
||||
|
@ -1107,13 +1118,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
|||
return 0;
|
||||
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
|
||||
/*
|
||||
* Update spte before increasing tlbs_dirty to make
|
||||
* sure no tlb flush is lost after spte is zapped; see
|
||||
* the comments in kvm_flush_remote_tlbs().
|
||||
*/
|
||||
smp_wmb();
|
||||
vcpu->kvm->tlbs_dirty++;
|
||||
set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1128,12 +1133,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
|||
|
||||
if (gfn != sp->gfns[i]) {
|
||||
drop_spte(vcpu->kvm, &sp->spt[i]);
|
||||
/*
|
||||
* The same as above where we are doing
|
||||
* prefetch_invalid_gpte().
|
||||
*/
|
||||
smp_wmb();
|
||||
vcpu->kvm->tlbs_dirty++;
|
||||
set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -545,7 +545,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
|
|||
(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
|
||||
(svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
|
||||
|
||||
svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
|
||||
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
|
||||
svm->vmcb->control.int_state = svm->nested.ctl.int_state;
|
||||
svm->vmcb->control.event_inj = svm->nested.ctl.event_inj;
|
||||
|
@ -579,7 +578,7 @@ static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to
|
|||
}
|
||||
|
||||
int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
|
||||
struct vmcb *vmcb12)
|
||||
struct vmcb *vmcb12, bool from_vmrun)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
int ret;
|
||||
|
@ -609,13 +608,16 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
|
|||
nested_vmcb02_prepare_save(svm, vmcb12);
|
||||
|
||||
ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
|
||||
nested_npt_enabled(svm), true);
|
||||
nested_npt_enabled(svm), from_vmrun);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!npt_enabled)
|
||||
vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
|
||||
|
||||
if (!from_vmrun)
|
||||
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
|
||||
|
||||
svm_set_gif(svm, true);
|
||||
|
||||
return 0;
|
||||
|
@ -681,7 +683,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
|
|||
|
||||
svm->nested.nested_run_pending = 1;
|
||||
|
||||
if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12))
|
||||
if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
|
||||
goto out_exit_err;
|
||||
|
||||
if (nested_svm_vmrun_msrpm(svm))
|
||||
|
|
|
@ -595,43 +595,50 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
||||
int *error)
|
||||
{
|
||||
struct sev_data_launch_update_vmsa vmsa;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
int ret;
|
||||
|
||||
/* Perform some pre-encryption checks against the VMSA */
|
||||
ret = sev_es_sync_vmsa(svm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* The LAUNCH_UPDATE_VMSA command will perform in-place encryption of
|
||||
* the VMSA memory content (i.e it will write the same memory region
|
||||
* with the guest's key), so invalidate it first.
|
||||
*/
|
||||
clflush_cache_range(svm->vmsa, PAGE_SIZE);
|
||||
|
||||
vmsa.reserved = 0;
|
||||
vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
|
||||
vmsa.address = __sme_pa(svm->vmsa);
|
||||
vmsa.len = PAGE_SIZE;
|
||||
return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
|
||||
}
|
||||
|
||||
static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
{
|
||||
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
||||
struct sev_data_launch_update_vmsa vmsa;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i, ret;
|
||||
|
||||
if (!sev_es_guest(kvm))
|
||||
return -ENOTTY;
|
||||
|
||||
vmsa.reserved = 0;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/* Perform some pre-encryption checks against the VMSA */
|
||||
ret = sev_es_sync_vmsa(svm);
|
||||
ret = mutex_lock_killable(&vcpu->mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* The LAUNCH_UPDATE_VMSA command will perform in-place
|
||||
* encryption of the VMSA memory content (i.e it will write
|
||||
* the same memory region with the guest's key), so invalidate
|
||||
* it first.
|
||||
*/
|
||||
clflush_cache_range(svm->vmsa, PAGE_SIZE);
|
||||
ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
|
||||
|
||||
vmsa.handle = sev->handle;
|
||||
vmsa.address = __sme_pa(svm->vmsa);
|
||||
vmsa.len = PAGE_SIZE;
|
||||
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa,
|
||||
&argp->error);
|
||||
mutex_unlock(&vcpu->mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
svm->vcpu.arch.guest_state_protected = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1397,8 +1404,10 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|||
|
||||
/* Bind ASID to this guest */
|
||||
ret = sev_bind_asid(kvm, start.handle, error);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
sev_decommission(start.handle);
|
||||
goto e_free_session;
|
||||
}
|
||||
|
||||
params.handle = start.handle;
|
||||
if (copy_to_user((void __user *)(uintptr_t)argp->data,
|
||||
|
@ -1464,7 +1473,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|||
|
||||
/* Pin guest memory */
|
||||
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
|
||||
PAGE_SIZE, &n, 0);
|
||||
PAGE_SIZE, &n, 1);
|
||||
if (IS_ERR(guest_page)) {
|
||||
ret = PTR_ERR(guest_page);
|
||||
goto e_free_trans;
|
||||
|
@ -1501,6 +1510,20 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|||
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
|
||||
}
|
||||
|
||||
static bool cmd_allowed_from_miror(u32 cmd_id)
|
||||
{
|
||||
/*
|
||||
* Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
|
||||
* active mirror VMs. Also allow the debugging and status commands.
|
||||
*/
|
||||
if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA ||
|
||||
cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT ||
|
||||
cmd_id == KVM_SEV_DBG_ENCRYPT)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
|
||||
{
|
||||
struct kvm_sev_cmd sev_cmd;
|
||||
|
@ -1517,8 +1540,9 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
|
|||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/* enc_context_owner handles all memory enc operations */
|
||||
if (is_mirroring_enc_context(kvm)) {
|
||||
/* Only the enc_context_owner handles some memory enc operations. */
|
||||
if (is_mirroring_enc_context(kvm) &&
|
||||
!cmd_allowed_from_miror(sev_cmd.id)) {
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1715,8 +1739,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
|
|||
{
|
||||
struct file *source_kvm_file;
|
||||
struct kvm *source_kvm;
|
||||
struct kvm_sev_info *mirror_sev;
|
||||
unsigned int asid;
|
||||
struct kvm_sev_info source_sev, *mirror_sev;
|
||||
int ret;
|
||||
|
||||
source_kvm_file = fget(source_fd);
|
||||
|
@ -1739,7 +1762,8 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
|
|||
goto e_source_unlock;
|
||||
}
|
||||
|
||||
asid = to_kvm_svm(source_kvm)->sev_info.asid;
|
||||
memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
|
||||
sizeof(source_sev));
|
||||
|
||||
/*
|
||||
* The mirror kvm holds an enc_context_owner ref so its asid can't
|
||||
|
@ -1759,8 +1783,16 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
|
|||
/* Set enc_context_owner and copy its encryption context over */
|
||||
mirror_sev = &to_kvm_svm(kvm)->sev_info;
|
||||
mirror_sev->enc_context_owner = source_kvm;
|
||||
mirror_sev->asid = asid;
|
||||
mirror_sev->active = true;
|
||||
mirror_sev->asid = source_sev.asid;
|
||||
mirror_sev->fd = source_sev.fd;
|
||||
mirror_sev->es_active = source_sev.es_active;
|
||||
mirror_sev->handle = source_sev.handle;
|
||||
/*
|
||||
* Do not copy ap_jump_table. Since the mirror does not share the same
|
||||
* KVM contexts as the original, and they may have different
|
||||
* memory-views.
|
||||
*/
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
return 0;
|
||||
|
|
|
@ -1566,6 +1566,8 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
|
|||
|
||||
svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
|
||||
V_IRQ_INJECTION_BITS_MASK;
|
||||
|
||||
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
|
||||
}
|
||||
|
||||
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
|
||||
|
@ -2222,6 +2224,10 @@ static int gp_interception(struct kvm_vcpu *vcpu)
|
|||
if (error_code)
|
||||
goto reinject;
|
||||
|
||||
/* All SVM instructions expect page aligned RAX */
|
||||
if (svm->vmcb->save.rax & ~PAGE_MASK)
|
||||
goto reinject;
|
||||
|
||||
/* Decode the instruction for usage later */
|
||||
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
|
||||
goto reinject;
|
||||
|
@ -4285,43 +4291,44 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
|
|||
struct kvm_host_map map_save;
|
||||
int ret;
|
||||
|
||||
if (is_guest_mode(vcpu)) {
|
||||
/* FED8h - SVM Guest */
|
||||
put_smstate(u64, smstate, 0x7ed8, 1);
|
||||
/* FEE0h - SVM Guest VMCB Physical Address */
|
||||
put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
|
||||
if (!is_guest_mode(vcpu))
|
||||
return 0;
|
||||
|
||||
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
|
||||
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
|
||||
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
|
||||
/* FED8h - SVM Guest */
|
||||
put_smstate(u64, smstate, 0x7ed8, 1);
|
||||
/* FEE0h - SVM Guest VMCB Physical Address */
|
||||
put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
|
||||
|
||||
ret = nested_svm_vmexit(svm);
|
||||
if (ret)
|
||||
return ret;
|
||||
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
|
||||
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
|
||||
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
|
||||
|
||||
/*
|
||||
* KVM uses VMCB01 to store L1 host state while L2 runs but
|
||||
* VMCB01 is going to be used during SMM and thus the state will
|
||||
* be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
|
||||
* area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
|
||||
* format of the area is identical to guest save area offsetted
|
||||
* by 0x400 (matches the offset of 'struct vmcb_save_area'
|
||||
* within 'struct vmcb'). Note: HSAVE area may also be used by
|
||||
* L1 hypervisor to save additional host context (e.g. KVM does
|
||||
* that, see svm_prepare_guest_switch()) which must be
|
||||
* preserved.
|
||||
*/
|
||||
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
|
||||
&map_save) == -EINVAL)
|
||||
return 1;
|
||||
ret = nested_svm_vmexit(svm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
|
||||
/*
|
||||
* KVM uses VMCB01 to store L1 host state while L2 runs but
|
||||
* VMCB01 is going to be used during SMM and thus the state will
|
||||
* be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
|
||||
* area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
|
||||
* format of the area is identical to guest save area offsetted
|
||||
* by 0x400 (matches the offset of 'struct vmcb_save_area'
|
||||
* within 'struct vmcb'). Note: HSAVE area may also be used by
|
||||
* L1 hypervisor to save additional host context (e.g. KVM does
|
||||
* that, see svm_prepare_guest_switch()) which must be
|
||||
* preserved.
|
||||
*/
|
||||
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
|
||||
&map_save) == -EINVAL)
|
||||
return 1;
|
||||
|
||||
svm_copy_vmrun_state(map_save.hva + 0x400,
|
||||
&svm->vmcb01.ptr->save);
|
||||
BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
|
||||
|
||||
kvm_vcpu_unmap(vcpu, &map_save, true);
|
||||
}
|
||||
svm_copy_vmrun_state(map_save.hva + 0x400,
|
||||
&svm->vmcb01.ptr->save);
|
||||
|
||||
kvm_vcpu_unmap(vcpu, &map_save, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -4329,50 +4336,54 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
|
|||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct kvm_host_map map, map_save;
|
||||
int ret = 0;
|
||||
u64 saved_efer, vmcb12_gpa;
|
||||
struct vmcb *vmcb12;
|
||||
int ret;
|
||||
|
||||
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
|
||||
u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
|
||||
u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
|
||||
u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
|
||||
struct vmcb *vmcb12;
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
|
||||
return 0;
|
||||
|
||||
if (guest) {
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
|
||||
return 1;
|
||||
/* Non-zero if SMI arrived while vCPU was in guest mode. */
|
||||
if (!GET_SMSTATE(u64, smstate, 0x7ed8))
|
||||
return 0;
|
||||
|
||||
if (!(saved_efer & EFER_SVME))
|
||||
return 1;
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
|
||||
return 1;
|
||||
|
||||
if (kvm_vcpu_map(vcpu,
|
||||
gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
|
||||
return 1;
|
||||
saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
|
||||
if (!(saved_efer & EFER_SVME))
|
||||
return 1;
|
||||
|
||||
if (svm_allocate_nested(svm))
|
||||
return 1;
|
||||
vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
|
||||
if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
|
||||
return 1;
|
||||
|
||||
vmcb12 = map.hva;
|
||||
ret = 1;
|
||||
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
|
||||
goto unmap_map;
|
||||
|
||||
nested_load_control_from_vmcb12(svm, &vmcb12->control);
|
||||
if (svm_allocate_nested(svm))
|
||||
goto unmap_save;
|
||||
|
||||
ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
|
||||
kvm_vcpu_unmap(vcpu, &map, true);
|
||||
/*
|
||||
* Restore L1 host state from L1 HSAVE area as VMCB01 was
|
||||
* used during SMM (see svm_enter_smm())
|
||||
*/
|
||||
|
||||
/*
|
||||
* Restore L1 host state from L1 HSAVE area as VMCB01 was
|
||||
* used during SMM (see svm_enter_smm())
|
||||
*/
|
||||
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
|
||||
&map_save) == -EINVAL)
|
||||
return 1;
|
||||
svm_copy_vmrun_state(&svm->vmcb01.ptr->save, map_save.hva + 0x400);
|
||||
|
||||
svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
|
||||
map_save.hva + 0x400);
|
||||
/*
|
||||
* Enter the nested guest now
|
||||
*/
|
||||
|
||||
kvm_vcpu_unmap(vcpu, &map_save, true);
|
||||
}
|
||||
}
|
||||
vmcb12 = map.hva;
|
||||
nested_load_control_from_vmcb12(svm, &vmcb12->control);
|
||||
ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
|
||||
|
||||
unmap_save:
|
||||
kvm_vcpu_unmap(vcpu, &map_save, true);
|
||||
unmap_map:
|
||||
kvm_vcpu_unmap(vcpu, &map, true);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -459,7 +459,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
|
|||
return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
|
||||
}
|
||||
|
||||
int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12);
|
||||
int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
|
||||
u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
|
||||
void svm_leave_nested(struct vcpu_svm *svm);
|
||||
void svm_free_nested(struct vcpu_svm *svm);
|
||||
int svm_allocate_nested(struct vcpu_svm *svm);
|
||||
|
|
|
@ -353,14 +353,20 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
|
|||
switch (msr_index) {
|
||||
case MSR_IA32_VMX_EXIT_CTLS:
|
||||
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
|
||||
ctl_high &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
|
||||
ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
|
||||
break;
|
||||
case MSR_IA32_VMX_ENTRY_CTLS:
|
||||
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
|
||||
ctl_high &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
|
||||
ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
|
||||
break;
|
||||
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
||||
ctl_high &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
||||
ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
|
||||
break;
|
||||
case MSR_IA32_VMX_PINBASED_CTLS:
|
||||
ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
|
||||
break;
|
||||
case MSR_IA32_VMX_VMFUNC:
|
||||
ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -2583,8 +2583,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|||
* Guest state is invalid and unrestricted guest is disabled,
|
||||
* which means L1 attempted VMEntry to L2 with invalid state.
|
||||
* Fail the VMEntry.
|
||||
*
|
||||
* However when force loading the guest state (SMM exit or
|
||||
* loading nested state after migration, it is possible to
|
||||
* have invalid guest state now, which will be later fixed by
|
||||
* restoring L2 register state
|
||||
*/
|
||||
if (CC(!vmx_guest_state_valid(vcpu))) {
|
||||
if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) {
|
||||
*entry_failure_code = ENTRY_FAIL_DEFAULT;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -4351,6 +4356,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|||
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
|
||||
vmcs12->vm_exit_msr_load_count))
|
||||
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
|
||||
|
||||
to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
|
||||
}
|
||||
|
||||
static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
|
||||
|
@ -4899,14 +4906,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Emulate the VMXON instruction.
|
||||
* Currently, we just remember that VMX is active, and do not save or even
|
||||
* inspect the argument to VMXON (the so-called "VMXON pointer") because we
|
||||
* do not currently need to store anything in that guest-allocated memory
|
||||
* region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
|
||||
* argument is different from the VMXON pointer (which the spec says they do).
|
||||
*/
|
||||
/* Emulate the VMXON instruction. */
|
||||
static int handle_vmon(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
@ -5903,6 +5903,12 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
|
|||
case EXIT_REASON_VMFUNC:
|
||||
/* VM functions are emulated through L2->L0 vmexits. */
|
||||
return true;
|
||||
case EXIT_REASON_BUS_LOCK:
|
||||
/*
|
||||
* At present, bus lock VM exit is never exposed to L1.
|
||||
* Handle L2's bus locks in L0 directly.
|
||||
*/
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1323,7 +1323,7 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
|
|||
vmx_prepare_switch_to_host(to_vmx(vcpu));
|
||||
}
|
||||
|
||||
static bool emulation_required(struct kvm_vcpu *vcpu)
|
||||
bool vmx_emulation_required(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu);
|
||||
}
|
||||
|
@ -1367,7 +1367,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
|
|||
vmcs_writel(GUEST_RFLAGS, rflags);
|
||||
|
||||
if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
|
||||
vmx->emulation_required = emulation_required(vcpu);
|
||||
vmx->emulation_required = vmx_emulation_required(vcpu);
|
||||
}
|
||||
|
||||
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
|
||||
|
@ -1837,10 +1837,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
&msr_info->data))
|
||||
return 1;
|
||||
/*
|
||||
* Enlightened VMCS v1 doesn't have certain fields, but buggy
|
||||
* Hyper-V versions are still trying to use corresponding
|
||||
* features when they are exposed. Filter out the essential
|
||||
* minimum.
|
||||
* Enlightened VMCS v1 doesn't have certain VMCS fields but
|
||||
* instead of just ignoring the features, different Hyper-V
|
||||
* versions are either trying to use them and fail or do some
|
||||
* sanity checking and refuse to boot. Filter all unsupported
|
||||
* features out.
|
||||
*/
|
||||
if (!msr_info->host_initiated &&
|
||||
vmx->nested.enlightened_vmcs_enabled)
|
||||
|
@ -3077,7 +3078,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
|||
}
|
||||
|
||||
/* depends on vcpu->arch.cr0 to be set to a new value */
|
||||
vmx->emulation_required = emulation_required(vcpu);
|
||||
vmx->emulation_required = vmx_emulation_required(vcpu);
|
||||
}
|
||||
|
||||
static int vmx_get_max_tdp_level(void)
|
||||
|
@ -3330,7 +3331,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int
|
|||
{
|
||||
__vmx_set_segment(vcpu, var, seg);
|
||||
|
||||
to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
|
||||
to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
|
||||
|
@ -6621,10 +6622,24 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
vmx->loaded_vmcs->soft_vnmi_blocked))
|
||||
vmx->loaded_vmcs->entry_time = ktime_get();
|
||||
|
||||
/* Don't enter VMX if guest state is invalid, let the exit handler
|
||||
start emulation until we arrive back to a valid state */
|
||||
if (vmx->emulation_required)
|
||||
/*
|
||||
* Don't enter VMX if guest state is invalid, let the exit handler
|
||||
* start emulation until we arrive back to a valid state. Synthesize a
|
||||
* consistency check VM-Exit due to invalid guest state and bail.
|
||||
*/
|
||||
if (unlikely(vmx->emulation_required)) {
|
||||
|
||||
/* We don't emulate invalid state of a nested guest */
|
||||
vmx->fail = is_guest_mode(vcpu);
|
||||
|
||||
vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
|
||||
vmx->exit_reason.failed_vmentry = 1;
|
||||
kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1);
|
||||
vmx->exit_qualification = ENTRY_FAIL_DEFAULT;
|
||||
kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2);
|
||||
vmx->exit_intr_info = 0;
|
||||
return EXIT_FASTPATH_NONE;
|
||||
}
|
||||
|
||||
trace_kvm_entry(vcpu);
|
||||
|
||||
|
|
|
@ -248,12 +248,8 @@ struct vcpu_vmx {
|
|||
* only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
|
||||
* of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
|
||||
* be loaded into hardware if those conditions aren't met.
|
||||
* nr_active_uret_msrs tracks the number of MSRs that need to be loaded
|
||||
* into hardware when running the guest. guest_uret_msrs[] is resorted
|
||||
* whenever the number of "active" uret MSRs is modified.
|
||||
*/
|
||||
struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
|
||||
int nr_active_uret_msrs;
|
||||
bool guest_uret_msrs_loaded;
|
||||
#ifdef CONFIG_X86_64
|
||||
u64 msr_host_kernel_gs_base;
|
||||
|
@ -359,6 +355,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
|
|||
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
|
||||
unsigned long fs_base, unsigned long gs_base);
|
||||
int vmx_get_cpl(struct kvm_vcpu *vcpu);
|
||||
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
|
||||
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
|
||||
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
|
||||
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -1332,6 +1332,13 @@ static const u32 msrs_to_save_all[] = {
|
|||
MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
|
||||
MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
|
||||
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
|
||||
|
||||
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
|
||||
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
|
||||
MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
|
||||
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
|
||||
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
|
||||
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
|
||||
};
|
||||
|
||||
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
|
||||
|
@ -2969,7 +2976,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
|||
offsetof(struct compat_vcpu_info, time));
|
||||
if (vcpu->xen.vcpu_time_info_set)
|
||||
kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
|
||||
if (v == kvm_get_vcpu(v->kvm, 0))
|
||||
if (!v->vcpu_idx)
|
||||
kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -7658,6 +7665,13 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
|
|||
|
||||
/* Process a latched INIT or SMI, if any. */
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
/*
|
||||
* Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
|
||||
* on SMM exit we still need to reload them from
|
||||
* guest memory
|
||||
*/
|
||||
vcpu->arch.pdptrs_from_userspace = false;
|
||||
}
|
||||
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
@ -10652,6 +10666,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
|||
int r;
|
||||
|
||||
vcpu->arch.last_vmentry_cpu = -1;
|
||||
vcpu->arch.regs_avail = ~0;
|
||||
vcpu->arch.regs_dirty = ~0;
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
|
@ -10893,6 +10909,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
|
||||
kvm_rip_write(vcpu, 0xfff0);
|
||||
|
||||
vcpu->arch.cr3 = 0;
|
||||
kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
|
||||
|
||||
/*
|
||||
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
|
||||
* of Intel's SDM list CD/NW as being set on INIT, but they contradict
|
||||
|
@ -11139,9 +11158,15 @@ void kvm_arch_free_vm(struct kvm *kvm)
|
|||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (type)
|
||||
return -EINVAL;
|
||||
|
||||
ret = kvm_page_track_init(kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
|
||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
|
||||
|
@ -11174,7 +11199,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
|
||||
kvm_apicv_init(kvm);
|
||||
kvm_hv_init_vm(kvm);
|
||||
kvm_page_track_init(kvm);
|
||||
kvm_mmu_init_vm(kvm);
|
||||
kvm_xen_init_vm(kvm);
|
||||
|
||||
|
|
|
@ -952,6 +952,8 @@ int armpmu_register(struct arm_pmu *pmu)
|
|||
pmu->name, pmu->num_events,
|
||||
has_nmi ? ", using NMIs" : "");
|
||||
|
||||
kvm_host_pmu_init(pmu);
|
||||
|
||||
return 0;
|
||||
|
||||
out_destroy:
|
||||
|
|
|
@ -61,7 +61,6 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
|
|||
int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
|
||||
int kvm_pmu_probe_pmuver(void);
|
||||
#else
|
||||
struct kvm_pmu {
|
||||
};
|
||||
|
@ -118,8 +117,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline int kvm_pmu_probe_pmuver(void) { return 0xf; }
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -608,7 +608,6 @@ struct kvm {
|
|||
unsigned long mmu_notifier_range_start;
|
||||
unsigned long mmu_notifier_range_end;
|
||||
#endif
|
||||
long tlbs_dirty;
|
||||
struct list_head devices;
|
||||
u64 manual_dirty_log_protect;
|
||||
struct dentry *debugfs_dentry;
|
||||
|
@ -721,11 +720,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->vcpu_idx;
|
||||
}
|
||||
|
||||
#define kvm_for_each_memslot(memslot, slots) \
|
||||
for (memslot = &slots->memslots[0]; \
|
||||
memslot < slots->memslots + slots->used_slots; memslot++) \
|
||||
|
|
|
@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
|
|||
static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM
|
||||
void kvm_host_pmu_init(struct arm_pmu *pmu);
|
||||
#else
|
||||
#define kvm_host_pmu_init(x) do { } while(0)
|
||||
#endif
|
||||
|
||||
/* Internal functions only for core arm_pmu code */
|
||||
struct arm_pmu *armpmu_alloc(void);
|
||||
struct arm_pmu *armpmu_alloc_atomic(void);
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
/x86_64/smm_test
|
||||
/x86_64/state_test
|
||||
/x86_64/svm_vmcall_test
|
||||
/x86_64/svm_int_ctl_test
|
||||
/x86_64/sync_regs_test
|
||||
/x86_64/tsc_msrs_test
|
||||
/x86_64/userspace_msr_exit_test
|
||||
|
|
|
@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
|
|||
TEST_GEN_PROGS_x86_64 += x86_64/state_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
|
||||
|
|
|
@ -371,9 +371,7 @@ static void help(char *name)
|
|||
printf(" -v: specify the number of vCPUs to run.\n");
|
||||
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
|
||||
" them into a separate region of memory for each vCPU.\n");
|
||||
printf(" -s: specify the type of memory that should be used to\n"
|
||||
" back the guest data region.\n\n");
|
||||
backing_src_help();
|
||||
backing_src_help("-s");
|
||||
puts("");
|
||||
exit(0);
|
||||
}
|
||||
|
@ -381,7 +379,7 @@ static void help(char *name)
|
|||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct test_params params = {
|
||||
.backing_src = VM_MEM_SRC_ANONYMOUS,
|
||||
.backing_src = DEFAULT_VM_MEM_SRC,
|
||||
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
|
||||
.vcpus = 1,
|
||||
};
|
||||
|
|
|
@ -179,7 +179,7 @@ static void *uffd_handler_thread_fn(void *arg)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (!pollfd[0].revents & POLLIN)
|
||||
if (!(pollfd[0].revents & POLLIN))
|
||||
continue;
|
||||
|
||||
r = read(uffd, &msg, sizeof(msg));
|
||||
|
@ -416,7 +416,7 @@ static void help(char *name)
|
|||
{
|
||||
puts("");
|
||||
printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
|
||||
" [-b memory] [-t type] [-v vcpus] [-o]\n", name);
|
||||
" [-b memory] [-s type] [-v vcpus] [-o]\n", name);
|
||||
guest_modes_help();
|
||||
printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
|
||||
" UFFD registration mode: 'MISSING' or 'MINOR'.\n");
|
||||
|
@ -426,8 +426,7 @@ static void help(char *name)
|
|||
printf(" -b: specify the size of the memory region which should be\n"
|
||||
" demand paged by each vCPU. e.g. 10M or 3G.\n"
|
||||
" Default: 1G\n");
|
||||
printf(" -t: The type of backing memory to use. Default: anonymous\n");
|
||||
backing_src_help();
|
||||
backing_src_help("-s");
|
||||
printf(" -v: specify the number of vCPUs to run.\n");
|
||||
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
|
||||
" them into a separate region of memory for each vCPU.\n");
|
||||
|
@ -439,14 +438,14 @@ int main(int argc, char *argv[])
|
|||
{
|
||||
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
|
||||
struct test_params p = {
|
||||
.src_type = VM_MEM_SRC_ANONYMOUS,
|
||||
.src_type = DEFAULT_VM_MEM_SRC,
|
||||
.partition_vcpu_memory_access = true,
|
||||
};
|
||||
int opt;
|
||||
|
||||
guest_modes_append_default();
|
||||
|
||||
while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
|
||||
switch (opt) {
|
||||
case 'm':
|
||||
guest_modes_cmdline(optarg);
|
||||
|
@ -465,7 +464,7 @@ int main(int argc, char *argv[])
|
|||
case 'b':
|
||||
guest_percpu_mem_size = parse_size(optarg);
|
||||
break;
|
||||
case 't':
|
||||
case 's':
|
||||
p.src_type = parse_backing_src_type(optarg);
|
||||
break;
|
||||
case 'v':
|
||||
|
@ -485,7 +484,7 @@ int main(int argc, char *argv[])
|
|||
|
||||
if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
|
||||
!backing_src_is_shared(p.src_type)) {
|
||||
TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
|
||||
TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
|
||||
}
|
||||
|
||||
for_each_guest_mode(run_test, &p);
|
||||
|
|
|
@ -118,42 +118,64 @@ static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
|
|||
toggle_dirty_logging(vm, slots, false);
|
||||
}
|
||||
|
||||
static void get_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
|
||||
uint64_t nr_pages)
|
||||
static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
|
||||
{
|
||||
uint64_t slot_pages = nr_pages / slots;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < slots; i++) {
|
||||
int slot = PERF_TEST_MEM_SLOT_INDEX + i;
|
||||
unsigned long *slot_bitmap = bitmap + i * slot_pages;
|
||||
|
||||
kvm_vm_get_dirty_log(vm, slot, slot_bitmap);
|
||||
kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
|
||||
uint64_t nr_pages)
|
||||
static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
|
||||
int slots, uint64_t pages_per_slot)
|
||||
{
|
||||
uint64_t slot_pages = nr_pages / slots;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < slots; i++) {
|
||||
int slot = PERF_TEST_MEM_SLOT_INDEX + i;
|
||||
unsigned long *slot_bitmap = bitmap + i * slot_pages;
|
||||
|
||||
kvm_vm_clear_dirty_log(vm, slot, slot_bitmap, 0, slot_pages);
|
||||
kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot)
|
||||
{
|
||||
unsigned long **bitmaps;
|
||||
int i;
|
||||
|
||||
bitmaps = malloc(slots * sizeof(bitmaps[0]));
|
||||
TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
|
||||
|
||||
for (i = 0; i < slots; i++) {
|
||||
bitmaps[i] = bitmap_zalloc(pages_per_slot);
|
||||
TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
|
||||
}
|
||||
|
||||
return bitmaps;
|
||||
}
|
||||
|
||||
static void free_bitmaps(unsigned long *bitmaps[], int slots)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < slots; i++)
|
||||
free(bitmaps[i]);
|
||||
|
||||
free(bitmaps);
|
||||
}
|
||||
|
||||
static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
{
|
||||
struct test_params *p = arg;
|
||||
pthread_t *vcpu_threads;
|
||||
struct kvm_vm *vm;
|
||||
unsigned long *bmap;
|
||||
unsigned long **bitmaps;
|
||||
uint64_t guest_num_pages;
|
||||
uint64_t host_num_pages;
|
||||
uint64_t pages_per_slot;
|
||||
int vcpu_id;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
|
@ -171,7 +193,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
|||
guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
|
||||
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
|
||||
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
|
||||
bmap = bitmap_zalloc(host_num_pages);
|
||||
pages_per_slot = host_num_pages / p->slots;
|
||||
|
||||
bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
|
||||
|
||||
if (dirty_log_manual_caps) {
|
||||
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
|
||||
|
@ -239,7 +263,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
|||
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
get_dirty_log(vm, p->slots, bmap, host_num_pages);
|
||||
get_dirty_log(vm, bitmaps, p->slots);
|
||||
ts_diff = timespec_elapsed(start);
|
||||
get_dirty_log_total = timespec_add(get_dirty_log_total,
|
||||
ts_diff);
|
||||
|
@ -248,7 +272,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
|||
|
||||
if (dirty_log_manual_caps) {
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
clear_dirty_log(vm, p->slots, bmap, host_num_pages);
|
||||
clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot);
|
||||
ts_diff = timespec_elapsed(start);
|
||||
clear_dirty_log_total = timespec_add(clear_dirty_log_total,
|
||||
ts_diff);
|
||||
|
@ -281,7 +305,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
|||
clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
|
||||
}
|
||||
|
||||
free(bmap);
|
||||
free_bitmaps(bitmaps, p->slots);
|
||||
free(vcpu_threads);
|
||||
perf_test_destroy_vm(vm);
|
||||
}
|
||||
|
@ -308,11 +332,9 @@ static void help(char *name)
|
|||
printf(" -v: specify the number of vCPUs to run.\n");
|
||||
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
|
||||
" them into a separate region of memory for each vCPU.\n");
|
||||
printf(" -s: specify the type of memory that should be used to\n"
|
||||
" back the guest data region.\n\n");
|
||||
backing_src_help("-s");
|
||||
printf(" -x: Split the memory region into this number of memslots.\n"
|
||||
" (default: 1)");
|
||||
backing_src_help();
|
||||
" (default: 1)\n");
|
||||
puts("");
|
||||
exit(0);
|
||||
}
|
||||
|
@ -324,7 +346,7 @@ int main(int argc, char *argv[])
|
|||
.iterations = TEST_HOST_LOOP_N,
|
||||
.wr_fract = 1,
|
||||
.partition_vcpu_memory_access = true,
|
||||
.backing_src = VM_MEM_SRC_ANONYMOUS,
|
||||
.backing_src = DEFAULT_VM_MEM_SRC,
|
||||
.slots = 1,
|
||||
};
|
||||
int opt;
|
||||
|
|
|
@ -90,6 +90,8 @@ enum vm_mem_backing_src_type {
|
|||
NUM_SRC_TYPES,
|
||||
};
|
||||
|
||||
#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS
|
||||
|
||||
struct vm_mem_backing_src_alias {
|
||||
const char *name;
|
||||
uint32_t flag;
|
||||
|
@ -102,7 +104,7 @@ size_t get_trans_hugepagesz(void);
|
|||
size_t get_def_hugetlb_pagesz(void);
|
||||
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
|
||||
size_t get_backing_src_pagesz(uint32_t i);
|
||||
void backing_src_help(void);
|
||||
void backing_src_help(const char *flag);
|
||||
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
|
||||
long get_run_delay(void);
|
||||
|
||||
|
|
|
@ -312,37 +312,37 @@ static inline void set_xmm(int n, unsigned long val)
|
|||
}
|
||||
}
|
||||
|
||||
typedef unsigned long v1di __attribute__ ((vector_size (8)));
|
||||
#define GET_XMM(__xmm) \
|
||||
({ \
|
||||
unsigned long __val; \
|
||||
asm volatile("movq %%"#__xmm", %0" : "=r"(__val) : : #__xmm); \
|
||||
__val; \
|
||||
})
|
||||
|
||||
static inline unsigned long get_xmm(int n)
|
||||
{
|
||||
assert(n >= 0 && n <= 7);
|
||||
|
||||
register v1di xmm0 __asm__("%xmm0");
|
||||
register v1di xmm1 __asm__("%xmm1");
|
||||
register v1di xmm2 __asm__("%xmm2");
|
||||
register v1di xmm3 __asm__("%xmm3");
|
||||
register v1di xmm4 __asm__("%xmm4");
|
||||
register v1di xmm5 __asm__("%xmm5");
|
||||
register v1di xmm6 __asm__("%xmm6");
|
||||
register v1di xmm7 __asm__("%xmm7");
|
||||
switch (n) {
|
||||
case 0:
|
||||
return (unsigned long)xmm0;
|
||||
return GET_XMM(xmm0);
|
||||
case 1:
|
||||
return (unsigned long)xmm1;
|
||||
return GET_XMM(xmm1);
|
||||
case 2:
|
||||
return (unsigned long)xmm2;
|
||||
return GET_XMM(xmm2);
|
||||
case 3:
|
||||
return (unsigned long)xmm3;
|
||||
return GET_XMM(xmm3);
|
||||
case 4:
|
||||
return (unsigned long)xmm4;
|
||||
return GET_XMM(xmm4);
|
||||
case 5:
|
||||
return (unsigned long)xmm5;
|
||||
return GET_XMM(xmm5);
|
||||
case 6:
|
||||
return (unsigned long)xmm6;
|
||||
return GET_XMM(xmm6);
|
||||
case 7:
|
||||
return (unsigned long)xmm7;
|
||||
return GET_XMM(xmm7);
|
||||
}
|
||||
|
||||
/* never reached */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -456,10 +456,7 @@ static void help(char *name)
|
|||
" (default: 1G)\n");
|
||||
printf(" -v: specify the number of vCPUs to run\n"
|
||||
" (default: 1)\n");
|
||||
printf(" -s: specify the type of memory that should be used to\n"
|
||||
" back the guest data region.\n"
|
||||
" (default: anonymous)\n\n");
|
||||
backing_src_help();
|
||||
backing_src_help("-s");
|
||||
puts("");
|
||||
}
|
||||
|
||||
|
@ -468,7 +465,7 @@ int main(int argc, char *argv[])
|
|||
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
|
||||
struct test_params p = {
|
||||
.test_mem_size = DEFAULT_TEST_MEM_SIZE,
|
||||
.src_type = VM_MEM_SRC_ANONYMOUS,
|
||||
.src_type = DEFAULT_VM_MEM_SRC,
|
||||
};
|
||||
int opt;
|
||||
|
||||
|
|
|
@ -283,13 +283,22 @@ size_t get_backing_src_pagesz(uint32_t i)
|
|||
}
|
||||
}
|
||||
|
||||
void backing_src_help(void)
|
||||
static void print_available_backing_src_types(const char *prefix)
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("Available backing src types:\n");
|
||||
printf("%sAvailable backing src types:\n", prefix);
|
||||
|
||||
for (i = 0; i < NUM_SRC_TYPES; i++)
|
||||
printf("\t%s\n", vm_mem_backing_src_alias(i)->name);
|
||||
printf("%s %s\n", prefix, vm_mem_backing_src_alias(i)->name);
|
||||
}
|
||||
|
||||
void backing_src_help(const char *flag)
|
||||
{
|
||||
printf(" %s: specify the type of memory that should be used to\n"
|
||||
" back the guest data region. (default: %s)\n",
|
||||
flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
|
||||
print_available_backing_src_types(" ");
|
||||
}
|
||||
|
||||
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
|
||||
|
@ -300,7 +309,7 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
|
|||
if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
|
||||
return i;
|
||||
|
||||
backing_src_help();
|
||||
print_available_backing_src_types("");
|
||||
TEST_FAIL("Unknown backing src type: %s", type_name);
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -180,6 +180,7 @@ int main(int argc, char *argv[])
|
|||
* CPU affinity.
|
||||
*/
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
ucall_init(vm, NULL);
|
||||
|
||||
pthread_create(&migration_thread, NULL, migration_worker, 0);
|
||||
|
||||
|
|
|
@ -116,12 +116,12 @@ struct st_time {
|
|||
uint64_t st_time;
|
||||
};
|
||||
|
||||
static int64_t smccc(uint32_t func, uint32_t arg)
|
||||
static int64_t smccc(uint32_t func, uint64_t arg)
|
||||
{
|
||||
unsigned long ret;
|
||||
|
||||
asm volatile(
|
||||
"mov x0, %1\n"
|
||||
"mov w0, %w1\n"
|
||||
"mov x1, %2\n"
|
||||
"hvc #0\n"
|
||||
"mov %0, x0\n"
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* svm_int_ctl_test
|
||||
*
|
||||
* Copyright (C) 2021, Red Hat, Inc.
|
||||
*
|
||||
* Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
|
||||
*/
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "svm_util.h"
|
||||
#include "apic.h"
|
||||
|
||||
#define VCPU_ID 0
|
||||
|
||||
static struct kvm_vm *vm;
|
||||
|
||||
bool vintr_irq_called;
|
||||
bool intr_irq_called;
|
||||
|
||||
#define VINTR_IRQ_NUMBER 0x20
|
||||
#define INTR_IRQ_NUMBER 0x30
|
||||
|
||||
static void vintr_irq_handler(struct ex_regs *regs)
|
||||
{
|
||||
vintr_irq_called = true;
|
||||
}
|
||||
|
||||
static void intr_irq_handler(struct ex_regs *regs)
|
||||
{
|
||||
x2apic_write_reg(APIC_EOI, 0x00);
|
||||
intr_irq_called = true;
|
||||
}
|
||||
|
||||
static void l2_guest_code(struct svm_test_data *svm)
|
||||
{
|
||||
/* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
|
||||
* and since L1 didn't enable virtual interrupt masking,
|
||||
* L2 should receive it and not L1.
|
||||
*
|
||||
* L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
|
||||
* so it should also receive it after the following 'sti'.
|
||||
*/
|
||||
x2apic_write_reg(APIC_ICR,
|
||||
APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sti\n"
|
||||
"nop\n"
|
||||
);
|
||||
|
||||
GUEST_ASSERT(vintr_irq_called);
|
||||
GUEST_ASSERT(intr_irq_called);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"vmcall\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void l1_guest_code(struct svm_test_data *svm)
|
||||
{
|
||||
#define L2_GUEST_STACK_SIZE 64
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
|
||||
x2apic_enable();
|
||||
|
||||
/* Prepare for L2 execution. */
|
||||
generic_svm_setup(svm, l2_guest_code,
|
||||
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
|
||||
/* No virtual interrupt masking */
|
||||
vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
|
||||
|
||||
/* No intercepts for real and virtual interrupts */
|
||||
vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
|
||||
|
||||
/* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
|
||||
vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
|
||||
vmcb->control.int_vector = VINTR_IRQ_NUMBER;
|
||||
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
vm_vaddr_t svm_gva;
|
||||
|
||||
nested_svm_check_supported();
|
||||
|
||||
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
|
||||
|
||||
vm_init_descriptor_tables(vm);
|
||||
vcpu_init_descriptor_tables(vm, VCPU_ID);
|
||||
|
||||
vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
|
||||
vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
|
||||
|
||||
vcpu_alloc_svm(vm, &svm_gva);
|
||||
vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
|
||||
|
||||
struct kvm_run *run = vcpu_state(vm, VCPU_ID);
|
||||
struct ucall uc;
|
||||
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
|
||||
run->exit_reason,
|
||||
exit_reason_str(run->exit_reason));
|
||||
|
||||
switch (get_ucall(vm, VCPU_ID, &uc)) {
|
||||
case UCALL_ABORT:
|
||||
TEST_FAIL("%s", (const char *)uc.args[0]);
|
||||
break;
|
||||
/* NOT REACHED */
|
||||
case UCALL_DONE:
|
||||
goto done;
|
||||
default:
|
||||
TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
|
||||
}
|
||||
done:
|
||||
kvm_vm_free(vm);
|
||||
return 0;
|
||||
}
|
|
@ -235,9 +235,13 @@ static void ack_flush(void *_completed)
|
|||
{
|
||||
}
|
||||
|
||||
static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
|
||||
static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait)
|
||||
{
|
||||
if (unlikely(!cpus))
|
||||
const struct cpumask *cpus;
|
||||
|
||||
if (likely(cpumask_available(tmp)))
|
||||
cpus = tmp;
|
||||
else
|
||||
cpus = cpu_online_mask;
|
||||
|
||||
if (cpumask_empty(cpus))
|
||||
|
@ -263,14 +267,34 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
|
|||
continue;
|
||||
|
||||
kvm_make_request(req, vcpu);
|
||||
cpu = vcpu->cpu;
|
||||
|
||||
if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
|
||||
continue;
|
||||
|
||||
if (tmp != NULL && cpu != -1 && cpu != me &&
|
||||
kvm_request_needs_ipi(vcpu, req))
|
||||
__cpumask_set_cpu(cpu, tmp);
|
||||
/*
|
||||
* tmp can be "unavailable" if cpumasks are allocated off stack
|
||||
* as allocation of the mask is deliberately not fatal and is
|
||||
* handled by falling back to kicking all online CPUs.
|
||||
*/
|
||||
if (!cpumask_available(tmp))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Note, the vCPU could get migrated to a different pCPU at any
|
||||
* point after kvm_request_needs_ipi(), which could result in
|
||||
* sending an IPI to the previous pCPU. But, that's ok because
|
||||
* the purpose of the IPI is to ensure the vCPU returns to
|
||||
* OUTSIDE_GUEST_MODE, which is satisfied if the vCPU migrates.
|
||||
* Entering READING_SHADOW_PAGE_TABLES after this point is also
|
||||
* ok, as the requirement is only that KVM wait for vCPUs that
|
||||
* were reading SPTEs _before_ any changes were finalized. See
|
||||
* kvm_vcpu_kick() for more details on handling requests.
|
||||
*/
|
||||
if (kvm_request_needs_ipi(vcpu, req)) {
|
||||
cpu = READ_ONCE(vcpu->cpu);
|
||||
if (cpu != -1 && cpu != me)
|
||||
__cpumask_set_cpu(cpu, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
|
||||
|
@ -302,13 +326,8 @@ EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
|
|||
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
|
||||
* kvm_make_all_cpus_request.
|
||||
*/
|
||||
long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
|
||||
|
||||
++kvm->stat.generic.remote_tlb_flush_requests;
|
||||
|
||||
/*
|
||||
* We want to publish modifications to the page tables before reading
|
||||
* mode. Pairs with a memory barrier in arch-specific code.
|
||||
|
@ -323,7 +342,6 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
|
|||
if (!kvm_arch_flush_remote_tlb(kvm)
|
||||
|| kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
|
||||
++kvm->stat.generic.remote_tlb_flush;
|
||||
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
|
||||
#endif
|
||||
|
@ -528,7 +546,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
|
|||
}
|
||||
}
|
||||
|
||||
if (range->flush_on_ret && (ret || kvm->tlbs_dirty))
|
||||
if (range->flush_on_ret && ret)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
if (locked)
|
||||
|
@ -3134,15 +3152,19 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned int old, val, shrink;
|
||||
unsigned int old, val, shrink, grow_start;
|
||||
|
||||
old = val = vcpu->halt_poll_ns;
|
||||
shrink = READ_ONCE(halt_poll_ns_shrink);
|
||||
grow_start = READ_ONCE(halt_poll_ns_grow_start);
|
||||
if (shrink == 0)
|
||||
val = 0;
|
||||
else
|
||||
val /= shrink;
|
||||
|
||||
if (val < grow_start)
|
||||
val = 0;
|
||||
|
||||
vcpu->halt_poll_ns = val;
|
||||
trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
|
||||
}
|
||||
|
@ -3290,16 +3312,24 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
|
|||
*/
|
||||
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int me;
|
||||
int cpu = vcpu->cpu;
|
||||
int me, cpu;
|
||||
|
||||
if (kvm_vcpu_wake_up(vcpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Note, the vCPU could get migrated to a different pCPU at any point
|
||||
* after kvm_arch_vcpu_should_kick(), which could result in sending an
|
||||
* IPI to the previous pCPU. But, that's ok because the purpose of the
|
||||
* IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the
|
||||
* vCPU also requires it to leave IN_GUEST_MODE.
|
||||
*/
|
||||
me = get_cpu();
|
||||
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
|
||||
if (kvm_arch_vcpu_should_kick(vcpu))
|
||||
if (kvm_arch_vcpu_should_kick(vcpu)) {
|
||||
cpu = READ_ONCE(vcpu->cpu);
|
||||
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
put_cpu();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
|
||||
|
|
Loading…
Reference in New Issue