KVM: nVMX: Disable PML in hardware when running L2

Unconditionally disable PML in vmcs02, KVM emulates PML purely in the
MMU, e.g. vmx_flush_pml_buffer() doesn't even try to copy the L2 GPAs
from vmcs02's buffer to vmcs12.  At best, enabling PML is a nop.  At
worst, it will cause vmx_flush_pml_buffer() to record bogus GFNs in the
dirty logs.

Initialize vmcs02.GUEST_PML_INDEX such that PML writes would trigger
VM-Exit if PML was somehow enabled, skip flushing the buffer for guest
mode since the index is bogus, and freak out if a PML full exit occurs
when L2 is active.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210213005015.1651772-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Sean Christopherson 2021-02-12 16:50:07 -08:00 committed by Paolo Bonzini
parent 9eba50f8d7
commit c3bb9a2083
2 changed files with 25 additions and 16 deletions

View File

@ -2167,15 +2167,13 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
/*
* The PML address never changes, so it is constant in vmcs02.
* Conceptually we want to copy the PML index from vmcs01 here,
* and then back to vmcs01 on nested vmexit. But since we flush
* the log and reset GUEST_PML_INDEX on each vmexit, the PML
* index is also effectively constant in vmcs02.
* PML is emulated for L2, but never enabled in hardware as the MMU
* handles A/D emulation. Disabling PML for L2 also avoids having to
* deal with filtering out L2 GPAs from the buffer.
*/
if (enable_pml) {
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
vmcs_write64(PML_ADDRESS, 0);
vmcs_write16(GUEST_PML_INDEX, -1);
}
if (cpu_has_vmx_encls_vmexit())
@ -2210,7 +2208,7 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
u32 exec_control, vmcs12_exec_ctrl;
u32 exec_control;
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
@ -2284,11 +2282,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_ENABLE_VMFUNC);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
~SECONDARY_EXEC_ENABLE_PML;
exec_control |= vmcs12_exec_ctrl;
}
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12->secondary_vm_exec_control;
/* PML is emulated and never enabled in hardware for L2. */
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
/* VMCS shadowing for L2 is emulated for now */
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
@ -5790,7 +5788,10 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
case EXIT_REASON_PREEMPTION_TIMER:
return true;
case EXIT_REASON_PML_FULL:
/* We emulate PML support to L1. */
/*
* PML is emulated for an L1 VMM and should never be enabled in
* vmcs02, always "handle" PML_FULL by exiting to userspace.
*/
return true;
case EXIT_REASON_VMFUNC:
/* VM functions are emulated through L2->L0 vmexits. */

View File

@ -5966,9 +5966,10 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
* querying dirty_bitmap, we only need to kick all vcpus out of guest
* mode as if vcpus is in root mode, the PML buffer must has been
* flushed already.
* flushed already. Note, PML is never enabled in hardware while
* running L2.
*/
if (enable_pml)
if (enable_pml && !is_guest_mode(vcpu))
vmx_flush_pml_buffer(vcpu);
/*
@ -5984,6 +5985,13 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return handle_invalid_guest_state(vcpu);
if (is_guest_mode(vcpu)) {
/*
* PML is never enabled when running L2, bail immediately if a
* PML full exit occurs as something is horribly wrong.
*/
if (exit_reason.basic == EXIT_REASON_PML_FULL)
goto unexpected_vmexit;
/*
* The host physical addresses of some pages of guest memory
* are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC