2007-12-14 09:35:10 +08:00
|
|
|
#ifndef __KVM_X86_MMU_H
|
|
|
|
#define __KVM_X86_MMU_H
|
|
|
|
|
2007-12-16 17:02:48 +08:00
|
|
|
#include <linux/kvm_host.h>
|
2009-12-07 18:16:48 +08:00
|
|
|
#include "kvm_cache_regs.h"
|
2007-12-14 09:35:10 +08:00
|
|
|
|
2008-04-25 10:17:08 +08:00
|
|
|
#define PT64_PT_BITS 9
|
|
|
|
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
|
|
|
|
#define PT32_PT_BITS 10
|
|
|
|
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
|
|
|
|
|
|
|
|
#define PT_WRITABLE_SHIFT 1
|
|
|
|
|
|
|
|
#define PT_PRESENT_MASK (1ULL << 0)
|
|
|
|
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
|
|
|
|
#define PT_USER_MASK (1ULL << 2)
|
|
|
|
#define PT_PWT_MASK (1ULL << 3)
|
|
|
|
#define PT_PCD_MASK (1ULL << 4)
|
2008-05-15 18:51:35 +08:00
|
|
|
#define PT_ACCESSED_SHIFT 5
|
|
|
|
#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
|
2012-09-12 18:44:53 +08:00
|
|
|
#define PT_DIRTY_SHIFT 6
|
|
|
|
#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
|
2012-09-13 01:46:56 +08:00
|
|
|
#define PT_PAGE_SIZE_SHIFT 7
|
|
|
|
#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
|
2008-04-25 10:17:08 +08:00
|
|
|
#define PT_PAT_MASK (1ULL << 7)
|
|
|
|
#define PT_GLOBAL_MASK (1ULL << 8)
|
|
|
|
#define PT64_NX_SHIFT 63
|
|
|
|
#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
|
|
|
|
|
|
|
|
#define PT_PAT_SHIFT 7
|
|
|
|
#define PT_DIR_PAT_SHIFT 12
|
|
|
|
#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
|
|
|
|
|
|
|
|
#define PT32_DIR_PSE36_SIZE 4
|
|
|
|
#define PT32_DIR_PSE36_SHIFT 13
|
|
|
|
#define PT32_DIR_PSE36_MASK \
|
|
|
|
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
|
|
|
|
|
|
|
|
#define PT64_ROOT_LEVEL 4
|
|
|
|
#define PT32_ROOT_LEVEL 2
|
|
|
|
#define PT32E_ROOT_LEVEL 3
|
|
|
|
|
2010-01-05 19:02:26 +08:00
|
|
|
#define PT_PDPE_LEVEL 3
|
|
|
|
#define PT_DIRECTORY_LEVEL 2
|
|
|
|
#define PT_PAGE_TABLE_LEVEL 1
|
|
|
|
|
2014-04-01 17:46:34 +08:00
|
|
|
#define PFERR_PRESENT_BIT 0
|
|
|
|
#define PFERR_WRITE_BIT 1
|
|
|
|
#define PFERR_USER_BIT 2
|
|
|
|
#define PFERR_RSVD_BIT 3
|
|
|
|
#define PFERR_FETCH_BIT 4
|
|
|
|
|
|
|
|
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
|
|
|
|
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
|
|
|
|
#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
|
|
|
|
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
|
|
|
|
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
|
2010-02-10 20:21:32 +08:00
|
|
|
|
2009-06-11 23:07:42 +08:00
|
|
|
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
|
2011-07-12 03:33:44 +08:00
|
|
|
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
|
2013-06-07 16:51:25 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Return values of handle_mmio_page_fault_common:
|
|
|
|
* RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
|
2013-06-07 16:51:26 +08:00
|
|
|
* directly.
|
|
|
|
* RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
|
|
|
|
* fault path update the mmio spte.
|
2013-06-07 16:51:25 +08:00
|
|
|
* RET_MMIO_PF_RETRY: let CPU fault again on the address.
|
|
|
|
* RET_MMIO_PF_BUG: bug is detected.
|
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
RET_MMIO_PF_EMULATE = 1,
|
2013-06-07 16:51:26 +08:00
|
|
|
RET_MMIO_PF_INVALID = 2,
|
2013-06-07 16:51:25 +08:00
|
|
|
RET_MMIO_PF_RETRY = 0,
|
|
|
|
RET_MMIO_PF_BUG = -1
|
|
|
|
};
|
|
|
|
|
2011-07-12 03:33:44 +08:00
|
|
|
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
|
2013-10-02 22:56:13 +08:00
|
|
|
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
|
|
|
|
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
|
2013-08-05 16:07:16 +08:00
|
|
|
bool execonly);
|
2014-04-01 17:46:34 +08:00
|
|
|
void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|
|
|
bool ept);
|
2009-06-11 23:07:42 +08:00
|
|
|
|
2010-08-20 09:11:05 +08:00
|
|
|
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
|
|
|
|
{
|
2013-03-13 09:36:43 +08:00
|
|
|
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
|
|
|
|
return kvm->arch.n_max_mmu_pages -
|
|
|
|
kvm->arch.n_used_mmu_pages;
|
|
|
|
|
|
|
|
return 0;
|
2010-08-20 09:11:05 +08:00
|
|
|
}
|
|
|
|
|
2007-12-14 09:35:10 +08:00
|
|
|
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return kvm_mmu_load(vcpu);
|
|
|
|
}
|
|
|
|
|
2009-06-10 19:12:05 +08:00
|
|
|
static inline int is_present_gpte(unsigned long pte)
|
2009-03-31 23:03:45 +08:00
|
|
|
{
|
|
|
|
return pte & PT_PRESENT_MASK;
|
|
|
|
}
|
|
|
|
|
2011-07-12 03:23:20 +08:00
|
|
|
static inline int is_writable_pte(unsigned long pte)
|
|
|
|
{
|
|
|
|
return pte & PT_WRITABLE_MASK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool is_write_protection(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
|
|
|
|
}
|
|
|
|
|
KVM: MMU: Optimize pte permission checks
walk_addr_generic() permission checks are a maze of branchy code, which is
performed four times per lookup. It depends on the type of access, efer.nxe,
cr0.wp, cr4.smep, and in the near future, cr4.smap.
Optimize this away by precalculating all variants and storing them in a
bitmap. The bitmap is recalculated when rarely-changing variables change
(cr0, cr4) and is indexed by the often-changing variables (page fault error
code, pte access permissions).
The permission check is moved to the end of the loop, otherwise an SMEP
fault could be reported as a false positive, when PDE.U=1 but PTE.U=0.
Noted by Xiao Guangrong.
The result is short, branch-free code.
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-09-12 19:52:00 +08:00
|
|
|
/*
|
|
|
|
* Will a fault with a given page-fault error code (pfec) cause a permission
|
|
|
|
* fault with the given access (in ACC_* format)?
|
|
|
|
*/
|
2014-04-01 17:46:34 +08:00
|
|
|
static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|
|
|
unsigned pte_access, unsigned pfec)
|
2011-07-12 03:23:20 +08:00
|
|
|
{
|
2014-04-01 17:46:34 +08:00
|
|
|
int cpl = kvm_x86_ops->get_cpl(vcpu);
|
|
|
|
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
|
|
|
|
*
|
|
|
|
* If CPL = 3, SMAP applies to all supervisor-mode data accesses
|
|
|
|
* (these are implicit supervisor accesses) regardless of the value
|
|
|
|
* of EFLAGS.AC.
|
|
|
|
*
|
|
|
|
* This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving
|
|
|
|
* the result in X86_EFLAGS_AC. We then insert it in place of
|
|
|
|
* the PFERR_RSVD_MASK bit; this bit will always be zero in pfec,
|
|
|
|
* but it will be one in index if SMAP checks are being overridden.
|
|
|
|
* It is important to keep this branchless.
|
|
|
|
*/
|
|
|
|
unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
|
|
|
|
int index = (pfec >> 1) +
|
|
|
|
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
|
|
|
|
|
|
|
|
return (mmu->permissions[index] >> pte_access) & 1;
|
2011-07-12 03:23:20 +08:00
|
|
|
}
|
KVM: MMU: Optimize pte permission checks
walk_addr_generic() permission checks are a maze of branchy code, which is
performed four times per lookup. It depends on the type of access, efer.nxe,
cr0.wp, cr4.smep, and in the near future, cr4.smap.
Optimize this away by precalculating all variants and storing them in a
bitmap. The bitmap is recalculated when rarely-changing variables change
(cr0, cr4) and is indexed by the often-changing variables (page fault error
code, pte access permissions).
The permission check is moved to the end of the loop, otherwise an SMEP
fault could be reported as a false positive, when PDE.U=1 but PTE.U=0.
Noted by Xiao Guangrong.
The result is short, branch-free code.
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-09-12 19:52:00 +08:00
|
|
|
|
2013-05-31 08:36:22 +08:00
|
|
|
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
|
2007-12-14 09:35:10 +08:00
|
|
|
#endif
|