mirror of https://gitee.com/openkylin/linux.git
KVM: MMU: Simplify page table walker
Simplify the walker level loop not to carry so much information from one loop to the next. In addition to being complex, this made kmap_atomic() critical sections difficult to manage. As a result of this change, kmap_atomic() sections are limited to actually touching the guest pte, which allows the other functions called from the walker to do sleepy operations. This will happen when we enable swapping. Signed-off-by: Avi Kivity <avi@qumranet.com>
This commit is contained in:
parent
d77a25074a
commit
42bf3f0a1f
|
@ -59,32 +59,12 @@
|
||||||
struct guest_walker {
|
struct guest_walker {
|
||||||
int level;
|
int level;
|
||||||
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
|
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
|
||||||
pt_element_t *table;
|
|
||||||
pt_element_t pte;
|
pt_element_t pte;
|
||||||
pt_element_t *ptep;
|
|
||||||
struct page *page;
|
|
||||||
int index;
|
|
||||||
pt_element_t inherited_ar;
|
pt_element_t inherited_ar;
|
||||||
gfn_t gfn;
|
gfn_t gfn;
|
||||||
u32 error_code;
|
u32 error_code;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu,
|
|
||||||
int write_fault,
|
|
||||||
pt_element_t *ptep,
|
|
||||||
gfn_t table_gfn)
|
|
||||||
{
|
|
||||||
gpa_t pte_gpa;
|
|
||||||
|
|
||||||
if (write_fault && !is_dirty_pte(*ptep)) {
|
|
||||||
mark_page_dirty(vcpu->kvm, table_gfn);
|
|
||||||
*ptep |= PT_DIRTY_MASK;
|
|
||||||
pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT);
|
|
||||||
pte_gpa += offset_in_page(ptep);
|
|
||||||
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fetch a guest pte for a guest virtual address
|
* Fetch a guest pte for a guest virtual address
|
||||||
*/
|
*/
|
||||||
|
@ -94,105 +74,99 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
|
||||||
{
|
{
|
||||||
hpa_t hpa;
|
hpa_t hpa;
|
||||||
struct kvm_memory_slot *slot;
|
struct kvm_memory_slot *slot;
|
||||||
pt_element_t *ptep;
|
struct page *page;
|
||||||
pt_element_t root;
|
pt_element_t *table;
|
||||||
|
pt_element_t pte;
|
||||||
gfn_t table_gfn;
|
gfn_t table_gfn;
|
||||||
|
unsigned index;
|
||||||
|
gpa_t pte_gpa;
|
||||||
|
|
||||||
pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
|
pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
|
||||||
walker->level = vcpu->mmu.root_level;
|
walker->level = vcpu->mmu.root_level;
|
||||||
walker->table = NULL;
|
pte = vcpu->cr3;
|
||||||
walker->page = NULL;
|
|
||||||
walker->ptep = NULL;
|
|
||||||
root = vcpu->cr3;
|
|
||||||
#if PTTYPE == 64
|
#if PTTYPE == 64
|
||||||
if (!is_long_mode(vcpu)) {
|
if (!is_long_mode(vcpu)) {
|
||||||
walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
|
pte = vcpu->pdptrs[(addr >> 30) & 3];
|
||||||
root = *walker->ptep;
|
if (!is_present_pte(pte))
|
||||||
walker->pte = root;
|
|
||||||
if (!(root & PT_PRESENT_MASK))
|
|
||||||
goto not_present;
|
goto not_present;
|
||||||
--walker->level;
|
--walker->level;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
|
|
||||||
walker->table_gfn[walker->level - 1] = table_gfn;
|
|
||||||
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
|
|
||||||
walker->level - 1, table_gfn);
|
|
||||||
slot = gfn_to_memslot(vcpu->kvm, table_gfn);
|
|
||||||
hpa = safe_gpa_to_hpa(vcpu->kvm, root & PT64_BASE_ADDR_MASK);
|
|
||||||
walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
|
|
||||||
walker->table = kmap_atomic(walker->page, KM_USER0);
|
|
||||||
|
|
||||||
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
|
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
|
||||||
(vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
|
(vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
|
||||||
|
|
||||||
walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
|
walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int index = PT_INDEX(addr, walker->level);
|
index = PT_INDEX(addr, walker->level);
|
||||||
hpa_t paddr;
|
|
||||||
|
|
||||||
ptep = &walker->table[index];
|
table_gfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
|
||||||
walker->index = index;
|
walker->table_gfn[walker->level - 1] = table_gfn;
|
||||||
ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
|
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
|
||||||
((unsigned long)ptep & PAGE_MASK));
|
walker->level - 1, table_gfn);
|
||||||
|
|
||||||
if (!is_present_pte(*ptep))
|
slot = gfn_to_memslot(vcpu->kvm, table_gfn);
|
||||||
|
hpa = safe_gpa_to_hpa(vcpu->kvm, pte & PT64_BASE_ADDR_MASK);
|
||||||
|
page = pfn_to_page(hpa >> PAGE_SHIFT);
|
||||||
|
|
||||||
|
table = kmap_atomic(page, KM_USER0);
|
||||||
|
pte = table[index];
|
||||||
|
kunmap_atomic(table, KM_USER0);
|
||||||
|
|
||||||
|
if (!is_present_pte(pte))
|
||||||
goto not_present;
|
goto not_present;
|
||||||
|
|
||||||
if (write_fault && !is_writeble_pte(*ptep))
|
if (write_fault && !is_writeble_pte(pte))
|
||||||
if (user_fault || is_write_protection(vcpu))
|
if (user_fault || is_write_protection(vcpu))
|
||||||
goto access_error;
|
goto access_error;
|
||||||
|
|
||||||
if (user_fault && !(*ptep & PT_USER_MASK))
|
if (user_fault && !(pte & PT_USER_MASK))
|
||||||
goto access_error;
|
goto access_error;
|
||||||
|
|
||||||
#if PTTYPE == 64
|
#if PTTYPE == 64
|
||||||
if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
|
if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
|
||||||
goto access_error;
|
goto access_error;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!(*ptep & PT_ACCESSED_MASK)) {
|
if (!(pte & PT_ACCESSED_MASK)) {
|
||||||
mark_page_dirty(vcpu->kvm, table_gfn);
|
mark_page_dirty(vcpu->kvm, table_gfn);
|
||||||
*ptep |= PT_ACCESSED_MASK;
|
pte |= PT_ACCESSED_MASK;
|
||||||
|
table = kmap_atomic(page, KM_USER0);
|
||||||
|
table[index] = pte;
|
||||||
|
kunmap_atomic(table, KM_USER0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (walker->level == PT_PAGE_TABLE_LEVEL) {
|
if (walker->level == PT_PAGE_TABLE_LEVEL) {
|
||||||
walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
|
walker->gfn = (pte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
|
||||||
>> PAGE_SHIFT;
|
|
||||||
FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
|
|
||||||
table_gfn);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (walker->level == PT_DIRECTORY_LEVEL
|
if (walker->level == PT_DIRECTORY_LEVEL
|
||||||
&& (*ptep & PT_PAGE_SIZE_MASK)
|
&& (pte & PT_PAGE_SIZE_MASK)
|
||||||
&& (PTTYPE == 64 || is_pse(vcpu))) {
|
&& (PTTYPE == 64 || is_pse(vcpu))) {
|
||||||
walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
|
walker->gfn = (pte & PT_DIR_BASE_ADDR_MASK)
|
||||||
>> PAGE_SHIFT;
|
>> PAGE_SHIFT;
|
||||||
walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
|
walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
|
||||||
FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
|
|
||||||
table_gfn);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
walker->inherited_ar &= walker->table[index];
|
walker->inherited_ar &= pte;
|
||||||
table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
|
|
||||||
kunmap_atomic(walker->table, KM_USER0);
|
|
||||||
paddr = safe_gpa_to_hpa(vcpu->kvm, table_gfn << PAGE_SHIFT);
|
|
||||||
walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
|
|
||||||
walker->table = kmap_atomic(walker->page, KM_USER0);
|
|
||||||
--walker->level;
|
--walker->level;
|
||||||
walker->table_gfn[walker->level - 1] = table_gfn;
|
|
||||||
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
|
|
||||||
walker->level - 1, table_gfn);
|
|
||||||
}
|
}
|
||||||
walker->pte = *ptep;
|
|
||||||
if (walker->page)
|
if (write_fault && !is_dirty_pte(pte)) {
|
||||||
walker->ptep = NULL;
|
mark_page_dirty(vcpu->kvm, table_gfn);
|
||||||
if (walker->table)
|
pte |= PT_DIRTY_MASK;
|
||||||
kunmap_atomic(walker->table, KM_USER0);
|
table = kmap_atomic(page, KM_USER0);
|
||||||
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
|
table[index] = pte;
|
||||||
|
kunmap_atomic(table, KM_USER0);
|
||||||
|
pte_gpa = table_gfn << PAGE_SHIFT;
|
||||||
|
pte_gpa += index * sizeof(pt_element_t);
|
||||||
|
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
|
||||||
|
}
|
||||||
|
|
||||||
|
walker->pte = pte;
|
||||||
|
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte);
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
not_present:
|
not_present:
|
||||||
|
@ -209,8 +183,6 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
|
||||||
walker->error_code |= PFERR_USER_MASK;
|
walker->error_code |= PFERR_USER_MASK;
|
||||||
if (fetch_fault)
|
if (fetch_fault)
|
||||||
walker->error_code |= PFERR_FETCH_MASK;
|
walker->error_code |= PFERR_FETCH_MASK;
|
||||||
if (walker->table)
|
|
||||||
kunmap_atomic(walker->table, KM_USER0);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue