KVM: MMU: Simplify page table walker

Simplify the walker level loop not to carry so much information from one
loop to the next.  In addition to being complex, this made kmap_atomic()
critical sections difficult to manage.

As a result of this change, kmap_atomic() sections are limited to actually
touching the guest pte, which allows the other functions called from the
walker to do sleepy operations.  This will happen when we enable swapping.

Signed-off-by: Avi Kivity <avi@qumranet.com>
This commit is contained in:
Avi Kivity 2007-10-17 12:18:47 +02:00
parent d77a25074a
commit 42bf3f0a1f
1 changed files with 48 additions and 76 deletions

View File

@ -59,32 +59,12 @@
struct guest_walker { struct guest_walker {
int level; int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS]; gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t *table;
pt_element_t pte; pt_element_t pte;
pt_element_t *ptep;
struct page *page;
int index;
pt_element_t inherited_ar; pt_element_t inherited_ar;
gfn_t gfn; gfn_t gfn;
u32 error_code; u32 error_code;
}; };
static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu,
int write_fault,
pt_element_t *ptep,
gfn_t table_gfn)
{
gpa_t pte_gpa;
if (write_fault && !is_dirty_pte(*ptep)) {
mark_page_dirty(vcpu->kvm, table_gfn);
*ptep |= PT_DIRTY_MASK;
pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT);
pte_gpa += offset_in_page(ptep);
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep));
}
}
/* /*
* Fetch a guest pte for a guest virtual address * Fetch a guest pte for a guest virtual address
*/ */
@ -94,105 +74,99 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
{ {
hpa_t hpa; hpa_t hpa;
struct kvm_memory_slot *slot; struct kvm_memory_slot *slot;
pt_element_t *ptep; struct page *page;
pt_element_t root; pt_element_t *table;
pt_element_t pte;
gfn_t table_gfn; gfn_t table_gfn;
unsigned index;
gpa_t pte_gpa;
pgprintk("%s: addr %lx\n", __FUNCTION__, addr); pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
walker->level = vcpu->mmu.root_level; walker->level = vcpu->mmu.root_level;
walker->table = NULL; pte = vcpu->cr3;
walker->page = NULL;
walker->ptep = NULL;
root = vcpu->cr3;
#if PTTYPE == 64 #if PTTYPE == 64
if (!is_long_mode(vcpu)) { if (!is_long_mode(vcpu)) {
walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; pte = vcpu->pdptrs[(addr >> 30) & 3];
root = *walker->ptep; if (!is_present_pte(pte))
walker->pte = root;
if (!(root & PT_PRESENT_MASK))
goto not_present; goto not_present;
--walker->level; --walker->level;
} }
#endif #endif
table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
walker->table_gfn[walker->level - 1] = table_gfn;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn);
slot = gfn_to_memslot(vcpu->kvm, table_gfn);
hpa = safe_gpa_to_hpa(vcpu->kvm, root & PT64_BASE_ADDR_MASK);
walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
walker->table = kmap_atomic(walker->page, KM_USER0);
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
for (;;) { for (;;) {
int index = PT_INDEX(addr, walker->level); index = PT_INDEX(addr, walker->level);
hpa_t paddr;
ptep = &walker->table[index]; table_gfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
walker->index = index; walker->table_gfn[walker->level - 1] = table_gfn;
ASSERT(((unsigned long)walker->table & PAGE_MASK) == pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
((unsigned long)ptep & PAGE_MASK)); walker->level - 1, table_gfn);
if (!is_present_pte(*ptep)) slot = gfn_to_memslot(vcpu->kvm, table_gfn);
hpa = safe_gpa_to_hpa(vcpu->kvm, pte & PT64_BASE_ADDR_MASK);
page = pfn_to_page(hpa >> PAGE_SHIFT);
table = kmap_atomic(page, KM_USER0);
pte = table[index];
kunmap_atomic(table, KM_USER0);
if (!is_present_pte(pte))
goto not_present; goto not_present;
if (write_fault && !is_writeble_pte(*ptep)) if (write_fault && !is_writeble_pte(pte))
if (user_fault || is_write_protection(vcpu)) if (user_fault || is_write_protection(vcpu))
goto access_error; goto access_error;
if (user_fault && !(*ptep & PT_USER_MASK)) if (user_fault && !(pte & PT_USER_MASK))
goto access_error; goto access_error;
#if PTTYPE == 64 #if PTTYPE == 64
if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK)) if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
goto access_error; goto access_error;
#endif #endif
if (!(*ptep & PT_ACCESSED_MASK)) { if (!(pte & PT_ACCESSED_MASK)) {
mark_page_dirty(vcpu->kvm, table_gfn); mark_page_dirty(vcpu->kvm, table_gfn);
*ptep |= PT_ACCESSED_MASK; pte |= PT_ACCESSED_MASK;
table = kmap_atomic(page, KM_USER0);
table[index] = pte;
kunmap_atomic(table, KM_USER0);
} }
if (walker->level == PT_PAGE_TABLE_LEVEL) { if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = (*ptep & PT_BASE_ADDR_MASK) walker->gfn = (pte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
>> PAGE_SHIFT;
FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
table_gfn);
break; break;
} }
if (walker->level == PT_DIRECTORY_LEVEL if (walker->level == PT_DIRECTORY_LEVEL
&& (*ptep & PT_PAGE_SIZE_MASK) && (pte & PT_PAGE_SIZE_MASK)
&& (PTTYPE == 64 || is_pse(vcpu))) { && (PTTYPE == 64 || is_pse(vcpu))) {
walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK) walker->gfn = (pte & PT_DIR_BASE_ADDR_MASK)
>> PAGE_SHIFT; >> PAGE_SHIFT;
walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL); walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
table_gfn);
break; break;
} }
walker->inherited_ar &= walker->table[index]; walker->inherited_ar &= pte;
table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
kunmap_atomic(walker->table, KM_USER0);
paddr = safe_gpa_to_hpa(vcpu->kvm, table_gfn << PAGE_SHIFT);
walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
walker->table = kmap_atomic(walker->page, KM_USER0);
--walker->level; --walker->level;
walker->table_gfn[walker->level - 1] = table_gfn;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn);
} }
walker->pte = *ptep;
if (walker->page) if (write_fault && !is_dirty_pte(pte)) {
walker->ptep = NULL; mark_page_dirty(vcpu->kvm, table_gfn);
if (walker->table) pte |= PT_DIRTY_MASK;
kunmap_atomic(walker->table, KM_USER0); table = kmap_atomic(page, KM_USER0);
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); table[index] = pte;
kunmap_atomic(table, KM_USER0);
pte_gpa = table_gfn << PAGE_SHIFT;
pte_gpa += index * sizeof(pt_element_t);
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
}
walker->pte = pte;
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte);
return 1; return 1;
not_present: not_present:
@ -209,8 +183,6 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->error_code |= PFERR_USER_MASK; walker->error_code |= PFERR_USER_MASK;
if (fetch_fault) if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK; walker->error_code |= PFERR_FETCH_MASK;
if (walker->table)
kunmap_atomic(walker->table, KM_USER0);
return 0; return 0;
} }