KVM: MMU: abstract the operation of rmap

Abstract the operation of rmap to spte_list, then we can use it for the
reverse mapping of parent pte in the later patch

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
This commit is contained in:
Xiao Guangrong 2011-05-15 23:26:20 +08:00 committed by Avi Kivity
parent 1249b96e72
commit 53c07b1878
2 changed files with 158 additions and 140 deletions

View File

@ -347,7 +347,7 @@ struct kvm_vcpu_arch {
struct kvm_pv_mmu_op_buffer mmu_op_buffer;
struct kvm_mmu_memory_cache mmu_pte_chain_cache;
struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;

View File

@ -148,7 +148,7 @@ module_param(oos_shadow, bool, 0644);
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
| PT64_NX_MASK)
#define RMAP_EXT 4
#define PTE_LIST_EXT 4
#define ACC_EXEC_MASK 1
#define ACC_WRITE_MASK PT_WRITABLE_MASK
@ -164,9 +164,9 @@ module_param(oos_shadow, bool, 0644);
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
struct kvm_rmap_desc {
u64 *sptes[RMAP_EXT];
struct kvm_rmap_desc *more;
struct pte_list_desc {
u64 *sptes[PTE_LIST_EXT];
struct pte_list_desc *more;
};
struct kvm_shadow_walk_iterator {
@ -185,7 +185,7 @@ struct kvm_shadow_walk_iterator {
typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);
static struct kmem_cache *pte_chain_cache;
static struct kmem_cache *rmap_desc_cache;
static struct kmem_cache *pte_list_desc_cache;
static struct kmem_cache *mmu_page_header_cache;
static struct percpu_counter kvm_total_used_mmu_pages;
@ -401,8 +401,8 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
pte_chain_cache, 4);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
rmap_desc_cache, 4 + PTE_PREFETCH_NUM);
r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
pte_list_desc_cache, 4 + PTE_PREFETCH_NUM);
if (r)
goto out;
r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
@ -416,8 +416,10 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache);
mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache);
mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache,
pte_chain_cache);
mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
pte_list_desc_cache);
mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache,
mmu_page_header_cache);
@ -444,15 +446,15 @@ static void mmu_free_pte_chain(struct kvm_pte_chain *pc)
kmem_cache_free(pte_chain_cache, pc);
}
static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu)
{
return mmu_memory_cache_alloc(&vcpu->arch.mmu_rmap_desc_cache,
sizeof(struct kvm_rmap_desc));
return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache,
sizeof(struct pte_list_desc));
}
static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
{
kmem_cache_free(rmap_desc_cache, rd);
kmem_cache_free(pte_list_desc_cache, pte_list_desc);
}
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
@ -589,10 +591,139 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
return level - 1;
}
/*
* Pte mapping structures:
*
* If pte_list bit zero is zero, then pte_list point to the spte.
*
* If pte_list bit zero is one, (then pte_list & ~1) points to a struct
* pte_list_desc containing more mappings.
*
* Returns the number of pte entries before the spte was added or zero if
* the spte was not added.
*
*/
static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
unsigned long *pte_list)
{
struct pte_list_desc *desc;
int i, count = 0;
if (!*pte_list) {
rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
*pte_list = (unsigned long)spte;
} else if (!(*pte_list & 1)) {
rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
desc = mmu_alloc_pte_list_desc(vcpu);
desc->sptes[0] = (u64 *)*pte_list;
desc->sptes[1] = spte;
*pte_list = (unsigned long)desc | 1;
++count;
} else {
rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
desc = (struct pte_list_desc *)(*pte_list & ~1ul);
while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
desc = desc->more;
count += PTE_LIST_EXT;
}
if (desc->sptes[PTE_LIST_EXT-1]) {
desc->more = mmu_alloc_pte_list_desc(vcpu);
desc = desc->more;
}
for (i = 0; desc->sptes[i]; ++i)
++count;
desc->sptes[i] = spte;
}
return count;
}
static u64 *pte_list_next(unsigned long *pte_list, u64 *spte)
{
struct pte_list_desc *desc;
u64 *prev_spte;
int i;
if (!*pte_list)
return NULL;
else if (!(*pte_list & 1)) {
if (!spte)
return (u64 *)*pte_list;
return NULL;
}
desc = (struct pte_list_desc *)(*pte_list & ~1ul);
prev_spte = NULL;
while (desc) {
for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
if (prev_spte == spte)
return desc->sptes[i];
prev_spte = desc->sptes[i];
}
desc = desc->more;
}
return NULL;
}
static void
pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
int i, struct pte_list_desc *prev_desc)
{
int j;
for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j)
;
desc->sptes[i] = desc->sptes[j];
desc->sptes[j] = NULL;
if (j != 0)
return;
if (!prev_desc && !desc->more)
*pte_list = (unsigned long)desc->sptes[0];
else
if (prev_desc)
prev_desc->more = desc->more;
else
*pte_list = (unsigned long)desc->more | 1;
mmu_free_pte_list_desc(desc);
}
static void pte_list_remove(u64 *spte, unsigned long *pte_list)
{
struct pte_list_desc *desc;
struct pte_list_desc *prev_desc;
int i;
if (!*pte_list) {
printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
BUG();
} else if (!(*pte_list & 1)) {
rmap_printk("pte_list_remove: %p 1->0\n", spte);
if ((u64 *)*pte_list != spte) {
printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte);
BUG();
}
*pte_list = 0;
} else {
rmap_printk("pte_list_remove: %p many->many\n", spte);
desc = (struct pte_list_desc *)(*pte_list & ~1ul);
prev_desc = NULL;
while (desc) {
for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
if (desc->sptes[i] == spte) {
pte_list_desc_remove_entry(pte_list,
desc, i,
prev_desc);
return;
}
prev_desc = desc;
desc = desc->more;
}
pr_err("pte_list_remove: %p many->many\n", spte);
BUG();
}
}
/*
* Take gfn and return the reverse mapping to it.
*/
static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
{
struct kvm_memory_slot *slot;
@ -607,122 +738,35 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
return &linfo->rmap_pde;
}
/*
* Reverse mapping data structures:
*
* If rmapp bit zero is zero, then rmapp point to the shadw page table entry
* that points to page_address(page).
*
* If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
* containing more mappings.
*
* Returns the number of rmap entries before the spte was added or zero if
* the spte was not added.
*
*/
static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
{
struct kvm_mmu_page *sp;
struct kvm_rmap_desc *desc;
unsigned long *rmapp;
int i, count = 0;
if (!is_rmap_spte(*spte))
return count;
return 0;
sp = page_header(__pa(spte));
kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
if (!*rmapp) {
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
*rmapp = (unsigned long)spte;
} else if (!(*rmapp & 1)) {
rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
desc = mmu_alloc_rmap_desc(vcpu);
desc->sptes[0] = (u64 *)*rmapp;
desc->sptes[1] = spte;
*rmapp = (unsigned long)desc | 1;
++count;
} else {
rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
while (desc->sptes[RMAP_EXT-1] && desc->more) {
desc = desc->more;
count += RMAP_EXT;
}
if (desc->sptes[RMAP_EXT-1]) {
desc->more = mmu_alloc_rmap_desc(vcpu);
desc = desc->more;
}
for (i = 0; desc->sptes[i]; ++i)
++count;
desc->sptes[i] = spte;
}
return count;
return pte_list_add(vcpu, spte, rmapp);
}
static void rmap_desc_remove_entry(unsigned long *rmapp,
struct kvm_rmap_desc *desc,
int i,
struct kvm_rmap_desc *prev_desc)
static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
{
int j;
for (j = RMAP_EXT - 1; !desc->sptes[j] && j > i; --j)
;
desc->sptes[i] = desc->sptes[j];
desc->sptes[j] = NULL;
if (j != 0)
return;
if (!prev_desc && !desc->more)
*rmapp = (unsigned long)desc->sptes[0];
else
if (prev_desc)
prev_desc->more = desc->more;
else
*rmapp = (unsigned long)desc->more | 1;
mmu_free_rmap_desc(desc);
return pte_list_next(rmapp, spte);
}
static void rmap_remove(struct kvm *kvm, u64 *spte)
{
struct kvm_rmap_desc *desc;
struct kvm_rmap_desc *prev_desc;
struct kvm_mmu_page *sp;
gfn_t gfn;
unsigned long *rmapp;
int i;
sp = page_header(__pa(spte));
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p 0->BUG\n", spte);
BUG();
} else if (!(*rmapp & 1)) {
rmap_printk("rmap_remove: %p 1->0\n", spte);
if ((u64 *)*rmapp != spte) {
printk(KERN_ERR "rmap_remove: %p 1->BUG\n", spte);
BUG();
}
*rmapp = 0;
} else {
rmap_printk("rmap_remove: %p many->many\n", spte);
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
prev_desc = NULL;
while (desc) {
for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i)
if (desc->sptes[i] == spte) {
rmap_desc_remove_entry(rmapp,
desc, i,
prev_desc);
return;
}
prev_desc = desc;
desc = desc->more;
}
pr_err("rmap_remove: %p many->many\n", spte);
BUG();
}
pte_list_remove(spte, rmapp);
}
static int set_spte_track_bits(u64 *sptep, u64 new_spte)
@ -752,32 +796,6 @@ static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
rmap_remove(kvm, sptep);
}
static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
{
struct kvm_rmap_desc *desc;
u64 *prev_spte;
int i;
if (!*rmapp)
return NULL;
else if (!(*rmapp & 1)) {
if (!spte)
return (u64 *)*rmapp;
return NULL;
}
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
prev_spte = NULL;
while (desc) {
for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) {
if (prev_spte == spte)
return desc->sptes[i];
prev_spte = desc->sptes[i];
}
desc = desc->more;
}
return NULL;
}
static int rmap_write_protect(struct kvm *kvm, u64 gfn)
{
unsigned long *rmapp;
@ -3601,8 +3619,8 @@ static void mmu_destroy_caches(void)
{
if (pte_chain_cache)
kmem_cache_destroy(pte_chain_cache);
if (rmap_desc_cache)
kmem_cache_destroy(rmap_desc_cache);
if (pte_list_desc_cache)
kmem_cache_destroy(pte_list_desc_cache);
if (mmu_page_header_cache)
kmem_cache_destroy(mmu_page_header_cache);
}
@ -3614,10 +3632,10 @@ int kvm_mmu_module_init(void)
0, 0, NULL);
if (!pte_chain_cache)
goto nomem;
rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",
sizeof(struct kvm_rmap_desc),
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
0, 0, NULL);
if (!rmap_desc_cache)
if (!pte_list_desc_cache)
goto nomem;
mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",