KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot in HPT page fault handler

This makes the same changes in the page fault handler for HPT guests
that commits 31c8b0d069 ("KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot()
in page fault handler", 2018-03-01), 71d29f43b6 ("KVM: PPC: Book3S HV:
Don't use compound_order to determine host mapping size", 2018-09-11)
and 6579804c43 ("KVM: PPC: Book3S HV: Avoid crash from THP collapse
during radix page fault", 2018-10-04) made for the page fault handler
for radix guests.

In summary, where we used to call get_user_pages_fast() and then do
special handling for VM_PFNMAP vmas, we now call __get_user_pages_fast()
and then __gfn_to_pfn_memslot() if that fails, followed by reading the
Linux PTE to get the host PFN, host page size and mapping attributes.

This also brings in the change from SetPageDirty() to set_page_dirty_lock()
which was done for the radix page fault handler in commit c3856aeb29
("KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault
handler", 2018-02-23).

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
Paul Mackerras 2019-08-26 16:20:47 +10:00
parent 1c482452d5
commit cd758a9b57
1 changed files with 57 additions and 62 deletions

View File

@ -485,18 +485,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
__be64 *hptep;
unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base;
unsigned long gpa, gfn, hva, pfn;
unsigned long gpa, gfn, hva, pfn, hpa;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
struct revmap_entry *rev;
struct page *page, *pages[1];
long index, ret, npages;
struct page *page;
long index, ret;
bool is_ci;
unsigned int writing, write_ok;
struct vm_area_struct *vma;
bool writing, write_ok;
unsigned int shift;
unsigned long rcbits;
long mmio_update;
struct mm_struct *mm;
pte_t pte, *ptep;
if (kvm_is_radix(kvm))
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
@ -570,59 +570,62 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
smp_rmb();
ret = -EFAULT;
is_ci = false;
pfn = 0;
page = NULL;
mm = kvm->mm;
pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
if (npages < 1) {
/* Check if it's an I/O mapping */
down_read(&mm->mmap_sem);
vma = find_vma(mm, hva);
if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
(vma->vm_flags & VM_PFNMAP)) {
pfn = vma->vm_pgoff +
((hva - vma->vm_start) >> PAGE_SHIFT);
pte_size = psize;
is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
write_ok = vma->vm_flags & VM_WRITE;
}
up_read(&mm->mmap_sem);
if (!pfn)
goto out_put;
/*
* Do a fast check first, since __gfn_to_pfn_memslot doesn't
* do it with !atomic && !async, which is how we call it.
* We always ask for write permission since the common case
* is that the page is writable.
*/
if (__get_user_pages_fast(hva, 1, 1, &page) == 1) {
write_ok = true;
} else {
page = pages[0];
pfn = page_to_pfn(page);
if (PageHuge(page)) {
page = compound_head(page);
pte_size <<= compound_order(page);
}
/* if the guest wants write access, see if that is OK */
if (!writing && hpte_is_writable(r)) {
pte_t *ptep, pte;
unsigned long flags;
/*
* We need to protect against page table destruction
* hugepage split and collapse.
*/
local_irq_save(flags);
ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
if (ptep) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (__pte_write(pte))
write_ok = 1;
}
local_irq_restore(flags);
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
writing, &write_ok);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
if (PageReserved(page))
page = NULL;
}
}
/*
* Read the PTE from the process' radix tree and use that
* so we get the shift and attribute bits.
*/
local_irq_disable();
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
/*
* If the PTE disappeared temporarily due to a THP
* collapse, just return and let the guest try again.
*/
if (!ptep) {
local_irq_enable();
if (page)
put_page(page);
return RESUME_GUEST;
}
pte = *ptep;
local_irq_enable();
hpa = pte_pfn(pte) << PAGE_SHIFT;
pte_size = PAGE_SIZE;
if (shift)
pte_size = 1ul << shift;
is_ci = pte_ci(pte);
if (psize > pte_size)
goto out_put;
if (pte_size > psize)
hpa |= hva & (pte_size - psize);
/* Check WIMG vs. the actual page we're accessing */
if (!hpte_cache_flags_ok(r, is_ci)) {
@ -636,14 +639,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/*
* Set the HPTE to point to pfn.
* Since the pfn is at PAGE_SIZE granularity, make sure we
* Set the HPTE to point to hpa.
* Since the hpa is at PAGE_SIZE granularity, make sure we
* don't mask out lower-order bits if psize < PAGE_SIZE.
*/
if (psize < PAGE_SIZE)
psize = PAGE_SIZE;
r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
((pfn << PAGE_SHIFT) & ~(psize - 1));
r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r);
ret = RESUME_GUEST;
@ -708,20 +710,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
asm volatile("ptesync" : : : "memory");
preempt_enable();
if (page && hpte_is_writable(r))
SetPageDirty(page);
set_page_dirty_lock(page);
out_put:
trace_kvm_page_fault_exit(vcpu, hpte, ret);
if (page) {
/*
* We drop pages[0] here, not page because page might
* have been set to the head page of a compound, but
* we have to drop the reference on the correct tail
* page to match the get inside gup()
*/
put_page(pages[0]);
}
if (page)
put_page(page);
return ret;
out_unlock: