s390/mm: extended gmap pte notifier
The current gmap pte notifier forces a pte into to a read-write state. If the pte is invalidated the gmap notifier is called to inform KVM that the mapping will go away. Extend this approach to allow read-write, read-only and no-access as possible target states and call the pte notifier for any change to the pte. This mechanism is used to temporarily set specific access rights for a pte without doing the heavy work of a true mprotect call. Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
This commit is contained in:
parent
8ecb1a59d6
commit
b2d73b2a0a
|
@ -59,8 +59,11 @@ void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
|
|||
void __gmap_zap(struct gmap *, unsigned long gaddr);
|
||||
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
|
||||
|
||||
void gmap_register_ipte_notifier(struct gmap_notifier *);
|
||||
void gmap_unregister_ipte_notifier(struct gmap_notifier *);
|
||||
int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
|
||||
void gmap_register_pte_notifier(struct gmap_notifier *);
|
||||
void gmap_unregister_pte_notifier(struct gmap_notifier *);
|
||||
void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *);
|
||||
|
||||
int gmap_mprotect_notify(struct gmap *, unsigned long start,
|
||||
unsigned long len, int prot);
|
||||
|
||||
#endif /* _ASM_S390_GMAP_H */
|
||||
|
|
|
@ -886,6 +886,8 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|||
pte_t *ptep, pte_t entry);
|
||||
void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
|
||||
void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
|
||||
int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,
|
||||
pte_t *ptep, int prot);
|
||||
void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep , int reset);
|
||||
void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -185,7 +186,7 @@ static struct notifier_block kvm_clock_notifier = {
|
|||
int kvm_arch_hardware_setup(void)
|
||||
{
|
||||
gmap_notifier.notifier_call = kvm_gmap_notifier;
|
||||
gmap_register_ipte_notifier(&gmap_notifier);
|
||||
gmap_register_pte_notifier(&gmap_notifier);
|
||||
atomic_notifier_chain_register(&s390_epoch_delta_notifier,
|
||||
&kvm_clock_notifier);
|
||||
return 0;
|
||||
|
@ -193,7 +194,7 @@ int kvm_arch_hardware_setup(void)
|
|||
|
||||
void kvm_arch_hardware_unsetup(void)
|
||||
{
|
||||
gmap_unregister_ipte_notifier(&gmap_notifier);
|
||||
gmap_unregister_pte_notifier(&gmap_notifier);
|
||||
atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
|
||||
&kvm_clock_notifier);
|
||||
}
|
||||
|
@ -2272,16 +2273,16 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
|
|||
return 0;
|
||||
/*
|
||||
* We use MMU_RELOAD just to re-arm the ipte notifier for the
|
||||
* guest prefix page. gmap_ipte_notify will wait on the ptl lock.
|
||||
* guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
|
||||
* This ensures that the ipte instruction for this request has
|
||||
* already finished. We might race against a second unmapper that
|
||||
* wants to set the blocking bit. Lets just retry the request loop.
|
||||
*/
|
||||
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
|
||||
int rc;
|
||||
rc = gmap_ipte_notify(vcpu->arch.gmap,
|
||||
kvm_s390_get_prefix(vcpu),
|
||||
PAGE_SIZE * 2);
|
||||
rc = gmap_mprotect_notify(vcpu->arch.gmap,
|
||||
kvm_s390_get_prefix(vcpu),
|
||||
PAGE_SIZE * 2, PROT_WRITE);
|
||||
if (rc)
|
||||
return rc;
|
||||
goto retry;
|
||||
|
|
|
@ -553,29 +553,29 @@ static LIST_HEAD(gmap_notifier_list);
|
|||
static DEFINE_SPINLOCK(gmap_notifier_lock);
|
||||
|
||||
/**
|
||||
* gmap_register_ipte_notifier - register a pte invalidation callback
|
||||
* gmap_register_pte_notifier - register a pte invalidation callback
|
||||
* @nb: pointer to the gmap notifier block
|
||||
*/
|
||||
void gmap_register_ipte_notifier(struct gmap_notifier *nb)
|
||||
void gmap_register_pte_notifier(struct gmap_notifier *nb)
|
||||
{
|
||||
spin_lock(&gmap_notifier_lock);
|
||||
list_add_rcu(&nb->list, &gmap_notifier_list);
|
||||
spin_unlock(&gmap_notifier_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
|
||||
EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);
|
||||
|
||||
/**
|
||||
* gmap_unregister_ipte_notifier - remove a pte invalidation callback
|
||||
* gmap_unregister_pte_notifier - remove a pte invalidation callback
|
||||
* @nb: pointer to the gmap notifier block
|
||||
*/
|
||||
void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
|
||||
void gmap_unregister_pte_notifier(struct gmap_notifier *nb)
|
||||
{
|
||||
spin_lock(&gmap_notifier_lock);
|
||||
list_del_rcu(&nb->list);
|
||||
spin_unlock(&gmap_notifier_lock);
|
||||
synchronize_rcu();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
|
||||
EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);
|
||||
|
||||
/**
|
||||
* gmap_call_notifier - call all registered invalidation callbacks
|
||||
|
@ -593,62 +593,150 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
|
|||
}
|
||||
|
||||
/**
|
||||
* gmap_ipte_notify - mark a range of ptes for invalidation notification
|
||||
* gmap_table_walk - walk the gmap page tables
|
||||
* @gmap: pointer to guest mapping meta data structure
|
||||
* @gaddr: virtual address in the guest address space
|
||||
*
|
||||
* Returns a table pointer for the given guest address.
|
||||
*/
|
||||
static inline unsigned long *gmap_table_walk(struct gmap *gmap,
|
||||
unsigned long gaddr)
|
||||
{
|
||||
unsigned long *table;
|
||||
|
||||
table = gmap->table;
|
||||
switch (gmap->asce & _ASCE_TYPE_MASK) {
|
||||
case _ASCE_TYPE_REGION1:
|
||||
table += (gaddr >> 53) & 0x7ff;
|
||||
if (*table & _REGION_ENTRY_INVALID)
|
||||
return NULL;
|
||||
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
|
||||
/* Fallthrough */
|
||||
case _ASCE_TYPE_REGION2:
|
||||
table += (gaddr >> 42) & 0x7ff;
|
||||
if (*table & _REGION_ENTRY_INVALID)
|
||||
return NULL;
|
||||
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
|
||||
/* Fallthrough */
|
||||
case _ASCE_TYPE_REGION3:
|
||||
table += (gaddr >> 31) & 0x7ff;
|
||||
if (*table & _REGION_ENTRY_INVALID)
|
||||
return NULL;
|
||||
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
|
||||
/* Fallthrough */
|
||||
case _ASCE_TYPE_SEGMENT:
|
||||
table += (gaddr >> 20) & 0x7ff;
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
* gmap_pte_op_walk - walk the gmap page table, get the page table lock
|
||||
* and return the pte pointer
|
||||
* @gmap: pointer to guest mapping meta data structure
|
||||
* @gaddr: virtual address in the guest address space
|
||||
* @ptl: pointer to the spinlock pointer
|
||||
*
|
||||
* Returns a pointer to the locked pte for a guest address, or NULL
|
||||
*/
|
||||
static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
|
||||
spinlock_t **ptl)
|
||||
{
|
||||
unsigned long *table;
|
||||
|
||||
/* Walk the gmap page table, lock and get pte pointer */
|
||||
table = gmap_table_walk(gmap, gaddr);
|
||||
if (!table || *table & _SEGMENT_ENTRY_INVALID)
|
||||
return NULL;
|
||||
return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
|
||||
}
|
||||
|
||||
/**
|
||||
* gmap_pte_op_fixup - force a page in and connect the gmap page table
|
||||
* @gmap: pointer to guest mapping meta data structure
|
||||
* @gaddr: virtual address in the guest address space
|
||||
* @vmaddr: address in the host process address space
|
||||
*
|
||||
* Returns 0 if the caller can retry __gmap_translate (might fail again),
|
||||
* -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
|
||||
* up or connecting the gmap page table.
|
||||
*/
|
||||
static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
|
||||
unsigned long vmaddr)
|
||||
{
|
||||
struct mm_struct *mm = gmap->mm;
|
||||
bool unlocked = false;
|
||||
|
||||
if (fixup_user_fault(current, mm, vmaddr, FAULT_FLAG_WRITE, &unlocked))
|
||||
return -EFAULT;
|
||||
if (unlocked)
|
||||
/* lost mmap_sem, caller has to retry __gmap_translate */
|
||||
return 0;
|
||||
/* Connect the page tables */
|
||||
return __gmap_link(gmap, gaddr, vmaddr);
|
||||
}
|
||||
|
||||
/**
|
||||
* gmap_pte_op_end - release the page table lock
|
||||
* @ptl: pointer to the spinlock pointer
|
||||
*/
|
||||
static void gmap_pte_op_end(spinlock_t *ptl)
|
||||
{
|
||||
spin_unlock(ptl);
|
||||
}
|
||||
|
||||
/**
|
||||
* gmap_mprotect_notify - change access rights for a range of ptes and
|
||||
* call the notifier if any pte changes again
|
||||
* @gmap: pointer to guest mapping meta data structure
|
||||
* @gaddr: virtual address in the guest address space
|
||||
* @len: size of area
|
||||
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
|
||||
*
|
||||
* Returns 0 if for each page in the given range a gmap mapping exists and
|
||||
* the invalidation notification could be set. If the gmap mapping is missing
|
||||
* for one or more pages -EFAULT is returned. If no memory could be allocated
|
||||
* -ENOMEM is returned. This function establishes missing page table entries.
|
||||
* Returns 0 if for each page in the given range a gmap mapping exists,
|
||||
* the new access rights could be set and the notifier could be armed.
|
||||
* If the gmap mapping is missing for one or more pages -EFAULT is
|
||||
* returned. If no memory could be allocated -ENOMEM is returned.
|
||||
* This function establishes missing page table entries.
|
||||
*/
|
||||
int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
|
||||
int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
|
||||
unsigned long len, int prot)
|
||||
{
|
||||
unsigned long addr;
|
||||
unsigned long vmaddr;
|
||||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
bool unlocked;
|
||||
int rc = 0;
|
||||
|
||||
if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
|
||||
return -EINVAL;
|
||||
if (!MACHINE_HAS_ESOP && prot == PROT_READ)
|
||||
return -EINVAL;
|
||||
down_read(&gmap->mm->mmap_sem);
|
||||
while (len) {
|
||||
unlocked = false;
|
||||
/* Convert gmap address and connect the page tables */
|
||||
addr = __gmap_translate(gmap, gaddr);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
rc = addr;
|
||||
break;
|
||||
rc = -EAGAIN;
|
||||
ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
|
||||
if (ptep) {
|
||||
rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot);
|
||||
gmap_pte_op_end(ptl);
|
||||
}
|
||||
/* Get the page mapped */
|
||||
if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
|
||||
&unlocked)) {
|
||||
rc = -EFAULT;
|
||||
break;
|
||||
}
|
||||
/* While trying to map mmap_sem got unlocked. Let us retry */
|
||||
if (unlocked)
|
||||
if (rc) {
|
||||
vmaddr = __gmap_translate(gmap, gaddr);
|
||||
if (IS_ERR_VALUE(vmaddr)) {
|
||||
rc = vmaddr;
|
||||
break;
|
||||
}
|
||||
rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr);
|
||||
if (rc)
|
||||
break;
|
||||
continue;
|
||||
rc = __gmap_link(gmap, gaddr, addr);
|
||||
if (rc)
|
||||
break;
|
||||
/* Walk the process page table, lock and get pte pointer */
|
||||
ptep = get_locked_pte(gmap->mm, addr, &ptl);
|
||||
VM_BUG_ON(!ptep);
|
||||
/* Set notification bit in the pgste of the pte */
|
||||
if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
|
||||
ptep_set_notify(gmap->mm, addr, ptep);
|
||||
gaddr += PAGE_SIZE;
|
||||
len -= PAGE_SIZE;
|
||||
}
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
gaddr += PAGE_SIZE;
|
||||
len -= PAGE_SIZE;
|
||||
}
|
||||
up_read(&gmap->mm->mmap_sem);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_ipte_notify);
|
||||
EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
|
||||
|
||||
/**
|
||||
* ptep_notify - call all invalidation callbacks for a specific pte.
|
||||
|
|
|
@ -179,9 +179,9 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
|
|||
return pgste;
|
||||
}
|
||||
|
||||
static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
|
||||
unsigned long addr,
|
||||
pte_t *ptep, pgste_t pgste)
|
||||
static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
|
||||
unsigned long addr,
|
||||
pte_t *ptep, pgste_t pgste)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
if (pgste_val(pgste) & PGSTE_IN_BIT) {
|
||||
|
@ -199,7 +199,7 @@ static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
|
|||
|
||||
if (mm_has_pgste(mm)) {
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
|
||||
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
|
||||
}
|
||||
return pgste;
|
||||
}
|
||||
|
@ -414,6 +414,50 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|||
pgste_set_unlock(ptep, pgste);
|
||||
}
|
||||
|
||||
/**
|
||||
* ptep_force_prot - change access rights of a locked pte
|
||||
* @mm: pointer to the process mm_struct
|
||||
* @addr: virtual address in the guest address space
|
||||
* @ptep: pointer to the page table entry
|
||||
* @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
|
||||
*
|
||||
* Returns 0 if the access rights were changed and -EAGAIN if the current
|
||||
* and requested access rights are incompatible.
|
||||
*/
|
||||
int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, int prot)
|
||||
{
|
||||
pte_t entry;
|
||||
pgste_t pgste;
|
||||
int pte_i, pte_p;
|
||||
|
||||
pgste = pgste_get_lock(ptep);
|
||||
entry = *ptep;
|
||||
/* Check pte entry after all locks have been acquired */
|
||||
pte_i = pte_val(entry) & _PAGE_INVALID;
|
||||
pte_p = pte_val(entry) & _PAGE_PROTECT;
|
||||
if ((pte_i && (prot != PROT_NONE)) ||
|
||||
(pte_p && (prot & PROT_WRITE))) {
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
return -EAGAIN;
|
||||
}
|
||||
/* Change access rights and set the pgste notification bit */
|
||||
if (prot == PROT_NONE && !pte_i) {
|
||||
ptep_flush_direct(mm, addr, ptep);
|
||||
pgste = pgste_update_all(entry, pgste, mm);
|
||||
pte_val(entry) |= _PAGE_INVALID;
|
||||
}
|
||||
if (prot == PROT_READ && !pte_p) {
|
||||
ptep_flush_direct(mm, addr, ptep);
|
||||
pte_val(entry) &= ~_PAGE_INVALID;
|
||||
pte_val(entry) |= _PAGE_PROTECT;
|
||||
}
|
||||
pgste_val(pgste) |= PGSTE_IN_BIT;
|
||||
pgste = pgste_set_pte(ptep, pgste, entry);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
|
||||
{
|
||||
if (!non_swap_entry(entry))
|
||||
|
@ -483,7 +527,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
|
|||
pgste_val(pgste) &= ~PGSTE_UC_BIT;
|
||||
pte = *ptep;
|
||||
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
|
||||
pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
|
||||
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
|
||||
__ptep_ipte(addr, ptep);
|
||||
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
|
||||
pte_val(pte) |= _PAGE_PROTECT;
|
||||
|
|
Loading…
Reference in New Issue