2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* S390 version
|
2012-07-20 17:15:04 +08:00
|
|
|
* Copyright IBM Corp. 1999, 2000
|
2005-04-17 06:20:36 +08:00
|
|
|
* Author(s): Hartmut Penner (hp@de.ibm.com)
|
|
|
|
* Martin Schwidefsky (schwidefsky@de.ibm.com)
|
|
|
|
*
|
|
|
|
* Derived from "include/asm-i386/pgalloc.h"
|
|
|
|
* Copyright (C) 1994 Linus Torvalds
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _S390_PGALLOC_H
|
|
|
|
#define _S390_PGALLOC_H
|
|
|
|
|
|
|
|
#include <linux/threads.h>
|
|
|
|
#include <linux/gfp.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
|
2017-06-16 23:24:39 +08:00
|
|
|
#define CRST_ALLOC_ORDER 2
|
|
|
|
|
2011-05-23 16:24:23 +08:00
|
|
|
unsigned long *crst_table_alloc(struct mm_struct *);
|
2008-02-10 01:24:35 +08:00
|
|
|
void crst_table_free(struct mm_struct *, unsigned long *);
|
2006-09-20 21:59:37 +08:00
|
|
|
|
2014-04-30 22:04:25 +08:00
|
|
|
unsigned long *page_table_alloc(struct mm_struct *);
|
2016-03-08 19:12:18 +08:00
|
|
|
struct page *page_table_alloc_pgste(struct mm_struct *mm);
|
2008-02-10 01:24:35 +08:00
|
|
|
void page_table_free(struct mm_struct *, unsigned long *);
|
2014-04-30 22:04:25 +08:00
|
|
|
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
|
2016-03-08 19:12:18 +08:00
|
|
|
void page_table_free_pgste(struct page *page);
|
2015-04-15 19:23:26 +08:00
|
|
|
extern int page_table_allocate_pgste;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-10-22 18:52:47 +08:00
|
|
|
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2016-11-29 20:52:01 +08:00
|
|
|
struct addrtype { char _[256]; };
|
|
|
|
int i;
|
2008-12-25 20:39:26 +08:00
|
|
|
|
2016-11-29 20:52:01 +08:00
|
|
|
for (i = 0; i < n; i += 256) {
|
|
|
|
*s = val;
|
|
|
|
asm volatile(
|
|
|
|
"mvc 8(248,%[s]),0(%[s])\n"
|
|
|
|
: "+m" (*(struct addrtype *) s)
|
|
|
|
: [s] "a" (s));
|
|
|
|
s += 256 / sizeof(long);
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2007-10-22 18:52:47 +08:00
|
|
|
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2007-10-22 18:52:47 +08:00
|
|
|
clear_table(crst, entry, sizeof(unsigned long)*2048);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2007-10-22 18:52:47 +08:00
|
|
|
static inline unsigned long pgd_entry_type(struct mm_struct *mm)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2008-02-10 01:24:37 +08:00
|
|
|
if (mm->context.asce_limit <= (1UL << 31))
|
|
|
|
return _SEGMENT_ENTRY_EMPTY;
|
|
|
|
if (mm->context.asce_limit <= (1UL << 42))
|
|
|
|
return _REGION3_ENTRY_EMPTY;
|
2017-04-25 00:19:10 +08:00
|
|
|
if (mm->context.asce_limit <= (1UL << 53))
|
|
|
|
return _REGION2_ENTRY_EMPTY;
|
|
|
|
return _REGION1_ENTRY_EMPTY;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2017-04-25 00:19:10 +08:00
|
|
|
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
|
s390/mm: fix asce_bits handling with dynamic pagetable levels
There is a race with multi-threaded applications between context switch and
pagetable upgrade. In switch_mm() a new user_asce is built from mm->pgd and
mm->context.asce_bits, w/o holding any locks. A concurrent mmap with a
pagetable upgrade on another thread in crst_table_upgrade() could already
have set new asce_bits, but not yet the new mm->pgd. This would result in a
corrupt user_asce in switch_mm(), and eventually in a kernel panic from a
translation exception.
Fix this by storing the complete asce instead of just the asce_bits, which
can then be read atomically from switch_mm(), so that it either sees the
old value or the new value, but no mixture. Both cases are OK. Having the
old value would result in a page fault on access to the higher level memory,
but the fault handler would see the new mm->pgd, if it was a valid access
after the mmap on the other thread has completed. So as worst-case scenario
we would have a page fault loop for the racing thread until the next time
slice.
Also remove dead code and simplify the upgrade/downgrade path, there are no
upgrades from 2 levels, and only downgrades from 3 levels for compat tasks.
There are also no concurrent upgrades, because the mmap_sem is held with
down_write() in do_mmap, so the flush and table checks during upgrade can
be removed.
Reported-by: Michael Munday <munday@ca.ibm.com>
Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-04-15 22:38:40 +08:00
|
|
|
void crst_table_downgrade(struct mm_struct *);
|
2008-02-10 01:24:37 +08:00
|
|
|
|
2017-04-25 00:19:10 +08:00
|
|
|
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
|
|
|
|
{
|
|
|
|
unsigned long *table = crst_table_alloc(mm);
|
|
|
|
|
|
|
|
if (table)
|
|
|
|
crst_table_init(table, _REGION2_ENTRY_EMPTY);
|
|
|
|
return (p4d_t *) table;
|
|
|
|
}
|
|
|
|
#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)
|
|
|
|
|
2008-02-10 01:24:36 +08:00
|
|
|
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
|
|
|
|
{
|
2011-05-23 16:24:23 +08:00
|
|
|
unsigned long *table = crst_table_alloc(mm);
|
2008-02-10 01:24:36 +08:00
|
|
|
if (table)
|
|
|
|
crst_table_init(table, _REGION3_ENTRY_EMPTY);
|
|
|
|
return (pud_t *) table;
|
|
|
|
}
|
|
|
|
#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud)
|
2007-10-22 18:52:48 +08:00
|
|
|
|
2007-10-22 18:52:47 +08:00
|
|
|
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2011-05-23 16:24:23 +08:00
|
|
|
unsigned long *table = crst_table_alloc(mm);
|
2014-02-12 21:16:18 +08:00
|
|
|
|
|
|
|
if (!table)
|
|
|
|
return NULL;
|
|
|
|
crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
|
|
|
|
if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
|
|
|
|
crst_table_free(mm, table);
|
|
|
|
return NULL;
|
|
|
|
}
|
2008-02-10 01:24:35 +08:00
|
|
|
return (pmd_t *) table;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2014-02-12 21:16:18 +08:00
|
|
|
|
|
|
|
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
|
|
|
|
{
|
|
|
|
pgtable_pmd_page_dtor(virt_to_page(pmd));
|
|
|
|
crst_table_free(mm, (unsigned long *) pmd);
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2017-04-25 00:19:10 +08:00
|
|
|
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
|
|
|
|
{
|
|
|
|
pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
|
2008-02-10 01:24:36 +08:00
|
|
|
{
|
2017-04-25 00:19:10 +08:00
|
|
|
p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2007-10-22 18:52:48 +08:00
|
|
|
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
|
2007-02-06 04:18:17 +08:00
|
|
|
{
|
2011-05-23 16:24:40 +08:00
|
|
|
pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
|
2007-02-06 04:18:17 +08:00
|
|
|
}
|
|
|
|
|
2007-10-22 18:52:47 +08:00
|
|
|
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
|
|
{
|
2016-02-15 21:46:49 +08:00
|
|
|
unsigned long *table = crst_table_alloc(mm);
|
|
|
|
|
|
|
|
if (!table)
|
|
|
|
return NULL;
|
|
|
|
if (mm->context.asce_limit == (1UL << 31)) {
|
|
|
|
/* Forking a compat process with 2 page table levels */
|
|
|
|
if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
|
|
|
|
crst_table_free(mm, table);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (pgd_t *) table;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|
|
|
{
|
|
|
|
if (mm->context.asce_limit == (1UL << 31))
|
|
|
|
pgtable_pmd_page_dtor(virt_to_page(pgd));
|
|
|
|
crst_table_free(mm, (unsigned long *) pgd);
|
2007-10-22 18:52:47 +08:00
|
|
|
}
|
|
|
|
|
2008-02-10 01:24:35 +08:00
|
|
|
static inline void pmd_populate(struct mm_struct *mm,
|
|
|
|
pmd_t *pmd, pgtable_t pte)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2011-05-23 16:24:40 +08:00
|
|
|
pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2008-02-10 01:24:35 +08:00
|
|
|
|
2011-05-23 16:24:40 +08:00
|
|
|
#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
|
|
|
|
|
2008-02-10 01:24:35 +08:00
|
|
|
#define pmd_pgtable(pmd) \
|
|
|
|
(pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* page table entry allocation/free routines.
|
|
|
|
*/
|
2014-04-30 22:04:25 +08:00
|
|
|
#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
|
|
|
|
#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
|
2008-02-10 01:24:35 +08:00
|
|
|
|
|
|
|
#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
|
|
|
|
#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-10-25 22:10:11 +08:00
|
|
|
extern void rcu_table_freelist_finish(void);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /* _S390_PGALLOC_H */
|