2005-04-17 06:20:36 +08:00
|
|
|
#ifndef __MMU_H
|
|
|
|
#define __MMU_H
|
|
|
|
|
2014-04-03 19:55:01 +08:00
|
|
|
#include <linux/cpumask.h>
|
2012-03-30 15:40:55 +08:00
|
|
|
#include <linux/errno.h>
|
|
|
|
|
2008-02-10 01:24:35 +08:00
|
|
|
typedef struct {
|
2014-04-03 19:55:01 +08:00
|
|
|
cpumask_t cpu_attach_mask;
|
2016-05-25 15:45:26 +08:00
|
|
|
atomic_t flush_count;
|
2010-08-24 15:26:21 +08:00
|
|
|
unsigned int flush_mm;
|
2016-03-08 18:54:14 +08:00
|
|
|
spinlock_t pgtable_lock;
|
2008-02-10 01:24:35 +08:00
|
|
|
struct list_head pgtable_list;
|
2016-03-08 18:54:14 +08:00
|
|
|
spinlock_t gmap_lock;
|
2011-07-24 16:48:20 +08:00
|
|
|
struct list_head gmap_list;
|
2016-06-13 16:36:00 +08:00
|
|
|
unsigned long gmap_asce;
|
s390/mm: fix asce_bits handling with dynamic pagetable levels
There is a race with multi-threaded applications between context switch and
pagetable upgrade. In switch_mm() a new user_asce is built from mm->pgd and
mm->context.asce_bits, w/o holding any locks. A concurrent mmap with a
pagetable upgrade on another thread in crst_table_upgrade() could already
have set new asce_bits, but not yet the new mm->pgd. This would result in a
corrupt user_asce in switch_mm(), and eventually in a kernel panic from a
translation exception.
Fix this by storing the complete asce instead of just the asce_bits, which
can then be read atomically from switch_mm(), so that it either sees the
old value or the new value, but no mixture. Both cases are OK. Having the
old value would result in a page fault on access to the higher level memory,
but the fault handler would see the new mm->pgd, if it was a valid access
after the mmap on the other thread has completed. So as worst-case scenario
we would have a page fault loop for the racing thread until the next time
slice.
Also remove dead code and simplify the upgrade/downgrade path, there are no
upgrades from 2 levels, and only downgrades from 3 levels for compat tasks.
There are also no concurrent upgrades, because the mmap_sem is held with
down_write() in do_mmap, so the flush and table checks during upgrade can
be removed.
Reported-by: Michael Munday <munday@ca.ibm.com>
Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-04-15 22:38:40 +08:00
|
|
|
unsigned long asce;
|
2008-02-10 01:24:37 +08:00
|
|
|
unsigned long asce_limit;
|
2008-12-25 20:38:36 +08:00
|
|
|
unsigned long vdso_base;
|
2015-04-15 19:23:26 +08:00
|
|
|
/* The mmu context allocates 4K page tables. */
|
|
|
|
unsigned int alloc_pgste:1;
|
|
|
|
/* The mmu context uses extended page tables. */
|
2011-05-23 16:24:40 +08:00
|
|
|
unsigned int has_pgste:1;
|
2014-01-14 22:02:11 +08:00
|
|
|
/* The mmu context uses storage keys. */
|
|
|
|
unsigned int use_skey:1;
|
2008-02-10 01:24:35 +08:00
|
|
|
} mm_context_t;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-03-08 18:54:14 +08:00
|
|
|
#define INIT_MM_CONTEXT(name) \
|
|
|
|
.context.pgtable_lock = \
|
|
|
|
__SPIN_LOCK_UNLOCKED(name.context.pgtable_lock), \
|
|
|
|
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
|
|
|
|
.context.gmap_lock = __SPIN_LOCK_UNLOCKED(name.context.gmap_lock), \
|
2011-07-24 16:48:20 +08:00
|
|
|
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
|
2010-08-10 08:18:28 +08:00
|
|
|
|
2012-03-29 01:30:02 +08:00
|
|
|
static inline int tprot(unsigned long addr)
|
|
|
|
{
|
|
|
|
int rc = -EFAULT;
|
|
|
|
|
|
|
|
asm volatile(
|
|
|
|
" tprot 0(%1),0\n"
|
|
|
|
"0: ipm %0\n"
|
|
|
|
" srl %0,28\n"
|
|
|
|
"1:\n"
|
|
|
|
EX_TABLE(0b,1b)
|
|
|
|
: "+d" (rc) : "a" (addr) : "cc");
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|