s390/mm: add no-dat TLB flush optimization

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Martin Schwidefsky 2016-07-26 16:53:09 +02:00
parent c9b5ad546e
commit 118bd31bea
6 changed files with 129 additions and 42 deletions

View File

@ -952,15 +952,27 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_GLOBAL 0
#define IPTE_LOCAL 1
static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local)
#define IPTE_NODAT 0x400
static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
unsigned long opt, int local)
{
unsigned long pto = (unsigned long) ptep;
/* Invalidation + TLB flush for the pte */
if (__builtin_constant_p(opt) && opt == 0) {
/* Invalidation + TLB flush for the pte */
asm volatile(
" .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
[m4] "i" (local));
return;
}
/* Invalidate ptes with options + TLB flush of the ptes */
asm volatile(
" .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
[m4] "i" (local));
" .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
: [r2] "+a" (address), [r3] "+a" (opt)
: [r1] "a" (pto), [m4] "i" (local) : "memory");
}
static inline void __ptep_ipte_range(unsigned long address, int nr,
@ -1341,31 +1353,36 @@ static inline void __pmdp_csp(pmd_t *pmdp)
#define IDTE_GLOBAL 0
#define IDTE_LOCAL 1
static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local)
#define IDTE_PTOA 0x0800
#define IDTE_NODAT 0x1000
static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
unsigned long opt, int local)
{
unsigned long sto;
sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t);
asm volatile(
" .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
: "+m" (*pmdp)
: [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)),
: [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
[m4] "i" (local)
: "cc" );
}
static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local)
static inline void __pudp_idte(unsigned long addr, pud_t *pudp,
unsigned long opt, int local)
{
unsigned long r3o;
r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t);
r3o |= _ASCE_TYPE_REGION3;
asm volatile(
" .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
: "+m" (*pudp)
: [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)),
: [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
[m4] "i" (local)
: "cc");
: "cc" );
}
pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);

View File

@ -29,8 +29,9 @@
#define MACHINE_FLAG_TE _BITUL(11)
#define MACHINE_FLAG_TLB_LC _BITUL(12)
#define MACHINE_FLAG_VX _BITUL(13)
#define MACHINE_FLAG_NX _BITUL(14)
#define MACHINE_FLAG_GS _BITUL(15)
#define MACHINE_FLAG_TLB_GUEST _BITUL(14)
#define MACHINE_FLAG_NX _BITUL(15)
#define MACHINE_FLAG_GS _BITUL(16)
#define LPP_MAGIC _BITUL(31)
#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@ -68,6 +69,7 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)

View File

@ -20,10 +20,13 @@ static inline void __tlb_flush_local(void)
*/
static inline void __tlb_flush_idte(unsigned long asce)
{
unsigned long opt;
opt = IDTE_PTOA;
/* Global TLB flush for the mm */
asm volatile(
" .insn rrf,0xb98e0000,0,%0,%1,0"
: : "a" (2048), "a" (asce) : "cc");
: : "a" (opt), "a" (asce) : "cc");
}
#ifdef CONFIG_SMP

View File

@ -328,7 +328,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
return;
}
for (i = 0; i < nr; i++) {
__ptep_ipte(address, pte, IPTE_GLOBAL);
__ptep_ipte(address, pte, 0, IPTE_GLOBAL);
address += PAGE_SIZE;
pte++;
}

View File

@ -25,6 +25,38 @@
#include <asm/mmu_context.h>
#include <asm/page-states.h>
static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long opt, asce;
if (MACHINE_HAS_TLB_GUEST) {
opt = 0;
asce = READ_ONCE(mm->context.gmap_asce);
if (asce == 0UL)
opt |= IPTE_NODAT;
__ptep_ipte(addr, ptep, opt, IPTE_LOCAL);
} else {
__ptep_ipte(addr, ptep, 0, IPTE_LOCAL);
}
}
static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long opt, asce;
if (MACHINE_HAS_TLB_GUEST) {
opt = 0;
asce = READ_ONCE(mm->context.gmap_asce);
if (asce == 0UL)
opt |= IPTE_NODAT;
__ptep_ipte(addr, ptep, opt, IPTE_GLOBAL);
} else {
__ptep_ipte(addr, ptep, 0, IPTE_GLOBAL);
}
}
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
@ -36,9 +68,9 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__ptep_ipte(addr, ptep, IPTE_LOCAL);
ptep_ipte_local(mm, addr, ptep);
else
__ptep_ipte(addr, ptep, IPTE_GLOBAL);
ptep_ipte_global(mm, addr, ptep);
atomic_dec(&mm->context.flush_count);
return old;
}
@ -57,7 +89,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
pte_val(*ptep) |= _PAGE_INVALID;
mm->context.flush_mm = 1;
} else
__ptep_ipte(addr, ptep, IPTE_GLOBAL);
ptep_ipte_global(mm, addr, ptep);
atomic_dec(&mm->context.flush_count);
return old;
}
@ -290,6 +322,26 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_modify_prot_commit);
static inline void pmdp_idte_local(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(addr, pmdp, IDTE_NODAT, IDTE_LOCAL);
else
__pmdp_idte(addr, pmdp, 0, IDTE_LOCAL);
}
static inline void pmdp_idte_global(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(addr, pmdp, IDTE_NODAT, IDTE_GLOBAL);
else if (MACHINE_HAS_IDTE)
__pmdp_idte(addr, pmdp, 0, IDTE_GLOBAL);
else
__pmdp_csp(pmdp);
}
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
@ -298,16 +350,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
old = *pmdp;
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
return old;
if (!MACHINE_HAS_IDTE) {
__pmdp_csp(pmdp);
return old;
}
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__pmdp_idte(addr, pmdp, IDTE_LOCAL);
pmdp_idte_local(mm, addr, pmdp);
else
__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
pmdp_idte_global(mm, addr, pmdp);
atomic_dec(&mm->context.flush_count);
return old;
}
@ -325,10 +373,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
cpumask_of(smp_processor_id()))) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
} else if (MACHINE_HAS_IDTE)
__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
else
__pmdp_csp(pmdp);
} else {
pmdp_idte_global(mm, addr, pmdp);
}
atomic_dec(&mm->context.flush_count);
return old;
}
@ -359,6 +406,30 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(pmdp_xchg_lazy);
static inline void pudp_idte_local(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
if (MACHINE_HAS_TLB_GUEST)
__pudp_idte(addr, pudp, IDTE_NODAT, IDTE_LOCAL);
else
__pudp_idte(addr, pudp, 0, IDTE_LOCAL);
}
static inline void pudp_idte_global(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
if (MACHINE_HAS_TLB_GUEST)
__pudp_idte(addr, pudp, IDTE_NODAT, IDTE_GLOBAL);
else if (MACHINE_HAS_IDTE)
__pudp_idte(addr, pudp, 0, IDTE_GLOBAL);
else
/*
* Invalid bit position is the same for pmd and pud, so we can
* re-use _pmd_csp() here
*/
__pmdp_csp((pmd_t *) pudp);
}
static inline pud_t pudp_flush_direct(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
@ -367,20 +438,12 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm,
old = *pudp;
if (pud_val(old) & _REGION_ENTRY_INVALID)
return old;
if (!MACHINE_HAS_IDTE) {
/*
* Invalid bit position is the same for pmd and pud, so we can
* re-use _pmd_csp() here
*/
__pmdp_csp((pmd_t *) pudp);
return old;
}
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__pudp_idte(addr, pudp, IDTE_LOCAL);
pudp_idte_local(mm, addr, pudp);
else
__pudp_idte(addr, pudp, IDTE_GLOBAL);
pudp_idte_global(mm, addr, pudp);
atomic_dec(&mm->context.flush_count);
return old;
}
@ -645,7 +708,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
__ptep_ipte(addr, ptep, IPTE_GLOBAL);
ptep_ipte_global(mm, addr, ptep);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
pte_val(pte) |= _PAGE_PROTECT;
else

View File

@ -39,7 +39,7 @@ struct read_info_sccb {
u8 fac84; /* 84 */
u8 fac85; /* 85 */
u8 _pad_86[91 - 86]; /* 86-90 */
u8 flags; /* 91 */
u8 fac91; /* 91 */
u8 _pad_92[98 - 92]; /* 92-97 */
u8 fac98; /* 98 */
u8 hamaxpow; /* 99 */
@ -103,6 +103,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
sclp.has_kss = !!(sccb->fac98 & 0x01);
if (sccb->fac85 & 0x02)
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
if (sccb->fac91 & 0x40)
S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST;
sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
sclp.rzm <<= 20;
@ -139,7 +141,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
/* Save IPL information */
sclp_ipl_info.is_valid = 1;
if (sccb->flags & 0x2)
if (sccb->fac91 & 0x2)
sclp_ipl_info.has_dump = 1;
memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN);