powerpc/64s/radix: optimise pte_update
Implementing pte_update with pte_xchg (which uses cmpxchg) is inefficient. A single larx/stcx. works fine, no need for the less efficient cmpxchg sequence. Then remove the memory barriers from the operation. There is a requirement for TLB flushing to load mm_cpumask after the store that reduces pte permissions, which is moved into the TLB flush code. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
f1cb8f9beb
commit
85bcfaf69c
|
@ -131,20 +131,21 @@ extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep
|
|||
static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
|
||||
unsigned long set)
|
||||
{
|
||||
pte_t pte;
|
||||
unsigned long old_pte, new_pte;
|
||||
__be64 old_be, tmp_be;
|
||||
|
||||
do {
|
||||
pte = READ_ONCE(*ptep);
|
||||
old_pte = pte_val(pte);
|
||||
new_pte = (old_pte | set) & ~clr;
|
||||
__asm__ __volatile__(
|
||||
"1: ldarx %0,0,%3 # pte_update\n"
|
||||
" andc %1,%0,%5 \n"
|
||||
" or %1,%1,%4 \n"
|
||||
" stdcx. %1,0,%3 \n"
|
||||
" bne- 1b"
|
||||
: "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
|
||||
: "r" (ptep), "r" (cpu_to_be64(set)), "r" (cpu_to_be64(clr))
|
||||
: "cc" );
|
||||
|
||||
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
|
||||
|
||||
return old_pte;
|
||||
return be64_to_cpu(old_be);
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned long radix__pte_update(struct mm_struct *mm,
|
||||
unsigned long addr,
|
||||
pte_t *ptep, unsigned long clr,
|
||||
|
|
|
@ -57,8 +57,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|||
* in switch_slb(), and/or the store of paca->mm_ctx_id in
|
||||
* copy_mm_to_paca().
|
||||
*
|
||||
* On the read side the barrier is in pte_xchg(), which orders
|
||||
* the store to the PTE vs the load of mm_cpumask.
|
||||
* On the other side, the barrier is in mm/tlb-radix.c for
|
||||
* radix which orders earlier stores to clear the PTEs vs
|
||||
* the load of mm_cpumask. And pte_xchg which does the same
|
||||
* thing for hash.
|
||||
*
|
||||
* This full barrier is needed by membarrier when switching
|
||||
* between processes after store to rq->curr, before user-space
|
||||
|
|
|
@ -524,6 +524,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
|
|||
return;
|
||||
|
||||
preempt_disable();
|
||||
/*
|
||||
* Order loads of mm_cpumask vs previous stores to clear ptes before
|
||||
* the invalidate. See barrier in switch_mm_irqs_off
|
||||
*/
|
||||
smp_mb();
|
||||
if (!mm_is_thread_local(mm)) {
|
||||
if (mm_needs_flush_escalation(mm))
|
||||
_tlbie_pid(pid, RIC_FLUSH_ALL);
|
||||
|
@ -544,6 +549,7 @@ void radix__flush_all_mm(struct mm_struct *mm)
|
|||
return;
|
||||
|
||||
preempt_disable();
|
||||
smp_mb(); /* see radix__flush_tlb_mm */
|
||||
if (!mm_is_thread_local(mm))
|
||||
_tlbie_pid(pid, RIC_FLUSH_ALL);
|
||||
else
|
||||
|
@ -568,6 +574,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
|
|||
return;
|
||||
|
||||
preempt_disable();
|
||||
smp_mb(); /* see radix__flush_tlb_mm */
|
||||
if (!mm_is_thread_local(mm))
|
||||
_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
|
||||
else
|
||||
|
@ -630,6 +637,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
|
|||
return;
|
||||
|
||||
preempt_disable();
|
||||
smp_mb(); /* see radix__flush_tlb_mm */
|
||||
if (mm_is_thread_local(mm)) {
|
||||
local = true;
|
||||
full = (end == TLB_FLUSH_ALL ||
|
||||
|
@ -791,6 +799,7 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
|
|||
return;
|
||||
|
||||
preempt_disable();
|
||||
smp_mb(); /* see radix__flush_tlb_mm */
|
||||
if (mm_is_thread_local(mm)) {
|
||||
local = true;
|
||||
full = (end == TLB_FLUSH_ALL ||
|
||||
|
@ -849,7 +858,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
|
|||
|
||||
/* Otherwise first do the PWC, then iterate the pages. */
|
||||
preempt_disable();
|
||||
|
||||
smp_mb(); /* see radix__flush_tlb_mm */
|
||||
if (mm_is_thread_local(mm)) {
|
||||
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue