powerpc/mm: Switch to generic RCU get_user_pages_fast
This patch switch the ppc arch to use the generic RCU based gup implementation. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
f30c59e921
commit
b30e759072
|
@ -149,6 +149,7 @@ config PPC
|
|||
select ARCH_SUPPORTS_ATOMIC_RMW
|
||||
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
|
||||
select NO_BOOTMEM
|
||||
select HAVE_GENERIC_RCU_GUP
|
||||
|
||||
config GENERIC_CSUM
|
||||
def_bool CPU_LITTLE_ENDIAN
|
||||
|
|
|
@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd)
|
|||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
|
||||
static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
|
||||
static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
|
||||
unsigned pdshift)
|
||||
{
|
||||
/*
|
||||
|
@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
|
|||
*/
|
||||
unsigned long idx = 0;
|
||||
|
||||
pte_t *dir = hugepd_page(*hpdp);
|
||||
pte_t *dir = hugepd_page(hpd);
|
||||
#ifndef CONFIG_PPC_FSL_BOOK3E
|
||||
idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
|
||||
idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
|
||||
#endif
|
||||
|
||||
return dir + idx;
|
||||
|
@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
|
|||
}
|
||||
|
||||
#define hugepd_shift(x) 0
|
||||
static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
|
||||
static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
|
||||
unsigned pdshift)
|
||||
{
|
||||
return 0;
|
||||
|
|
|
@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd)
|
|||
}
|
||||
#endif
|
||||
|
||||
#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep))))
|
||||
#define is_hugepd(hpd) (hugepd_ok(hpd))
|
||||
#define pgd_huge pgd_huge
|
||||
int pgd_huge(pgd_t pgd);
|
||||
#else /* CONFIG_HUGETLB_PAGE */
|
||||
#define is_hugepd(pdep) 0
|
||||
#define pgd_huge(pgd) 0
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
#define __hugepd(x) ((hugepd_t) { (x) })
|
||||
|
||||
struct page;
|
||||
extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
|
||||
|
|
|
@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
|
|||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
|
||||
|
|
|
@ -274,11 +274,9 @@ extern void paging_init(void);
|
|||
*/
|
||||
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
|
||||
|
||||
extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr);
|
||||
|
||||
extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr);
|
||||
unsigned long end, int write,
|
||||
struct page **pages, int *nr);
|
||||
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#define pmd_large(pmd) 0
|
||||
#define has_transparent_hugepage() 0
|
||||
|
|
|
@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
|
|||
|
||||
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
|
||||
|
||||
obj-y := fault.o mem.o pgtable.o gup.o mmap.o \
|
||||
obj-y := fault.o mem.o pgtable.o mmap.o \
|
||||
init_$(CONFIG_WORD_SIZE).o \
|
||||
pgtable_$(CONFIG_WORD_SIZE).o
|
||||
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
|
||||
|
|
|
@ -1,235 +0,0 @@
|
|||
/*
|
||||
* Lockless get_user_pages_fast for powerpc
|
||||
*
|
||||
* Copyright (C) 2008 Nick Piggin
|
||||
* Copyright (C) 2008 Novell Inc.
|
||||
*/
|
||||
#undef DEBUG
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/vmstat.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
#ifdef __HAVE_ARCH_PTE_SPECIAL
|
||||
|
||||
/*
|
||||
* The performance critical leaf functions are made noinline otherwise gcc
|
||||
* inlines everything into a single function which results in too much
|
||||
* register pressure.
|
||||
*/
|
||||
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long mask, result;
|
||||
pte_t *ptep;
|
||||
|
||||
result = _PAGE_PRESENT|_PAGE_USER;
|
||||
if (write)
|
||||
result |= _PAGE_RW;
|
||||
mask = result | _PAGE_SPECIAL;
|
||||
|
||||
ptep = pte_offset_kernel(&pmd, addr);
|
||||
do {
|
||||
pte_t pte = ACCESS_ONCE(*ptep);
|
||||
struct page *page;
|
||||
/*
|
||||
* Similar to the PMD case, NUMA hinting must take slow path
|
||||
*/
|
||||
if (pte_numa(pte))
|
||||
return 0;
|
||||
|
||||
if ((pte_val(pte) & mask) != result)
|
||||
return 0;
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
page = pte_page(pte);
|
||||
if (!page_cache_get_speculative(page))
|
||||
return 0;
|
||||
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
|
||||
put_page(page);
|
||||
return 0;
|
||||
}
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
|
||||
} while (ptep++, addr += PAGE_SIZE, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pmd_t *pmdp;
|
||||
|
||||
pmdp = pmd_offset(&pud, addr);
|
||||
do {
|
||||
pmd_t pmd = ACCESS_ONCE(*pmdp);
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
/*
|
||||
* If we find a splitting transparent hugepage we
|
||||
* return zero. That will result in taking the slow
|
||||
* path which will call wait_split_huge_page()
|
||||
* if the pmd is still in splitting state
|
||||
*/
|
||||
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
|
||||
return 0;
|
||||
if (pmd_huge(pmd) || pmd_large(pmd)) {
|
||||
/*
|
||||
* NUMA hinting faults need to be handled in the GUP
|
||||
* slowpath for accounting purposes and so that they
|
||||
* can be serialised against THP migration.
|
||||
*/
|
||||
if (pmd_numa(pmd))
|
||||
return 0;
|
||||
|
||||
if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
|
||||
write, pages, nr))
|
||||
return 0;
|
||||
} else if (is_hugepd(pmdp)) {
|
||||
if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
|
||||
addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} while (pmdp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pud_t *pudp;
|
||||
|
||||
pudp = pud_offset(&pgd, addr);
|
||||
do {
|
||||
pud_t pud = ACCESS_ONCE(*pudp);
|
||||
|
||||
next = pud_addr_end(addr, end);
|
||||
if (pud_none(pud))
|
||||
return 0;
|
||||
if (pud_huge(pud)) {
|
||||
if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
|
||||
write, pages, nr))
|
||||
return 0;
|
||||
} else if (is_hugepd(pudp)) {
|
||||
if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
|
||||
addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} while (pudp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
unsigned long next;
|
||||
unsigned long flags;
|
||||
pgd_t *pgdp;
|
||||
int nr = 0;
|
||||
|
||||
pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
|
||||
|
||||
start &= PAGE_MASK;
|
||||
addr = start;
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
|
||||
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
|
||||
start, len)))
|
||||
return 0;
|
||||
|
||||
pr_devel(" aligned: %lx .. %lx\n", start, end);
|
||||
|
||||
/*
|
||||
* XXX: batch / limit 'nr', to avoid large irq off latency
|
||||
* needs some instrumenting to determine the common sizes used by
|
||||
* important workloads (eg. DB2), and whether limiting the batch size
|
||||
* will decrease performance.
|
||||
*
|
||||
* It seems like we're in the clear for the moment. Direct-IO is
|
||||
* the main guy that batches up lots of get_user_pages, and even
|
||||
* they are limited to 64-at-a-time which is not so many.
|
||||
*/
|
||||
/*
|
||||
* This doesn't prevent pagetable teardown, but does prevent
|
||||
* the pagetables from being freed on powerpc.
|
||||
*
|
||||
* So long as we atomically load page table pointers versus teardown,
|
||||
* we can follow the address down to the the page and take a ref on it.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = ACCESS_ONCE(*pgdp);
|
||||
|
||||
pr_devel(" %016lx: normal pgd %p\n", addr,
|
||||
(void *)pgd_val(pgd));
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
break;
|
||||
if (pgd_huge(pgd)) {
|
||||
if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
|
||||
write, pages, &nr))
|
||||
break;
|
||||
} else if (is_hugepd(pgdp)) {
|
||||
if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
|
||||
addr, next, write, pages, &nr))
|
||||
break;
|
||||
} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
|
||||
break;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
int nr, ret;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
nr = __get_user_pages_fast(start, nr_pages, write, pages);
|
||||
ret = nr;
|
||||
|
||||
if (nr < nr_pages) {
|
||||
pr_devel(" slow path ! nr = %d\n", nr);
|
||||
|
||||
/* Try to get the remaining pages with get_user_pages */
|
||||
start += nr << PAGE_SHIFT;
|
||||
pages += nr;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
ret = get_user_pages(current, mm, start,
|
||||
nr_pages - nr, write, 0, pages, NULL);
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
/* Have to be a bit careful with return values */
|
||||
if (nr > 0) {
|
||||
if (ret < 0)
|
||||
ret = nr;
|
||||
else
|
||||
ret += nr;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* __HAVE_ARCH_PTE_SPECIAL */
|
|
@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
|
|||
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
|
||||
return NULL;
|
||||
|
||||
return hugepte_offset(hpdp, addr, pdshift);
|
||||
return hugepte_offset(*hpdp, addr, pdshift);
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
|
|||
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
|
||||
return NULL;
|
||||
|
||||
return hugepte_offset(hpdp, addr, pdshift);
|
||||
return hugepte_offset(*hpdp, addr, pdshift);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
|
|||
do {
|
||||
pmd = pmd_offset(pud, addr);
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (!is_hugepd(pmd)) {
|
||||
if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
|
||||
/*
|
||||
* if it is not hugepd pointer, we should already find
|
||||
* it cleared.
|
||||
|
@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
|
|||
do {
|
||||
pud = pud_offset(pgd, addr);
|
||||
next = pud_addr_end(addr, end);
|
||||
if (!is_hugepd(pud)) {
|
||||
if (!is_hugepd(__hugepd(pud_val(*pud)))) {
|
||||
if (pud_none_or_clear_bad(pud))
|
||||
continue;
|
||||
hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
|
||||
|
@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
|
|||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
pgd = pgd_offset(tlb->mm, addr);
|
||||
if (!is_hugepd(pgd)) {
|
||||
if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
|
||||
if (pgd_none_or_clear_bad(pgd))
|
||||
continue;
|
||||
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
|
||||
|
@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
|
|||
return (__boundary - 1 < end - 1) ? __boundary : end;
|
||||
}
|
||||
|
||||
int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
|
||||
unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
pte_t *ptep;
|
||||
unsigned long sz = 1UL << hugepd_shift(*hugepd);
|
||||
unsigned long sz = 1UL << hugepd_shift(hugepd);
|
||||
unsigned long next;
|
||||
|
||||
ptep = hugepte_offset(hugepd, addr, pdshift);
|
||||
|
@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
|
|||
else if (pgd_huge(pgd)) {
|
||||
ret_pte = (pte_t *) pgdp;
|
||||
goto out;
|
||||
} else if (is_hugepd(&pgd))
|
||||
} else if (is_hugepd(__hugepd(pgd_val(pgd))))
|
||||
hpdp = (hugepd_t *)&pgd;
|
||||
else {
|
||||
/*
|
||||
|
@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
|
|||
else if (pud_huge(pud)) {
|
||||
ret_pte = (pte_t *) pudp;
|
||||
goto out;
|
||||
} else if (is_hugepd(&pud))
|
||||
} else if (is_hugepd(__hugepd(pud_val(pud))))
|
||||
hpdp = (hugepd_t *)&pud;
|
||||
else {
|
||||
pdshift = PMD_SHIFT;
|
||||
|
@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
|
|||
if (pmd_huge(pmd) || pmd_large(pmd)) {
|
||||
ret_pte = (pte_t *) pmdp;
|
||||
goto out;
|
||||
} else if (is_hugepd(&pmd))
|
||||
} else if (is_hugepd(__hugepd(pmd_val(pmd))))
|
||||
hpdp = (hugepd_t *)&pmd;
|
||||
else
|
||||
return pte_offset_kernel(&pmd, ea);
|
||||
|
@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
|
|||
if (!hpdp)
|
||||
return NULL;
|
||||
|
||||
ret_pte = hugepte_offset(hpdp, ea, pdshift);
|
||||
ret_pte = hugepte_offset(*hpdp, ea, pdshift);
|
||||
pdshift = hugepd_shift(*hpdp);
|
||||
out:
|
||||
if (shift)
|
||||
|
@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
|||
if ((pte_val(pte) & mask) != mask)
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
/*
|
||||
* check for splitting here
|
||||
*/
|
||||
if (pmd_trans_splitting(pte_pmd(pte)))
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
/* hugepages are never "special" */
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
|
||||
|
|
Loading…
Reference in New Issue