linux/include/asm-powerpc/pgtable-4k.h

/*
 * Entries per page directory level.  The PTE level must use a 64b record
 * for each page table entry.  The PMD and PGD level use a 32b record for
 * each entry by assuming that each entry is page aligned.
 */
#define PTE_INDEX_SIZE  9
#define PMD_INDEX_SIZE  7
#define PUD_INDEX_SIZE  7
#define PGD_INDEX_SIZE  9

#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)

#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
#define PTRS_PER_PUD	(1 << PMD_INDEX_SIZE)
#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)

/* PMD_SHIFT determines what a second-level page table entry can map */
#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
#define PMD_SIZE	(1UL << PMD_SHIFT)
#define PMD_MASK	(~(PMD_SIZE-1))

/* With 4k base page size, hugepage PTEs go at the PMD level */
#define MIN_HUGEPTE_SHIFT	PMD_SHIFT

/* PUD_SHIFT determines what a third-level page table entry can map */
#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
#define PUD_SIZE	(1UL << PUD_SHIFT)
#define PUD_MASK	(~(PUD_SIZE-1))

/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
#define PGDIR_MASK	(~(PGDIR_SIZE-1))

/* PTE bits */
#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
#define _PAGE_GROUP_IX  0x7000 /* software: HPTE index within group */
#define _PAGE_F_SECOND  _PAGE_SECONDARY
#define _PAGE_F_GIX     _PAGE_GROUP_IX

/* PTE flags to conserve for HPTE identification */
#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
			 _PAGE_SECONDARY | _PAGE_GROUP_IX)

/* PAGE_MASK gives the right answer below, but only by accident */
/* It should be preserving the high 48 bits and then specifically */
/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \
                         _PAGE_HPTEFLAGS)

/* Bits to mask out from a PMD to get to the PTE page */
#define PMD_MASKED_BITS		0
/* Bits to mask out from a PUD to get to the PMD page */
#define PUD_MASKED_BITS		0
/* Bits to mask out from a PGD to get to the PUD page */
#define PGD_MASKED_BITS		0

/* shift to put page number into pte */
#define PTE_RPN_SHIFT	(17)

#ifdef STRICT_MM_TYPECHECKS
#define __real_pte(e,p)		((real_pte_t){(e)})
#define __rpte_to_pte(r)	((r).pte)
#else
#define __real_pte(e,p)		(e)
#define __rpte_to_pte(r)	(__pte(r))
#endif
#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> 12)

#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
	do {							         \
		index = 0;					         \
		shift = mmu_psize_defs[psize].shift;		         \

#define pte_iterate_hashed_end() } while(0)

#define pte_pagesize_index(pte)	MMU_PAGE_4K

/*
 * 4-level page tables related bits
 */

#define pgd_none(pgd)		(!pgd_val(pgd))
#define pgd_bad(pgd)		(pgd_val(pgd) == 0)
#define pgd_present(pgd)	(pgd_val(pgd) != 0)
#define pgd_clear(pgdp)		(pgd_val(*(pgdp)) = 0)
#define pgd_page_vaddr(pgd)	(pgd_val(pgd) & ~PGD_MASKED_BITS)
#define pgd_page(pgd)		virt_to_page(pgd_page_vaddr(pgd))

#define pud_offset(pgdp, addr)	\
  (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
    (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))

#define pud_ERROR(e) \
	printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
[PATCH] ppc64: support 64k pages Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-11-07 08:06:55 +08:00			`/*`
			`* Entries per page directory level. The PTE level must use a 64b record`
			`* for each page table entry. The PMD and PGD level use a 32b record for`
			`* each entry by assuming that each entry is page aligned.`
			`*/`
			`#define PTE_INDEX_SIZE 9`
			`#define PMD_INDEX_SIZE 7`
			`#define PUD_INDEX_SIZE 7`
			`#define PGD_INDEX_SIZE 9`

			`#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)`
			`#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)`
			`#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)`
			`#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)`

			`#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)`
			`#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)`
			`#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)`
			`#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)`

			`/* PMD_SHIFT determines what a second-level page table entry can map */`
			`#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)`
			`#define PMD_SIZE (1UL << PMD_SHIFT)`
			`#define PMD_MASK (~(PMD_SIZE-1))`

[PATCH] ppc64: Fix bug in SLB miss handler for hugepages This patch, however, should be applied on top of the 64k-page-size patch to fix some problems with hugepage (some pre-existing, another introduced by this patch). The patch fixes a bug in the SLB miss handler for hugepages on ppc64 introduced by the dynamic hugepage patch (commit id c594adad5653491813959277fb87a2fef54c4e05) due to a misunderstanding of the srd instruction's behaviour (mea culpa). The problem arises when a 64-bit process maps some hugepages in the low 4GB of the address space (unusual). In this case, as well as the 256M segment in question being marked for hugepages, other segments at 32G intervals will be incorrectly marked for hugepages. In the process, this patch tweaks the semantics of the hugepage bitmaps to be more sensible. Previously, an address below 4G was marked for hugepages if the appropriate segment bit in the "low areas" bitmask was set or if the low bit in the "high areas" bitmap was set (which would mark all addresses below 1TB for hugepage). With this patch, any given address is governed by a single bitmap. Addresses below 4GB are marked for hugepage if and only if their bit is set in the "low areas" bitmap (256M granularity). Addresses between 4GB and 1TB are marked for hugepage iff the low bit in the "high areas" bitmap is set. Higher addresses are marked for hugepage iff their bit in the "high areas" bitmap is set (1TB granularity). To avoid conflicts, this patch must be applied on top of BenH's pending patch for 64k base page size [0]. As such, this patch also addresses a hugepage problem introduced by that patch. That patch allows hugepages of 1MB in size on hardware which supports it, however, that won't work when using 4k pages (4 level pagetable), because in that case hugepage PTEs are stored at the PMD level, and each PMD entry maps 2MB. This patch simply disallows hugepages in that case (we can do something cleverer to re-enable them some other day). Built, booted, and a handful of hugepage related tests passed on POWER5 LPAR (both ARCH=powerpc and ARCH=ppc64). [0] http://gate.crashing.org/~benh/ppc64-64k-pages.diff Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-11-07 16:57:52 +08:00			`/* With 4k base page size, hugepage PTEs go at the PMD level */`
			`#define MIN_HUGEPTE_SHIFT PMD_SHIFT`

[PATCH] ppc64: support 64k pages Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-11-07 08:06:55 +08:00			`/* PUD_SHIFT determines what a third-level page table entry can map */`
			`#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)`
			`#define PUD_SIZE (1UL << PUD_SHIFT)`
			`#define PUD_MASK (~(PUD_SIZE-1))`

			`/* PGDIR_SHIFT determines what a fourth-level page table entry can map */`
			`#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)`
			`#define PGDIR_SIZE (1UL << PGDIR_SHIFT)`
			`#define PGDIR_MASK (~(PGDIR_SIZE-1))`

			`/* PTE bits */`
			`#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */`
			`#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */`
			`#define _PAGE_F_SECOND _PAGE_SECONDARY`
			`#define _PAGE_F_GIX _PAGE_GROUP_IX`

			`/* PTE flags to conserve for HPTE identification */`
			`#define _PAGE_HPTEFLAGS (_PAGE_BUSY \| _PAGE_HASHPTE \| \`
			`_PAGE_SECONDARY \| _PAGE_GROUP_IX)`

			`/* PAGE_MASK gives the right answer below, but only by accident */`
			`/* It should be preserving the high 48 bits and then specifically */`
			`/* preserving _PAGE_SECONDARY \| _PAGE_GROUP_IX */`
			`#define _PAGE_CHG_MASK (PAGE_MASK \| _PAGE_ACCESSED \| _PAGE_DIRTY \| \`
			`_PAGE_HPTEFLAGS)`

			`/* Bits to mask out from a PMD to get to the PTE page */`
			`#define PMD_MASKED_BITS 0`
			`/* Bits to mask out from a PUD to get to the PMD page */`
			`#define PUD_MASKED_BITS 0`
			`/* Bits to mask out from a PGD to get to the PUD page */`
			`#define PGD_MASKED_BITS 0`

			`/* shift to put page number into pte */`
			`#define PTE_RPN_SHIFT (17)`

[PATCH] powerpc: Fixup for STRICT_MM_TYPECHECKS Currently ARCH=powerpc will not compile when STRICT_MM_TYPECHECKS is turned on and CONFIG_64K_PAGES is turned off. This corrects the problem. Signed-off-by: David Gibson <dwg@au1.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org> 2006-02-20 11:05:56 +08:00			`#ifdef STRICT_MM_TYPECHECKS`
			`#define __real_pte(e,p) ((real_pte_t){(e)})`
			`#define __rpte_to_pte(r) ((r).pte)`
			`#else`
			`#define __real_pte(e,p) (e)`
			`#define __rpte_to_pte(r) (__pte(r))`
			`#endif`
			`#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> 12)`
[PATCH] ppc64: support 64k pages Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-11-07 08:06:55 +08:00
			`#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \`
			`do { \`
			`index = 0; \`
			`shift = mmu_psize_defs[psize].shift; \`

			`#define pte_iterate_hashed_end() } while(0)`

powerpc: Use 64k pages without needing cache-inhibited large pages Some POWER5+ machines can do 64k hardware pages for normal memory but not for cache-inhibited pages. This patch lets us use 64k hardware pages for most user processes on such machines (assuming the kernel has been configured with CONFIG_PPC_64K_PAGES=y). User processes start out using 64k pages and get switched to 4k pages if they use any non-cacheable mappings. With this, we use 64k pages for the vmalloc region and 4k pages for the imalloc region. If anything creates a non-cacheable mapping in the vmalloc region, the vmalloc region will get switched to 4k pages. I don't know of any driver other than the DRM that would do this, though, and these machines don't have AGP. When a region gets switched from 64k pages to 4k pages, we do not have to clear out all the 64k HPTEs from the hash table immediately. We use the _PAGE_COMBO bit in the Linux PTE to indicate whether the page was hashed in as a 64k page or a set of 4k pages. If hash_page is trying to insert a 4k page for a Linux PTE and it sees that it has already been inserted as a 64k page, it first invalidates the 64k HPTE before inserting the 4k HPTE. The hash invalidation routines also use the _PAGE_COMBO bit, to determine whether to look for a 64k HPTE or a set of 4k HPTEs to remove. With those two changes, we can tolerate a mix of 4k and 64k HPTEs in the hash table, and they will all get removed when the address space is torn down. Signed-off-by: Paul Mackerras <paulus@samba.org> 2006-06-15 08:45:18 +08:00			`#define pte_pagesize_index(pte) MMU_PAGE_4K`

[PATCH] ppc64: support 64k pages Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-11-07 08:06:55 +08:00			`/*`
			`* 4-level page tables related bits`
			`*/`

			`#define pgd_none(pgd) (!pgd_val(pgd))`
			`#define pgd_bad(pgd) (pgd_val(pgd) == 0)`
			`#define pgd_present(pgd) (pgd_val(pgd) != 0)`
			`#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)`
[PATCH] Standardize pxx_page macros One of the changes necessary for shared page tables is to standardize the pxx_page macros. pte_page and pmd_page have always returned the struct page associated with their entry, while pte_page_kernel and pmd_page_kernel have returned the kernel virtual address. pud_page and pgd_page, on the other hand, return the kernel virtual address. Shared page tables needs pud_page and pgd_page to return the actual page structures. There are very few actual users of these functions, so it is simple to standardize their usage. Since this is basic cleanup, I am submitting these changes as a standalone patch. Per Hugh Dickins' comments about it, I am also changing the pxx_page_kernel macros to pxx_page_vaddr to clarify their meaning. Signed-off-by: Dave McCracken <dmccr@us.ibm.com> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-09-26 14:31:48 +08:00			`#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)`
			`#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd))`
[PATCH] ppc64: support 64k pages Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-11-07 08:06:55 +08:00
			`#define pud_offset(pgdp, addr) \`
[PATCH] Standardize pxx_page macros One of the changes necessary for shared page tables is to standardize the pxx_page macros. pte_page and pmd_page have always returned the struct page associated with their entry, while pte_page_kernel and pmd_page_kernel have returned the kernel virtual address. pud_page and pgd_page, on the other hand, return the kernel virtual address. Shared page tables needs pud_page and pgd_page to return the actual page structures. There are very few actual users of these functions, so it is simple to standardize their usage. Since this is basic cleanup, I am submitting these changes as a standalone patch. Per Hugh Dickins' comments about it, I am also changing the pxx_page_kernel macros to pxx_page_vaddr to clarify their meaning. Signed-off-by: Dave McCracken <dmccr@us.ibm.com> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-09-26 14:31:48 +08:00			`(((pud_t ) pgd_page_vaddr((pgdp))) + \`
[PATCH] ppc64: support 64k pages Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-11-07 08:06:55 +08:00			`(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))`

			`#define pud_ERROR(e) \`
[PATCH] powerpc: Fix incorrect pud_ERROR() message The powerpc pud_ERROR() function misleadingly prints a message indicating a pmd error. This patch fixes it. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Paul Mackerras <paulus@samba.org> 2006-03-03 13:24:06 +08:00			`printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))`