2005-11-11 11:25:24 +08:00
|
|
|
#ifndef _ASM_POWERPC_PAGE_H
|
|
|
|
#define _ASM_POWERPC_PAGE_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (C) 2001,2005 IBM Corporation.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
2008-10-20 11:16:55 +08:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/types.h>
|
powerpc: fix linux-next build failure
Today's linux-next build (powerpc allyesconfig) failed like this:
In file included from arch/powerpc/include/asm/mmu-hash64.h:17,
from arch/powerpc/include/asm/mmu.h:8,
from arch/powerpc/include/asm/pgtable.h:8,
from arch/powerpc/mm/slb.c:20:
arch/powerpc/include/asm/page.h:76: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'memstart_addr'
arch/powerpc/include/asm/page.h:77: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'kernstart_addr'
Caused by commit 600715dcdf567c86f8b2c6173fcfb4b873e25a19 ("generic: add
phys_addr_t for holding physical addresses") from the tip-core tree.
This only fails if CONFIG_RELOCATABLE is set.
So include that instead of asm/types.h in asm/page.h for
the CONFIG_RELOCATABLE case.
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: ppc-dev <linuxppc-dev@ozlabs.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-10-16 17:29:07 +08:00
|
|
|
#else
|
|
|
|
#include <asm/types.h>
|
2008-10-20 11:16:55 +08:00
|
|
|
#endif
|
2005-11-11 11:25:24 +08:00
|
|
|
#include <asm/asm-compat.h>
|
2006-05-17 16:00:49 +08:00
|
|
|
#include <asm/kdump.h>
|
2005-11-11 11:25:24 +08:00
|
|
|
|
|
|
|
/*
|
powerpc/44x: Support for 256KB PAGE_SIZE
This patch adds support for 256KB pages on ppc44x-based boards.
For simplification of implementation with 256KB pages we still assume
2-level paging. As a side effect this leads to wasting extra memory space
reserved for PTE tables: only 1/4 of pages allocated for PTEs are
actually used. But this may be an acceptable trade-off to achieve the
high performance we have with big PAGE_SIZEs in some applications (e.g.
RAID).
Also with 256KB PAGE_SIZE we increase THREAD_SIZE up to 32KB to minimize
the risk of stack overflows in the cases of on-stack arrays, which size
depends on the page size (e.g. multipage BIOs, NTFS, etc.).
With 256KB PAGE_SIZE we need to decrease the PKMAP_ORDER at least down
to 9, otherwise all high memory (2 ^ 10 * PAGE_SIZE == 256MB) we'll be
occupied by PKMAP addresses leaving no place for vmalloc. We do not
separate PKMAP_ORDER for 256K from 16K/64K PAGE_SIZE here; actually that
value of 10 in support for 16K/64K had been selected rather intuitively.
Thus now for all cases of PAGE_SIZE on ppc44x (including the default, 4KB,
one) we have 512 pages for PKMAP.
Because ELF standard supports only page sizes up to 64K, then you should
use binutils later than 2.17.50.0.3 with '-zmax-page-size' set to 256K
for building applications, which are to be run with the 256KB-page sized
kernel. If using the older binutils, then you should patch them like follows:
--- binutils/bfd/elf32-ppc.c.orig
+++ binutils/bfd/elf32-ppc.c
-#define ELF_MAXPAGESIZE 0x10000
+#define ELF_MAXPAGESIZE 0x40000
One more restriction we currently have with 256KB page sizes is inability
to use shmem safely, so, for now, the 256KB is available only if you turn
the CONFIG_SHMEM option off (another variant is to use BROKEN).
Though, if you need shmem with 256KB pages, you can always remove the !SHMEM
dependency in 'config PPC_256K_PAGES', and use the workaround available here:
http://lkml.org/lkml/2008/12/19/20
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
2009-01-29 09:40:44 +08:00
|
|
|
* On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages
|
2008-12-11 09:55:41 +08:00
|
|
|
* on PPC44x). For PPC64 we support either 4K or 64K software
|
2005-11-11 11:25:24 +08:00
|
|
|
* page size. When using 64K pages however, whether we are really supporting
|
|
|
|
* 64K pages in HW or not is irrelevant to those definitions.
|
|
|
|
*/
|
powerpc/44x: Support for 256KB PAGE_SIZE
This patch adds support for 256KB pages on ppc44x-based boards.
For simplification of implementation with 256KB pages we still assume
2-level paging. As a side effect this leads to wasting extra memory space
reserved for PTE tables: only 1/4 of pages allocated for PTEs are
actually used. But this may be an acceptable trade-off to achieve the
high performance we have with big PAGE_SIZEs in some applications (e.g.
RAID).
Also with 256KB PAGE_SIZE we increase THREAD_SIZE up to 32KB to minimize
the risk of stack overflows in the cases of on-stack arrays, which size
depends on the page size (e.g. multipage BIOs, NTFS, etc.).
With 256KB PAGE_SIZE we need to decrease the PKMAP_ORDER at least down
to 9, otherwise all high memory (2 ^ 10 * PAGE_SIZE == 256MB) we'll be
occupied by PKMAP addresses leaving no place for vmalloc. We do not
separate PKMAP_ORDER for 256K from 16K/64K PAGE_SIZE here; actually that
value of 10 in support for 16K/64K had been selected rather intuitively.
Thus now for all cases of PAGE_SIZE on ppc44x (including the default, 4KB,
one) we have 512 pages for PKMAP.
Because ELF standard supports only page sizes up to 64K, then you should
use binutils later than 2.17.50.0.3 with '-zmax-page-size' set to 256K
for building applications, which are to be run with the 256KB-page sized
kernel. If using the older binutils, then you should patch them like follows:
--- binutils/bfd/elf32-ppc.c.orig
+++ binutils/bfd/elf32-ppc.c
-#define ELF_MAXPAGESIZE 0x10000
+#define ELF_MAXPAGESIZE 0x40000
One more restriction we currently have with 256KB page sizes is inability
to use shmem safely, so, for now, the 256KB is available only if you turn
the CONFIG_SHMEM option off (another variant is to use BROKEN).
Though, if you need shmem with 256KB pages, you can always remove the !SHMEM
dependency in 'config PPC_256K_PAGES', and use the workaround available here:
http://lkml.org/lkml/2008/12/19/20
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
2009-01-29 09:40:44 +08:00
|
|
|
#if defined(CONFIG_PPC_256K_PAGES)
|
|
|
|
#define PAGE_SHIFT 18
|
|
|
|
#elif defined(CONFIG_PPC_64K_PAGES)
|
2005-11-11 11:25:24 +08:00
|
|
|
#define PAGE_SHIFT 16
|
2008-12-11 09:55:41 +08:00
|
|
|
#elif defined(CONFIG_PPC_16K_PAGES)
|
|
|
|
#define PAGE_SHIFT 14
|
2005-11-11 11:25:24 +08:00
|
|
|
#else
|
|
|
|
#define PAGE_SHIFT 12
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
|
|
|
|
|
2011-06-28 17:54:48 +08:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
|
|
extern unsigned int HPAGE_SHIFT;
|
|
|
|
#else
|
|
|
|
#define HPAGE_SHIFT PAGE_SHIFT
|
|
|
|
#endif
|
|
|
|
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
|
|
|
|
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
|
|
|
|
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
|
|
|
|
#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
|
|
|
|
#endif
|
|
|
|
|
2005-11-11 11:25:24 +08:00
|
|
|
/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
|
|
|
|
#define __HAVE_ARCH_GATE_AREA 1
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
|
|
|
|
* assign PAGE_MASK to a larger type it gets extended the way we want
|
|
|
|
* (i.e. with 1s in the high bits)
|
|
|
|
*/
|
|
|
|
#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1))
|
|
|
|
|
2005-12-06 00:24:33 +08:00
|
|
|
/*
|
|
|
|
* KERNELBASE is the virtual address of the start of the kernel, it's often
|
|
|
|
* the same as PAGE_OFFSET, but _might not be_.
|
|
|
|
*
|
|
|
|
* The kdump dump kernel is one example where KERNELBASE != PAGE_OFFSET.
|
|
|
|
*
|
2008-04-22 02:22:34 +08:00
|
|
|
* PAGE_OFFSET is the virtual address of the start of lowmem.
|
|
|
|
*
|
|
|
|
* PHYSICAL_START is the physical address of the start of the kernel.
|
|
|
|
*
|
|
|
|
* MEMORY_START is the physical address of the start of lowmem.
|
|
|
|
*
|
|
|
|
* KERNELBASE, PAGE_OFFSET, and PHYSICAL_START are all configurable on
|
|
|
|
* ppc32 and based on how they are set we determine MEMORY_START.
|
|
|
|
*
|
|
|
|
* For the linear mapping the following equation should be true:
|
|
|
|
* KERNELBASE - PAGE_OFFSET = PHYSICAL_START - MEMORY_START
|
|
|
|
*
|
|
|
|
* Also, KERNELBASE >= PAGE_OFFSET and PHYSICAL_START >= MEMORY_START
|
|
|
|
*
|
|
|
|
* There are two was to determine a physical address from a virtual one:
|
|
|
|
* va = pa + PAGE_OFFSET - MEMORY_START
|
|
|
|
* va = pa + KERNELBASE - PHYSICAL_START
|
2005-12-06 00:24:33 +08:00
|
|
|
*
|
|
|
|
* If you want to know something's offset from the start of the kernel you
|
|
|
|
* should subtract KERNELBASE.
|
|
|
|
*
|
|
|
|
* If you want to test if something's a kernel address, use is_kernel_addr().
|
|
|
|
*/
|
2005-12-04 15:39:23 +08:00
|
|
|
|
2008-04-22 02:22:34 +08:00
|
|
|
#define KERNELBASE ASM_CONST(CONFIG_KERNEL_START)
|
|
|
|
#define PAGE_OFFSET ASM_CONST(CONFIG_PAGE_OFFSET)
|
|
|
|
#define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_PHYSICAL_START))
|
|
|
|
|
2011-12-15 06:57:15 +08:00
|
|
|
#if defined(CONFIG_NONSTATIC_KERNEL)
|
2008-04-22 02:22:34 +08:00
|
|
|
#ifndef __ASSEMBLY__
|
2008-10-20 11:16:55 +08:00
|
|
|
|
2008-04-22 02:22:34 +08:00
|
|
|
extern phys_addr_t memstart_addr;
|
|
|
|
extern phys_addr_t kernstart_addr;
|
powerpc: Define virtual-physical translations for RELOCATABLE
We find the runtime address of _stext and relocate ourselves based
on the following calculation.
virtual_base = ALIGN(KERNELBASE,KERNEL_TLB_PIN_SIZE) +
MODULO(_stext.run,KERNEL_TLB_PIN_SIZE)
relocate() is called with the Effective Virtual Base Address (as
shown below)
| Phys. Addr| Virt. Addr |
Page |------------------------|
Boundary | | |
| | |
| | |
Kernel Load |___________|_ __ _ _ _ _|<- Effective
Addr(_stext)| | ^ |Virt. Base Addr
| | | |
| | | |
| |reloc_offset|
| | | |
| | | |
| |______v_____|<-(KERNELBASE)%TLB_SIZE
| | |
| | |
| | |
Page |-----------|------------|
Boundary | | |
On BookE, we need __va() & __pa() early in the boot process to access
the device tree.
Currently this has been defined as :
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) -
PHYSICAL_START + KERNELBASE)
where:
PHYSICAL_START is kernstart_addr - a variable updated at runtime.
KERNELBASE is the compile time Virtual base address of kernel.
This won't work for us, as kernstart_addr is dynamic and will yield different
results for __va()/__pa() for same mapping.
e.g.,
Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as
PAGE_OFFSET).
In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
= 0xbc100000 , which is wrong.
it should be : 0xc0000000 + 0x100000 = 0xc0100000
On platforms which support AMP, like PPC_47x (based on 44x), the kernel
could be loaded at highmem. Hence we cannot always depend on the compile
time constants for mapping.
Here are the possible solutions:
1) Update kernstart_addr(PHSYICAL_START) to match the Physical address of
compile time KERNELBASE value, instead of the actual Physical_Address(_stext).
The disadvantage is that we may break other users of PHYSICAL_START. They
could be replaced with __pa(_stext).
2) Redefine __va() & __pa() with relocation offset
#ifdef CONFIG_RELOCATABLE_PPC32
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET)))
#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET))
#endif
where, RELOC_OFFSET could be
a) A variable, say relocation_offset (like kernstart_addr), updated
at boot time. This impacts performance, as we have to load an additional
variable from memory.
OR
b) #define RELOC_OFFSET ((PHYSICAL_START & PPC_PIN_SIZE_OFFSET_MASK) - \
(KERNELBASE & PPC_PIN_SIZE_OFFSET_MASK))
This introduces more calculations for doing the translation.
3) Redefine __va() & __pa() with a new variable
i.e,
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
where VIRT_PHYS_OFFSET :
#ifdef CONFIG_RELOCATABLE_PPC32
#define VIRT_PHYS_OFFSET virt_phys_offset
#else
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
#endif /* CONFIG_RELOCATABLE_PPC32 */
where virt_phy_offset is updated at runtime to :
Effective KERNELBASE - kernstart_addr.
Taking our example, above:
virt_phys_offset = effective_kernelstart_vaddr - kernstart_addr
= 0xc0400000 - 0x400000
= 0xc0000000
and
__va(0x100000) = 0xc0000000 + 0x100000 = 0xc0100000
which is what we want.
I have implemented (3) in the following patch which has same cost of
operation as the existing one.
I have tested the patches on 440x platforms only. However this should
work fine for PPC_47x also, as we only depend on the runtime address
and the current TLB XLAT entry for the startup code, which is available
in r25. I don't have access to a 47x board yet. So, it would be great if
somebody could test this on 47x.
Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
2011-12-15 06:58:37 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_RELOCATABLE_PPC32
|
|
|
|
extern long long virt_phys_offset;
|
2008-04-22 02:22:34 +08:00
|
|
|
#endif
|
powerpc: Define virtual-physical translations for RELOCATABLE
We find the runtime address of _stext and relocate ourselves based
on the following calculation.
virtual_base = ALIGN(KERNELBASE,KERNEL_TLB_PIN_SIZE) +
MODULO(_stext.run,KERNEL_TLB_PIN_SIZE)
relocate() is called with the Effective Virtual Base Address (as
shown below)
| Phys. Addr| Virt. Addr |
Page |------------------------|
Boundary | | |
| | |
| | |
Kernel Load |___________|_ __ _ _ _ _|<- Effective
Addr(_stext)| | ^ |Virt. Base Addr
| | | |
| | | |
| |reloc_offset|
| | | |
| | | |
| |______v_____|<-(KERNELBASE)%TLB_SIZE
| | |
| | |
| | |
Page |-----------|------------|
Boundary | | |
On BookE, we need __va() & __pa() early in the boot process to access
the device tree.
Currently this has been defined as :
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) -
PHYSICAL_START + KERNELBASE)
where:
PHYSICAL_START is kernstart_addr - a variable updated at runtime.
KERNELBASE is the compile time Virtual base address of kernel.
This won't work for us, as kernstart_addr is dynamic and will yield different
results for __va()/__pa() for same mapping.
e.g.,
Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as
PAGE_OFFSET).
In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
= 0xbc100000 , which is wrong.
it should be : 0xc0000000 + 0x100000 = 0xc0100000
On platforms which support AMP, like PPC_47x (based on 44x), the kernel
could be loaded at highmem. Hence we cannot always depend on the compile
time constants for mapping.
Here are the possible solutions:
1) Update kernstart_addr(PHSYICAL_START) to match the Physical address of
compile time KERNELBASE value, instead of the actual Physical_Address(_stext).
The disadvantage is that we may break other users of PHYSICAL_START. They
could be replaced with __pa(_stext).
2) Redefine __va() & __pa() with relocation offset
#ifdef CONFIG_RELOCATABLE_PPC32
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET)))
#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET))
#endif
where, RELOC_OFFSET could be
a) A variable, say relocation_offset (like kernstart_addr), updated
at boot time. This impacts performance, as we have to load an additional
variable from memory.
OR
b) #define RELOC_OFFSET ((PHYSICAL_START & PPC_PIN_SIZE_OFFSET_MASK) - \
(KERNELBASE & PPC_PIN_SIZE_OFFSET_MASK))
This introduces more calculations for doing the translation.
3) Redefine __va() & __pa() with a new variable
i.e,
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
where VIRT_PHYS_OFFSET :
#ifdef CONFIG_RELOCATABLE_PPC32
#define VIRT_PHYS_OFFSET virt_phys_offset
#else
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
#endif /* CONFIG_RELOCATABLE_PPC32 */
where virt_phy_offset is updated at runtime to :
Effective KERNELBASE - kernstart_addr.
Taking our example, above:
virt_phys_offset = effective_kernelstart_vaddr - kernstart_addr
= 0xc0400000 - 0x400000
= 0xc0000000
and
__va(0x100000) = 0xc0000000 + 0x100000 = 0xc0100000
which is what we want.
I have implemented (3) in the following patch which has same cost of
operation as the existing one.
I have tested the patches on 440x platforms only. However this should
work fine for PPC_47x also, as we only depend on the runtime address
and the current TLB XLAT entry for the startup code, which is available
in r25. I don't have access to a 47x board yet. So, it would be great if
somebody could test this on 47x.
Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
2011-12-15 06:58:37 +08:00
|
|
|
|
|
|
|
#endif /* __ASSEMBLY__ */
|
2008-04-22 02:22:34 +08:00
|
|
|
#define PHYSICAL_START kernstart_addr
|
powerpc: Define virtual-physical translations for RELOCATABLE
We find the runtime address of _stext and relocate ourselves based
on the following calculation.
virtual_base = ALIGN(KERNELBASE,KERNEL_TLB_PIN_SIZE) +
MODULO(_stext.run,KERNEL_TLB_PIN_SIZE)
relocate() is called with the Effective Virtual Base Address (as
shown below)
| Phys. Addr| Virt. Addr |
Page |------------------------|
Boundary | | |
| | |
| | |
Kernel Load |___________|_ __ _ _ _ _|<- Effective
Addr(_stext)| | ^ |Virt. Base Addr
| | | |
| | | |
| |reloc_offset|
| | | |
| | | |
| |______v_____|<-(KERNELBASE)%TLB_SIZE
| | |
| | |
| | |
Page |-----------|------------|
Boundary | | |
On BookE, we need __va() & __pa() early in the boot process to access
the device tree.
Currently this has been defined as :
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) -
PHYSICAL_START + KERNELBASE)
where:
PHYSICAL_START is kernstart_addr - a variable updated at runtime.
KERNELBASE is the compile time Virtual base address of kernel.
This won't work for us, as kernstart_addr is dynamic and will yield different
results for __va()/__pa() for same mapping.
e.g.,
Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as
PAGE_OFFSET).
In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
= 0xbc100000 , which is wrong.
it should be : 0xc0000000 + 0x100000 = 0xc0100000
On platforms which support AMP, like PPC_47x (based on 44x), the kernel
could be loaded at highmem. Hence we cannot always depend on the compile
time constants for mapping.
Here are the possible solutions:
1) Update kernstart_addr(PHSYICAL_START) to match the Physical address of
compile time KERNELBASE value, instead of the actual Physical_Address(_stext).
The disadvantage is that we may break other users of PHYSICAL_START. They
could be replaced with __pa(_stext).
2) Redefine __va() & __pa() with relocation offset
#ifdef CONFIG_RELOCATABLE_PPC32
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET)))
#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET))
#endif
where, RELOC_OFFSET could be
a) A variable, say relocation_offset (like kernstart_addr), updated
at boot time. This impacts performance, as we have to load an additional
variable from memory.
OR
b) #define RELOC_OFFSET ((PHYSICAL_START & PPC_PIN_SIZE_OFFSET_MASK) - \
(KERNELBASE & PPC_PIN_SIZE_OFFSET_MASK))
This introduces more calculations for doing the translation.
3) Redefine __va() & __pa() with a new variable
i.e,
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
where VIRT_PHYS_OFFSET :
#ifdef CONFIG_RELOCATABLE_PPC32
#define VIRT_PHYS_OFFSET virt_phys_offset
#else
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
#endif /* CONFIG_RELOCATABLE_PPC32 */
where virt_phy_offset is updated at runtime to :
Effective KERNELBASE - kernstart_addr.
Taking our example, above:
virt_phys_offset = effective_kernelstart_vaddr - kernstart_addr
= 0xc0400000 - 0x400000
= 0xc0000000
and
__va(0x100000) = 0xc0000000 + 0x100000 = 0xc0100000
which is what we want.
I have implemented (3) in the following patch which has same cost of
operation as the existing one.
I have tested the patches on 440x platforms only. However this should
work fine for PPC_47x also, as we only depend on the runtime address
and the current TLB XLAT entry for the startup code, which is available
in r25. I don't have access to a 47x board yet. So, it would be great if
somebody could test this on 47x.
Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
2011-12-15 06:58:37 +08:00
|
|
|
|
|
|
|
#else /* !CONFIG_NONSTATIC_KERNEL */
|
2008-04-22 02:22:34 +08:00
|
|
|
#define PHYSICAL_START ASM_CONST(CONFIG_PHYSICAL_START)
|
powerpc: Make the 64-bit kernel as a position-independent executable
This implements CONFIG_RELOCATABLE for 64-bit by making the kernel as
a position-independent executable (PIE) when it is set. This involves
processing the dynamic relocations in the image in the early stages of
booting, even if the kernel is being run at the address it is linked at,
since the linker does not necessarily fill in words in the image for
which there are dynamic relocations. (In fact the linker does fill in
such words for 64-bit executables, though not for 32-bit executables,
so in principle we could avoid calling relocate() entirely when we're
running a 64-bit kernel at the linked address.)
The dynamic relocations are processed by a new function relocate(addr),
where the addr parameter is the virtual address where the image will be
run. In fact we call it twice; once before calling prom_init, and again
when starting the main kernel. This means that reloc_offset() returns
0 in prom_init (since it has been relocated to the address it is running
at), which necessitated a few adjustments.
This also changes __va and __pa to use an equivalent definition that is
simpler. With the relocatable kernel, PAGE_OFFSET and MEMORY_START are
constants (for 64-bit) whereas PHYSICAL_START is a variable (and
KERNELBASE ideally should be too, but isn't yet).
With this, relocatable kernels still copy themselves down to physical
address 0 and run there.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-08-30 09:43:47 +08:00
|
|
|
#endif
|
|
|
|
|
powerpc: Define virtual-physical translations for RELOCATABLE
We find the runtime address of _stext and relocate ourselves based
on the following calculation.
virtual_base = ALIGN(KERNELBASE,KERNEL_TLB_PIN_SIZE) +
MODULO(_stext.run,KERNEL_TLB_PIN_SIZE)
relocate() is called with the Effective Virtual Base Address (as
shown below)
| Phys. Addr| Virt. Addr |
Page |------------------------|
Boundary | | |
| | |
| | |
Kernel Load |___________|_ __ _ _ _ _|<- Effective
Addr(_stext)| | ^ |Virt. Base Addr
| | | |
| | | |
| |reloc_offset|
| | | |
| | | |
| |______v_____|<-(KERNELBASE)%TLB_SIZE
| | |
| | |
| | |
Page |-----------|------------|
Boundary | | |
On BookE, we need __va() & __pa() early in the boot process to access
the device tree.
Currently this has been defined as :
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) -
PHYSICAL_START + KERNELBASE)
where:
PHYSICAL_START is kernstart_addr - a variable updated at runtime.
KERNELBASE is the compile time Virtual base address of kernel.
This won't work for us, as kernstart_addr is dynamic and will yield different
results for __va()/__pa() for same mapping.
e.g.,
Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as
PAGE_OFFSET).
In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
= 0xbc100000 , which is wrong.
it should be : 0xc0000000 + 0x100000 = 0xc0100000
On platforms which support AMP, like PPC_47x (based on 44x), the kernel
could be loaded at highmem. Hence we cannot always depend on the compile
time constants for mapping.
Here are the possible solutions:
1) Update kernstart_addr(PHSYICAL_START) to match the Physical address of
compile time KERNELBASE value, instead of the actual Physical_Address(_stext).
The disadvantage is that we may break other users of PHYSICAL_START. They
could be replaced with __pa(_stext).
2) Redefine __va() & __pa() with relocation offset
#ifdef CONFIG_RELOCATABLE_PPC32
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET)))
#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET))
#endif
where, RELOC_OFFSET could be
a) A variable, say relocation_offset (like kernstart_addr), updated
at boot time. This impacts performance, as we have to load an additional
variable from memory.
OR
b) #define RELOC_OFFSET ((PHYSICAL_START & PPC_PIN_SIZE_OFFSET_MASK) - \
(KERNELBASE & PPC_PIN_SIZE_OFFSET_MASK))
This introduces more calculations for doing the translation.
3) Redefine __va() & __pa() with a new variable
i.e,
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
where VIRT_PHYS_OFFSET :
#ifdef CONFIG_RELOCATABLE_PPC32
#define VIRT_PHYS_OFFSET virt_phys_offset
#else
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
#endif /* CONFIG_RELOCATABLE_PPC32 */
where virt_phy_offset is updated at runtime to :
Effective KERNELBASE - kernstart_addr.
Taking our example, above:
virt_phys_offset = effective_kernelstart_vaddr - kernstart_addr
= 0xc0400000 - 0x400000
= 0xc0000000
and
__va(0x100000) = 0xc0000000 + 0x100000 = 0xc0100000
which is what we want.
I have implemented (3) in the following patch which has same cost of
operation as the existing one.
I have tested the patches on 440x platforms only. However this should
work fine for PPC_47x also, as we only depend on the runtime address
and the current TLB XLAT entry for the startup code, which is available
in r25. I don't have access to a 47x board yet. So, it would be great if
somebody could test this on 47x.
Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
2011-12-15 06:58:37 +08:00
|
|
|
/* See Description below for VIRT_PHYS_OFFSET */
|
|
|
|
#ifdef CONFIG_RELOCATABLE_PPC32
|
|
|
|
#define VIRT_PHYS_OFFSET virt_phys_offset
|
|
|
|
#else
|
|
|
|
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
powerpc: Make the 64-bit kernel as a position-independent executable
This implements CONFIG_RELOCATABLE for 64-bit by making the kernel as
a position-independent executable (PIE) when it is set. This involves
processing the dynamic relocations in the image in the early stages of
booting, even if the kernel is being run at the address it is linked at,
since the linker does not necessarily fill in words in the image for
which there are dynamic relocations. (In fact the linker does fill in
such words for 64-bit executables, though not for 32-bit executables,
so in principle we could avoid calling relocate() entirely when we're
running a 64-bit kernel at the linked address.)
The dynamic relocations are processed by a new function relocate(addr),
where the addr parameter is the virtual address where the image will be
run. In fact we call it twice; once before calling prom_init, and again
when starting the main kernel. This means that reloc_offset() returns
0 in prom_init (since it has been relocated to the address it is running
at), which necessitated a few adjustments.
This also changes __va and __pa to use an equivalent definition that is
simpler. With the relocatable kernel, PAGE_OFFSET and MEMORY_START are
constants (for 64-bit) whereas PHYSICAL_START is a variable (and
KERNELBASE ideally should be too, but isn't yet).
With this, relocatable kernels still copy themselves down to physical
address 0 and run there.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-08-30 09:43:47 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
#define MEMORY_START 0UL
|
2011-12-15 06:57:15 +08:00
|
|
|
#elif defined(CONFIG_NONSTATIC_KERNEL)
|
powerpc: Make the 64-bit kernel as a position-independent executable
This implements CONFIG_RELOCATABLE for 64-bit by making the kernel as
a position-independent executable (PIE) when it is set. This involves
processing the dynamic relocations in the image in the early stages of
booting, even if the kernel is being run at the address it is linked at,
since the linker does not necessarily fill in words in the image for
which there are dynamic relocations. (In fact the linker does fill in
such words for 64-bit executables, though not for 32-bit executables,
so in principle we could avoid calling relocate() entirely when we're
running a 64-bit kernel at the linked address.)
The dynamic relocations are processed by a new function relocate(addr),
where the addr parameter is the virtual address where the image will be
run. In fact we call it twice; once before calling prom_init, and again
when starting the main kernel. This means that reloc_offset() returns
0 in prom_init (since it has been relocated to the address it is running
at), which necessitated a few adjustments.
This also changes __va and __pa to use an equivalent definition that is
simpler. With the relocatable kernel, PAGE_OFFSET and MEMORY_START are
constants (for 64-bit) whereas PHYSICAL_START is a variable (and
KERNELBASE ideally should be too, but isn't yet).
With this, relocatable kernels still copy themselves down to physical
address 0 and run there.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-08-30 09:43:47 +08:00
|
|
|
#define MEMORY_START memstart_addr
|
|
|
|
#else
|
2008-04-22 02:22:34 +08:00
|
|
|
#define MEMORY_START (PHYSICAL_START + PAGE_OFFSET - KERNELBASE)
|
|
|
|
#endif
|
2005-11-11 11:25:24 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_FLATMEM
|
2011-03-24 19:51:19 +08:00
|
|
|
#define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT))
|
2011-01-27 18:31:38 +08:00
|
|
|
#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr)
|
2005-11-11 11:25:24 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
|
|
|
|
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
|
|
|
|
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
|
|
|
|
|
2010-04-21 15:12:58 +08:00
|
|
|
/*
|
|
|
|
* On Book-E parts we need __va to parse the device tree and we can't
|
|
|
|
* determine MEMORY_START until then. However we can determine PHYSICAL_START
|
|
|
|
* from information at hand (program counter, TLB lookup).
|
|
|
|
*
|
powerpc: Define virtual-physical translations for RELOCATABLE
We find the runtime address of _stext and relocate ourselves based
on the following calculation.
virtual_base = ALIGN(KERNELBASE,KERNEL_TLB_PIN_SIZE) +
MODULO(_stext.run,KERNEL_TLB_PIN_SIZE)
relocate() is called with the Effective Virtual Base Address (as
shown below)
| Phys. Addr| Virt. Addr |
Page |------------------------|
Boundary | | |
| | |
| | |
Kernel Load |___________|_ __ _ _ _ _|<- Effective
Addr(_stext)| | ^ |Virt. Base Addr
| | | |
| | | |
| |reloc_offset|
| | | |
| | | |
| |______v_____|<-(KERNELBASE)%TLB_SIZE
| | |
| | |
| | |
Page |-----------|------------|
Boundary | | |
On BookE, we need __va() & __pa() early in the boot process to access
the device tree.
Currently this has been defined as :
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) -
PHYSICAL_START + KERNELBASE)
where:
PHYSICAL_START is kernstart_addr - a variable updated at runtime.
KERNELBASE is the compile time Virtual base address of kernel.
This won't work for us, as kernstart_addr is dynamic and will yield different
results for __va()/__pa() for same mapping.
e.g.,
Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as
PAGE_OFFSET).
In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
= 0xbc100000 , which is wrong.
it should be : 0xc0000000 + 0x100000 = 0xc0100000
On platforms which support AMP, like PPC_47x (based on 44x), the kernel
could be loaded at highmem. Hence we cannot always depend on the compile
time constants for mapping.
Here are the possible solutions:
1) Update kernstart_addr(PHSYICAL_START) to match the Physical address of
compile time KERNELBASE value, instead of the actual Physical_Address(_stext).
The disadvantage is that we may break other users of PHYSICAL_START. They
could be replaced with __pa(_stext).
2) Redefine __va() & __pa() with relocation offset
#ifdef CONFIG_RELOCATABLE_PPC32
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET)))
#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET))
#endif
where, RELOC_OFFSET could be
a) A variable, say relocation_offset (like kernstart_addr), updated
at boot time. This impacts performance, as we have to load an additional
variable from memory.
OR
b) #define RELOC_OFFSET ((PHYSICAL_START & PPC_PIN_SIZE_OFFSET_MASK) - \
(KERNELBASE & PPC_PIN_SIZE_OFFSET_MASK))
This introduces more calculations for doing the translation.
3) Redefine __va() & __pa() with a new variable
i.e,
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
where VIRT_PHYS_OFFSET :
#ifdef CONFIG_RELOCATABLE_PPC32
#define VIRT_PHYS_OFFSET virt_phys_offset
#else
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
#endif /* CONFIG_RELOCATABLE_PPC32 */
where virt_phy_offset is updated at runtime to :
Effective KERNELBASE - kernstart_addr.
Taking our example, above:
virt_phys_offset = effective_kernelstart_vaddr - kernstart_addr
= 0xc0400000 - 0x400000
= 0xc0000000
and
__va(0x100000) = 0xc0000000 + 0x100000 = 0xc0100000
which is what we want.
I have implemented (3) in the following patch which has same cost of
operation as the existing one.
I have tested the patches on 440x platforms only. However this should
work fine for PPC_47x also, as we only depend on the runtime address
and the current TLB XLAT entry for the startup code, which is available
in r25. I don't have access to a 47x board yet. So, it would be great if
somebody could test this on 47x.
Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
2011-12-15 06:58:37 +08:00
|
|
|
* On BookE with RELOCATABLE (RELOCATABLE_PPC32)
|
|
|
|
*
|
|
|
|
* With RELOCATABLE_PPC32, we support loading the kernel at any physical
|
|
|
|
* address without any restriction on the page alignment.
|
|
|
|
*
|
|
|
|
* We find the runtime address of _stext and relocate ourselves based on
|
|
|
|
* the following calculation:
|
|
|
|
*
|
|
|
|
* virtual_base = ALIGN_DOWN(KERNELBASE,256M) +
|
|
|
|
* MODULO(_stext.run,256M)
|
|
|
|
* and create the following mapping:
|
|
|
|
*
|
|
|
|
* ALIGN_DOWN(_stext.run,256M) => ALIGN_DOWN(KERNELBASE,256M)
|
|
|
|
*
|
|
|
|
* When we process relocations, we cannot depend on the
|
|
|
|
* existing equation for the __va()/__pa() translations:
|
|
|
|
*
|
|
|
|
* __va(x) = (x) - PHYSICAL_START + KERNELBASE
|
|
|
|
*
|
|
|
|
* Where:
|
|
|
|
* PHYSICAL_START = kernstart_addr = Physical address of _stext
|
|
|
|
* KERNELBASE = Compiled virtual address of _stext.
|
|
|
|
*
|
|
|
|
* This formula holds true iff, kernel load address is TLB page aligned.
|
|
|
|
*
|
|
|
|
* In our case, we need to also account for the shift in the kernel Virtual
|
|
|
|
* address.
|
|
|
|
*
|
|
|
|
* E.g.,
|
|
|
|
*
|
|
|
|
* Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as PAGE_OFFSET).
|
|
|
|
* In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
|
|
|
|
*
|
|
|
|
* Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
|
|
|
|
* = 0xbc100000 , which is wrong.
|
|
|
|
*
|
|
|
|
* Rather, it should be : 0xc0000000 + 0x100000 = 0xc0100000
|
|
|
|
* according to our mapping.
|
|
|
|
*
|
|
|
|
* Hence we use the following formula to get the translations right:
|
|
|
|
*
|
|
|
|
* __va(x) = (x) - [ PHYSICAL_START - Effective KERNELBASE ]
|
|
|
|
*
|
|
|
|
* Where :
|
|
|
|
* PHYSICAL_START = dynamic load address.(kernstart_addr variable)
|
|
|
|
* Effective KERNELBASE = virtual_base =
|
|
|
|
* = ALIGN_DOWN(KERNELBASE,256M) +
|
|
|
|
* MODULO(PHYSICAL_START,256M)
|
|
|
|
*
|
|
|
|
* To make the cost of __va() / __pa() more light weight, we introduce
|
|
|
|
* a new variable virt_phys_offset, which will hold :
|
|
|
|
*
|
|
|
|
* virt_phys_offset = Effective KERNELBASE - PHYSICAL_START
|
|
|
|
* = ALIGN_DOWN(KERNELBASE,256M) -
|
|
|
|
* ALIGN_DOWN(PHYSICALSTART,256M)
|
|
|
|
*
|
|
|
|
* Hence :
|
|
|
|
*
|
|
|
|
* __va(x) = x - PHYSICAL_START + Effective KERNELBASE
|
|
|
|
* = x + virt_phys_offset
|
|
|
|
*
|
|
|
|
* and
|
|
|
|
* __pa(x) = x + PHYSICAL_START - Effective KERNELBASE
|
|
|
|
* = x - virt_phys_offset
|
|
|
|
*
|
2010-04-21 15:12:58 +08:00
|
|
|
* On non-Book-E PPC64 PAGE_OFFSET and MEMORY_START are constants so use
|
|
|
|
* the other definitions for __va & __pa.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_BOOKE
|
powerpc: Define virtual-physical translations for RELOCATABLE
We find the runtime address of _stext and relocate ourselves based
on the following calculation.
virtual_base = ALIGN(KERNELBASE,KERNEL_TLB_PIN_SIZE) +
MODULO(_stext.run,KERNEL_TLB_PIN_SIZE)
relocate() is called with the Effective Virtual Base Address (as
shown below)
| Phys. Addr| Virt. Addr |
Page |------------------------|
Boundary | | |
| | |
| | |
Kernel Load |___________|_ __ _ _ _ _|<- Effective
Addr(_stext)| | ^ |Virt. Base Addr
| | | |
| | | |
| |reloc_offset|
| | | |
| | | |
| |______v_____|<-(KERNELBASE)%TLB_SIZE
| | |
| | |
| | |
Page |-----------|------------|
Boundary | | |
On BookE, we need __va() & __pa() early in the boot process to access
the device tree.
Currently this has been defined as :
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) -
PHYSICAL_START + KERNELBASE)
where:
PHYSICAL_START is kernstart_addr - a variable updated at runtime.
KERNELBASE is the compile time Virtual base address of kernel.
This won't work for us, as kernstart_addr is dynamic and will yield different
results for __va()/__pa() for same mapping.
e.g.,
Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as
PAGE_OFFSET).
In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
= 0xbc100000 , which is wrong.
it should be : 0xc0000000 + 0x100000 = 0xc0100000
On platforms which support AMP, like PPC_47x (based on 44x), the kernel
could be loaded at highmem. Hence we cannot always depend on the compile
time constants for mapping.
Here are the possible solutions:
1) Update kernstart_addr(PHSYICAL_START) to match the Physical address of
compile time KERNELBASE value, instead of the actual Physical_Address(_stext).
The disadvantage is that we may break other users of PHYSICAL_START. They
could be replaced with __pa(_stext).
2) Redefine __va() & __pa() with relocation offset
#ifdef CONFIG_RELOCATABLE_PPC32
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET)))
#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET))
#endif
where, RELOC_OFFSET could be
a) A variable, say relocation_offset (like kernstart_addr), updated
at boot time. This impacts performance, as we have to load an additional
variable from memory.
OR
b) #define RELOC_OFFSET ((PHYSICAL_START & PPC_PIN_SIZE_OFFSET_MASK) - \
(KERNELBASE & PPC_PIN_SIZE_OFFSET_MASK))
This introduces more calculations for doing the translation.
3) Redefine __va() & __pa() with a new variable
i.e,
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
where VIRT_PHYS_OFFSET :
#ifdef CONFIG_RELOCATABLE_PPC32
#define VIRT_PHYS_OFFSET virt_phys_offset
#else
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
#endif /* CONFIG_RELOCATABLE_PPC32 */
where virt_phy_offset is updated at runtime to :
Effective KERNELBASE - kernstart_addr.
Taking our example, above:
virt_phys_offset = effective_kernelstart_vaddr - kernstart_addr
= 0xc0400000 - 0x400000
= 0xc0000000
and
__va(0x100000) = 0xc0000000 + 0x100000 = 0xc0100000
which is what we want.
I have implemented (3) in the following patch which has same cost of
operation as the existing one.
I have tested the patches on 440x platforms only. However this should
work fine for PPC_47x also, as we only depend on the runtime address
and the current TLB XLAT entry for the startup code, which is available
in r25. I don't have access to a 47x board yet. So, it would be great if
somebody could test this on 47x.
Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
2011-12-15 06:58:37 +08:00
|
|
|
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
|
|
|
|
#define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET)
|
2010-04-21 15:12:58 +08:00
|
|
|
#else
|
|
|
|
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))
|
powerpc: Make the 64-bit kernel as a position-independent executable
This implements CONFIG_RELOCATABLE for 64-bit by making the kernel as
a position-independent executable (PIE) when it is set. This involves
processing the dynamic relocations in the image in the early stages of
booting, even if the kernel is being run at the address it is linked at,
since the linker does not necessarily fill in words in the image for
which there are dynamic relocations. (In fact the linker does fill in
such words for 64-bit executables, though not for 32-bit executables,
so in principle we could avoid calling relocate() entirely when we're
running a 64-bit kernel at the linked address.)
The dynamic relocations are processed by a new function relocate(addr),
where the addr parameter is the virtual address where the image will be
run. In fact we call it twice; once before calling prom_init, and again
when starting the main kernel. This means that reloc_offset() returns
0 in prom_init (since it has been relocated to the address it is running
at), which necessitated a few adjustments.
This also changes __va and __pa to use an equivalent definition that is
simpler. With the relocatable kernel, PAGE_OFFSET and MEMORY_START are
constants (for 64-bit) whereas PHYSICAL_START is a variable (and
KERNELBASE ideally should be too, but isn't yet).
With this, relocatable kernels still copy themselves down to physical
address 0 and run there.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-08-30 09:43:47 +08:00
|
|
|
#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START)
|
2010-04-21 15:12:58 +08:00
|
|
|
#endif
|
2005-11-11 11:25:24 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
|
|
|
|
* and needs to be executable. This means the whole heap ends
|
|
|
|
* up being executable.
|
|
|
|
*/
|
|
|
|
#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
|
|
|
|
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
|
|
|
|
|
|
|
|
#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
|
|
|
|
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
|
|
|
|
|
|
|
|
#ifdef __powerpc64__
|
|
|
|
#include <asm/page_64.h>
|
|
|
|
#else
|
|
|
|
#include <asm/page_32.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* align addr on a size boundary - adjust address up/down if needed */
|
|
|
|
#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
|
|
|
|
#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1)))
|
|
|
|
|
|
|
|
/* align addr on a size boundary - adjust address up if needed */
|
|
|
|
#define _ALIGN(addr,size) _ALIGN_UP(addr,size)
|
|
|
|
|
2005-12-04 15:39:15 +08:00
|
|
|
/*
|
|
|
|
* Don't compare things with KERNELBASE or PAGE_OFFSET to test for
|
|
|
|
* "kernelness", use is_kernel_addr() - it should do what you want.
|
|
|
|
*/
|
2009-07-28 09:59:34 +08:00
|
|
|
#ifdef CONFIG_PPC_BOOK3E_64
|
|
|
|
#define is_kernel_addr(x) ((x) >= 0x8000000000000000ul)
|
|
|
|
#else
|
2005-12-04 15:39:15 +08:00
|
|
|
#define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
|
2009-07-28 09:59:34 +08:00
|
|
|
#endif
|
2005-12-04 15:39:15 +08:00
|
|
|
|
2013-04-28 17:37:29 +08:00
|
|
|
#ifndef CONFIG_PPC_BOOK3S_64
|
2011-06-28 17:54:48 +08:00
|
|
|
/*
|
|
|
|
* Use the top bit of the higher-level page table entries to indicate whether
|
|
|
|
* the entries we point to contain hugepages. This works because we know that
|
|
|
|
* the page tables live in kernel space. If we ever decide to support having
|
|
|
|
* page tables at arbitrary addresses, this breaks and will have to change.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
#define PD_HUGE 0x8000000000000000
|
|
|
|
#else
|
|
|
|
#define PD_HUGE 0x80000000
|
|
|
|
#endif
|
2013-04-28 17:37:29 +08:00
|
|
|
#endif /* CONFIG_PPC_BOOK3S_64 */
|
2011-06-28 17:54:48 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Some number of bits at the level of the page table that points to
|
|
|
|
* a hugepte are used to encode the size. This masks those bits.
|
|
|
|
*/
|
|
|
|
#define HUGEPD_SHIFT_MASK 0x3f
|
|
|
|
|
2005-11-11 11:25:24 +08:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
#undef STRICT_MM_TYPECHECKS
|
|
|
|
|
|
|
|
#ifdef STRICT_MM_TYPECHECKS
|
|
|
|
/* These are used to make use of C type-checking. */
|
|
|
|
|
|
|
|
/* PTE level */
|
|
|
|
typedef struct { pte_basic_t pte; } pte_t;
|
|
|
|
#define pte_val(x) ((x).pte)
|
|
|
|
#define __pte(x) ((pte_t) { (x) })
|
|
|
|
|
|
|
|
/* 64k pages additionally define a bigger "real PTE" type that gathers
|
|
|
|
* the "second half" part of the PTE for pseudo 64k pages
|
|
|
|
*/
|
2008-12-11 09:55:41 +08:00
|
|
|
#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
|
2005-11-11 11:25:24 +08:00
|
|
|
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
|
|
|
|
#else
|
|
|
|
typedef struct { pte_t pte; } real_pte_t;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* PMD level */
|
2007-05-08 10:46:49 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-11-11 11:25:24 +08:00
|
|
|
typedef struct { unsigned long pmd; } pmd_t;
|
|
|
|
#define pmd_val(x) ((x).pmd)
|
|
|
|
#define __pmd(x) ((pmd_t) { (x) })
|
|
|
|
|
|
|
|
/* PUD level exusts only on 4k pages */
|
2007-05-08 10:46:49 +08:00
|
|
|
#ifndef CONFIG_PPC_64K_PAGES
|
2005-11-11 11:25:24 +08:00
|
|
|
typedef struct { unsigned long pud; } pud_t;
|
|
|
|
#define pud_val(x) ((x).pud)
|
|
|
|
#define __pud(x) ((pud_t) { (x) })
|
2007-05-08 10:46:49 +08:00
|
|
|
#endif /* !CONFIG_PPC_64K_PAGES */
|
|
|
|
#endif /* CONFIG_PPC64 */
|
2005-11-11 11:25:24 +08:00
|
|
|
|
|
|
|
/* PGD level */
|
|
|
|
typedef struct { unsigned long pgd; } pgd_t;
|
|
|
|
#define pgd_val(x) ((x).pgd)
|
|
|
|
#define __pgd(x) ((pgd_t) { (x) })
|
|
|
|
|
|
|
|
/* Page protection bits */
|
|
|
|
typedef struct { unsigned long pgprot; } pgprot_t;
|
|
|
|
#define pgprot_val(x) ((x).pgprot)
|
|
|
|
#define __pgprot(x) ((pgprot_t) { (x) })
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
/*
|
|
|
|
* .. while these make it easier on the compiler
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef pte_basic_t pte_t;
|
|
|
|
#define pte_val(x) (x)
|
|
|
|
#define __pte(x) (x)
|
|
|
|
|
2008-12-11 09:55:41 +08:00
|
|
|
#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
|
2005-11-11 11:25:24 +08:00
|
|
|
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
|
|
|
|
#else
|
2008-12-11 09:55:41 +08:00
|
|
|
typedef pte_t real_pte_t;
|
2005-11-11 11:25:24 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2007-05-08 10:46:49 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-11-11 11:25:24 +08:00
|
|
|
typedef unsigned long pmd_t;
|
|
|
|
#define pmd_val(x) (x)
|
|
|
|
#define __pmd(x) (x)
|
|
|
|
|
2007-05-08 10:46:49 +08:00
|
|
|
#ifndef CONFIG_PPC_64K_PAGES
|
2005-11-11 11:25:24 +08:00
|
|
|
typedef unsigned long pud_t;
|
|
|
|
#define pud_val(x) (x)
|
|
|
|
#define __pud(x) (x)
|
2007-05-08 10:46:49 +08:00
|
|
|
#endif /* !CONFIG_PPC_64K_PAGES */
|
|
|
|
#endif /* CONFIG_PPC64 */
|
2005-11-11 11:25:24 +08:00
|
|
|
|
|
|
|
typedef unsigned long pgd_t;
|
|
|
|
#define pgd_val(x) (x)
|
|
|
|
#define pgprot_val(x) (x)
|
|
|
|
|
|
|
|
typedef unsigned long pgprot_t;
|
|
|
|
#define __pgd(x) (x)
|
|
|
|
#define __pgprot(x) (x)
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
powerpc/mm: Allow more flexible layouts for hugepage pagetables
Currently each available hugepage size uses a slightly different
pagetable layout: that is, the bottem level table of pointers to
hugepages is a different size, and may branch off from the normal page
tables at a different level. Every hugepage aware path that needs to
walk the pagetables must therefore look up the hugepage size from the
slice info first, and work out the correct way to walk the pagetables
accordingly. Future hardware is likely to add more possible hugepage
sizes, more layout options and more mess.
This patch, therefore reworks the handling of hugepage pagetables to
reduce this complexity. In the new scheme, instead of having to
consult the slice mask, pagetable walking code can check a flag in the
PGD/PUD/PMD entries to see where to branch off to hugepage pagetables,
and the entry also contains the information (eseentially hugepage
shift) necessary to then interpret that table without recourse to the
slice mask. This scheme can be extended neatly to handle multiple
levels of self-describing "special" hugepage pagetables, although for
now we assume only one level exists.
This approach means that only the pagetable allocation path needs to
know how the pagetables should be set out. All other (hugepage)
pagetable walking paths can just interpret the structure as they go.
There already was a flag bit in PGD/PUD/PMD entries for hugepage
directory pointers, but it was only used for debug. We alter that
flag bit to instead be a 0 in the MSB to indicate a hugepage pagetable
pointer (normally it would be 1 since the pointer lies in the linear
mapping). This means that asm pagetable walking can test for (and
punt on) hugepage pointers with the same test that checks for
unpopulated page directory entries (beq becomes bge), since hugepage
pointers will always be positive, and normal pointers always negative.
While we're at it, we get rid of the confusing (and grep defeating)
#defining of hugepte_shift to be the same thing as mmu_huge_psizes.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-10-27 03:24:31 +08:00
|
|
|
typedef struct { signed long pd; } hugepd_t;
|
|
|
|
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
2013-04-28 17:37:29 +08:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
|
|
static inline int hugepd_ok(hugepd_t hpd)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* hugepd pointer, bottom two bits == 00 and next 4 bits
|
|
|
|
* indicate size of table
|
|
|
|
*/
|
|
|
|
return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
|
|
|
|
}
|
|
|
|
#else
|
powerpc/mm: Allow more flexible layouts for hugepage pagetables
Currently each available hugepage size uses a slightly different
pagetable layout: that is, the bottem level table of pointers to
hugepages is a different size, and may branch off from the normal page
tables at a different level. Every hugepage aware path that needs to
walk the pagetables must therefore look up the hugepage size from the
slice info first, and work out the correct way to walk the pagetables
accordingly. Future hardware is likely to add more possible hugepage
sizes, more layout options and more mess.
This patch, therefore reworks the handling of hugepage pagetables to
reduce this complexity. In the new scheme, instead of having to
consult the slice mask, pagetable walking code can check a flag in the
PGD/PUD/PMD entries to see where to branch off to hugepage pagetables,
and the entry also contains the information (eseentially hugepage
shift) necessary to then interpret that table without recourse to the
slice mask. This scheme can be extended neatly to handle multiple
levels of self-describing "special" hugepage pagetables, although for
now we assume only one level exists.
This approach means that only the pagetable allocation path needs to
know how the pagetables should be set out. All other (hugepage)
pagetable walking paths can just interpret the structure as they go.
There already was a flag bit in PGD/PUD/PMD entries for hugepage
directory pointers, but it was only used for debug. We alter that
flag bit to instead be a 0 in the MSB to indicate a hugepage pagetable
pointer (normally it would be 1 since the pointer lies in the linear
mapping). This means that asm pagetable walking can test for (and
punt on) hugepage pointers with the same test that checks for
unpopulated page directory entries (beq becomes bge), since hugepage
pointers will always be positive, and normal pointers always negative.
While we're at it, we get rid of the confusing (and grep defeating)
#defining of hugepte_shift to be the same thing as mmu_huge_psizes.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-10-27 03:24:31 +08:00
|
|
|
static inline int hugepd_ok(hugepd_t hpd)
|
|
|
|
{
|
|
|
|
return (hpd.pd > 0);
|
|
|
|
}
|
2013-04-28 17:37:29 +08:00
|
|
|
#endif
|
powerpc/mm: Allow more flexible layouts for hugepage pagetables
Currently each available hugepage size uses a slightly different
pagetable layout: that is, the bottem level table of pointers to
hugepages is a different size, and may branch off from the normal page
tables at a different level. Every hugepage aware path that needs to
walk the pagetables must therefore look up the hugepage size from the
slice info first, and work out the correct way to walk the pagetables
accordingly. Future hardware is likely to add more possible hugepage
sizes, more layout options and more mess.
This patch, therefore reworks the handling of hugepage pagetables to
reduce this complexity. In the new scheme, instead of having to
consult the slice mask, pagetable walking code can check a flag in the
PGD/PUD/PMD entries to see where to branch off to hugepage pagetables,
and the entry also contains the information (eseentially hugepage
shift) necessary to then interpret that table without recourse to the
slice mask. This scheme can be extended neatly to handle multiple
levels of self-describing "special" hugepage pagetables, although for
now we assume only one level exists.
This approach means that only the pagetable allocation path needs to
know how the pagetables should be set out. All other (hugepage)
pagetable walking paths can just interpret the structure as they go.
There already was a flag bit in PGD/PUD/PMD entries for hugepage
directory pointers, but it was only used for debug. We alter that
flag bit to instead be a 0 in the MSB to indicate a hugepage pagetable
pointer (normally it would be 1 since the pointer lies in the linear
mapping). This means that asm pagetable walking can test for (and
punt on) hugepage pointers with the same test that checks for
unpopulated page directory entries (beq becomes bge), since hugepage
pointers will always be positive, and normal pointers always negative.
While we're at it, we get rid of the confusing (and grep defeating)
#defining of hugepte_shift to be the same thing as mmu_huge_psizes.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-10-27 03:24:31 +08:00
|
|
|
|
|
|
|
#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep))))
|
2013-04-28 17:37:30 +08:00
|
|
|
int pgd_huge(pgd_t pgd);
|
powerpc/mm: Allow more flexible layouts for hugepage pagetables
Currently each available hugepage size uses a slightly different
pagetable layout: that is, the bottem level table of pointers to
hugepages is a different size, and may branch off from the normal page
tables at a different level. Every hugepage aware path that needs to
walk the pagetables must therefore look up the hugepage size from the
slice info first, and work out the correct way to walk the pagetables
accordingly. Future hardware is likely to add more possible hugepage
sizes, more layout options and more mess.
This patch, therefore reworks the handling of hugepage pagetables to
reduce this complexity. In the new scheme, instead of having to
consult the slice mask, pagetable walking code can check a flag in the
PGD/PUD/PMD entries to see where to branch off to hugepage pagetables,
and the entry also contains the information (eseentially hugepage
shift) necessary to then interpret that table without recourse to the
slice mask. This scheme can be extended neatly to handle multiple
levels of self-describing "special" hugepage pagetables, although for
now we assume only one level exists.
This approach means that only the pagetable allocation path needs to
know how the pagetables should be set out. All other (hugepage)
pagetable walking paths can just interpret the structure as they go.
There already was a flag bit in PGD/PUD/PMD entries for hugepage
directory pointers, but it was only used for debug. We alter that
flag bit to instead be a 0 in the MSB to indicate a hugepage pagetable
pointer (normally it would be 1 since the pointer lies in the linear
mapping). This means that asm pagetable walking can test for (and
punt on) hugepage pointers with the same test that checks for
unpopulated page directory entries (beq becomes bge), since hugepage
pointers will always be positive, and normal pointers always negative.
While we're at it, we get rid of the confusing (and grep defeating)
#defining of hugepte_shift to be the same thing as mmu_huge_psizes.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-10-27 03:24:31 +08:00
|
|
|
#else /* CONFIG_HUGETLB_PAGE */
|
|
|
|
#define is_hugepd(pdep) 0
|
2013-04-28 17:37:30 +08:00
|
|
|
#define pgd_huge(pgd) 0
|
powerpc/mm: Allow more flexible layouts for hugepage pagetables
Currently each available hugepage size uses a slightly different
pagetable layout: that is, the bottem level table of pointers to
hugepages is a different size, and may branch off from the normal page
tables at a different level. Every hugepage aware path that needs to
walk the pagetables must therefore look up the hugepage size from the
slice info first, and work out the correct way to walk the pagetables
accordingly. Future hardware is likely to add more possible hugepage
sizes, more layout options and more mess.
This patch, therefore reworks the handling of hugepage pagetables to
reduce this complexity. In the new scheme, instead of having to
consult the slice mask, pagetable walking code can check a flag in the
PGD/PUD/PMD entries to see where to branch off to hugepage pagetables,
and the entry also contains the information (eseentially hugepage
shift) necessary to then interpret that table without recourse to the
slice mask. This scheme can be extended neatly to handle multiple
levels of self-describing "special" hugepage pagetables, although for
now we assume only one level exists.
This approach means that only the pagetable allocation path needs to
know how the pagetables should be set out. All other (hugepage)
pagetable walking paths can just interpret the structure as they go.
There already was a flag bit in PGD/PUD/PMD entries for hugepage
directory pointers, but it was only used for debug. We alter that
flag bit to instead be a 0 in the MSB to indicate a hugepage pagetable
pointer (normally it would be 1 since the pointer lies in the linear
mapping). This means that asm pagetable walking can test for (and
punt on) hugepage pointers with the same test that checks for
unpopulated page directory entries (beq becomes bge), since hugepage
pointers will always be positive, and normal pointers always negative.
While we're at it, we get rid of the confusing (and grep defeating)
#defining of hugepte_shift to be the same thing as mmu_huge_psizes.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-10-27 03:24:31 +08:00
|
|
|
#endif /* CONFIG_HUGETLB_PAGE */
|
|
|
|
|
2005-11-11 11:25:24 +08:00
|
|
|
struct page;
|
|
|
|
extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
|
|
|
|
extern void copy_user_page(void *to, void *from, unsigned long vaddr,
|
|
|
|
struct page *p);
|
|
|
|
extern int page_is_ram(unsigned long pfn);
|
2011-08-30 17:19:17 +08:00
|
|
|
extern int devmem_is_allowed(unsigned long pfn);
|
2005-11-11 11:25:24 +08:00
|
|
|
|
2009-04-15 13:55:32 +08:00
|
|
|
#ifdef CONFIG_PPC_SMLPAR
|
|
|
|
void arch_free_page(struct page *page, int order);
|
|
|
|
#define HAVE_ARCH_FREE_PAGE
|
|
|
|
#endif
|
|
|
|
|
2006-05-30 11:51:37 +08:00
|
|
|
struct vm_area_struct;
|
|
|
|
|
2013-04-28 17:37:33 +08:00
|
|
|
#ifdef CONFIG_PPC_64K_PAGES
|
|
|
|
typedef pte_t *pgtable_t;
|
|
|
|
#else
|
2008-02-08 20:22:04 +08:00
|
|
|
typedef struct page *pgtable_t;
|
2013-04-28 17:37:33 +08:00
|
|
|
#endif
|
2008-02-08 20:22:04 +08:00
|
|
|
|
2006-03-27 17:15:35 +08:00
|
|
|
#include <asm-generic/memory_model.h>
|
2005-11-11 11:25:24 +08:00
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
|
|
|
|
#endif /* _ASM_POWERPC_PAGE_H */
|