2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 1995 Linus Torvalds
|
|
|
|
*
|
|
|
|
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
|
|
|
|
*
|
|
|
|
* Memory region support
|
|
|
|
* David Parsons <orc@pell.chi.il.us>, July-August 1999
|
|
|
|
*
|
|
|
|
* Added E820 sanitization routine (removes overlapping memory regions);
|
|
|
|
* Brian Moyle <bmoyle@mvista.com>, February 2001
|
|
|
|
*
|
|
|
|
* Moved CPU detection code to cpu/${cpu}.c
|
|
|
|
* Patrick Mochel <mochel@osdl.org>, March 2002
|
|
|
|
*
|
|
|
|
* Provisions for empty E820 memory regions (reported by certain BIOSes).
|
|
|
|
* Alex Achenbach <xela@slit.de>, December 2002.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This file handles the architecture-dependent parts of initialization
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/mm.h>
|
2005-06-23 15:07:57 +08:00
|
|
|
#include <linux/mmzone.h>
|
2006-07-10 19:44:13 +08:00
|
|
|
#include <linux/screen_info.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/ioport.h>
|
|
|
|
#include <linux/acpi.h>
|
2009-08-15 03:23:29 +08:00
|
|
|
#include <linux/sfi.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/apm_bios.h>
|
|
|
|
#include <linux/initrd.h>
|
|
|
|
#include <linux/bootmem.h>
|
2010-08-26 04:39:17 +08:00
|
|
|
#include <linux/memblock.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/console.h>
|
|
|
|
#include <linux/root_dev.h>
|
|
|
|
#include <linux/highmem.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/efi.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/edd.h>
|
2008-04-10 10:50:41 +08:00
|
|
|
#include <linux/iscsi_ibft.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/nodemask.h>
|
2005-06-26 05:58:01 +08:00
|
|
|
#include <linux/kexec.h>
|
2006-01-12 05:43:33 +08:00
|
|
|
#include <linux/dmi.h>
|
2006-03-27 17:16:04 +08:00
|
|
|
#include <linux/pfn.h>
|
2008-01-30 20:30:16 +08:00
|
|
|
#include <linux/pci.h>
|
2008-06-26 08:51:29 +08:00
|
|
|
#include <asm/pci-direct.h>
|
x86: early boot debugging via FireWire (ohci1394_dma=early)
This patch adds a new configuration option, which adds support for a new
early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch()
to decide wether OHCI-1394 FireWire controllers should be initialized and
enabled for physical DMA access to allow remote debugging of early problems
like issues ACPI or other subsystems which are executed very early.
If the config option is not enabled, no code is changed, and if the boot
paramenter is not given, no new code is executed, and independent of that,
all new code is freed after boot, so the config option can be even enabled
in standard, non-debug kernels.
With specialized tools, it is then possible to get debugging information
from machines which have no serial ports (notebooks) such as the printk
buffer contents, or any data which can be referenced from global pointers,
if it is stored below the 4GB limit and even memory dumps of of the physical
RAM region below the 4GB limit can be taken without any cooperation from the
CPU of the host, so the machine can be crashed early, it does not matter.
In the extreme, even kernel debuggers can be accessed in this way. I wrote
a small kgdb module and an accompanying gdb stub for FireWire which allows
to gdb to talk to kgdb using remote remory reads and writes over FireWire.
An version of the gdb stub fore FireWire is able to read all global data
from a system which is running a a normal kernel without any kernel debugger,
without any interruption or support of the system's CPU. That way, e.g. the
task struct and so on can be read and even manipulated when the physical DMA
access is granted.
A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt
and I've put a copy online at
ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt
It also has links to all the tools which are available to make use of it
another copy of it is online at:
ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff
Signed-Off-By: Bernhard Kaindl <bk@suse.de>
Tested-By: Thomas Renninger <trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:34:11 +08:00
|
|
|
#include <linux/init_ohci1394_dma.h>
|
2008-02-16 03:52:48 +08:00
|
|
|
#include <linux/kvm_para.h>
|
2011-12-29 20:09:51 +08:00
|
|
|
#include <linux/dma-contiguous.h>
|
2005-06-26 05:58:01 +08:00
|
|
|
|
2008-06-26 08:51:29 +08:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/stddef.h>
|
|
|
|
#include <linux/unistd.h>
|
|
|
|
#include <linux/ptrace.h>
|
|
|
|
#include <linux/user.h>
|
|
|
|
#include <linux/delay.h>
|
|
|
|
|
|
|
|
#include <linux/kallsyms.h>
|
|
|
|
#include <linux/cpufreq.h>
|
|
|
|
#include <linux/dma-mapping.h>
|
|
|
|
#include <linux/ctype.h>
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
|
|
|
|
#include <linux/percpu.h>
|
|
|
|
#include <linux/crash_dump.h>
|
2009-09-02 09:25:07 +08:00
|
|
|
#include <linux/tboot.h>
|
jiffies: Remove compile time assumptions about CLOCK_TICK_RATE
CLOCK_TICK_RATE is used to accurately caclulate exactly how
a tick will be at a given HZ.
This is useful, because while we'd expect NSEC_PER_SEC/HZ,
the underlying hardware will have some granularity limit,
so we won't be able to have exactly HZ ticks per second.
This slight error can cause timekeeping quality problems
when using the jiffies or other jiffies driven clocksources.
Thus we currently use compile time CLOCK_TICK_RATE value to
generate SHIFTED_HZ and NSEC_PER_JIFFIES, which we then use
to adjust the jiffies clocksource to correct this error.
Unfortunately though, since CLOCK_TICK_RATE is a compile
time value, and the jiffies clocksource is registered very
early during boot, there are a number of cases where there
are different possible hardware timers that have different
tick rates. This causes problems in cases like ARM where
there are numerous different types of hardware, each having
their own compile-time CLOCK_TICK_RATE, making it hard to
accurately support different hardware with a single kernel.
For the most part, this doesn't matter all that much, as not
too many systems actually utilize the jiffies or jiffies driven
clocksource. Usually there are other highres clocksources
who's granularity error is negligable.
Even so, we have some complicated calcualtions that we do
everywhere to handle these edge cases.
This patch removes the compile time SHIFTED_HZ value, and
introduces a register_refined_jiffies() function. This results
in the default jiffies clock as being assumed a perfect HZ
freq, and allows archtectures that care about jiffies accuracy
to call register_refined_jiffies() with the tick rate, specified
dynamically at boot.
This allows us, where necessary, to not have a compile time
CLOCK_TICK_RATE constant, simplifies the jiffies code, and
still provides a way to have an accurate jiffies clock.
NOTE: Since this patch does not add register_refinied_jiffies()
calls for every arch, it may cause time quality regressions
in some cases. Its likely these will not be noticable, but
if they are an issue, adding the following to the end of
setup_arch() should resolve the regression:
register_refinied_jiffies(CLOCK_TICK_RATE)
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
2012-09-05 00:42:27 +08:00
|
|
|
#include <linux/jiffies.h>
|
2008-06-26 08:51:29 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <video/edid.h>
|
2005-06-26 05:58:01 +08:00
|
|
|
|
2008-01-30 20:33:32 +08:00
|
|
|
#include <asm/mtrr.h>
|
2005-06-26 05:57:41 +08:00
|
|
|
#include <asm/apic.h>
|
2012-05-09 02:22:26 +08:00
|
|
|
#include <asm/realmode.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/e820.h>
|
|
|
|
#include <asm/mpspec.h>
|
|
|
|
#include <asm/setup.h>
|
2008-06-26 08:54:23 +08:00
|
|
|
#include <asm/efi.h>
|
2009-02-23 07:34:39 +08:00
|
|
|
#include <asm/timer.h>
|
|
|
|
#include <asm/i8259.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/sections.h>
|
2008-06-18 06:41:45 +08:00
|
|
|
#include <asm/dmi.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/io_apic.h>
|
|
|
|
#include <asm/ist.h>
|
2009-01-29 02:34:09 +08:00
|
|
|
#include <asm/setup_arch.h>
|
2008-03-18 03:08:17 +08:00
|
|
|
#include <asm/bios_ebda.h>
|
2007-10-22 07:42:01 +08:00
|
|
|
#include <asm/cacheflush.h>
|
2008-03-05 02:57:42 +08:00
|
|
|
#include <asm/processor.h>
|
2008-06-17 07:11:08 +08:00
|
|
|
#include <asm/bugs.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-06-26 08:51:29 +08:00
|
|
|
#include <asm/vsyscall.h>
|
2009-01-07 20:41:35 +08:00
|
|
|
#include <asm/cpu.h>
|
2008-06-26 08:51:29 +08:00
|
|
|
#include <asm/desc.h>
|
|
|
|
#include <asm/dma.h>
|
2008-07-11 09:23:42 +08:00
|
|
|
#include <asm/iommu.h>
|
2008-11-28 01:39:15 +08:00
|
|
|
#include <asm/gart.h>
|
2008-06-26 08:51:29 +08:00
|
|
|
#include <asm/mmu_context.h>
|
|
|
|
#include <asm/proto.h>
|
|
|
|
|
|
|
|
#include <asm/paravirt.h>
|
2008-10-28 01:41:46 +08:00
|
|
|
#include <asm/hypervisor.h>
|
2010-06-19 05:46:53 +08:00
|
|
|
#include <asm/olpc_ofw.h>
|
2008-06-26 08:51:29 +08:00
|
|
|
|
|
|
|
#include <asm/percpu.h>
|
|
|
|
#include <asm/topology.h>
|
|
|
|
#include <asm/apicdef.h>
|
2010-09-18 00:03:43 +08:00
|
|
|
#include <asm/amd_nb.h>
|
2008-06-26 08:54:23 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
#include <asm/numa_64.h>
|
|
|
|
#endif
|
2009-11-10 09:38:24 +08:00
|
|
|
#include <asm/mce.h>
|
2010-09-17 23:08:51 +08:00
|
|
|
#include <asm/alternative.h>
|
2011-02-23 04:07:37 +08:00
|
|
|
#include <asm/prom.h>
|
2008-06-26 08:51:29 +08:00
|
|
|
|
2009-04-28 21:00:49 +08:00
|
|
|
/*
|
|
|
|
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
|
|
|
|
* The direct mapping extends to max_pfn_mapped, so that we can directly access
|
|
|
|
* apertures, ACPI and other tables without having to play with fixmaps.
|
|
|
|
*/
|
|
|
|
unsigned long max_low_pfn_mapped;
|
|
|
|
unsigned long max_pfn_mapped;
|
|
|
|
|
2010-02-10 07:38:45 +08:00
|
|
|
#ifdef CONFIG_DMI
|
2009-03-13 07:09:49 +08:00
|
|
|
RESERVE_BRK(dmi_alloc, 65536);
|
2010-02-10 07:38:45 +08:00
|
|
|
#endif
|
2009-03-13 07:09:49 +08:00
|
|
|
|
2009-01-28 00:13:05 +08:00
|
|
|
|
x86: add brk allocation for very, very early allocations
Impact: new interface
Add a brk()-like allocator which effectively extends the bss in order
to allow very early code to do dynamic allocations. This is better than
using statically allocated arrays for data in subsystems which may never
get used.
The space for brk allocations is in the bss ELF segment, so that the
space is mapped properly by the code which maps the kernel, and so
that bootloaders keep the space free rather than putting a ramdisk or
something into it.
The bss itself, delimited by __bss_stop, ends before the brk area
(__brk_base to __brk_limit). The kernel text, data and bss is reserved
up to __bss_stop.
Any brk-allocated data is reserved separately just before the kernel
pagetable is built, as that code allocates from unreserved spaces
in the e820 map, potentially allocating from any unused brk memory.
Ultimately any unused memory in the brk area is used in the general
kernel memory pool.
Initially the brk space is set to 1MB, which is probably much larger
than any user needs (the largest current user is i386 head_32.S's code
to build the pagetables to map the kernel, which can get fairly large
with a big kernel image and no PSE support). So long as the system
has sufficient memory for the bootloader to reserve the kernel+1MB brk,
there are no bad effects resulting from an over-large brk.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-02-27 09:35:44 +08:00
|
|
|
static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
|
|
|
|
unsigned long _brk_end = (unsigned long)__brk_base;
|
|
|
|
|
2009-01-28 00:13:05 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
int default_cpu_present_to_apicid(int mps_cpu)
|
|
|
|
{
|
|
|
|
return __default_cpu_present_to_apicid(mps_cpu);
|
|
|
|
}
|
|
|
|
|
2009-08-31 21:18:40 +08:00
|
|
|
int default_check_phys_apicid_present(int phys_apicid)
|
2009-01-28 00:13:05 +08:00
|
|
|
{
|
2009-08-31 21:18:40 +08:00
|
|
|
return __default_check_phys_apicid_present(phys_apicid);
|
2009-01-28 00:13:05 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-06-26 08:55:20 +08:00
|
|
|
#ifndef CONFIG_DEBUG_BOOT_PARAMS
|
|
|
|
struct boot_params __initdata boot_params;
|
|
|
|
#else
|
|
|
|
struct boot_params boot_params;
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Machine setup..
|
|
|
|
*/
|
2008-01-30 20:30:32 +08:00
|
|
|
static struct resource data_resource = {
|
|
|
|
.name = "Kernel data",
|
|
|
|
.start = 0,
|
|
|
|
.end = 0,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct resource code_resource = {
|
|
|
|
.name = "Kernel code",
|
|
|
|
.start = 0,
|
|
|
|
.end = 0,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct resource bss_resource = {
|
|
|
|
.name = "Kernel bss",
|
|
|
|
.start = 0,
|
|
|
|
.end = 0,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
|
|
};
|
|
|
|
|
2008-06-26 08:50:06 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
2005-04-17 06:20:36 +08:00
|
|
|
/* cpu data as detected by the assembly code in head.S */
|
2008-06-26 08:50:06 +08:00
|
|
|
struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
|
2005-04-17 06:20:36 +08:00
|
|
|
/* common cpu data for all cpus */
|
2008-06-26 08:50:06 +08:00
|
|
|
struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
|
2005-06-23 15:08:33 +08:00
|
|
|
EXPORT_SYMBOL(boot_cpu_data);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-03-28 04:55:04 +08:00
|
|
|
unsigned int def_to_bigsmp;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* for MCA, but anyone else can use it if they want */
|
|
|
|
unsigned int machine_id;
|
|
|
|
unsigned int machine_submodel_id;
|
|
|
|
unsigned int BIOS_revision;
|
|
|
|
|
2008-06-26 08:50:06 +08:00
|
|
|
struct apm_info apm_info;
|
|
|
|
EXPORT_SYMBOL(apm_info);
|
|
|
|
|
|
|
|
#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
|
|
|
|
defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
|
|
|
|
struct ist_info ist_info;
|
|
|
|
EXPORT_SYMBOL(ist_info);
|
|
|
|
#else
|
|
|
|
struct ist_info ist_info;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#else
|
2009-03-05 08:16:51 +08:00
|
|
|
struct cpuinfo_x86 boot_cpu_data __read_mostly = {
|
|
|
|
.x86_phys_bits = MAX_PHYSMEM_BITS,
|
|
|
|
};
|
2008-06-26 08:50:06 +08:00
|
|
|
EXPORT_SYMBOL(boot_cpu_data);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
|
|
|
|
unsigned long mmu_cr4_features;
|
|
|
|
#else
|
|
|
|
unsigned long mmu_cr4_features = X86_CR4_PAE;
|
|
|
|
#endif
|
|
|
|
|
2009-05-08 07:54:11 +08:00
|
|
|
/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
|
|
|
|
int bootloader_type, bootloader_version;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Setup options
|
|
|
|
*/
|
|
|
|
struct screen_info screen_info;
|
2005-06-23 15:08:33 +08:00
|
|
|
EXPORT_SYMBOL(screen_info);
|
2005-04-17 06:20:36 +08:00
|
|
|
struct edid_info edid_info;
|
2005-09-10 04:04:34 +08:00
|
|
|
EXPORT_SYMBOL_GPL(edid_info);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
extern int root_mountflags;
|
|
|
|
|
2008-04-11 05:28:10 +08:00
|
|
|
unsigned long saved_video_mode;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-01-30 20:32:51 +08:00
|
|
|
#define RAMDISK_IMAGE_START_MASK 0x07FF
|
2005-04-17 06:20:36 +08:00
|
|
|
#define RAMDISK_PROMPT_FLAG 0x8000
|
2008-01-30 20:32:51 +08:00
|
|
|
#define RAMDISK_LOAD_FLAG 0x4000
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-02-12 16:54:11 +08:00
|
|
|
static char __initdata command_line[COMMAND_LINE_SIZE];
|
2008-08-13 03:52:36 +08:00
|
|
|
#ifdef CONFIG_CMDLINE_BOOL
|
|
|
|
static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
|
|
|
struct edd edd;
|
|
|
|
#ifdef CONFIG_EDD_MODULE
|
|
|
|
EXPORT_SYMBOL(edd);
|
|
|
|
#endif
|
|
|
|
/**
|
|
|
|
* copy_edd() - Copy the BIOS EDD information
|
|
|
|
* from boot_params into a safe place.
|
|
|
|
*
|
|
|
|
*/
|
2009-11-30 18:33:51 +08:00
|
|
|
static inline void __init copy_edd(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2007-10-16 08:13:22 +08:00
|
|
|
memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
|
|
|
|
sizeof(edd.mbr_signature));
|
|
|
|
memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
|
|
|
|
edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
|
|
|
|
edd.edd_info_nr = boot_params.eddbuf_entries;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
#else
|
2009-11-30 18:33:51 +08:00
|
|
|
static inline void __init copy_edd(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-03-15 08:19:51 +08:00
|
|
|
void * __init extend_brk(size_t size, size_t align)
|
|
|
|
{
|
|
|
|
size_t mask = align - 1;
|
|
|
|
void *ret;
|
|
|
|
|
|
|
|
BUG_ON(_brk_start == 0);
|
|
|
|
BUG_ON(align & mask);
|
|
|
|
|
|
|
|
_brk_end = (_brk_end + mask) & ~mask;
|
|
|
|
BUG_ON((char *)(_brk_end + size) > __brk_limit);
|
|
|
|
|
|
|
|
ret = (void *)_brk_end;
|
|
|
|
_brk_end += size;
|
|
|
|
|
|
|
|
memset(ret, 0, size);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
static void __init init_gbpages(void)
|
|
|
|
{
|
|
|
|
if (direct_gbpages && cpu_has_gbpages)
|
|
|
|
printk(KERN_INFO "Using GB pages for direct mapping\n");
|
|
|
|
else
|
|
|
|
direct_gbpages = 0;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void init_gbpages(void)
|
|
|
|
{
|
|
|
|
}
|
2011-02-18 19:30:30 +08:00
|
|
|
static void __init cleanup_highmap(void)
|
2010-12-28 08:48:32 +08:00
|
|
|
{
|
|
|
|
}
|
2009-06-22 22:39:41 +08:00
|
|
|
#endif
|
|
|
|
|
2009-03-15 08:19:51 +08:00
|
|
|
static void __init reserve_brk(void)
|
|
|
|
{
|
|
|
|
if (_brk_end > _brk_start)
|
2011-07-12 17:16:06 +08:00
|
|
|
memblock_reserve(__pa(_brk_start),
|
|
|
|
__pa(_brk_end) - __pa(_brk_start));
|
2009-03-15 08:19:51 +08:00
|
|
|
|
|
|
|
/* Mark brk area as locked down and no longer taking any
|
|
|
|
new allocations */
|
|
|
|
_brk_start = 0;
|
|
|
|
}
|
|
|
|
|
2008-01-30 20:32:51 +08:00
|
|
|
#ifdef CONFIG_BLK_DEV_INITRD
|
|
|
|
|
2008-06-26 08:49:26 +08:00
|
|
|
#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
|
|
|
|
static void __init relocate_initrd(void)
|
2008-01-30 20:32:51 +08:00
|
|
|
{
|
x86: Make sure free_init_pages() frees pages on page boundary
When CONFIG_NO_BOOTMEM=y, it could use memory more effiently, or
in a more compact fashion.
Example:
Allocated new RAMDISK: 00ec2000 - 0248ce57
Move RAMDISK from 000000002ea04000 - 000000002ffcee56 to 00ec2000 - 0248ce56
The new RAMDISK's end is not page aligned.
Last page could be shared with other users.
When free_init_pages are called for initrd or .init, the page
could be freed and we could corrupt other data.
code segment in free_init_pages():
| for (; addr < end; addr += PAGE_SIZE) {
| ClearPageReserved(virt_to_page(addr));
| init_page_count(virt_to_page(addr));
| memset((void *)(addr & ~(PAGE_SIZE-1)),
| POISON_FREE_INITMEM, PAGE_SIZE);
| free_page(addr);
| totalram_pages++;
| }
last half page could be used as one whole free page.
So page align the boundaries.
-v2: make the original initramdisk to be aligned, according to
Johannes, otherwise we have the chance to lose one page.
we still need to keep initrd_end not aligned, otherwise it could
confuse decompressor.
-v3: change to WARN_ON instead, suggested by Johannes.
-v4: use PAGE_ALIGN, suggested by Johannes.
We may fix that macro name later to PAGE_ALIGN_UP, and PAGE_ALIGN_DOWN
Add comments about assuming ramdisk start is aligned
in relocate_initrd(), change to re get ramdisk_image instead of save it
to make diff smaller. Add warning for wrong range, suggested by Johannes.
-v6: remove one WARN()
We need to align beginning in free_init_pages()
do not copy more than ramdisk_size, noticed by Johannes
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1269830604-26214-3-git-send-email-yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-29 10:42:55 +08:00
|
|
|
/* Assume only end is not page aligned */
|
2008-05-22 09:40:18 +08:00
|
|
|
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
|
|
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
x86: Make sure free_init_pages() frees pages on page boundary
When CONFIG_NO_BOOTMEM=y, it could use memory more effiently, or
in a more compact fashion.
Example:
Allocated new RAMDISK: 00ec2000 - 0248ce57
Move RAMDISK from 000000002ea04000 - 000000002ffcee56 to 00ec2000 - 0248ce56
The new RAMDISK's end is not page aligned.
Last page could be shared with other users.
When free_init_pages are called for initrd or .init, the page
could be freed and we could corrupt other data.
code segment in free_init_pages():
| for (; addr < end; addr += PAGE_SIZE) {
| ClearPageReserved(virt_to_page(addr));
| init_page_count(virt_to_page(addr));
| memset((void *)(addr & ~(PAGE_SIZE-1)),
| POISON_FREE_INITMEM, PAGE_SIZE);
| free_page(addr);
| totalram_pages++;
| }
last half page could be used as one whole free page.
So page align the boundaries.
-v2: make the original initramdisk to be aligned, according to
Johannes, otherwise we have the chance to lose one page.
we still need to keep initrd_end not aligned, otherwise it could
confuse decompressor.
-v3: change to WARN_ON instead, suggested by Johannes.
-v4: use PAGE_ALIGN, suggested by Johannes.
We may fix that macro name later to PAGE_ALIGN_UP, and PAGE_ALIGN_DOWN
Add comments about assuming ramdisk start is aligned
in relocate_initrd(), change to re get ramdisk_image instead of save it
to make diff smaller. Add warning for wrong range, suggested by Johannes.
-v6: remove one WARN()
We need to align beginning in free_init_pages()
do not copy more than ramdisk_size, noticed by Johannes
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1269830604-26214-3-git-send-email-yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-29 10:42:55 +08:00
|
|
|
u64 area_size = PAGE_ALIGN(ramdisk_size);
|
2009-06-05 10:14:22 +08:00
|
|
|
u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
|
2008-05-22 09:40:18 +08:00
|
|
|
u64 ramdisk_here;
|
2008-06-26 08:49:26 +08:00
|
|
|
unsigned long slop, clen, mapaddr;
|
|
|
|
char *p, *q;
|
2008-01-30 20:32:51 +08:00
|
|
|
|
|
|
|
/* We need to move the initrd down into lowmem */
|
2010-08-26 04:39:17 +08:00
|
|
|
ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
|
2008-06-25 03:18:14 +08:00
|
|
|
PAGE_SIZE);
|
2008-01-30 20:32:51 +08:00
|
|
|
|
2011-07-12 15:58:09 +08:00
|
|
|
if (!ramdisk_here)
|
2008-05-26 01:00:09 +08:00
|
|
|
panic("Cannot find place for new RAMDISK of size %lld\n",
|
|
|
|
ramdisk_size);
|
|
|
|
|
2008-01-30 20:32:51 +08:00
|
|
|
/* Note: this includes all the lowmem currently occupied by
|
|
|
|
the initrd, we rely on that fact to keep the data intact. */
|
2011-07-12 17:16:06 +08:00
|
|
|
memblock_reserve(ramdisk_here, area_size);
|
2008-01-30 20:32:51 +08:00
|
|
|
initrd_start = ramdisk_here + PAGE_OFFSET;
|
|
|
|
initrd_end = initrd_start + ramdisk_size;
|
2012-05-30 06:06:29 +08:00
|
|
|
printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
|
|
|
|
ramdisk_here, ramdisk_here + ramdisk_size - 1);
|
2008-01-30 20:32:51 +08:00
|
|
|
|
|
|
|
q = (char *)initrd_start;
|
|
|
|
|
|
|
|
/* Copy any lowmem portion of the initrd */
|
|
|
|
if (ramdisk_image < end_of_lowmem) {
|
|
|
|
clen = end_of_lowmem - ramdisk_image;
|
|
|
|
p = (char *)__va(ramdisk_image);
|
|
|
|
memcpy(q, p, clen);
|
|
|
|
q += clen;
|
|
|
|
ramdisk_image += clen;
|
|
|
|
ramdisk_size -= clen;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Copy the highmem portion of the initrd */
|
|
|
|
while (ramdisk_size) {
|
|
|
|
slop = ramdisk_image & ~PAGE_MASK;
|
|
|
|
clen = ramdisk_size;
|
|
|
|
if (clen > MAX_MAP_CHUNK-slop)
|
|
|
|
clen = MAX_MAP_CHUNK-slop;
|
|
|
|
mapaddr = ramdisk_image & PAGE_MASK;
|
2008-09-08 06:21:16 +08:00
|
|
|
p = early_memremap(mapaddr, clen+slop);
|
2008-01-30 20:32:51 +08:00
|
|
|
memcpy(q, p+slop, clen);
|
2008-01-30 20:33:44 +08:00
|
|
|
early_iounmap(p, clen+slop);
|
2008-01-30 20:32:51 +08:00
|
|
|
q += clen;
|
|
|
|
ramdisk_image += clen;
|
|
|
|
ramdisk_size -= clen;
|
|
|
|
}
|
2008-05-18 16:18:57 +08:00
|
|
|
/* high pages is not converted by early_res_to_bootmem */
|
2008-05-22 09:40:18 +08:00
|
|
|
ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
|
|
ramdisk_size = boot_params.hdr.ramdisk_size;
|
2012-05-30 06:06:29 +08:00
|
|
|
printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
|
|
|
|
" [mem %#010llx-%#010llx]\n",
|
2008-05-22 09:40:18 +08:00
|
|
|
ramdisk_image, ramdisk_image + ramdisk_size - 1,
|
|
|
|
ramdisk_here, ramdisk_here + ramdisk_size - 1);
|
2008-06-26 08:49:26 +08:00
|
|
|
}
|
2008-06-14 11:07:03 +08:00
|
|
|
|
2008-06-26 08:49:26 +08:00
|
|
|
static void __init reserve_initrd(void)
|
|
|
|
{
|
x86: Make sure free_init_pages() frees pages on page boundary
When CONFIG_NO_BOOTMEM=y, it could use memory more effiently, or
in a more compact fashion.
Example:
Allocated new RAMDISK: 00ec2000 - 0248ce57
Move RAMDISK from 000000002ea04000 - 000000002ffcee56 to 00ec2000 - 0248ce56
The new RAMDISK's end is not page aligned.
Last page could be shared with other users.
When free_init_pages are called for initrd or .init, the page
could be freed and we could corrupt other data.
code segment in free_init_pages():
| for (; addr < end; addr += PAGE_SIZE) {
| ClearPageReserved(virt_to_page(addr));
| init_page_count(virt_to_page(addr));
| memset((void *)(addr & ~(PAGE_SIZE-1)),
| POISON_FREE_INITMEM, PAGE_SIZE);
| free_page(addr);
| totalram_pages++;
| }
last half page could be used as one whole free page.
So page align the boundaries.
-v2: make the original initramdisk to be aligned, according to
Johannes, otherwise we have the chance to lose one page.
we still need to keep initrd_end not aligned, otherwise it could
confuse decompressor.
-v3: change to WARN_ON instead, suggested by Johannes.
-v4: use PAGE_ALIGN, suggested by Johannes.
We may fix that macro name later to PAGE_ALIGN_UP, and PAGE_ALIGN_DOWN
Add comments about assuming ramdisk start is aligned
in relocate_initrd(), change to re get ramdisk_image instead of save it
to make diff smaller. Add warning for wrong range, suggested by Johannes.
-v6: remove one WARN()
We need to align beginning in free_init_pages()
do not copy more than ramdisk_size, noticed by Johannes
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1269830604-26214-3-git-send-email-yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-29 10:42:55 +08:00
|
|
|
/* Assume only end is not page aligned */
|
2008-06-26 08:49:26 +08:00
|
|
|
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
|
|
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
x86: Make sure free_init_pages() frees pages on page boundary
When CONFIG_NO_BOOTMEM=y, it could use memory more effiently, or
in a more compact fashion.
Example:
Allocated new RAMDISK: 00ec2000 - 0248ce57
Move RAMDISK from 000000002ea04000 - 000000002ffcee56 to 00ec2000 - 0248ce56
The new RAMDISK's end is not page aligned.
Last page could be shared with other users.
When free_init_pages are called for initrd or .init, the page
could be freed and we could corrupt other data.
code segment in free_init_pages():
| for (; addr < end; addr += PAGE_SIZE) {
| ClearPageReserved(virt_to_page(addr));
| init_page_count(virt_to_page(addr));
| memset((void *)(addr & ~(PAGE_SIZE-1)),
| POISON_FREE_INITMEM, PAGE_SIZE);
| free_page(addr);
| totalram_pages++;
| }
last half page could be used as one whole free page.
So page align the boundaries.
-v2: make the original initramdisk to be aligned, according to
Johannes, otherwise we have the chance to lose one page.
we still need to keep initrd_end not aligned, otherwise it could
confuse decompressor.
-v3: change to WARN_ON instead, suggested by Johannes.
-v4: use PAGE_ALIGN, suggested by Johannes.
We may fix that macro name later to PAGE_ALIGN_UP, and PAGE_ALIGN_DOWN
Add comments about assuming ramdisk start is aligned
in relocate_initrd(), change to re get ramdisk_image instead of save it
to make diff smaller. Add warning for wrong range, suggested by Johannes.
-v6: remove one WARN()
We need to align beginning in free_init_pages()
do not copy more than ramdisk_size, noticed by Johannes
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1269830604-26214-3-git-send-email-yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-29 10:42:55 +08:00
|
|
|
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
|
2009-06-05 10:14:22 +08:00
|
|
|
u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
|
2008-06-26 08:49:26 +08:00
|
|
|
|
|
|
|
if (!boot_params.hdr.type_of_loader ||
|
|
|
|
!ramdisk_image || !ramdisk_size)
|
|
|
|
return; /* No initrd provided by bootloader */
|
|
|
|
|
|
|
|
initrd_start = 0;
|
|
|
|
|
|
|
|
if (ramdisk_size >= (end_of_lowmem>>1)) {
|
2012-05-17 01:43:26 +08:00
|
|
|
panic("initrd too large to handle, "
|
|
|
|
"disabling initrd (%lld needed, %lld available)\n",
|
|
|
|
ramdisk_size, end_of_lowmem>>1);
|
2008-06-26 08:49:26 +08:00
|
|
|
}
|
|
|
|
|
2012-05-30 06:06:29 +08:00
|
|
|
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
|
|
|
|
ramdisk_end - 1);
|
2008-06-26 08:49:26 +08:00
|
|
|
|
|
|
|
|
|
|
|
if (ramdisk_end <= end_of_lowmem) {
|
|
|
|
/* All in lowmem, easy case */
|
|
|
|
/*
|
|
|
|
* don't need to reserve again, already reserved early
|
|
|
|
* in i386_start_kernel
|
|
|
|
*/
|
|
|
|
initrd_start = ramdisk_image + PAGE_OFFSET;
|
|
|
|
initrd_end = initrd_start + ramdisk_size;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
relocate_initrd();
|
2009-06-05 10:14:22 +08:00
|
|
|
|
2011-07-12 17:16:06 +08:00
|
|
|
memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
|
2008-01-30 20:32:51 +08:00
|
|
|
}
|
2008-06-22 17:46:58 +08:00
|
|
|
#else
|
2008-06-26 08:49:26 +08:00
|
|
|
static void __init reserve_initrd(void)
|
2008-06-22 17:46:58 +08:00
|
|
|
{
|
|
|
|
}
|
2008-01-30 20:32:51 +08:00
|
|
|
#endif /* CONFIG_BLK_DEV_INITRD */
|
|
|
|
|
2008-06-26 09:00:22 +08:00
|
|
|
static void __init parse_setup_data(void)
|
2008-06-26 08:56:22 +08:00
|
|
|
{
|
|
|
|
struct setup_data *data;
|
|
|
|
u64 pa_data;
|
|
|
|
|
|
|
|
if (boot_params.hdr.version < 0x0209)
|
|
|
|
return;
|
|
|
|
pa_data = boot_params.hdr.setup_data;
|
|
|
|
while (pa_data) {
|
2011-02-23 04:07:36 +08:00
|
|
|
u32 data_len, map_len;
|
|
|
|
|
|
|
|
map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
|
|
|
|
(u64)sizeof(struct setup_data));
|
|
|
|
data = early_memremap(pa_data, map_len);
|
|
|
|
data_len = data->len + sizeof(struct setup_data);
|
|
|
|
if (data_len > map_len) {
|
|
|
|
early_iounmap(data, map_len);
|
|
|
|
data = early_memremap(pa_data, data_len);
|
|
|
|
map_len = data_len;
|
|
|
|
}
|
|
|
|
|
2008-06-26 08:56:22 +08:00
|
|
|
switch (data->type) {
|
|
|
|
case SETUP_E820_EXT:
|
2011-02-23 04:07:36 +08:00
|
|
|
parse_e820_ext(data);
|
2008-06-26 08:56:22 +08:00
|
|
|
break;
|
2011-02-23 04:07:37 +08:00
|
|
|
case SETUP_DTB:
|
|
|
|
add_dtb(pa_data);
|
2008-06-26 08:56:22 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
pa_data = data->next;
|
2011-02-23 04:07:36 +08:00
|
|
|
early_iounmap(data, map_len);
|
2008-06-26 08:56:22 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-04 02:37:13 +08:00
|
|
|
static void __init e820_reserve_setup_data(void)
|
2008-07-01 07:20:54 +08:00
|
|
|
{
|
|
|
|
struct setup_data *data;
|
|
|
|
u64 pa_data;
|
2008-07-02 11:04:10 +08:00
|
|
|
int found = 0;
|
2008-07-01 07:20:54 +08:00
|
|
|
|
|
|
|
if (boot_params.hdr.version < 0x0209)
|
|
|
|
return;
|
|
|
|
pa_data = boot_params.hdr.setup_data;
|
|
|
|
while (pa_data) {
|
2008-09-08 06:21:16 +08:00
|
|
|
data = early_memremap(pa_data, sizeof(*data));
|
2008-07-01 07:20:54 +08:00
|
|
|
e820_update_range(pa_data, sizeof(*data)+data->len,
|
|
|
|
E820_RAM, E820_RESERVED_KERN);
|
2008-07-02 11:04:10 +08:00
|
|
|
found = 1;
|
2008-07-01 07:20:54 +08:00
|
|
|
pa_data = data->next;
|
|
|
|
early_iounmap(data, sizeof(*data));
|
|
|
|
}
|
2008-07-02 11:04:10 +08:00
|
|
|
if (!found)
|
|
|
|
return;
|
|
|
|
|
2008-07-01 07:20:54 +08:00
|
|
|
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
2008-07-04 02:37:13 +08:00
|
|
|
memcpy(&e820_saved, &e820, sizeof(struct e820map));
|
2008-07-01 07:20:54 +08:00
|
|
|
printk(KERN_INFO "extended physical RAM map:\n");
|
|
|
|
e820_print_map("reserve setup_data");
|
|
|
|
}
|
|
|
|
|
2010-08-26 04:39:17 +08:00
|
|
|
static void __init memblock_x86_reserve_range_setup_data(void)
|
2008-07-04 02:37:13 +08:00
|
|
|
{
|
|
|
|
struct setup_data *data;
|
|
|
|
u64 pa_data;
|
|
|
|
|
|
|
|
if (boot_params.hdr.version < 0x0209)
|
|
|
|
return;
|
|
|
|
pa_data = boot_params.hdr.setup_data;
|
|
|
|
while (pa_data) {
|
2008-09-08 06:21:16 +08:00
|
|
|
data = early_memremap(pa_data, sizeof(*data));
|
2011-07-12 17:16:06 +08:00
|
|
|
memblock_reserve(pa_data, sizeof(*data) + data->len);
|
2008-07-04 02:37:13 +08:00
|
|
|
pa_data = data->next;
|
|
|
|
early_iounmap(data, sizeof(*data));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-06-26 08:57:13 +08:00
|
|
|
/*
|
|
|
|
* --------- Crashkernel reservation ------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef CONFIG_KEXEC
|
2008-06-27 03:54:08 +08:00
|
|
|
|
2010-12-17 11:20:41 +08:00
|
|
|
/*
|
|
|
|
* Keep the crash kernel below this limit. On 32 bits earlier kernels
|
|
|
|
* would limit the kernel to the low 512 MiB due to mapping restrictions.
|
|
|
|
* On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
|
|
|
|
* limit once kexec-tools are fixed.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
# define CRASH_KERNEL_ADDR_MAX (512 << 20)
|
|
|
|
#else
|
|
|
|
# define CRASH_KERNEL_ADDR_MAX (896 << 20)
|
|
|
|
#endif
|
|
|
|
|
2008-06-26 09:00:22 +08:00
|
|
|
static void __init reserve_crashkernel(void)
|
2008-06-26 08:57:13 +08:00
|
|
|
{
|
|
|
|
unsigned long long total_mem;
|
|
|
|
unsigned long long crash_size, crash_base;
|
|
|
|
int ret;
|
|
|
|
|
2012-03-29 05:42:47 +08:00
|
|
|
total_mem = memblock_phys_mem_size();
|
2008-06-26 08:57:13 +08:00
|
|
|
|
|
|
|
ret = parse_crashkernel(boot_command_line, total_mem,
|
|
|
|
&crash_size, &crash_base);
|
2008-06-27 03:54:08 +08:00
|
|
|
if (ret != 0 || crash_size <= 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* 0 means: find the address automatically */
|
|
|
|
if (crash_base <= 0) {
|
2009-11-23 09:18:49 +08:00
|
|
|
const unsigned long long alignment = 16<<20; /* 16M */
|
|
|
|
|
2010-10-06 07:05:14 +08:00
|
|
|
/*
|
2010-12-17 11:20:41 +08:00
|
|
|
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
|
2010-10-06 07:05:14 +08:00
|
|
|
*/
|
|
|
|
crash_base = memblock_find_in_range(alignment,
|
2010-12-17 11:20:41 +08:00
|
|
|
CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
|
2010-10-06 07:05:14 +08:00
|
|
|
|
2011-07-12 15:58:09 +08:00
|
|
|
if (!crash_base) {
|
2009-11-23 09:18:49 +08:00
|
|
|
pr_info("crashkernel reservation failed - No suitable area found.\n");
|
2008-06-26 08:57:13 +08:00
|
|
|
return;
|
|
|
|
}
|
2008-06-27 03:54:08 +08:00
|
|
|
} else {
|
2009-11-23 09:18:49 +08:00
|
|
|
unsigned long long start;
|
|
|
|
|
2010-10-06 07:05:14 +08:00
|
|
|
start = memblock_find_in_range(crash_base,
|
|
|
|
crash_base + crash_size, crash_size, 1<<20);
|
2009-11-23 09:18:49 +08:00
|
|
|
if (start != crash_base) {
|
|
|
|
pr_info("crashkernel reservation failed - memory is in use.\n");
|
2008-06-26 08:57:13 +08:00
|
|
|
return;
|
|
|
|
}
|
2008-06-27 03:54:08 +08:00
|
|
|
}
|
2011-07-12 17:16:06 +08:00
|
|
|
memblock_reserve(crash_base, crash_size);
|
2008-06-26 08:57:13 +08:00
|
|
|
|
2008-06-27 03:54:08 +08:00
|
|
|
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
|
|
|
|
"for crashkernel (System RAM: %ldMB)\n",
|
|
|
|
(unsigned long)(crash_size >> 20),
|
|
|
|
(unsigned long)(crash_base >> 20),
|
|
|
|
(unsigned long)(total_mem >> 20));
|
2008-06-26 08:57:13 +08:00
|
|
|
|
2008-06-27 03:54:08 +08:00
|
|
|
crashk_res.start = crash_base;
|
|
|
|
crashk_res.end = crash_base + crash_size - 1;
|
|
|
|
insert_resource(&iomem_resource, &crashk_res);
|
2008-06-26 08:57:13 +08:00
|
|
|
}
|
|
|
|
#else
|
2008-06-26 09:00:22 +08:00
|
|
|
static void __init reserve_crashkernel(void)
|
2008-06-26 08:57:13 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-06-26 08:58:02 +08:00
|
|
|
static struct resource standard_io_resources[] = {
|
|
|
|
{ .name = "dma1", .start = 0x00, .end = 0x1f,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "pic1", .start = 0x20, .end = 0x21,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "timer0", .start = 0x40, .end = 0x43,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "timer1", .start = 0x50, .end = 0x53,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "keyboard", .start = 0x60, .end = 0x60,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "keyboard", .start = 0x64, .end = 0x64,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "dma page reg", .start = 0x80, .end = 0x8f,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "pic2", .start = 0xa0, .end = 0xa1,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "dma2", .start = 0xc0, .end = 0xdf,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "fpu", .start = 0xf0, .end = 0xff,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO }
|
|
|
|
};
|
|
|
|
|
2009-08-19 20:55:50 +08:00
|
|
|
void __init reserve_standard_io_resources(void)
|
2008-06-26 08:58:02 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* request I/O space for devices used on all i[345]86 PCs */
|
|
|
|
for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
|
|
|
|
request_resource(&ioport_resource, &standard_io_resources[i]);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2010-04-02 05:32:43 +08:00
|
|
|
static __init void reserve_ibft_region(void)
|
|
|
|
{
|
|
|
|
unsigned long addr, size = 0;
|
|
|
|
|
|
|
|
addr = find_ibft_region(&size);
|
|
|
|
|
|
|
|
if (size)
|
2011-07-12 17:16:06 +08:00
|
|
|
memblock_reserve(addr, size);
|
2010-04-02 05:32:43 +08:00
|
|
|
}
|
|
|
|
|
2010-08-26 07:38:20 +08:00
|
|
|
static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
|
2008-09-16 15:29:09 +08:00
|
|
|
|
2010-01-22 11:21:04 +08:00
|
|
|
static void __init trim_bios_range(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* A special case is the first 4Kb of memory;
|
|
|
|
* This is a BIOS owned area, not kernel ram, but generally
|
|
|
|
* not listed as such in the E820 table.
|
2010-08-25 08:32:04 +08:00
|
|
|
*
|
|
|
|
* This typically reserves additional memory (64KiB by default)
|
|
|
|
* since some BIOSes are known to corrupt low memory. See the
|
2010-08-26 07:38:20 +08:00
|
|
|
* Kconfig help text for X86_RESERVE_LOW.
|
2010-01-22 11:21:04 +08:00
|
|
|
*/
|
2010-08-26 07:38:20 +08:00
|
|
|
e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
|
2010-08-25 08:32:04 +08:00
|
|
|
E820_RAM, E820_RESERVED);
|
|
|
|
|
2010-01-22 11:21:04 +08:00
|
|
|
/*
|
|
|
|
* special case: Some BIOSen report the PC BIOS
|
|
|
|
* area (640->1Mb) as ram even though it is not.
|
|
|
|
* take them out.
|
|
|
|
*/
|
|
|
|
e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
|
|
|
|
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
|
|
|
}
|
|
|
|
|
2010-08-26 07:38:20 +08:00
|
|
|
static int __init parse_reservelow(char *p)
|
|
|
|
{
|
|
|
|
unsigned long long size;
|
|
|
|
|
|
|
|
if (!p)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
size = memparse(p, &p);
|
|
|
|
|
|
|
|
if (size < 4096)
|
|
|
|
size = 4096;
|
|
|
|
|
|
|
|
if (size > 640*1024)
|
|
|
|
size = 640*1024;
|
|
|
|
|
|
|
|
reserve_low = size;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
early_param("reservelow", parse_reservelow);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Determine if we were loaded by an EFI loader. If so, then we have also been
|
|
|
|
* passed the efi memmap, systab, etc., so we should use these data structures
|
|
|
|
* for initialization. Note, the efi init code path is determined by the
|
|
|
|
* global efi_enabled. This allows the same kernel image to be used on existing
|
|
|
|
* systems (with a traditional BIOS) as well as on EFI systems.
|
|
|
|
*/
|
2008-06-26 08:52:35 +08:00
|
|
|
/*
|
|
|
|
* setup_arch - architecture-specific boot-time initializations
|
|
|
|
*
|
|
|
|
* Note: On x86_64, fixmaps are ready for use even before this is called.
|
|
|
|
*/
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
void __init setup_arch(char **cmdline_p)
|
|
|
|
{
|
2008-06-26 08:52:35 +08:00
|
|
|
#ifdef CONFIG_X86_32
|
2005-04-17 06:20:36 +08:00
|
|
|
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
|
2008-07-10 23:30:40 +08:00
|
|
|
visws_early_detect();
|
2010-08-28 21:58:33 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* copy kernel address range established so far and switch
|
|
|
|
* to the proper swapper page table
|
|
|
|
*/
|
|
|
|
clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
|
|
|
initial_page_table + KERNEL_PGD_BOUNDARY,
|
|
|
|
KERNEL_PGD_PTRS);
|
|
|
|
|
|
|
|
load_cr3(swapper_pg_dir);
|
|
|
|
__flush_tlb_all();
|
2008-06-26 08:52:35 +08:00
|
|
|
#else
|
|
|
|
printk(KERN_INFO "Command line: %s\n", boot_command_line);
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-08-24 05:49:11 +08:00
|
|
|
/*
|
|
|
|
* If we have OLPC OFW, we might end up relocating the fixmap due to
|
|
|
|
* reserve_top(), so do this before touching the ioremap area.
|
|
|
|
*/
|
2010-06-19 05:46:53 +08:00
|
|
|
olpc_ofw_detect();
|
|
|
|
|
2010-05-21 10:04:29 +08:00
|
|
|
early_trap_init();
|
2008-07-22 07:49:54 +08:00
|
|
|
early_cpu_init();
|
2008-06-30 11:02:44 +08:00
|
|
|
early_ioremap_init();
|
|
|
|
|
2010-06-19 05:46:53 +08:00
|
|
|
setup_olpc_ofw_pgd();
|
|
|
|
|
2007-10-16 08:13:22 +08:00
|
|
|
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
|
|
|
|
screen_info = boot_params.screen_info;
|
|
|
|
edid_info = boot_params.edid_info;
|
2008-06-26 08:52:35 +08:00
|
|
|
#ifdef CONFIG_X86_32
|
2007-10-16 08:13:22 +08:00
|
|
|
apm_info.bios = boot_params.apm_bios_info;
|
|
|
|
ist_info = boot_params.ist_info;
|
2008-06-26 08:52:35 +08:00
|
|
|
if (boot_params.sys_desc_table.length != 0) {
|
2007-10-16 08:13:22 +08:00
|
|
|
machine_id = boot_params.sys_desc_table.table[0];
|
|
|
|
machine_submodel_id = boot_params.sys_desc_table.table[1];
|
|
|
|
BIOS_revision = boot_params.sys_desc_table.table[2];
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2008-06-26 08:52:35 +08:00
|
|
|
#endif
|
|
|
|
saved_video_mode = boot_params.hdr.vid_mode;
|
2007-10-16 08:13:22 +08:00
|
|
|
bootloader_type = boot_params.hdr.type_of_loader;
|
2009-05-08 07:54:11 +08:00
|
|
|
if ((bootloader_type >> 4) == 0xe) {
|
|
|
|
bootloader_type &= 0xf;
|
|
|
|
bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
|
|
|
|
}
|
|
|
|
bootloader_version = bootloader_type & 0xf;
|
|
|
|
bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_BLK_DEV_RAM
|
2007-10-16 08:13:22 +08:00
|
|
|
rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
|
|
|
|
rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
|
|
|
|
rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
2008-06-24 10:53:33 +08:00
|
|
|
#ifdef CONFIG_EFI
|
|
|
|
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
2012-02-13 05:24:29 +08:00
|
|
|
"EL32", 4)) {
|
2008-06-24 10:53:33 +08:00
|
|
|
efi_enabled = 1;
|
2012-02-13 05:24:29 +08:00
|
|
|
efi_64bit = false;
|
|
|
|
} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
|
|
|
"EL64", 4)) {
|
|
|
|
efi_enabled = 1;
|
|
|
|
efi_64bit = true;
|
2008-06-24 10:53:33 +08:00
|
|
|
}
|
2012-02-13 05:24:29 +08:00
|
|
|
if (efi_enabled && efi_memblock_x86_reserve_range())
|
|
|
|
efi_enabled = 0;
|
2008-06-24 10:53:33 +08:00
|
|
|
#endif
|
|
|
|
|
2009-08-20 19:04:10 +08:00
|
|
|
x86_init.oem.arch_setup();
|
2008-01-30 20:31:19 +08:00
|
|
|
|
2010-10-27 05:41:49 +08:00
|
|
|
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
|
2008-04-23 21:09:05 +08:00
|
|
|
setup_memory_map();
|
2008-07-01 07:20:54 +08:00
|
|
|
parse_setup_data();
|
2008-07-04 02:37:13 +08:00
|
|
|
/* update the e820_saved too */
|
|
|
|
e820_reserve_setup_data();
|
2008-07-01 07:20:54 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
copy_edd();
|
|
|
|
|
2007-10-16 08:13:22 +08:00
|
|
|
if (!boot_params.hdr.root_flags)
|
2005-04-17 06:20:36 +08:00
|
|
|
root_mountflags &= ~MS_RDONLY;
|
|
|
|
init_mm.start_code = (unsigned long) _text;
|
|
|
|
init_mm.end_code = (unsigned long) _etext;
|
|
|
|
init_mm.end_data = (unsigned long) _edata;
|
x86: add brk allocation for very, very early allocations
Impact: new interface
Add a brk()-like allocator which effectively extends the bss in order
to allow very early code to do dynamic allocations. This is better than
using statically allocated arrays for data in subsystems which may never
get used.
The space for brk allocations is in the bss ELF segment, so that the
space is mapped properly by the code which maps the kernel, and so
that bootloaders keep the space free rather than putting a ramdisk or
something into it.
The bss itself, delimited by __bss_stop, ends before the brk area
(__brk_base to __brk_limit). The kernel text, data and bss is reserved
up to __bss_stop.
Any brk-allocated data is reserved separately just before the kernel
pagetable is built, as that code allocates from unreserved spaces
in the e820 map, potentially allocating from any unused brk memory.
Ultimately any unused memory in the brk area is used in the general
kernel memory pool.
Initially the brk space is set to 1MB, which is probably much larger
than any user needs (the largest current user is i386 head_32.S's code
to build the pagetables to map the kernel, which can get fairly large
with a big kernel image and no PSE support). So long as the system
has sufficient memory for the bootloader to reserve the kernel+1MB brk,
there are no bad effects resulting from an over-large brk.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-02-27 09:35:44 +08:00
|
|
|
init_mm.brk = _brk_end;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
code_resource.start = virt_to_phys(_text);
|
|
|
|
code_resource.end = virt_to_phys(_etext)-1;
|
|
|
|
data_resource.start = virt_to_phys(_etext);
|
|
|
|
data_resource.end = virt_to_phys(_edata)-1;
|
2007-10-22 07:42:01 +08:00
|
|
|
bss_resource.start = virt_to_phys(&__bss_start);
|
|
|
|
bss_resource.end = virt_to_phys(&__bss_stop)-1;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-08-13 03:52:36 +08:00
|
|
|
#ifdef CONFIG_CMDLINE_BOOL
|
|
|
|
#ifdef CONFIG_CMDLINE_OVERRIDE
|
|
|
|
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
|
|
|
|
#else
|
|
|
|
if (builtin_cmdline[0]) {
|
|
|
|
/* append boot loader cmdline to builtin */
|
|
|
|
strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
|
|
|
|
strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
|
|
|
|
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2009-09-20 02:07:57 +08:00
|
|
|
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
|
|
|
|
*cmdline_p = command_line;
|
|
|
|
|
|
|
|
/*
|
2009-11-14 07:28:17 +08:00
|
|
|
* x86_configure_nx() is called before parse_early_param() to detect
|
|
|
|
* whether hardware doesn't support NX (so that the early EHCI debug
|
|
|
|
* console setup can safely call set_fixmap()). It may then be called
|
|
|
|
* again from within noexec_setup() during parsing early parameters
|
|
|
|
* to honor the respective command line option.
|
2009-09-20 02:07:57 +08:00
|
|
|
*/
|
2009-11-14 07:28:16 +08:00
|
|
|
x86_configure_nx();
|
2009-09-20 02:07:57 +08:00
|
|
|
|
|
|
|
parse_early_param();
|
|
|
|
|
2009-11-14 07:28:17 +08:00
|
|
|
x86_report_nx();
|
2008-09-12 07:42:00 +08:00
|
|
|
|
2008-07-01 07:20:54 +08:00
|
|
|
/* after early param, so could get panic from serial */
|
2010-08-26 04:39:17 +08:00
|
|
|
memblock_x86_reserve_range_setup_data();
|
2008-07-01 07:20:54 +08:00
|
|
|
|
2008-06-26 08:52:35 +08:00
|
|
|
if (acpi_mps_check()) {
|
2008-06-24 04:19:22 +08:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
2008-06-26 08:52:35 +08:00
|
|
|
disable_apic = 1;
|
2008-06-24 04:19:22 +08:00
|
|
|
#endif
|
2008-07-22 02:21:43 +08:00
|
|
|
setup_clear_cpu_cap(X86_FEATURE_APIC);
|
2008-06-21 07:11:20 +08:00
|
|
|
}
|
|
|
|
|
2008-07-17 08:25:46 +08:00
|
|
|
#ifdef CONFIG_PCI
|
|
|
|
if (pci_early_dump_regs)
|
|
|
|
early_dump_pci_devices();
|
|
|
|
#endif
|
|
|
|
|
2008-04-23 21:09:05 +08:00
|
|
|
finish_e820_parsing();
|
2006-09-26 16:52:32 +08:00
|
|
|
|
2009-03-04 10:55:31 +08:00
|
|
|
if (efi_enabled)
|
|
|
|
efi_init();
|
|
|
|
|
2008-09-22 17:52:26 +08:00
|
|
|
dmi_scan_machine();
|
|
|
|
|
2008-10-28 01:41:46 +08:00
|
|
|
/*
|
|
|
|
* VMware detection requires dmi to be available, so this
|
|
|
|
* needs to be done after dmi_scan_machine, for the BP.
|
|
|
|
*/
|
2009-08-20 23:06:25 +08:00
|
|
|
init_hypervisor_platform();
|
2008-10-28 01:41:46 +08:00
|
|
|
|
2009-08-19 20:43:56 +08:00
|
|
|
x86_init.resources.probe_roms();
|
2008-06-17 04:03:31 +08:00
|
|
|
|
|
|
|
/* after parse_early_param, so could debug it */
|
|
|
|
insert_resource(&iomem_resource, &code_resource);
|
|
|
|
insert_resource(&iomem_resource, &data_resource);
|
|
|
|
insert_resource(&iomem_resource, &bss_resource);
|
|
|
|
|
2010-01-22 11:21:04 +08:00
|
|
|
trim_bios_range();
|
2008-06-26 08:52:35 +08:00
|
|
|
#ifdef CONFIG_X86_32
|
2008-06-17 07:11:08 +08:00
|
|
|
if (ppro_with_ram_bug()) {
|
|
|
|
e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
|
|
|
|
E820_RESERVED);
|
|
|
|
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
|
|
|
printk(KERN_INFO "fixed physical RAM map:\n");
|
|
|
|
e820_print_map("bad_ppro");
|
|
|
|
}
|
2008-06-26 08:52:35 +08:00
|
|
|
#else
|
|
|
|
early_gart_iommu_check();
|
|
|
|
#endif
|
2008-06-17 07:11:08 +08:00
|
|
|
|
2008-06-04 10:35:04 +08:00
|
|
|
/*
|
|
|
|
* partially used pages are not usable - thus
|
|
|
|
* we are rounding upwards:
|
|
|
|
*/
|
2008-07-11 11:38:26 +08:00
|
|
|
max_pfn = e820_end_of_ram_pfn();
|
2008-06-04 10:35:04 +08:00
|
|
|
|
2008-01-30 20:33:32 +08:00
|
|
|
/* update e820 for memory not covered by WB MTRRs */
|
|
|
|
mtrr_bp_init();
|
2008-07-09 09:56:38 +08:00
|
|
|
if (mtrr_trim_uncached_memory(max_pfn))
|
2008-07-11 11:38:26 +08:00
|
|
|
max_pfn = e820_end_of_ram_pfn();
|
2008-03-23 15:16:49 +08:00
|
|
|
|
2008-06-26 08:52:35 +08:00
|
|
|
#ifdef CONFIG_X86_32
|
2008-06-25 03:18:14 +08:00
|
|
|
/* max_low_pfn get updated here */
|
2008-06-23 18:05:30 +08:00
|
|
|
find_low_pfn_range();
|
2008-06-26 08:52:35 +08:00
|
|
|
#else
|
|
|
|
num_physpages = max_pfn;
|
|
|
|
|
2009-02-17 09:29:58 +08:00
|
|
|
check_x2apic();
|
2008-06-26 08:52:35 +08:00
|
|
|
|
|
|
|
/* How many end-of-memory variables you have, grandma! */
|
|
|
|
/* need this before calling reserve_initrd */
|
2008-07-11 11:38:26 +08:00
|
|
|
if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
|
|
|
|
max_low_pfn = e820_end_of_low_ram_pfn();
|
|
|
|
else
|
|
|
|
max_low_pfn = max_pfn;
|
|
|
|
|
2008-06-26 08:52:35 +08:00
|
|
|
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
|
2008-09-07 16:51:32 +08:00
|
|
|
#endif
|
|
|
|
|
2009-12-11 05:07:22 +08:00
|
|
|
/*
|
|
|
|
* Find and reserve possible boot-time SMP configuration:
|
|
|
|
*/
|
|
|
|
find_smp_config();
|
|
|
|
|
2010-04-02 05:32:43 +08:00
|
|
|
reserve_ibft_region();
|
|
|
|
|
2010-08-26 04:39:17 +08:00
|
|
|
/*
|
|
|
|
* Need to conclude brk, before memblock_x86_fill()
|
|
|
|
* it could use memblock_find_in_range, could overlap with
|
|
|
|
* brk area.
|
|
|
|
*/
|
|
|
|
reserve_brk();
|
|
|
|
|
2011-02-18 19:30:30 +08:00
|
|
|
cleanup_highmap();
|
|
|
|
|
2010-08-26 04:39:17 +08:00
|
|
|
memblock.current_limit = get_max_mapped();
|
|
|
|
memblock_x86_fill();
|
|
|
|
|
x86, efi: Retain boot service code until after switching to virtual mode
UEFI stands for "Unified Extensible Firmware Interface", where "Firmware"
is an ancient African word meaning "Why do something right when you can
do it so wrong that children will weep and brave adults will cower before
you", and "UEI" is Celtic for "We missed DOS so we burned it into your
ROMs". The UEFI specification provides for runtime services (ie, another
way for the operating system to be forced to depend on the firmware) and
we rely on these for certain trivial tasks such as setting up the
bootloader. But some hardware fails to work if we attempt to use these
runtime services from physical mode, and so we have to switch into virtual
mode. So far so dreadful.
The specification makes it clear that the operating system is free to do
whatever it wants with boot services code after ExitBootServices() has been
called. SetVirtualAddressMap() can't be called until ExitBootServices() has
been. So, obviously, a whole bunch of EFI implementations call into boot
services code when we do that. Since we've been charmingly naive and
trusted that the specification may be somehow relevant to the real world,
we've already stuffed a picture of a penguin or something in that address
space. And just to make things more entertaining, we've also marked it
non-executable.
This patch allocates the boot services regions during EFI init and makes
sure that they're executable. Then, after SetVirtualAddressMap(), it
discards them and everyone lives happily ever after. Except for the ones
who have to work on EFI, who live sad lives haunted by the knowledge that
someone's eventually going to write yet another firmware specification.
[ hpa: adding this to urgent with a stable tag since it fixes currently-broken
hardware. However, I do not know what the dependencies are and so I do
not know which -stable versions this may be a candidate for. ]
Signed-off-by: Matthew Garrett <mjg@redhat.com>
Link: http://lkml.kernel.org/r/1306331593-28715-1-git-send-email-mjg@redhat.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: <stable@kernel.org>
2011-05-25 21:53:13 +08:00
|
|
|
/*
|
|
|
|
* The EFI specification says that boot service code won't be called
|
|
|
|
* after ExitBootServices(). This is, in fact, a lie.
|
|
|
|
*/
|
|
|
|
if (efi_enabled)
|
|
|
|
efi_reserve_boot_services();
|
|
|
|
|
2010-08-26 04:39:17 +08:00
|
|
|
/* preallocate 4k for mptable mpc */
|
|
|
|
early_reserve_e820_mpc_new();
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
|
|
|
|
setup_bios_corruption_check();
|
|
|
|
#endif
|
|
|
|
|
2012-05-30 06:06:29 +08:00
|
|
|
printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
|
|
|
|
(max_pfn_mapped<<PAGE_SHIFT) - 1);
|
2010-08-26 04:39:17 +08:00
|
|
|
|
2012-05-09 02:22:26 +08:00
|
|
|
setup_real_mode();
|
2009-12-11 05:07:22 +08:00
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
init_gbpages();
|
|
|
|
|
2008-06-25 03:18:14 +08:00
|
|
|
/* max_pfn_mapped is updated here */
|
2008-07-11 11:38:26 +08:00
|
|
|
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
|
|
|
|
max_pfn_mapped = max_low_pfn_mapped;
|
|
|
|
|
2011-03-04 17:26:36 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
if (max_pfn > max_low_pfn) {
|
2011-10-21 05:15:26 +08:00
|
|
|
int i;
|
2012-10-23 07:35:18 +08:00
|
|
|
unsigned long start, end;
|
|
|
|
unsigned long start_pfn, end_pfn;
|
2011-10-21 05:15:26 +08:00
|
|
|
|
2012-10-23 07:35:18 +08:00
|
|
|
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
|
|
|
|
NULL) {
|
2011-10-21 05:15:26 +08:00
|
|
|
|
2012-10-23 07:35:18 +08:00
|
|
|
end = PFN_PHYS(end_pfn);
|
|
|
|
if (end <= (1UL<<32))
|
2011-10-21 05:15:26 +08:00
|
|
|
continue;
|
|
|
|
|
2012-10-23 07:35:18 +08:00
|
|
|
start = PFN_PHYS(start_pfn);
|
2011-10-21 05:15:26 +08:00
|
|
|
max_pfn_mapped = init_memory_mapping(
|
2012-10-23 07:35:18 +08:00
|
|
|
max((1UL<<32), start), end);
|
2011-10-21 05:15:26 +08:00
|
|
|
}
|
|
|
|
|
2011-03-04 17:26:36 +08:00
|
|
|
/* can we preseve max_low_pfn ?*/
|
|
|
|
max_low_pfn = max_pfn;
|
|
|
|
}
|
|
|
|
#endif
|
2010-08-26 04:39:17 +08:00
|
|
|
memblock.current_limit = get_max_mapped();
|
2011-12-29 20:09:51 +08:00
|
|
|
dma_contiguous_reserve(0);
|
2008-06-25 03:18:14 +08:00
|
|
|
|
2008-06-26 12:51:28 +08:00
|
|
|
/*
|
|
|
|
* NOTE: On x86-32, only from this point on, fixmaps are ready for use.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
|
|
if (init_ohci1394_dma_early)
|
|
|
|
init_ohci1394_dma_on_all_controllers();
|
|
|
|
#endif
|
2011-05-25 08:13:20 +08:00
|
|
|
/* Allocate bigger log buffer */
|
|
|
|
setup_log_buf(1);
|
2008-06-26 12:51:28 +08:00
|
|
|
|
2008-06-23 18:05:30 +08:00
|
|
|
reserve_initrd();
|
|
|
|
|
2009-11-23 09:18:49 +08:00
|
|
|
reserve_crashkernel();
|
|
|
|
|
2008-06-26 08:52:35 +08:00
|
|
|
vsmp_init();
|
|
|
|
|
2008-06-18 06:41:45 +08:00
|
|
|
io_delay_init();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parse the ACPI tables for possible boot-time SMP configuration.
|
|
|
|
*/
|
|
|
|
acpi_boot_table_init();
|
|
|
|
|
2008-09-24 04:37:13 +08:00
|
|
|
early_acpi_boot_init();
|
|
|
|
|
2011-02-16 19:13:06 +08:00
|
|
|
initmem_init();
|
2010-08-26 04:39:18 +08:00
|
|
|
memblock_find_dma_reserve();
|
2008-07-19 01:07:53 +08:00
|
|
|
|
2012-08-17 04:00:19 +08:00
|
|
|
#ifdef CONFIG_KVM_GUEST
|
2008-02-16 03:52:48 +08:00
|
|
|
kvmclock_init();
|
|
|
|
#endif
|
|
|
|
|
2012-08-22 04:22:38 +08:00
|
|
|
x86_init.paging.pagetable_init();
|
x86: early boot debugging via FireWire (ohci1394_dma=early)
This patch adds a new configuration option, which adds support for a new
early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch()
to decide wether OHCI-1394 FireWire controllers should be initialized and
enabled for physical DMA access to allow remote debugging of early problems
like issues ACPI or other subsystems which are executed very early.
If the config option is not enabled, no code is changed, and if the boot
paramenter is not given, no new code is executed, and independent of that,
all new code is freed after boot, so the config option can be even enabled
in standard, non-debug kernels.
With specialized tools, it is then possible to get debugging information
from machines which have no serial ports (notebooks) such as the printk
buffer contents, or any data which can be referenced from global pointers,
if it is stored below the 4GB limit and even memory dumps of of the physical
RAM region below the 4GB limit can be taken without any cooperation from the
CPU of the host, so the machine can be crashed early, it does not matter.
In the extreme, even kernel debuggers can be accessed in this way. I wrote
a small kgdb module and an accompanying gdb stub for FireWire which allows
to gdb to talk to kgdb using remote remory reads and writes over FireWire.
An version of the gdb stub fore FireWire is able to read all global data
from a system which is running a a normal kernel without any kernel debugger,
without any interruption or support of the system's CPU. That way, e.g. the
task struct and so on can be read and even manipulated when the physical DMA
access is granted.
A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt
and I've put a copy online at
ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt
It also has links to all the tools which are available to make use of it
another copy of it is online at:
ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff
Signed-Off-By: Bernhard Kaindl <bk@suse.de>
Tested-By: Thomas Renninger <trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:34:11 +08:00
|
|
|
|
2011-04-07 04:10:02 +08:00
|
|
|
if (boot_cpu_data.cpuid_level >= 0) {
|
|
|
|
/* A CPU has %cr4 if and only if it has CPUID */
|
|
|
|
mmu_cr4_features = read_cr4();
|
2012-05-09 02:22:46 +08:00
|
|
|
if (trampoline_cr4_features)
|
|
|
|
*trampoline_cr4_features = mmu_cr4_features;
|
2011-04-07 04:10:02 +08:00
|
|
|
}
|
|
|
|
|
2010-08-28 21:58:33 +08:00
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
/* sync back kernel address range */
|
|
|
|
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
|
|
|
|
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
|
|
|
KERNEL_PGD_PTRS);
|
|
|
|
#endif
|
x86-32: Separate 1:1 pagetables from swapper_pg_dir
This patch fixes machine crashes which occur when heavily exercising the
CPU hotplug codepaths on a 32-bit kernel. These crashes are caused by
AMD Erratum 383 and result in a fatal machine check exception. Here's
the scenario:
1. On 32-bit, the swapper_pg_dir page table is used as the initial page
table for booting a secondary CPU.
2. To make this work, swapper_pg_dir needs a direct mapping of physical
memory in it (the low mappings). By adding those low, large page (2M)
mappings (PAE kernel), we create the necessary conditions for Erratum
383 to occur.
3. Other CPUs which do not participate in the off- and onlining game may
use swapper_pg_dir while the low mappings are present (when leave_mm is
called). For all steps below, the CPU referred to is a CPU that is using
swapper_pg_dir, and not the CPU which is being onlined.
4. The presence of the low mappings in swapper_pg_dir can result
in TLB entries for addresses below __PAGE_OFFSET to be established
speculatively. These TLB entries are marked global and large.
5. When the CPU with such TLB entry switches to another page table, this
TLB entry remains because it is global.
6. The process then generates an access to an address covered by the
above TLB entry but there is a permission mismatch - the TLB entry
covers a large global page not accessible to userspace.
7. Due to this permission mismatch a new 4kb, user TLB entry gets
established. Further, Erratum 383 provides for a small window of time
where both TLB entries are present. This results in an uncorrectable
machine check exception signalling a TLB multimatch which panics the
machine.
There are two ways to fix this issue:
1. Always do a global TLB flush when a new cr3 is loaded and the
old page table was swapper_pg_dir. I consider this a hack hard
to understand and with performance implications
2. Do not use swapper_pg_dir to boot secondary CPUs like 64-bit
does.
This patch implements solution 2. It introduces a trampoline_pg_dir
which has the same layout as swapper_pg_dir with low_mappings. This page
table is used as the initial page table of the booting CPU. Later in the
bringup process, it switches to swapper_pg_dir and does a global TLB
flush. This fixes the crashes in our test cases.
-v2: switch to swapper_pg_dir right after entering start_secondary() so
that we are able to access percpu data which might not be mapped in the
trampoline page table.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
LKML-Reference: <20100816123833.GB28147@aftab>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-08-16 20:38:33 +08:00
|
|
|
|
x86, intel_txt: Intel TXT boot support
This patch adds kernel configuration and boot support for Intel Trusted
Execution Technology (Intel TXT).
Intel's technology for safer computing, Intel Trusted Execution
Technology (Intel TXT), defines platform-level enhancements that
provide the building blocks for creating trusted platforms.
Intel TXT was formerly known by the code name LaGrande Technology (LT).
Intel TXT in Brief:
o Provides dynamic root of trust for measurement (DRTM)
o Data protection in case of improper shutdown
o Measurement and verification of launched environment
Intel TXT is part of the vPro(TM) brand and is also available some
non-vPro systems. It is currently available on desktop systems based on
the Q35, X38, Q45, and Q43 Express chipsets (e.g. Dell Optiplex 755, HP
dc7800, etc.) and mobile systems based on the GM45, PM45, and GS45
Express chipsets.
For more information, see http://www.intel.com/technology/security/.
This site also has a link to the Intel TXT MLE Developers Manual, which
has been updated for the new released platforms.
A much more complete description of how these patches support TXT, how to
configure a system for it, etc. is in the Documentation/intel_txt.txt file
in this patch.
This patch provides the TXT support routines for complete functionality,
documentation for TXT support and for the changes to the boot_params structure,
and boot detection of a TXT launch. Attempts to shutdown (reboot, Sx) the system
will result in platform resets; subsequent patches will support these shutdown modes
properly.
Documentation/intel_txt.txt | 210 +++++++++++++++++++++
Documentation/x86/zero-page.txt | 1
arch/x86/include/asm/bootparam.h | 3
arch/x86/include/asm/fixmap.h | 3
arch/x86/include/asm/tboot.h | 197 ++++++++++++++++++++
arch/x86/kernel/Makefile | 1
arch/x86/kernel/setup.c | 4
arch/x86/kernel/tboot.c | 379 +++++++++++++++++++++++++++++++++++++++
security/Kconfig | 30 +++
9 files changed, 827 insertions(+), 1 deletion(-)
Signed-off-by: Joseph Cihula <joseph.cihula@intel.com>
Signed-off-by: Shane Wang <shane.wang@intel.com>
Signed-off-by: Gang Wei <gang.wei@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-07-01 10:30:59 +08:00
|
|
|
tboot_probe();
|
|
|
|
|
2008-06-26 08:52:35 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
map_vsyscall();
|
|
|
|
#endif
|
|
|
|
|
2006-09-26 16:52:32 +08:00
|
|
|
generic_apic_probe();
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-10-20 02:35:03 +08:00
|
|
|
early_quirks();
|
2006-06-08 15:43:38 +08:00
|
|
|
|
2008-06-24 10:55:05 +08:00
|
|
|
/*
|
|
|
|
* Read APIC and some other early information from ACPI tables.
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
acpi_boot_init();
|
2009-08-15 03:23:29 +08:00
|
|
|
sfi_init();
|
2011-02-25 23:09:31 +08:00
|
|
|
x86_dtb_init();
|
2008-06-21 16:38:41 +08:00
|
|
|
|
2008-06-24 10:55:05 +08:00
|
|
|
/*
|
|
|
|
* get boot-time SMP configuration:
|
|
|
|
*/
|
2008-06-09 09:29:22 +08:00
|
|
|
if (smp_found_config)
|
|
|
|
get_smp_config();
|
2008-06-26 08:52:35 +08:00
|
|
|
|
2008-07-03 09:54:40 +08:00
|
|
|
prefill_possible_map();
|
2008-08-20 11:50:02 +08:00
|
|
|
|
2008-07-03 09:53:44 +08:00
|
|
|
init_cpu_to_node();
|
|
|
|
|
2008-06-26 08:52:35 +08:00
|
|
|
init_apic_mappings();
|
2012-05-08 01:19:56 +08:00
|
|
|
if (x86_io_apic_ops.init)
|
|
|
|
x86_io_apic_ops.init();
|
2008-08-20 11:50:52 +08:00
|
|
|
|
2008-06-24 10:55:05 +08:00
|
|
|
kvm_guest_init();
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-06-17 04:03:31 +08:00
|
|
|
e820_reserve_resources();
|
2008-05-21 11:10:58 +08:00
|
|
|
e820_mark_nosave_regions(max_low_pfn);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-08-19 20:55:50 +08:00
|
|
|
x86_init.resources.reserve_resources();
|
2008-06-17 04:03:31 +08:00
|
|
|
|
|
|
|
e820_setup_gap();
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#ifdef CONFIG_VT
|
|
|
|
#if defined(CONFIG_VGA_CONSOLE)
|
|
|
|
if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
|
|
|
|
conswitchp = &vga_con;
|
|
|
|
#elif defined(CONFIG_DUMMY_CONSOLE)
|
|
|
|
conswitchp = &dummy_con;
|
|
|
|
#endif
|
|
|
|
#endif
|
2009-08-20 19:19:57 +08:00
|
|
|
x86_init.oem.banner();
|
2009-11-10 09:38:24 +08:00
|
|
|
|
2011-02-15 00:13:31 +08:00
|
|
|
x86_init.timers.wallclock_init();
|
|
|
|
|
2009-11-10 09:38:24 +08:00
|
|
|
mcheck_init();
|
2010-09-17 23:08:51 +08:00
|
|
|
|
2011-04-19 06:19:51 +08:00
|
|
|
arch_init_ideal_nops();
|
jiffies: Remove compile time assumptions about CLOCK_TICK_RATE
CLOCK_TICK_RATE is used to accurately caclulate exactly how
a tick will be at a given HZ.
This is useful, because while we'd expect NSEC_PER_SEC/HZ,
the underlying hardware will have some granularity limit,
so we won't be able to have exactly HZ ticks per second.
This slight error can cause timekeeping quality problems
when using the jiffies or other jiffies driven clocksources.
Thus we currently use compile time CLOCK_TICK_RATE value to
generate SHIFTED_HZ and NSEC_PER_JIFFIES, which we then use
to adjust the jiffies clocksource to correct this error.
Unfortunately though, since CLOCK_TICK_RATE is a compile
time value, and the jiffies clocksource is registered very
early during boot, there are a number of cases where there
are different possible hardware timers that have different
tick rates. This causes problems in cases like ARM where
there are numerous different types of hardware, each having
their own compile-time CLOCK_TICK_RATE, making it hard to
accurately support different hardware with a single kernel.
For the most part, this doesn't matter all that much, as not
too many systems actually utilize the jiffies or jiffies driven
clocksource. Usually there are other highres clocksources
who's granularity error is negligable.
Even so, we have some complicated calcualtions that we do
everywhere to handle these edge cases.
This patch removes the compile time SHIFTED_HZ value, and
introduces a register_refined_jiffies() function. This results
in the default jiffies clock as being assumed a perfect HZ
freq, and allows archtectures that care about jiffies accuracy
to call register_refined_jiffies() with the tick rate, specified
dynamically at boot.
This allows us, where necessary, to not have a compile time
CLOCK_TICK_RATE constant, simplifies the jiffies code, and
still provides a way to have an accurate jiffies clock.
NOTE: Since this patch does not add register_refinied_jiffies()
calls for every arch, it may cause time quality regressions
in some cases. Its likely these will not be noticable, but
if they are an issue, adding the following to the end of
setup_arch() should resolve the regression:
register_refinied_jiffies(CLOCK_TICK_RATE)
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
2012-09-05 00:42:27 +08:00
|
|
|
|
|
|
|
register_refined_jiffies(CLOCK_TICK_RATE);
|
2012-10-25 01:00:44 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_EFI
|
|
|
|
/* Once setup is done above, disable efi_enabled on mismatched
|
|
|
|
* firmware/kernel archtectures since there is no support for
|
|
|
|
* runtime services.
|
|
|
|
*/
|
|
|
|
if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
|
|
|
|
pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
|
|
|
|
efi_unmap_memmap();
|
|
|
|
efi_enabled = 0;
|
|
|
|
}
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2008-09-16 15:29:09 +08:00
|
|
|
|
2009-02-18 06:12:48 +08:00
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
|
2009-08-19 20:55:50 +08:00
|
|
|
static struct resource video_ram_resource = {
|
|
|
|
.name = "Video RAM area",
|
|
|
|
.start = 0xa0000,
|
|
|
|
.end = 0xbffff,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
2009-02-18 06:12:48 +08:00
|
|
|
};
|
|
|
|
|
2009-08-19 20:55:50 +08:00
|
|
|
void __init i386_reserve_resources(void)
|
2009-02-18 06:12:48 +08:00
|
|
|
{
|
2009-08-19 20:55:50 +08:00
|
|
|
request_resource(&iomem_resource, &video_ram_resource);
|
|
|
|
reserve_standard_io_resources();
|
2009-02-18 06:12:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* CONFIG_X86_32 */
|