Merge branch 'for-next' into for-linus

* pcpu_chunk_page_occupied() doesn't exist in for-next.
* pcpu_chunk_addr_search() updated to use raw_smp_processor_id().

Conflicts:
	mm/percpu.c
This commit is contained in:
Tejun Heo 2009-09-15 09:57:19 +09:00
commit 5579fd7e6a
80 changed files with 1912 additions and 1230 deletions

View File

@ -1919,11 +1919,12 @@ and is between 256 and 4096 characters. It is defined in the file
Format: { 0 | 1 } Format: { 0 | 1 }
See arch/parisc/kernel/pdc_chassis.c See arch/parisc/kernel/pdc_chassis.c
percpu_alloc= [X86] Select which percpu first chunk allocator to use. percpu_alloc= Select which percpu first chunk allocator to use.
Allowed values are one of "lpage", "embed" and "4k". Currently supported values are "embed" and "page".
See comments in arch/x86/kernel/setup_percpu.c for Archs may support subset or none of the selections.
details on each allocator. This parameter is primarily See comments in mm/percpu.c for details on each
for debugging and performance comparison. allocator. This parameter is primarily for debugging
and performance comparison.
pf. [PARIDE] pf. [PARIDE]
See Documentation/blockdev/paride.txt. See Documentation/blockdev/paride.txt.

View File

@ -325,7 +325,7 @@ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
MODFLAGS = -DMODULE MODFLAGS = -DMODULE
CFLAGS_MODULE = $(MODFLAGS) CFLAGS_MODULE = $(MODFLAGS)
AFLAGS_MODULE = $(MODFLAGS) AFLAGS_MODULE = $(MODFLAGS)
LDFLAGS_MODULE = LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds
CFLAGS_KERNEL = CFLAGS_KERNEL =
AFLAGS_KERNEL = AFLAGS_KERNEL =
CFLAGS_GCOV = -fprofile-arcs -ftest-coverage CFLAGS_GCOV = -fprofile-arcs -ftest-coverage

View File

@ -1,102 +1,18 @@
#ifndef __ALPHA_PERCPU_H #ifndef __ALPHA_PERCPU_H
#define __ALPHA_PERCPU_H #define __ALPHA_PERCPU_H
#include <linux/compiler.h>
#include <linux/threads.h>
#include <linux/percpu-defs.h>
/* /*
* Determine the real variable name from the name visible in the * To calculate addresses of locally defined variables, GCC uses
* kernel sources. * 32-bit displacement from the GP. Which doesn't work for per cpu
*/ * variables in modules, as an offset to the kernel per cpu area is
#define per_cpu_var(var) per_cpu__##var * way above 4G.
#ifdef CONFIG_SMP
/*
* per_cpu_offset() is the offset that has to be added to a
* percpu variable to get to the instance for a certain processor.
*/
extern unsigned long __per_cpu_offset[NR_CPUS];
#define per_cpu_offset(x) (__per_cpu_offset[x])
#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
#ifdef CONFIG_DEBUG_PREEMPT
#define my_cpu_offset per_cpu_offset(smp_processor_id())
#else
#define my_cpu_offset __my_cpu_offset
#endif
#ifndef MODULE
#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
#define PER_CPU_DEF_ATTRIBUTES
#else
/*
* To calculate addresses of locally defined variables, GCC uses 32-bit
* displacement from the GP. Which doesn't work for per cpu variables in
* modules, as an offset to the kernel per cpu area is way above 4G.
* *
* This forces allocation of a GOT entry for per cpu variable using * Always use weak definitions for percpu variables in modules.
* ldq instruction with a 'literal' relocation.
*/ */
#define SHIFT_PERCPU_PTR(var, offset) ({ \ #if defined(MODULE) && defined(CONFIG_SMP)
extern int simple_identifier_##var(void); \ #define ARCH_NEEDS_WEAK_PER_CPU
unsigned long __ptr, tmp_gp; \
asm ( "br %1, 1f \n\
1: ldgp %1, 0(%1) \n\
ldq %0, per_cpu__" #var"(%1)\t!literal" \
: "=&r"(__ptr), "=&r"(tmp_gp)); \
(typeof(&per_cpu_var(var)))(__ptr + (offset)); })
#define PER_CPU_DEF_ATTRIBUTES __used
#endif /* MODULE */
/*
* A percpu variable may point to a discarded regions. The following are
* established ways to produce a usable pointer from the percpu variable
* offset.
*/
#define per_cpu(var, cpu) \
(*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu)))
#define __get_cpu_var(var) \
(*SHIFT_PERCPU_PTR(var, my_cpu_offset))
#define __raw_get_cpu_var(var) \
(*SHIFT_PERCPU_PTR(var, __my_cpu_offset))
#else /* ! SMP */
#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var)))
#define __get_cpu_var(var) per_cpu_var(var)
#define __raw_get_cpu_var(var) per_cpu_var(var)
#define PER_CPU_DEF_ATTRIBUTES
#endif /* SMP */
#ifdef CONFIG_SMP
#define PER_CPU_BASE_SECTION ".data.percpu"
#else
#define PER_CPU_BASE_SECTION ".data"
#endif #endif
#ifdef CONFIG_SMP #include <asm-generic/percpu.h>
#ifdef MODULE
#define PER_CPU_SHARED_ALIGNED_SECTION ""
#else
#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
#endif
#define PER_CPU_FIRST_SECTION ".first"
#else
#define PER_CPU_SHARED_ALIGNED_SECTION ""
#define PER_CPU_FIRST_SECTION ""
#endif
#define PER_CPU_ATTRIBUTES
#endif /* __ALPHA_PERCPU_H */ #endif /* __ALPHA_PERCPU_H */

View File

@ -2,6 +2,7 @@
#define _ALPHA_TLBFLUSH_H #define _ALPHA_TLBFLUSH_H
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/sched.h>
#include <asm/compiler.h> #include <asm/compiler.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>

View File

@ -134,13 +134,6 @@ SECTIONS
__bss_stop = .; __bss_stop = .;
_end = .; _end = .;
/* Sections to be discarded */
/DISCARD/ : {
EXIT_TEXT
EXIT_DATA
*(.exitcall.exit)
}
.mdebug 0 : { .mdebug 0 : {
*(.mdebug) *(.mdebug)
} }
@ -150,4 +143,6 @@ SECTIONS
STABS_DEBUG STABS_DEBUG
DWARF_DEBUG DWARF_DEBUG
DISCARDS
} }

View File

@ -83,6 +83,7 @@ SECTIONS
EXIT_TEXT EXIT_TEXT
EXIT_DATA EXIT_DATA
*(.exitcall.exit) *(.exitcall.exit)
*(.discard)
*(.ARM.exidx.exit.text) *(.ARM.exidx.exit.text)
*(.ARM.extab.exit.text) *(.ARM.extab.exit.text)
#ifndef CONFIG_HOTPLUG_CPU #ifndef CONFIG_HOTPLUG_CPU

View File

@ -124,14 +124,11 @@ SECTIONS
_end = .; _end = .;
} }
DWARF_DEBUG
/* When something in the kernel is NOT compiled as a module, the module /* When something in the kernel is NOT compiled as a module, the module
* cleanup code and data are put into these segments. Both can then be * cleanup code and data are put into these segments. Both can then be
* thrown away, as cleanup code is never called unless it's a module. * thrown away, as cleanup code is never called unless it's a module.
*/ */
/DISCARD/ : { DISCARDS
EXIT_DATA
*(.exitcall.exit)
}
DWARF_DEBUG
} }

View File

@ -277,8 +277,5 @@ SECTIONS
DWARF_DEBUG DWARF_DEBUG
/DISCARD/ : DISCARDS
{
*(.exitcall.exit)
}
} }

View File

@ -42,9 +42,9 @@
#include <asm/mem_map.h> #include <asm/mem_map.h>
#include "blackfin_sram.h" #include "blackfin_sram.h"
static DEFINE_PER_CPU(spinlock_t, l1sram_lock) ____cacheline_aligned_in_smp; static DEFINE_PER_CPU_SHARED_ALIGNED(spinlock_t, l1sram_lock);
static DEFINE_PER_CPU(spinlock_t, l1_data_sram_lock) ____cacheline_aligned_in_smp; static DEFINE_PER_CPU_SHARED_ALIGNED(spinlock_t, l1_data_sram_lock);
static DEFINE_PER_CPU(spinlock_t, l1_inst_sram_lock) ____cacheline_aligned_in_smp; static DEFINE_PER_CPU_SHARED_ALIGNED(spinlock_t, l1_inst_sram_lock);
static spinlock_t l2_sram_lock ____cacheline_aligned_in_smp; static spinlock_t l2_sram_lock ____cacheline_aligned_in_smp;
/* the data structure for L1 scratchpad and DATA SRAM */ /* the data structure for L1 scratchpad and DATA SRAM */

View File

@ -17,7 +17,8 @@ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* registers like cr3 on the i386 * registers like cr3 on the i386
*/ */
extern volatile DEFINE_PER_CPU(pgd_t *,current_pgd); /* defined in arch/cris/mm/fault.c */ /* defined in arch/cris/mm/fault.c */
DECLARE_PER_CPU(pgd_t *, current_pgd);
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{ {

View File

@ -140,12 +140,7 @@ SECTIONS
_end = .; _end = .;
__end = .; __end = .;
/* Sections to be discarded */
/DISCARD/ : {
EXIT_TEXT
EXIT_DATA
*(.exitcall.exit)
}
dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024; dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024;
DISCARDS
} }

View File

@ -29,7 +29,7 @@ extern void die_if_kernel(const char *, struct pt_regs *, long);
/* current active page directory */ /* current active page directory */
volatile DEFINE_PER_CPU(pgd_t *,current_pgd); DEFINE_PER_CPU(pgd_t *, current_pgd);
unsigned long cris_signal_return_page; unsigned long cris_signal_return_page;
/* /*

View File

@ -177,6 +177,8 @@ SECTIONS
.debug_ranges 0 : { *(.debug_ranges) } .debug_ranges 0 : { *(.debug_ranges) }
.comment 0 : { *(.comment) } .comment 0 : { *(.comment) }
DISCARDS
} }
__kernel_image_size_no_bss = __bss_start - __kernel_image_start; __kernel_image_size_no_bss = __bss_start - __kernel_image_start;

View File

@ -152,9 +152,6 @@ SECTIONS
__end = . ; __end = . ;
__ramstart = .; __ramstart = .;
} }
/DISCARD/ : {
*(.exitcall.exit)
}
.romfs : .romfs :
{ {
*(.romfs*) *(.romfs*)
@ -165,4 +162,6 @@ SECTIONS
COMMAND_START = . - 0x200 ; COMMAND_START = . - 0x200 ;
__ramend = . ; __ramend = . ;
} }
DISCARDS
} }

View File

@ -89,6 +89,9 @@ config GENERIC_TIME_VSYSCALL
bool bool
default y default y
config HAVE_LEGACY_PER_CPU_AREA
def_bool y
config HAVE_SETUP_PER_CPU_AREA config HAVE_SETUP_PER_CPU_AREA
def_bool y def_bool y

View File

@ -855,11 +855,17 @@ identify_cpu (struct cpuinfo_ia64 *c)
c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
} }
/*
* In UP configuration, setup_per_cpu_areas() is defined in
* include/linux/percpu.h
*/
#ifdef CONFIG_SMP
void __init void __init
setup_per_cpu_areas (void) setup_per_cpu_areas (void)
{ {
/* start_kernel() requires this... */ /* start_kernel() requires this... */
} }
#endif
/* /*
* Do the following calculations: * Do the following calculations:

View File

@ -58,7 +58,8 @@ static struct local_tlb_flush_counts {
unsigned int count; unsigned int count;
} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
static DEFINE_PER_CPU(unsigned short, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS],
shadow_flush_counts);
#define IPI_CALL_FUNC 0 #define IPI_CALL_FUNC 0
#define IPI_CPU_STOP 1 #define IPI_CPU_STOP 1

View File

@ -24,14 +24,14 @@ PHDRS {
} }
SECTIONS SECTIONS
{ {
/* Sections to be discarded */ /* unwind exit sections must be discarded before the rest of the
sections get included. */
/DISCARD/ : { /DISCARD/ : {
EXIT_TEXT
EXIT_DATA
*(.exitcall.exit)
*(.IA_64.unwind.exit.text) *(.IA_64.unwind.exit.text)
*(.IA_64.unwind_info.exit.text) *(.IA_64.unwind_info.exit.text)
} *(.comment)
*(.note)
}
v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
phys_start = _start - LOAD_OFFSET; phys_start = _start - LOAD_OFFSET;
@ -316,7 +316,7 @@ SECTIONS
.debug_funcnames 0 : { *(.debug_funcnames) } .debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) } .debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) } .debug_varnames 0 : { *(.debug_varnames) }
/* These must appear regardless of . */
/DISCARD/ : { *(.comment) } /* Default discards */
/DISCARD/ : { *(.note) } DISCARDS
} }

View File

@ -71,7 +71,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
EXPORT_PER_CPU_SYMBOL(__sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); DEFINE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid);
EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);

View File

@ -120,13 +120,6 @@ SECTIONS
_end = . ; _end = . ;
/* Sections to be discarded */
/DISCARD/ : {
EXIT_TEXT
EXIT_DATA
*(.exitcall.exit)
}
/* Stabs debugging sections. */ /* Stabs debugging sections. */
.stab 0 : { *(.stab) } .stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) } .stabstr 0 : { *(.stabstr) }
@ -135,4 +128,7 @@ SECTIONS
.stab.index 0 : { *(.stab.index) } .stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) } .stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) } .comment 0 : { *(.comment) }
/* Sections to be discarded */
DISCARDS
} }

View File

@ -82,13 +82,6 @@ SECTIONS
_end = . ; _end = . ;
/* Sections to be discarded */
/DISCARD/ : {
EXIT_TEXT
EXIT_DATA
*(.exitcall.exit)
}
/* Stabs debugging sections. */ /* Stabs debugging sections. */
.stab 0 : { *(.stab) } .stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) } .stabstr 0 : { *(.stabstr) }
@ -97,4 +90,7 @@ SECTIONS
.stab.index 0 : { *(.stab.index) } .stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) } .stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) } .comment 0 : { *(.comment) }
/* Sections to be discarded */
DISCARDS
} }

View File

@ -77,13 +77,6 @@ __init_begin = .;
_end = . ; _end = . ;
/* Sections to be discarded */
/DISCARD/ : {
EXIT_TEXT
EXIT_DATA
*(.exitcall.exit)
}
.crap : { .crap : {
/* Stabs debugging sections. */ /* Stabs debugging sections. */
*(.stab) *(.stab)
@ -96,4 +89,6 @@ __init_begin = .;
*(.note) *(.note)
} }
/* Sections to be discarded */
DISCARDS
} }

View File

@ -184,12 +184,6 @@ SECTIONS {
__init_end = .; __init_end = .;
} > INIT } > INIT
/DISCARD/ : {
EXIT_TEXT
EXIT_DATA
*(.exitcall.exit)
}
.bss : { .bss : {
. = ALIGN(4); . = ALIGN(4);
_sbss = . ; _sbss = . ;
@ -200,5 +194,6 @@ SECTIONS {
_end = . ; _end = . ;
} > BSS } > BSS
DISCARDS
} }

View File

@ -23,8 +23,8 @@ SECTIONS {
_stext = . ; _stext = . ;
*(.text .text.*) *(.text .text.*)
*(.fixup) *(.fixup)
EXIT_TEXT
*(.exitcall.exit) EXIT_CALL
SCHED_TEXT SCHED_TEXT
LOCK_TEXT LOCK_TEXT
KPROBES_TEXT KPROBES_TEXT
@ -162,4 +162,6 @@ SECTIONS {
} }
. = ALIGN(4096); . = ALIGN(4096);
_end = .; _end = .;
DISCARDS
} }

View File

@ -176,17 +176,6 @@ SECTIONS
_end = . ; _end = . ;
/* Sections to be discarded */
/DISCARD/ : {
*(.exitcall.exit)
/* ABI crap starts here */
*(.MIPS.options)
*(.options)
*(.pdr)
*(.reginfo)
}
/* These mark the ABI of the kernel for debuggers. */ /* These mark the ABI of the kernel for debuggers. */
.mdebug.abi32 : { .mdebug.abi32 : {
KEEP(*(.mdebug.abi32)) KEEP(*(.mdebug.abi32))
@ -212,4 +201,14 @@ SECTIONS
*(.gptab.bss) *(.gptab.bss)
*(.gptab.sbss) *(.gptab.sbss)
} }
/* Sections to be discarded */
DISCARDS
/DISCARD/ : {
/* ABI crap starts here */
*(.MIPS.options)
*(.options)
*(.pdr)
*(.reginfo)
}
} }

View File

@ -115,12 +115,10 @@ SECTIONS
. = ALIGN(PAGE_SIZE); . = ALIGN(PAGE_SIZE);
pg0 = .; pg0 = .;
/* Sections to be discarded */
/DISCARD/ : {
EXIT_CALL
}
STABS_DEBUG STABS_DEBUG
DWARF_DEBUG DWARF_DEBUG
/* Sections to be discarded */
DISCARDS
} }

View File

@ -237,9 +237,12 @@ SECTIONS
/* freed after init ends here */ /* freed after init ends here */
_end = . ; _end = . ;
STABS_DEBUG
.note 0 : { *(.note) }
/* Sections to be discarded */ /* Sections to be discarded */
DISCARDS
/DISCARD/ : { /DISCARD/ : {
*(.exitcall.exit)
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
/* temporary hack until binutils is fixed to not emit these /* temporary hack until binutils is fixed to not emit these
* for static binaries * for static binaries
@ -252,7 +255,4 @@ SECTIONS
*(.gnu.hash) *(.gnu.hash)
#endif #endif
} }
STABS_DEBUG
.note 0 : { *(.note) }
} }

View File

@ -49,6 +49,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
config HAVE_SETUP_PER_CPU_AREA config HAVE_SETUP_PER_CPU_AREA
def_bool PPC64 def_bool PPC64
config NEED_PER_CPU_EMBED_FIRST_CHUNK
def_bool PPC64
config IRQ_PER_CPU config IRQ_PER_CPU
bool bool
default y default y

View File

@ -57,6 +57,7 @@
#include <asm/cache.h> #include <asm/cache.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/mmu.h> #include <asm/mmu.h>
#include <asm/mmu-hash64.h>
#include <asm/firmware.h> #include <asm/firmware.h>
#include <asm/xmon.h> #include <asm/xmon.h>
#include <asm/udbg.h> #include <asm/udbg.h>
@ -569,25 +570,53 @@ void cpu_die(void)
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define PCPU_DYN_SIZE ()
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
{
return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
__pa(MAX_DMA_ADDRESS));
}
static void __init pcpu_fc_free(void *ptr, size_t size)
{
free_bootmem(__pa(ptr), size);
}
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (cpu_to_node(from) == cpu_to_node(to))
return LOCAL_DISTANCE;
else
return REMOTE_DISTANCE;
}
void __init setup_per_cpu_areas(void) void __init setup_per_cpu_areas(void)
{ {
int i; const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
unsigned long size; size_t atom_size;
char *ptr; unsigned long delta;
unsigned int cpu;
int rc;
/* Copy section for each CPU (we discard the original) */ /*
size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
#ifdef CONFIG_MODULES * to group units. For larger mappings, use 1M atom which
if (size < PERCPU_ENOUGH_ROOM) * should be large enough to contain a number of units.
size = PERCPU_ENOUGH_ROOM; */
#endif if (mmu_linear_psize == MMU_PAGE_4K)
atom_size = PAGE_SIZE;
else
atom_size = 1 << 20;
for_each_possible_cpu(i) { rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
paca[i].data_offset = ptr - __per_cpu_start; delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); for_each_possible_cpu(cpu)
} paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu];
} }
#endif #endif

View File

@ -37,12 +37,6 @@ jiffies = jiffies_64 + 4;
#endif #endif
SECTIONS SECTIONS
{ {
/* Sections to be discarded. */
/DISCARD/ : {
*(.exitcall.exit)
EXIT_DATA
}
. = KERNELBASE; . = KERNELBASE;
/* /*
@ -298,4 +292,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE); . = ALIGN(PAGE_SIZE);
_end = . ; _end = . ;
PROVIDE32 (end = .); PROVIDE32 (end = .);
/* Sections to be discarded. */
DISCARDS
} }

View File

@ -31,7 +31,7 @@ struct stab_entry {
#define NR_STAB_CACHE_ENTRIES 8 #define NR_STAB_CACHE_ENTRIES 8
static DEFINE_PER_CPU(long, stab_cache_ptr); static DEFINE_PER_CPU(long, stab_cache_ptr);
static DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache);
/* /*
* Create a segment table entry for the given esid/vsid pair. * Create a segment table entry for the given esid/vsid pair.

View File

@ -37,7 +37,7 @@
*/ */
#define MSG_COUNT 4 #define MSG_COUNT 4
static DEFINE_PER_CPU(unsigned int, ps3_ipi_virqs[MSG_COUNT]); static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs);
static void do_message_pass(int target, int msg) static void do_message_pass(int target, int msg)
{ {

View File

@ -1,37 +1,21 @@
#ifndef __ARCH_S390_PERCPU__ #ifndef __ARCH_S390_PERCPU__
#define __ARCH_S390_PERCPU__ #define __ARCH_S390_PERCPU__
#include <linux/compiler.h>
#include <asm/lowcore.h>
/* /*
* s390 uses its own implementation for per cpu data, the offset of * s390 uses its own implementation for per cpu data, the offset of
* the cpu local data area is cached in the cpu's lowcore memory. * the cpu local data area is cached in the cpu's lowcore memory.
* For 64 bit module code s390 forces the use of a GOT slot for the
* address of the per cpu variable. This is needed because the module
* may be more than 4G above the per cpu area.
*/ */
#if defined(__s390x__) && defined(MODULE)
#define SHIFT_PERCPU_PTR(ptr,offset) (({ \
extern int simple_identifier_##var(void); \
unsigned long *__ptr; \
asm ( "larl %0, %1@GOTENT" \
: "=a" (__ptr) : "X" (ptr) ); \
(typeof(ptr))((*__ptr) + (offset)); }))
#else
#define SHIFT_PERCPU_PTR(ptr, offset) (({ \
extern int simple_identifier_##var(void); \
unsigned long __ptr; \
asm ( "" : "=a" (__ptr) : "0" (ptr) ); \
(typeof(ptr)) (__ptr + (offset)); }))
#endif
#define __my_cpu_offset S390_lowcore.percpu_offset #define __my_cpu_offset S390_lowcore.percpu_offset
/*
* For 64 bit module code, the module may be more than 4G above the
* per cpu area, use weak definitions to force the compiler to
* generate external references.
*/
#if defined(CONFIG_SMP) && defined(__s390x__) && defined(MODULE)
#define ARCH_NEEDS_WEAK_PER_CPU
#endif
#include <asm-generic/percpu.h> #include <asm-generic/percpu.h>
#endif /* __ARCH_S390_PERCPU__ */ #endif /* __ARCH_S390_PERCPU__ */

View File

@ -157,13 +157,10 @@ SECTIONS
_end = . ; _end = . ;
/* Sections to be discarded */
/DISCARD/ : {
EXIT_DATA
*(.exitcall.exit)
}
/* Debugging sections. */ /* Debugging sections. */
STABS_DEBUG STABS_DEBUG
DWARF_DEBUG DWARF_DEBUG
/* Sections to be discarded */
DISCARDS
} }

View File

@ -163,16 +163,14 @@ SECTIONS
_end = . ; _end = . ;
} }
STABS_DEBUG
DWARF_DEBUG
/* /*
* When something in the kernel is NOT compiled as a module, the * When something in the kernel is NOT compiled as a module, the
* module cleanup code and data are put into these segments. Both * module cleanup code and data are put into these segments. Both
* can then be thrown away, as cleanup code is never called unless * can then be thrown away, as cleanup code is never called unless
* it's a module. * it's a module.
*/ */
/DISCARD/ : { DISCARDS
*(.exitcall.exit)
}
STABS_DEBUG
DWARF_DEBUG
} }

View File

@ -95,7 +95,7 @@ config AUDIT_ARCH
config HAVE_SETUP_PER_CPU_AREA config HAVE_SETUP_PER_CPU_AREA
def_bool y if SPARC64 def_bool y if SPARC64
config HAVE_DYNAMIC_PER_CPU_AREA config NEED_PER_CPU_EMBED_FIRST_CHUNK
def_bool y if SPARC64 def_bool y if SPARC64
config GENERIC_HARDIRQS_NO__DO_IRQ config GENERIC_HARDIRQS_NO__DO_IRQ

View File

@ -1389,8 +1389,8 @@ void smp_send_stop(void)
* RETURNS: * RETURNS:
* Pointer to the allocated area on success, NULL on failure. * Pointer to the allocated area on success, NULL on failure.
*/ */
static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
unsigned long align) size_t align)
{ {
const unsigned long goal = __pa(MAX_DMA_ADDRESS); const unsigned long goal = __pa(MAX_DMA_ADDRESS);
#ifdef CONFIG_NEED_MULTIPLE_NODES #ifdef CONFIG_NEED_MULTIPLE_NODES
@ -1415,127 +1415,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
#endif #endif
} }
static size_t pcpur_size __initdata; static void __init pcpu_free_bootmem(void *ptr, size_t size)
static void **pcpur_ptrs __initdata;
static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
{ {
size_t off = (size_t)pageno << PAGE_SHIFT; free_bootmem(__pa(ptr), size);
if (off >= pcpur_size)
return NULL;
return virt_to_page(pcpur_ptrs[cpu] + off);
} }
#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL) static int pcpu_cpu_distance(unsigned int from, unsigned int to)
static void __init pcpu_map_range(unsigned long start, unsigned long end,
struct page *page)
{ {
unsigned long pfn = page_to_pfn(page); if (cpu_to_node(from) == cpu_to_node(to))
unsigned long pte_base; return LOCAL_DISTANCE;
else
BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL)); return REMOTE_DISTANCE;
pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
_PAGE_CP_4U | _PAGE_CV_4U |
_PAGE_P_4U | _PAGE_W_4U);
if (tlb_type == hypervisor)
pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
while (start < end) {
pgd_t *pgd = pgd_offset_k(start);
unsigned long this_end;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pud = pud_offset(pgd, start);
if (pud_none(*pud)) {
pmd_t *new;
new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
pud_populate(&init_mm, pud, new);
}
pmd = pmd_offset(pud, start);
if (!pmd_present(*pmd)) {
pte_t *new;
new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
pmd_populate_kernel(&init_mm, pmd, new);
}
pte = pte_offset_kernel(pmd, start);
this_end = (start + PMD_SIZE) & PMD_MASK;
if (this_end > end)
this_end = end;
while (start < this_end) {
unsigned long paddr = pfn << PAGE_SHIFT;
pte_val(*pte) = (paddr | pte_base);
start += PAGE_SIZE;
pte++;
pfn++;
}
}
} }
void __init setup_per_cpu_areas(void) void __init setup_per_cpu_areas(void)
{ {
size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; unsigned long delta;
static struct vm_struct vm; unsigned int cpu;
unsigned long delta, cpu; int rc;
size_t pcpu_unit_size;
size_t ptrs_size;
pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE); PERCPU_DYNAMIC_RESERVE, 4 << 20,
dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; pcpu_cpu_distance, pcpu_alloc_bootmem,
pcpu_free_bootmem);
if (rc)
ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpur_ptrs[0])); panic("failed to initialize first chunk (%d)", rc);
pcpur_ptrs = alloc_bootmem(ptrs_size);
for_each_possible_cpu(cpu) {
pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
PCPU_CHUNK_SIZE);
free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
PCPU_CHUNK_SIZE - pcpur_size);
memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
}
/* allocate address and map */
vm.flags = VM_ALLOC;
vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE;
vm_area_register_early(&vm, PCPU_CHUNK_SIZE);
for_each_possible_cpu(cpu) {
unsigned long start = (unsigned long) vm.addr;
unsigned long end;
start += cpu * PCPU_CHUNK_SIZE;
end = start + PCPU_CHUNK_SIZE;
pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu]));
}
pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size,
PERCPU_MODULE_RESERVE, dyn_size,
PCPU_CHUNK_SIZE, vm.addr, NULL);
free_bootmem(__pa(pcpur_ptrs), ptrs_size);
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu)
__per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
}
/* Setup %g5 for the boot cpu. */ /* Setup %g5 for the boot cpu. */
__local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());

View File

@ -171,12 +171,8 @@ SECTIONS
} }
_end = . ; _end = . ;
/DISCARD/ : {
EXIT_TEXT
EXIT_DATA
*(.exitcall.exit)
}
STABS_DEBUG STABS_DEBUG
DWARF_DEBUG DWARF_DEBUG
DISCARDS
} }

View File

@ -123,8 +123,3 @@
__initramfs_end = .; __initramfs_end = .;
} }
/* Sections to be discarded */
/DISCARD/ : {
*(.exitcall.exit)
}

View File

@ -156,4 +156,6 @@ SECTIONS
STABS_DEBUG STABS_DEBUG
DWARF_DEBUG DWARF_DEBUG
DISCARDS
} }

View File

@ -100,4 +100,6 @@ SECTIONS
STABS_DEBUG STABS_DEBUG
DWARF_DEBUG DWARF_DEBUG
DISCARDS
} }

View File

@ -150,7 +150,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config HAVE_SETUP_PER_CPU_AREA config HAVE_SETUP_PER_CPU_AREA
def_bool y def_bool y
config HAVE_DYNAMIC_PER_CPU_AREA config NEED_PER_CPU_EMBED_FIRST_CHUNK
def_bool y
config NEED_PER_CPU_PAGE_FIRST_CHUNK
def_bool y def_bool y
config HAVE_CPUMASK_OF_CPU_MAP config HAVE_CPUMASK_OF_CPU_MAP

View File

@ -156,15 +156,6 @@ do { \
/* We can use this directly for local CPU (faster). */ /* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off); DECLARE_PER_CPU(unsigned long, this_cpu_off);
#ifdef CONFIG_NEED_MULTIPLE_NODES
void *pcpu_lpage_remapped(void *kaddr);
#else
static inline void *pcpu_lpage_remapped(void *kaddr)
{
return NULL;
}
#endif
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP

View File

@ -30,8 +30,8 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/desc.h> #include <asm/desc.h>
static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
static DEFINE_PER_CPU(int, cpu_priv_count); static DEFINE_PER_CPU(int, cpu_priv_count);
static DEFINE_MUTEX(cpu_debug_lock); static DEFINE_MUTEX(cpu_debug_lock);

View File

@ -1091,7 +1091,7 @@ void mce_log_therm_throt_event(__u64 status)
*/ */
static int check_interval = 5 * 60; /* 5 minutes */ static int check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer); static DEFINE_PER_CPU(struct timer_list, mce_timer);
static void mcheck_timer(unsigned long data) static void mcheck_timer(unsigned long data)
@ -1110,7 +1110,7 @@ static void mcheck_timer(unsigned long data)
* Alert userspace if needed. If we logged an MCE, reduce the * Alert userspace if needed. If we logged an MCE, reduce the
* polling interval, otherwise increase the polling interval. * polling interval, otherwise increase the polling interval.
*/ */
n = &__get_cpu_var(next_interval); n = &__get_cpu_var(mce_next_interval);
if (mce_notify_irq()) if (mce_notify_irq())
*n = max(*n/2, HZ/100); *n = max(*n/2, HZ/100);
else else
@ -1311,7 +1311,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
static void mce_init_timer(void) static void mce_init_timer(void)
{ {
struct timer_list *t = &__get_cpu_var(mce_timer); struct timer_list *t = &__get_cpu_var(mce_timer);
int *n = &__get_cpu_var(next_interval); int *n = &__get_cpu_var(mce_next_interval);
if (mce_ignore_ce) if (mce_ignore_ce)
return; return;
@ -1912,7 +1912,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN: case CPU_DOWN_FAILED_FROZEN:
t->expires = round_jiffies(jiffies + t->expires = round_jiffies(jiffies +
__get_cpu_var(next_interval)); __get_cpu_var(mce_next_interval));
add_timer_on(t, cpu); add_timer_on(t, cpu);
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break; break;

View File

@ -69,7 +69,7 @@ struct threshold_bank {
struct threshold_block *blocks; struct threshold_block *blocks;
cpumask_var_t cpus; cpumask_var_t cpus;
}; };
static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static unsigned char shared_bank[NR_BANKS] = { static unsigned char shared_bank[NR_BANKS] = {

View File

@ -976,7 +976,7 @@ amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
x86_pmu_disable_counter(hwc, idx); x86_pmu_disable_counter(hwc, idx);
} }
static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
/* /*
* Set the next IRQ period, based on the hwc->period_left value. * Set the next IRQ period, based on the hwc->period_left value.
@ -1015,7 +1015,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
if (left > x86_pmu.max_period) if (left > x86_pmu.max_period)
left = x86_pmu.max_period; left = x86_pmu.max_period;
per_cpu(prev_left[idx], smp_processor_id()) = left; per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
/* /*
* The hw counter starts counting from this counter offset, * The hw counter starts counting from this counter offset,
@ -1211,7 +1211,7 @@ void perf_counter_print_debug(void)
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
rdmsrl(x86_pmu.perfctr + idx, pmc_count); rdmsrl(x86_pmu.perfctr + idx, pmc_count);
prev_left = per_cpu(prev_left[idx], cpu); prev_left = per_cpu(pmc_prev_left[idx], cpu);
pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
cpu, idx, pmc_ctrl); cpu, idx, pmc_ctrl);
@ -1798,8 +1798,8 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
entry->ip[entry->nr++] = ip; entry->ip[entry->nr++] = ip;
} }
static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
static DEFINE_PER_CPU(int, in_nmi_frame); static DEFINE_PER_CPU(int, in_nmi_frame);
@ -1952,9 +1952,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
struct perf_callchain_entry *entry; struct perf_callchain_entry *entry;
if (in_nmi()) if (in_nmi())
entry = &__get_cpu_var(nmi_entry); entry = &__get_cpu_var(pmc_nmi_entry);
else else
entry = &__get_cpu_var(irq_entry); entry = &__get_cpu_var(pmc_irq_entry);
entry->nr = 0; entry->nr = 0;

View File

@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
#define PERCPU_FIRST_CHUNK_RESERVE 0 #define PERCPU_FIRST_CHUNK_RESERVE 0
#endif #endif
#ifdef CONFIG_X86_32
/** /**
* pcpu_need_numa - determine percpu allocation needs to consider NUMA * pcpu_need_numa - determine percpu allocation needs to consider NUMA
* *
@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void)
#endif #endif
return false; return false;
} }
#endif
/** /**
* pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
@ -124,308 +126,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
} }
/* /*
* Large page remap allocator * Helpers for first chunk memory allocation
*
* This allocator uses PMD page as unit. A PMD page is allocated for
* each cpu and each is remapped into vmalloc area using PMD mapping.
* As PMD page is quite large, only part of it is used for the first
* chunk. Unused part is returned to the bootmem allocator.
*
* So, the PMD pages are mapped twice - once to the physical mapping
* and to the vmalloc area for the first percpu chunk. The double
* mapping does add one more PMD TLB entry pressure but still is much
* better than only using 4k mappings while still being NUMA friendly.
*/ */
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
{
return pcpu_alloc_bootmem(cpu, size, align);
}
static void __init pcpu_fc_free(void *ptr, size_t size)
{
free_bootmem(__pa(ptr), size);
}
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
#ifdef CONFIG_NEED_MULTIPLE_NODES #ifdef CONFIG_NEED_MULTIPLE_NODES
struct pcpul_ent { if (early_cpu_to_node(from) == early_cpu_to_node(to))
unsigned int cpu; return LOCAL_DISTANCE;
void *ptr; else
}; return REMOTE_DISTANCE;
static size_t pcpul_size;
static struct pcpul_ent *pcpul_map;
static struct vm_struct pcpul_vm;
static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
{
size_t off = (size_t)pageno << PAGE_SHIFT;
if (off >= pcpul_size)
return NULL;
return virt_to_page(pcpul_map[cpu].ptr + off);
}
static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
{
size_t map_size, dyn_size;
unsigned int cpu;
int i, j;
ssize_t ret;
if (!chosen) {
size_t vm_size = VMALLOC_END - VMALLOC_START;
size_t tot_size = nr_cpu_ids * PMD_SIZE;
/* on non-NUMA, embedding is better */
if (!pcpu_need_numa())
return -EINVAL;
/* don't consume more than 20% of vmalloc area */
if (tot_size > vm_size / 5) {
pr_info("PERCPU: too large chunk size %zuMB for "
"large page remap\n", tot_size >> 20);
return -EINVAL;
}
}
/* need PSE */
if (!cpu_has_pse) {
pr_warning("PERCPU: lpage allocator requires PSE\n");
return -EINVAL;
}
/*
* Currently supports only single page. Supporting multiple
* pages won't be too difficult if it ever becomes necessary.
*/
pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
PERCPU_DYNAMIC_RESERVE);
if (pcpul_size > PMD_SIZE) {
pr_warning("PERCPU: static data is larger than large page, "
"can't use large page\n");
return -EINVAL;
}
dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
/* allocate pointer array and alloc large pages */
map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0]));
pcpul_map = alloc_bootmem(map_size);
for_each_possible_cpu(cpu) {
pcpul_map[cpu].cpu = cpu;
pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
PMD_SIZE);
if (!pcpul_map[cpu].ptr) {
pr_warning("PERCPU: failed to allocate large page "
"for cpu%u\n", cpu);
goto enomem;
}
/*
* Only use pcpul_size bytes and give back the rest.
*
* Ingo: The 2MB up-rounding bootmem is needed to make
* sure the partial 2MB page is still fully RAM - it's
* not well-specified to have a PAT-incompatible area
* (unmapped RAM, device memory, etc.) in that hole.
*/
free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
PMD_SIZE - pcpul_size);
memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
}
/* allocate address and map */
pcpul_vm.flags = VM_ALLOC;
pcpul_vm.size = nr_cpu_ids * PMD_SIZE;
vm_area_register_early(&pcpul_vm, PMD_SIZE);
for_each_possible_cpu(cpu) {
pmd_t *pmd, pmd_v;
pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
cpu * PMD_SIZE);
pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
PAGE_KERNEL_LARGE);
set_pmd(pmd, pmd_v);
}
/* we're ready, commit */
pr_info("PERCPU: Remapped at %p with large pages, static data "
"%zu bytes\n", pcpul_vm.addr, static_size);
ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
PMD_SIZE, pcpul_vm.addr, NULL);
/* sort pcpul_map array for pcpu_lpage_remapped() */
for (i = 0; i < nr_cpu_ids - 1; i++)
for (j = i + 1; j < nr_cpu_ids; j++)
if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
struct pcpul_ent tmp = pcpul_map[i];
pcpul_map[i] = pcpul_map[j];
pcpul_map[j] = tmp;
}
return ret;
enomem:
for_each_possible_cpu(cpu)
if (pcpul_map[cpu].ptr)
free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
free_bootmem(__pa(pcpul_map), map_size);
return -ENOMEM;
}
/**
* pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
* @kaddr: the kernel address in question
*
* Determine whether @kaddr falls in the pcpul recycled area. This is
* used by pageattr to detect VM aliases and break up the pcpu PMD
* mapping such that the same physical page is not mapped under
* different attributes.
*
* The recycled area is always at the tail of a partially used PMD
* page.
*
* RETURNS:
* Address of corresponding remapped pcpu address if match is found;
* otherwise, NULL.
*/
void *pcpu_lpage_remapped(void *kaddr)
{
void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
int left = 0, right = nr_cpu_ids - 1;
int pos;
/* pcpul in use at all? */
if (!pcpul_map)
return NULL;
/* okay, perform binary search */
while (left <= right) {
pos = (left + right) / 2;
if (pcpul_map[pos].ptr < pmd_addr)
left = pos + 1;
else if (pcpul_map[pos].ptr > pmd_addr)
right = pos - 1;
else {
/* it shouldn't be in the area for the first chunk */
WARN_ON(offset < pcpul_size);
return pcpul_vm.addr +
pcpul_map[pos].cpu * PMD_SIZE + offset;
}
}
return NULL;
}
#else #else
static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) return LOCAL_DISTANCE;
{
return -EINVAL;
}
#endif #endif
/*
* Embedding allocator
*
* The first chunk is sized to just contain the static area plus
* module and dynamic reserves and embedded into linear physical
* mapping so that it can use PMD mapping without additional TLB
* pressure.
*/
static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
{
size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
/*
* If large page isn't supported, there's no benefit in doing
* this. Also, embedding allocation doesn't play well with
* NUMA.
*/
if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
return -EINVAL;
return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
} }
/* static void __init pcpup_populate_pte(unsigned long addr)
* 4k page allocator
*
* This is the basic allocator. Static percpu area is allocated
* page-by-page and most of initialization is done by the generic
* setup function.
*/
static struct page **pcpu4k_pages __initdata;
static int pcpu4k_nr_static_pages __initdata;
static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
{
if (pageno < pcpu4k_nr_static_pages)
return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
return NULL;
}
static void __init pcpu4k_populate_pte(unsigned long addr)
{ {
populate_extra_pte(addr); populate_extra_pte(addr);
} }
static ssize_t __init setup_pcpu_4k(size_t static_size)
{
size_t pages_size;
unsigned int cpu;
int i, j;
ssize_t ret;
pcpu4k_nr_static_pages = PFN_UP(static_size);
/* unaligned allocations can't be freed, round up to page size */
pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids
* sizeof(pcpu4k_pages[0]));
pcpu4k_pages = alloc_bootmem(pages_size);
/* allocate and copy */
j = 0;
for_each_possible_cpu(cpu)
for (i = 0; i < pcpu4k_nr_static_pages; i++) {
void *ptr;
ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
if (!ptr) {
pr_warning("PERCPU: failed to allocate "
"4k page for cpu%u\n", cpu);
goto enomem;
}
memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
pcpu4k_pages[j++] = virt_to_page(ptr);
}
/* we're ready, commit */
pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
pcpu4k_nr_static_pages, static_size);
ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, -1,
-1, NULL, pcpu4k_populate_pte);
goto out_free_ar;
enomem:
while (--j >= 0)
free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
ret = -ENOMEM;
out_free_ar:
free_bootmem(__pa(pcpu4k_pages), pages_size);
return ret;
}
/* for explicit first chunk allocator selection */
static char pcpu_chosen_alloc[16] __initdata;
static int __init percpu_alloc_setup(char *str)
{
strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
return 0;
}
early_param("percpu_alloc", percpu_alloc_setup);
static inline void setup_percpu_segment(int cpu) static inline void setup_percpu_segment(int cpu)
{ {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
@ -441,52 +170,49 @@ static inline void setup_percpu_segment(int cpu)
void __init setup_per_cpu_areas(void) void __init setup_per_cpu_areas(void)
{ {
size_t static_size = __per_cpu_end - __per_cpu_start;
unsigned int cpu; unsigned int cpu;
unsigned long delta; unsigned long delta;
size_t pcpu_unit_size; int rc;
ssize_t ret;
pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
/* /*
* Allocate percpu area. If PSE is supported, try to make use * Allocate percpu area. Embedding allocator is our favorite;
* of large page mappings. Please read comments on top of * however, on NUMA configurations, it can result in very
* each allocator for details. * sparse unit mapping and vmalloc area isn't spacious enough
* on 32bit. Use page in that case.
*/ */
ret = -EINVAL; #ifdef CONFIG_X86_32
if (strlen(pcpu_chosen_alloc)) { if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa())
if (strcmp(pcpu_chosen_alloc, "4k")) { pcpu_chosen_fc = PCPU_FC_PAGE;
if (!strcmp(pcpu_chosen_alloc, "lpage")) #endif
ret = setup_pcpu_lpage(static_size, true); rc = -EINVAL;
else if (!strcmp(pcpu_chosen_alloc, "embed")) if (pcpu_chosen_fc != PCPU_FC_PAGE) {
ret = setup_pcpu_embed(static_size, true); const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE;
else const size_t dyn_size = PERCPU_MODULE_RESERVE +
pr_warning("PERCPU: unknown allocator %s " PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;
"specified\n", pcpu_chosen_alloc);
if (ret < 0)
pr_warning("PERCPU: %s allocator failed (%zd), "
"falling back to 4k\n",
pcpu_chosen_alloc, ret);
}
} else {
ret = setup_pcpu_lpage(static_size, false);
if (ret < 0)
ret = setup_pcpu_embed(static_size, false);
}
if (ret < 0)
ret = setup_pcpu_4k(static_size);
if (ret < 0)
panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
static_size, ret);
pcpu_unit_size = ret; rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
dyn_size, atom_size,
pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
pr_warning("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
pcpu_fc_alloc, pcpu_fc_free,
pcpup_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
/* alrighty, percpu areas up and running */ /* alrighty, percpu areas up and running */
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu; per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu); setup_percpu_segment(cpu);

View File

@ -380,15 +380,12 @@ SECTIONS
_end = .; _end = .;
} }
/* Sections to be discarded */
/DISCARD/ : {
*(.exitcall.exit)
*(.eh_frame)
*(.discard)
}
STABS_DEBUG STABS_DEBUG
DWARF_DEBUG DWARF_DEBUG
/* Sections to be discarded */
DISCARDS
/DISCARD/ : { *(.eh_frame) }
} }

View File

@ -12,6 +12,7 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/pfn.h> #include <linux/pfn.h>
#include <linux/percpu.h>
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/processor.h> #include <asm/processor.h>
@ -686,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
{ {
struct cpa_data alias_cpa; struct cpa_data alias_cpa;
unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
unsigned long vaddr, remapped; unsigned long vaddr;
int ret; int ret;
if (cpa->pfn >= max_pfn_mapped) if (cpa->pfn >= max_pfn_mapped)
@ -744,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa)
} }
#endif #endif
/*
* If the PMD page was partially used for per-cpu remapping,
* the recycled area needs to be split and modified. Because
* the area is always proper subset of a PMD page
* cpa->numpages is guaranteed to be 1 for these areas, so
* there's no need to loop over and check for further remaps.
*/
remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
if (remapped) {
WARN_ON(cpa->numpages > 1);
alias_cpa = *cpa;
alias_cpa.vaddr = &remapped;
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
ret = __change_page_attr_set_clr(&alias_cpa, 0);
if (ret)
return ret;
}
return 0; return 0;
} }

View File

@ -280,15 +280,6 @@ SECTIONS
*(.ResetVector.text) *(.ResetVector.text)
} }
/* Sections to be discarded */
/DISCARD/ :
{
*(.exit.literal)
EXIT_TEXT
EXIT_DATA
*(.exitcall.exit)
}
.xt.lit : { *(.xt.lit) } .xt.lit : { *(.xt.lit) }
.xt.prop : { *(.xt.prop) } .xt.prop : { *(.xt.prop) }
@ -321,4 +312,8 @@ SECTIONS
*(.xt.lit) *(.xt.lit)
*(.gnu.linkonce.p*) *(.gnu.linkonce.p*)
} }
/* Sections to be discarded */
DISCARDS
/DISCARD/ : { *(.exit.literal) }
} }

View File

@ -146,7 +146,7 @@ enum arq_state {
#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2) #define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2)
#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state) #define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state)
static DEFINE_PER_CPU(unsigned long, ioc_count); static DEFINE_PER_CPU(unsigned long, as_ioc_count);
static struct completion *ioc_gone; static struct completion *ioc_gone;
static DEFINE_SPINLOCK(ioc_gone_lock); static DEFINE_SPINLOCK(ioc_gone_lock);
@ -161,7 +161,7 @@ static void as_antic_stop(struct as_data *ad);
static void free_as_io_context(struct as_io_context *aic) static void free_as_io_context(struct as_io_context *aic)
{ {
kfree(aic); kfree(aic);
elv_ioc_count_dec(ioc_count); elv_ioc_count_dec(as_ioc_count);
if (ioc_gone) { if (ioc_gone) {
/* /*
* AS scheduler is exiting, grab exit lock and check * AS scheduler is exiting, grab exit lock and check
@ -169,7 +169,7 @@ static void free_as_io_context(struct as_io_context *aic)
* complete ioc_gone and set it back to NULL. * complete ioc_gone and set it back to NULL.
*/ */
spin_lock(&ioc_gone_lock); spin_lock(&ioc_gone_lock);
if (ioc_gone && !elv_ioc_count_read(ioc_count)) { if (ioc_gone && !elv_ioc_count_read(as_ioc_count)) {
complete(ioc_gone); complete(ioc_gone);
ioc_gone = NULL; ioc_gone = NULL;
} }
@ -211,7 +211,7 @@ static struct as_io_context *alloc_as_io_context(void)
ret->seek_total = 0; ret->seek_total = 0;
ret->seek_samples = 0; ret->seek_samples = 0;
ret->seek_mean = 0; ret->seek_mean = 0;
elv_ioc_count_inc(ioc_count); elv_ioc_count_inc(as_ioc_count);
} }
return ret; return ret;
@ -1507,7 +1507,7 @@ static void __exit as_exit(void)
ioc_gone = &all_gone; ioc_gone = &all_gone;
/* ioc_gone's update must be visible before reading ioc_count */ /* ioc_gone's update must be visible before reading ioc_count */
smp_wmb(); smp_wmb();
if (elv_ioc_count_read(ioc_count)) if (elv_ioc_count_read(as_ioc_count))
wait_for_completion(&all_gone); wait_for_completion(&all_gone);
synchronize_rcu(); synchronize_rcu();
} }

View File

@ -48,7 +48,7 @@ static int cfq_slice_idle = HZ / 125;
static struct kmem_cache *cfq_pool; static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool; static struct kmem_cache *cfq_ioc_pool;
static DEFINE_PER_CPU(unsigned long, ioc_count); static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
static struct completion *ioc_gone; static struct completion *ioc_gone;
static DEFINE_SPINLOCK(ioc_gone_lock); static DEFINE_SPINLOCK(ioc_gone_lock);
@ -1427,7 +1427,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
cic = container_of(head, struct cfq_io_context, rcu_head); cic = container_of(head, struct cfq_io_context, rcu_head);
kmem_cache_free(cfq_ioc_pool, cic); kmem_cache_free(cfq_ioc_pool, cic);
elv_ioc_count_dec(ioc_count); elv_ioc_count_dec(cfq_ioc_count);
if (ioc_gone) { if (ioc_gone) {
/* /*
@ -1436,7 +1436,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
* complete ioc_gone and set it back to NULL * complete ioc_gone and set it back to NULL
*/ */
spin_lock(&ioc_gone_lock); spin_lock(&ioc_gone_lock);
if (ioc_gone && !elv_ioc_count_read(ioc_count)) { if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
complete(ioc_gone); complete(ioc_gone);
ioc_gone = NULL; ioc_gone = NULL;
} }
@ -1562,7 +1562,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
INIT_HLIST_NODE(&cic->cic_list); INIT_HLIST_NODE(&cic->cic_list);
cic->dtor = cfq_free_io_context; cic->dtor = cfq_free_io_context;
cic->exit = cfq_exit_io_context; cic->exit = cfq_exit_io_context;
elv_ioc_count_inc(ioc_count); elv_ioc_count_inc(cfq_ioc_count);
} }
return cic; return cic;
@ -2668,7 +2668,7 @@ static void __exit cfq_exit(void)
* this also protects us from entering cfq_slab_kill() with * this also protects us from entering cfq_slab_kill() with
* pending RCU callbacks * pending RCU callbacks
*/ */
if (elv_ioc_count_read(ioc_count)) if (elv_ioc_count_read(cfq_ioc_count))
wait_for_completion(&all_gone); wait_for_completion(&all_gone);
cfq_slab_kill(); cfq_slab_kill();
} }

View File

@ -71,7 +71,7 @@ struct cpu_dbs_info_s {
*/ */
struct mutex timer_mutex; struct mutex timer_mutex;
}; };
static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info);
static unsigned int dbs_enable; /* number of CPUs using this policy */ static unsigned int dbs_enable; /* number of CPUs using this policy */
@ -137,7 +137,7 @@ dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data) void *data)
{ {
struct cpufreq_freqs *freq = data; struct cpufreq_freqs *freq = data;
struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info,
freq->cpu); freq->cpu);
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
@ -297,7 +297,7 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
/* we need to re-evaluate prev_cpu_idle */ /* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu(j) { for_each_online_cpu(j) {
struct cpu_dbs_info_s *dbs_info; struct cpu_dbs_info_s *dbs_info;
dbs_info = &per_cpu(cpu_dbs_info, j); dbs_info = &per_cpu(cs_cpu_dbs_info, j);
dbs_info->prev_cpu_idle = get_cpu_idle_time(j, dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall); &dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice) if (dbs_tuners_ins.ignore_nice)
@ -387,7 +387,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
cputime64_t cur_wall_time, cur_idle_time; cputime64_t cur_wall_time, cur_idle_time;
unsigned int idle_time, wall_time; unsigned int idle_time, wall_time;
j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
@ -521,7 +521,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
unsigned int j; unsigned int j;
int rc; int rc;
this_dbs_info = &per_cpu(cpu_dbs_info, cpu); this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
switch (event) { switch (event) {
case CPUFREQ_GOV_START: case CPUFREQ_GOV_START:
@ -538,7 +538,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
for_each_cpu(j, policy->cpus) { for_each_cpu(j, policy->cpus) {
struct cpu_dbs_info_s *j_dbs_info; struct cpu_dbs_info_s *j_dbs_info;
j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
j_dbs_info->cur_policy = policy; j_dbs_info->cur_policy = policy;
j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,

View File

@ -78,7 +78,7 @@ struct cpu_dbs_info_s {
*/ */
struct mutex timer_mutex; struct mutex timer_mutex;
}; };
static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
static unsigned int dbs_enable; /* number of CPUs using this policy */ static unsigned int dbs_enable; /* number of CPUs using this policy */
@ -149,7 +149,8 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
unsigned int freq_hi, freq_lo; unsigned int freq_hi, freq_lo;
unsigned int index = 0; unsigned int index = 0;
unsigned int jiffies_total, jiffies_hi, jiffies_lo; unsigned int jiffies_total, jiffies_hi, jiffies_lo;
struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu); struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
policy->cpu);
if (!dbs_info->freq_table) { if (!dbs_info->freq_table) {
dbs_info->freq_lo = 0; dbs_info->freq_lo = 0;
@ -192,7 +193,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
static void ondemand_powersave_bias_init_cpu(int cpu) static void ondemand_powersave_bias_init_cpu(int cpu)
{ {
struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
dbs_info->freq_table = cpufreq_frequency_get_table(cpu); dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
dbs_info->freq_lo = 0; dbs_info->freq_lo = 0;
} }
@ -297,7 +298,7 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
/* we need to re-evaluate prev_cpu_idle */ /* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu(j) { for_each_online_cpu(j) {
struct cpu_dbs_info_s *dbs_info; struct cpu_dbs_info_s *dbs_info;
dbs_info = &per_cpu(cpu_dbs_info, j); dbs_info = &per_cpu(od_cpu_dbs_info, j);
dbs_info->prev_cpu_idle = get_cpu_idle_time(j, dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall); &dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice) if (dbs_tuners_ins.ignore_nice)
@ -388,7 +389,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
unsigned int load, load_freq; unsigned int load, load_freq;
int freq_avg; int freq_avg;
j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
@ -535,7 +536,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
unsigned int j; unsigned int j;
int rc; int rc;
this_dbs_info = &per_cpu(cpu_dbs_info, cpu); this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
switch (event) { switch (event) {
case CPUFREQ_GOV_START: case CPUFREQ_GOV_START:
@ -553,7 +554,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
dbs_enable++; dbs_enable++;
for_each_cpu(j, policy->cpus) { for_each_cpu(j, policy->cpus) {
struct cpu_dbs_info_s *j_dbs_info; struct cpu_dbs_info_s *j_dbs_info;
j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
j_dbs_info->cur_policy = policy; j_dbs_info->cur_policy = policy;
j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,

View File

@ -47,10 +47,10 @@
static DEFINE_SPINLOCK(irq_mapping_update_lock); static DEFINE_SPINLOCK(irq_mapping_update_lock);
/* IRQ <-> VIRQ mapping. */ /* IRQ <-> VIRQ mapping. */
static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping */ /* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1}; static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
/* Interrupt types. */ /* Interrupt types. */
enum xen_irq_type { enum xen_irq_type {
@ -602,6 +602,8 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
/* /*
* Search the CPUs pending events bitmasks. For each one found, map * Search the CPUs pending events bitmasks. For each one found, map
* the event number to an irq, and feed it into do_IRQ() for * the event number to an irq, and feed it into do_IRQ() for
@ -617,7 +619,6 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs); struct pt_regs *old_regs = set_irq_regs(regs);
struct shared_info *s = HYPERVISOR_shared_info; struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
static DEFINE_PER_CPU(unsigned, nesting_count);
unsigned count; unsigned count;
exit_idle(); exit_idle();
@ -628,7 +629,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
vcpu_info->evtchn_upcall_pending = 0; vcpu_info->evtchn_upcall_pending = 0;
if (__get_cpu_var(nesting_count)++) if (__get_cpu_var(xed_nesting_count)++)
goto out; goto out;
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
@ -653,8 +654,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
BUG_ON(!irqs_disabled()); BUG_ON(!irqs_disabled());
count = __get_cpu_var(nesting_count); count = __get_cpu_var(xed_nesting_count);
__get_cpu_var(nesting_count) = 0; __get_cpu_var(xed_nesting_count) = 0;
} while(count != 1); } while(count != 1);
out: out:

View File

@ -33,13 +33,10 @@
* BSS_SECTION(0, 0, 0) * BSS_SECTION(0, 0, 0)
* _end = .; * _end = .;
* *
* /DISCARD/ : {
* EXIT_TEXT
* EXIT_DATA
* EXIT_CALL
* }
* STABS_DEBUG * STABS_DEBUG
* DWARF_DEBUG * DWARF_DEBUG
*
* DISCARDS // must be the last
* } * }
* *
* [__init_begin, __init_end] is the init section that may be freed after init * [__init_begin, __init_end] is the init section that may be freed after init
@ -626,6 +623,23 @@
#define INIT_RAM_FS #define INIT_RAM_FS
#endif #endif
/*
* Default discarded sections.
*
* Some archs want to discard exit text/data at runtime rather than
* link time due to cross-section references such as alt instructions,
* bug table, eh_frame, etc. DISCARDS must be the last of output
* section definitions so that such archs put those in earlier section
* definitions.
*/
#define DISCARDS \
/DISCARD/ : { \
EXIT_TEXT \
EXIT_DATA \
EXIT_CALL \
*(.discard) \
}
/** /**
* PERCPU_VADDR - define output section for percpu area * PERCPU_VADDR - define output section for percpu area
* @vaddr: explicit base address (optional) * @vaddr: explicit base address (optional)

View File

@ -10,22 +10,70 @@
/* /*
* Base implementations of per-CPU variable declarations and definitions, where * Base implementations of per-CPU variable declarations and definitions, where
* the section in which the variable is to be placed is provided by the * the section in which the variable is to be placed is provided by the
* 'section' argument. This may be used to affect the parameters governing the * 'sec' argument. This may be used to affect the parameters governing the
* variable's storage. * variable's storage.
* *
* NOTE! The sections for the DECLARE and for the DEFINE must match, lest * NOTE! The sections for the DECLARE and for the DEFINE must match, lest
* linkage errors occur due the compiler generating the wrong code to access * linkage errors occur due the compiler generating the wrong code to access
* that section. * that section.
*/ */
#define DECLARE_PER_CPU_SECTION(type, name, section) \ #define __PCPU_ATTRS(sec) \
extern \ __attribute__((section(PER_CPU_BASE_SECTION sec))) \
__attribute__((__section__(PER_CPU_BASE_SECTION section))) \ PER_CPU_ATTRIBUTES
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#define DEFINE_PER_CPU_SECTION(type, name, section) \ #define __PCPU_DUMMY_ATTRS \
__attribute__((__section__(PER_CPU_BASE_SECTION section))) \ __attribute__((section(".discard"), unused))
PER_CPU_ATTRIBUTES PER_CPU_DEF_ATTRIBUTES \
/*
* s390 and alpha modules require percpu variables to be defined as
* weak to force the compiler to generate GOT based external
* references for them. This is necessary because percpu sections
* will be located outside of the usually addressable area.
*
* This definition puts the following two extra restrictions when
* defining percpu variables.
*
* 1. The symbol must be globally unique, even the static ones.
* 2. Static percpu variables cannot be defined inside a function.
*
* Archs which need weak percpu definitions should define
* ARCH_NEEDS_WEAK_PER_CPU in asm/percpu.h when necessary.
*
* To ensure that the generic code observes the above two
* restrictions, if CONFIG_DEBUG_FORCE_WEAK_PER_CPU is set weak
* definition is used for all cases.
*/
#if defined(ARCH_NEEDS_WEAK_PER_CPU) || defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU)
/*
* __pcpu_scope_* dummy variable is used to enforce scope. It
* receives the static modifier when it's used in front of
* DEFINE_PER_CPU() and will trigger build failure if
* DECLARE_PER_CPU() is used for the same variable.
*
* __pcpu_unique_* dummy variable is used to enforce symbol uniqueness
* such that hidden weak symbol collision, which will cause unrelated
* variables to share the same address, can be detected during build.
*/
#define DECLARE_PER_CPU_SECTION(type, name, sec) \
extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \
extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
#define DEFINE_PER_CPU_SECTION(type, name, sec) \
__PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \
__PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \
__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak \
__typeof__(type) per_cpu__##name __typeof__(type) per_cpu__##name
#else
/*
* Normal declaration and definition macros.
*/
#define DECLARE_PER_CPU_SECTION(type, name, sec) \
extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
#define DEFINE_PER_CPU_SECTION(type, name, sec) \
__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES \
__typeof__(type) per_cpu__##name
#endif
/* /*
* Variant on the per-CPU variable declaration/definition theme used for * Variant on the per-CPU variable declaration/definition theme used for

View File

@ -34,7 +34,7 @@
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
/* minimum unit size, also is the maximum supported allocation size */ /* minimum unit size, also is the maximum supported allocation size */
#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10)
@ -57,19 +57,70 @@
#endif #endif
extern void *pcpu_base_addr; extern void *pcpu_base_addr;
extern const unsigned long *pcpu_unit_offsets;
typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); struct pcpu_group_info {
typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); int nr_units; /* aligned # of units */
unsigned long base_offset; /* base address offset */
unsigned int *cpu_map; /* unit->cpu map, empty
* entries contain NR_CPUS */
};
extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, struct pcpu_alloc_info {
size_t static_size, size_t reserved_size, size_t static_size;
ssize_t dyn_size, ssize_t unit_size, size_t reserved_size;
void *base_addr, size_t dyn_size;
pcpu_populate_pte_fn_t populate_pte_fn); size_t unit_size;
size_t atom_size;
size_t alloc_size;
size_t __ai_size; /* internal, don't use */
int nr_groups; /* 0 if grouping unnecessary */
struct pcpu_group_info groups[];
};
extern ssize_t __init pcpu_embed_first_chunk( enum pcpu_fc {
size_t static_size, size_t reserved_size, PCPU_FC_AUTO,
ssize_t dyn_size, ssize_t unit_size); PCPU_FC_EMBED,
PCPU_FC_PAGE,
PCPU_FC_NR,
};
extern const char *pcpu_fc_names[PCPU_FC_NR];
extern enum pcpu_fc pcpu_chosen_fc;
typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
size_t align);
typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
int nr_units);
extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai);
extern struct pcpu_alloc_info * __init pcpu_build_alloc_info(
size_t reserved_size, ssize_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
void *base_addr);
#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
pcpu_fc_alloc_fn_t alloc_fn,
pcpu_fc_free_fn_t free_fn);
#endif
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
extern int __init pcpu_page_first_chunk(size_t reserved_size,
pcpu_fc_alloc_fn_t alloc_fn,
pcpu_fc_free_fn_t free_fn,
pcpu_fc_populate_pte_fn_t populate_pte_fn);
#endif
/* /*
* Use this to get to a cpu's version of the per-cpu object * Use this to get to a cpu's version of the per-cpu object
@ -80,7 +131,7 @@ extern ssize_t __init pcpu_embed_first_chunk(
extern void *__alloc_reserved_percpu(size_t size, size_t align); extern void *__alloc_reserved_percpu(size_t size, size_t align);
#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ #else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
struct percpu_data { struct percpu_data {
void *ptrs[1]; void *ptrs[1];
@ -99,11 +150,15 @@ struct percpu_data {
(__typeof__(ptr))__p->ptrs[(cpu)]; \ (__typeof__(ptr))__p->ptrs[(cpu)]; \
}) })
#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
extern void *__alloc_percpu(size_t size, size_t align); extern void *__alloc_percpu(size_t size, size_t align);
extern void free_percpu(void *__pdata); extern void free_percpu(void *__pdata);
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
extern void __init setup_per_cpu_areas(void);
#endif
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
@ -124,6 +179,13 @@ static inline void free_percpu(void *p)
kfree(p); kfree(p);
} }
static inline void __init setup_per_cpu_areas(void) { }
static inline void *pcpu_lpage_remapped(void *kaddr)
{
return NULL;
}
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \

View File

@ -115,4 +115,10 @@ extern rwlock_t vmlist_lock;
extern struct vm_struct *vmlist; extern struct vm_struct *vmlist;
extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
const size_t *sizes, int nr_vms,
size_t align, gfp_t gfp_mask);
void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
#endif /* _LINUX_VMALLOC_H */ #endif /* _LINUX_VMALLOC_H */

View File

@ -353,7 +353,6 @@ static void __init smp_init(void)
#define smp_init() do { } while (0) #define smp_init() do { } while (0)
#endif #endif
static inline void setup_per_cpu_areas(void) { }
static inline void setup_nr_cpu_ids(void) { } static inline void setup_nr_cpu_ids(void) { }
static inline void smp_prepare_cpus(unsigned int maxcpus) { } static inline void smp_prepare_cpus(unsigned int maxcpus) { }
@ -374,29 +373,6 @@ static void __init setup_nr_cpu_ids(void)
nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
} }
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
static void __init setup_per_cpu_areas(void)
{
unsigned long size, i;
char *ptr;
unsigned long nr_possible_cpus = num_possible_cpus();
/* Copy section for each CPU (we discard the original) */
size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
ptr = alloc_bootmem_pages(size * nr_possible_cpus);
for_each_possible_cpu(i) {
__per_cpu_offset[i] = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
ptr += size;
}
}
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
/* Called by boot processor to activate the rest. */ /* Called by boot processor to activate the rest. */
static void __init smp_init(void) static void __init smp_init(void)
{ {

View File

@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
static void *percpu_modalloc(unsigned long size, unsigned long align, static void *percpu_modalloc(unsigned long size, unsigned long align,
const char *name) const char *name)
@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme)
free_percpu(freeme); free_percpu(freeme);
} }
#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ #else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
/* Number of blocks used and allocated. */ /* Number of blocks used and allocated. */
static unsigned int pcpu_num_used, pcpu_num_allocated; static unsigned int pcpu_num_used, pcpu_num_allocated;
@ -535,7 +535,7 @@ static int percpu_modinit(void)
} }
__initcall(percpu_modinit); __initcall(percpu_modinit);
#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
static unsigned int find_pcpusec(Elf_Ehdr *hdr, static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs, Elf_Shdr *sechdrs,

View File

@ -100,16 +100,16 @@ hw_perf_group_sched_in(struct perf_counter *group_leader,
void __weak perf_counter_print_debug(void) { } void __weak perf_counter_print_debug(void) { }
static DEFINE_PER_CPU(int, disable_count); static DEFINE_PER_CPU(int, perf_disable_count);
void __perf_disable(void) void __perf_disable(void)
{ {
__get_cpu_var(disable_count)++; __get_cpu_var(perf_disable_count)++;
} }
bool __perf_enable(void) bool __perf_enable(void)
{ {
return !--__get_cpu_var(disable_count); return !--__get_cpu_var(perf_disable_count);
} }
void perf_disable(void) void perf_disable(void)

View File

@ -318,12 +318,12 @@ struct task_group root_task_group;
/* Default task group's sched entity on each cpu */ /* Default task group's sched entity on each cpu */
static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
/* Default task group's cfs_rq on each cpu */ /* Default task group's cfs_rq on each cpu */
static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_cfs_rq);
#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
#endif /* CONFIG_RT_GROUP_SCHED */ #endif /* CONFIG_RT_GROUP_SCHED */
#else /* !CONFIG_USER_SCHED */ #else /* !CONFIG_USER_SCHED */
#define root_task_group init_task_group #define root_task_group init_task_group

View File

@ -1334,7 +1334,7 @@ static __init void event_trace_self_tests(void)
#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_FUNCTION_TRACER
static DEFINE_PER_CPU(atomic_t, test_event_disable); static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
static void static void
function_test_events_call(unsigned long ip, unsigned long parent_ip) function_test_events_call(unsigned long ip, unsigned long parent_ip)
@ -1350,7 +1350,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
pc = preempt_count(); pc = preempt_count();
resched = ftrace_preempt_disable(); resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id(); cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu)); disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
if (disabled != 1) if (disabled != 1)
goto out; goto out;
@ -1368,7 +1368,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
trace_nowake_buffer_unlock_commit(event, flags, pc); trace_nowake_buffer_unlock_commit(event, flags, pc);
out: out:
atomic_dec(&per_cpu(test_event_disable, cpu)); atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
ftrace_preempt_enable(resched); ftrace_preempt_enable(resched);
} }

View File

@ -790,6 +790,21 @@ config DEBUG_BLOCK_EXT_DEVT
Say N if you are unsure. Say N if you are unsure.
config DEBUG_FORCE_WEAK_PER_CPU
bool "Force weak per-cpu definitions"
depends on DEBUG_KERNEL
help
s390 and alpha require percpu variables in modules to be
defined weak to work around addressing range issue which
puts the following two restrictions on percpu variable
definitions.
1. percpu symbols must be unique whether static or not
2. percpu variables can't be defined inside a function
To ensure that generic code follows the above rules, this
option forces all percpu variables to be defined as weak.
config LKDTM config LKDTM
tristate "Linux Kernel Dump Test Tool Module" tristate "Linux Kernel Dump Test Tool Module"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL

View File

@ -33,7 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_MIGRATION) += migrate.o
ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
obj-$(CONFIG_SMP) += percpu.o obj-$(CONFIG_SMP) += percpu.o
else else
obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_SMP) += allocpercpu.o

View File

@ -5,6 +5,8 @@
*/ */
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/bootmem.h>
#include <asm/sections.h>
#ifndef cache_line_size #ifndef cache_line_size
#define cache_line_size() L1_CACHE_BYTES #define cache_line_size() L1_CACHE_BYTES
@ -147,3 +149,29 @@ void free_percpu(void *__pdata)
kfree(__percpu_disguise(__pdata)); kfree(__percpu_disguise(__pdata));
} }
EXPORT_SYMBOL_GPL(free_percpu); EXPORT_SYMBOL_GPL(free_percpu);
/*
* Generic percpu area setup.
*/
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
void __init setup_per_cpu_areas(void)
{
unsigned long size, i;
char *ptr;
unsigned long nr_possible_cpus = num_possible_cpus();
/* Copy section for each CPU (we discard the original) */
size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
ptr = alloc_bootmem_pages(size * nr_possible_cpus);
for_each_possible_cpu(i) {
__per_cpu_offset[i] = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
ptr += size;
}
}
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */

View File

@ -36,7 +36,7 @@ struct test_node {
}; };
static LIST_HEAD(test_list); static LIST_HEAD(test_list);
static DEFINE_PER_CPU(void *, test_pointer); static DEFINE_PER_CPU(void *, kmemleak_test_pointer);
/* /*
* Some very simple testing. This function needs to be extended for * Some very simple testing. This function needs to be extended for
@ -86,9 +86,9 @@ static int __init kmemleak_test_init(void)
} }
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
per_cpu(test_pointer, i) = kmalloc(129, GFP_KERNEL); per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL);
pr_info("kmemleak: kmalloc(129) = %p\n", pr_info("kmemleak: kmalloc(129) = %p\n",
per_cpu(test_pointer, i)); per_cpu(kmemleak_test_pointer, i));
} }
return 0; return 0;

View File

@ -610,6 +610,8 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
} }
} }
static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;
/** /**
* balance_dirty_pages_ratelimited_nr - balance dirty memory state * balance_dirty_pages_ratelimited_nr - balance dirty memory state
* @mapping: address_space which was dirtied * @mapping: address_space which was dirtied
@ -627,7 +629,6 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
unsigned long nr_pages_dirtied) unsigned long nr_pages_dirtied)
{ {
static DEFINE_PER_CPU(unsigned long, ratelimits) = 0;
unsigned long ratelimit; unsigned long ratelimit;
unsigned long *p; unsigned long *p;
@ -640,7 +641,7 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
* tasks in balance_dirty_pages(). Period. * tasks in balance_dirty_pages(). Period.
*/ */
preempt_disable(); preempt_disable();
p = &__get_cpu_var(ratelimits); p = &__get_cpu_var(bdp_ratelimits);
*p += nr_pages_dirtied; *p += nr_pages_dirtied;
if (unlikely(*p >= ratelimit)) { if (unlikely(*p >= ratelimit)) {
*p = 0; *p = 0;

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/quicklist.h> #include <linux/quicklist.h>
DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; DEFINE_PER_CPU(struct quicklist [CONFIG_NR_QUICK], quicklist);
#define FRACTION_OF_NODE_MEM 16 #define FRACTION_OF_NODE_MEM 16

View File

@ -2091,8 +2091,8 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
*/ */
#define NR_KMEM_CACHE_CPU 100 #define NR_KMEM_CACHE_CPU 100
static DEFINE_PER_CPU(struct kmem_cache_cpu, static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
kmem_cache_cpu)[NR_KMEM_CACHE_CPU]; kmem_cache_cpu);
static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);

View File

@ -265,6 +265,7 @@ struct vmap_area {
static DEFINE_SPINLOCK(vmap_area_lock); static DEFINE_SPINLOCK(vmap_area_lock);
static struct rb_root vmap_area_root = RB_ROOT; static struct rb_root vmap_area_root = RB_ROOT;
static LIST_HEAD(vmap_area_list); static LIST_HEAD(vmap_area_list);
static unsigned long vmap_area_pcpu_hole;
static struct vmap_area *__find_vmap_area(unsigned long addr) static struct vmap_area *__find_vmap_area(unsigned long addr)
{ {
@ -431,6 +432,15 @@ static void __free_vmap_area(struct vmap_area *va)
RB_CLEAR_NODE(&va->rb_node); RB_CLEAR_NODE(&va->rb_node);
list_del_rcu(&va->list); list_del_rcu(&va->list);
/*
* Track the highest possible candidate for pcpu area
* allocation. Areas outside of vmalloc area can be returned
* here too, consider only end addresses which fall inside
* vmalloc area proper.
*/
if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
call_rcu(&va->rcu_head, rcu_free_va); call_rcu(&va->rcu_head, rcu_free_va);
} }
@ -1038,6 +1048,9 @@ void __init vmalloc_init(void)
va->va_end = va->va_start + tmp->size; va->va_end = va->va_start + tmp->size;
__insert_vmap_area(va); __insert_vmap_area(va);
} }
vmap_area_pcpu_hole = VMALLOC_END;
vmap_initialized = true; vmap_initialized = true;
} }
@ -1122,13 +1135,34 @@ EXPORT_SYMBOL_GPL(map_vm_area);
DEFINE_RWLOCK(vmlist_lock); DEFINE_RWLOCK(vmlist_lock);
struct vm_struct *vmlist; struct vm_struct *vmlist;
static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
unsigned long flags, void *caller)
{
struct vm_struct *tmp, **p;
vm->flags = flags;
vm->addr = (void *)va->va_start;
vm->size = va->va_end - va->va_start;
vm->caller = caller;
va->private = vm;
va->flags |= VM_VM_AREA;
write_lock(&vmlist_lock);
for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
if (tmp->addr >= vm->addr)
break;
}
vm->next = *p;
*p = vm;
write_unlock(&vmlist_lock);
}
static struct vm_struct *__get_vm_area_node(unsigned long size, static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long flags, unsigned long start, unsigned long end, unsigned long flags, unsigned long start, unsigned long end,
int node, gfp_t gfp_mask, void *caller) int node, gfp_t gfp_mask, void *caller)
{ {
static struct vmap_area *va; static struct vmap_area *va;
struct vm_struct *area; struct vm_struct *area;
struct vm_struct *tmp, **p;
unsigned long align = 1; unsigned long align = 1;
BUG_ON(in_interrupt()); BUG_ON(in_interrupt());
@ -1147,7 +1181,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
if (unlikely(!size)) if (unlikely(!size))
return NULL; return NULL;
area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!area)) if (unlikely(!area))
return NULL; return NULL;
@ -1162,25 +1196,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
return NULL; return NULL;
} }
area->flags = flags; insert_vmalloc_vm(area, va, flags, caller);
area->addr = (void *)va->va_start;
area->size = size;
area->pages = NULL;
area->nr_pages = 0;
area->phys_addr = 0;
area->caller = caller;
va->private = area;
va->flags |= VM_VM_AREA;
write_lock(&vmlist_lock);
for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
if (tmp->addr >= area->addr)
break;
}
area->next = *p;
*p = area;
write_unlock(&vmlist_lock);
return area; return area;
} }
@ -1818,6 +1834,286 @@ void free_vm_area(struct vm_struct *area)
} }
EXPORT_SYMBOL_GPL(free_vm_area); EXPORT_SYMBOL_GPL(free_vm_area);
static struct vmap_area *node_to_va(struct rb_node *n)
{
return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
}
/**
* pvm_find_next_prev - find the next and prev vmap_area surrounding @end
* @end: target address
* @pnext: out arg for the next vmap_area
* @pprev: out arg for the previous vmap_area
*
* Returns: %true if either or both of next and prev are found,
* %false if no vmap_area exists
*
* Find vmap_areas end addresses of which enclose @end. ie. if not
* NULL, *pnext->va_end > @end and *pprev->va_end <= @end.
*/
static bool pvm_find_next_prev(unsigned long end,
struct vmap_area **pnext,
struct vmap_area **pprev)
{
struct rb_node *n = vmap_area_root.rb_node;
struct vmap_area *va = NULL;
while (n) {
va = rb_entry(n, struct vmap_area, rb_node);
if (end < va->va_end)
n = n->rb_left;
else if (end > va->va_end)
n = n->rb_right;
else
break;
}
if (!va)
return false;
if (va->va_end > end) {
*pnext = va;
*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
} else {
*pprev = va;
*pnext = node_to_va(rb_next(&(*pprev)->rb_node));
}
return true;
}
/**
* pvm_determine_end - find the highest aligned address between two vmap_areas
* @pnext: in/out arg for the next vmap_area
* @pprev: in/out arg for the previous vmap_area
* @align: alignment
*
* Returns: determined end address
*
* Find the highest aligned address between *@pnext and *@pprev below
* VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned
* down address is between the end addresses of the two vmap_areas.
*
* Please note that the address returned by this function may fall
* inside *@pnext vmap_area. The caller is responsible for checking
* that.
*/
static unsigned long pvm_determine_end(struct vmap_area **pnext,
struct vmap_area **pprev,
unsigned long align)
{
const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
unsigned long addr;
if (*pnext)
addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
else
addr = vmalloc_end;
while (*pprev && (*pprev)->va_end > addr) {
*pnext = *pprev;
*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
}
return addr;
}
/**
* pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
* @offsets: array containing offset of each area
* @sizes: array containing size of each area
* @nr_vms: the number of areas to allocate
* @align: alignment, all entries in @offsets and @sizes must be aligned to this
* @gfp_mask: allocation mask
*
* Returns: kmalloc'd vm_struct pointer array pointing to allocated
* vm_structs on success, %NULL on failure
*
* Percpu allocator wants to use congruent vm areas so that it can
* maintain the offsets among percpu areas. This function allocates
* congruent vmalloc areas for it. These areas tend to be scattered
* pretty far, distance between two areas easily going up to
* gigabytes. To avoid interacting with regular vmallocs, these areas
* are allocated from top.
*
* Despite its complicated look, this allocator is rather simple. It
* does everything top-down and scans areas from the end looking for
* matching slot. While scanning, if any of the areas overlaps with
* existing vmap_area, the base address is pulled down to fit the
* area. Scanning is repeated till all the areas fit and then all
* necessary data structres are inserted and the result is returned.
*/
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
const size_t *sizes, int nr_vms,
size_t align, gfp_t gfp_mask)
{
const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
struct vmap_area **vas, *prev, *next;
struct vm_struct **vms;
int area, area2, last_area, term_area;
unsigned long base, start, end, last_end;
bool purged = false;
gfp_mask &= GFP_RECLAIM_MASK;
/* verify parameters and allocate data structures */
BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
for (last_area = 0, area = 0; area < nr_vms; area++) {
start = offsets[area];
end = start + sizes[area];
/* is everything aligned properly? */
BUG_ON(!IS_ALIGNED(offsets[area], align));
BUG_ON(!IS_ALIGNED(sizes[area], align));
/* detect the area with the highest address */
if (start > offsets[last_area])
last_area = area;
for (area2 = 0; area2 < nr_vms; area2++) {
unsigned long start2 = offsets[area2];
unsigned long end2 = start2 + sizes[area2];
if (area2 == area)
continue;
BUG_ON(start2 >= start && start2 < end);
BUG_ON(end2 <= end && end2 > start);
}
}
last_end = offsets[last_area] + sizes[last_area];
if (vmalloc_end - vmalloc_start < last_end) {
WARN_ON(true);
return NULL;
}
vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
if (!vas || !vms)
goto err_free;
for (area = 0; area < nr_vms; area++) {
vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
if (!vas[area] || !vms[area])
goto err_free;
}
retry:
spin_lock(&vmap_area_lock);
/* start scanning - we scan from the top, begin with the last area */
area = term_area = last_area;
start = offsets[area];
end = start + sizes[area];
if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
base = vmalloc_end - last_end;
goto found;
}
base = pvm_determine_end(&next, &prev, align) - end;
while (true) {
BUG_ON(next && next->va_end <= base + end);
BUG_ON(prev && prev->va_end > base + end);
/*
* base might have underflowed, add last_end before
* comparing.
*/
if (base + last_end < vmalloc_start + last_end) {
spin_unlock(&vmap_area_lock);
if (!purged) {
purge_vmap_area_lazy();
purged = true;
goto retry;
}
goto err_free;
}
/*
* If next overlaps, move base downwards so that it's
* right below next and then recheck.
*/
if (next && next->va_start < base + end) {
base = pvm_determine_end(&next, &prev, align) - end;
term_area = area;
continue;
}
/*
* If prev overlaps, shift down next and prev and move
* base so that it's right below new next and then
* recheck.
*/
if (prev && prev->va_end > base + start) {
next = prev;
prev = node_to_va(rb_prev(&next->rb_node));
base = pvm_determine_end(&next, &prev, align) - end;
term_area = area;
continue;
}
/*
* This area fits, move on to the previous one. If
* the previous one is the terminal one, we're done.
*/
area = (area + nr_vms - 1) % nr_vms;
if (area == term_area)
break;
start = offsets[area];
end = start + sizes[area];
pvm_find_next_prev(base + end, &next, &prev);
}
found:
/* we've found a fitting base, insert all va's */
for (area = 0; area < nr_vms; area++) {
struct vmap_area *va = vas[area];
va->va_start = base + offsets[area];
va->va_end = va->va_start + sizes[area];
__insert_vmap_area(va);
}
vmap_area_pcpu_hole = base + offsets[last_area];
spin_unlock(&vmap_area_lock);
/* insert all vm's */
for (area = 0; area < nr_vms; area++)
insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
pcpu_get_vm_areas);
kfree(vas);
return vms;
err_free:
for (area = 0; area < nr_vms; area++) {
if (vas)
kfree(vas[area]);
if (vms)
kfree(vms[area]);
}
kfree(vas);
kfree(vms);
return NULL;
}
/**
* pcpu_free_vm_areas - free vmalloc areas for percpu allocator
* @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
* @nr_vms: the number of allocated areas
*
* Free vm_structs and the array allocated by pcpu_get_vm_areas().
*/
void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
{
int i;
for (i = 0; i < nr_vms; i++)
free_vm_area(vms[i]);
kfree(vms);
}
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
static void *s_start(struct seq_file *m, loff_t *pos) static void *s_start(struct seq_file *m, loff_t *pos)

View File

@ -37,12 +37,13 @@ __initcall(init_syncookies);
#define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS]; static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
ipv4_cookie_scratch);
static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c) u32 count, int c)
{ {
__u32 *tmp = __get_cpu_var(cookie_scratch); __u32 *tmp = __get_cpu_var(ipv4_cookie_scratch);
memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c])); memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
tmp[0] = (__force u32)saddr; tmp[0] = (__force u32)saddr;

View File

@ -74,12 +74,13 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
return child; return child;
} }
static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS]; static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
ipv6_cookie_scratch);
static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr, static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
__be16 sport, __be16 dport, u32 count, int c) __be16 sport, __be16 dport, u32 count, int c)
{ {
__u32 *tmp = __get_cpu_var(cookie_scratch); __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch);
/* /*
* we have 320 bits of information to hash, copy in the remaining * we have 320 bits of information to hash, copy in the remaining

View File

@ -37,7 +37,7 @@
#include "rds.h" #include "rds.h"
#include "ib.h" #include "ib.h"
DEFINE_PER_CPU(struct rds_ib_statistics, rds_ib_stats) ____cacheline_aligned; DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
static char *rds_ib_stat_names[] = { static char *rds_ib_stat_names[] = {
"ib_connect_raced", "ib_connect_raced",

View File

@ -37,7 +37,7 @@
#include "rds.h" #include "rds.h"
#include "iw.h" #include "iw.h"
DEFINE_PER_CPU(struct rds_iw_statistics, rds_iw_stats) ____cacheline_aligned; DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_iw_statistics, rds_iw_stats);
static char *rds_iw_stat_names[] = { static char *rds_iw_stat_names[] = {
"iw_connect_raced", "iw_connect_raced",

View File

@ -39,7 +39,7 @@ struct rds_page_remainder {
unsigned long r_offset; unsigned long r_offset;
}; };
DEFINE_PER_CPU(struct rds_page_remainder, rds_page_remainders) ____cacheline_aligned; DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
/* /*
* returns 0 on success or -errno on failure. * returns 0 on success or -errno on failure.

View File

@ -0,0 +1,8 @@
/*
* Common module linker script, always used when linking a module.
* Archs are free to supply their own linker scripts. ld will
* combine them automatically.
*/
SECTIONS {
/DISCARD/ : { *(.discard) }
}