mirror of https://gitee.com/openkylin/linux.git
x86: Unify NUMA initialization between 32 and 64bit
Now that everything else is unified, NUMA initialization can be unified too. * numa_init_array() and init_cpu_to_node() are moved from numa_64 to numa. * numa_32::initmem_init() is updated to call numa_init_array() and setup_arch() to call init_cpu_to_node() on 32bit too. * x86_cpu_to_node_map is now initialized to NUMA_NO_NODE on 32bit too. This is safe now as numa_init_array() will initialize it early during boot. This makes NUMA mapping fully initialized before setup_per_cpu_areas() on 32bit too and thus makes the first percpu chunk which contains all the static variables and some of dynamic area allocated with NUMA affinity correctly considered. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: yinghai@kernel.org Cc: brgerst@gmail.com Cc: gorcunov@gmail.com Cc: shaohui.zheng@intel.com Cc: rientjes@google.com LKML-Reference: <1295789862-25482-17-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> Reported-by: Eric Dumazet <eric.dumazet@gmail.com> Reviewed-by: Pekka Enberg <penberg@kernel.org>
This commit is contained in:
parent
de2d9445f1
commit
8db78cc4b4
|
@ -34,11 +34,15 @@ static inline void set_apicid_to_node(int apicid, s16 node)
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
extern void __cpuinit numa_set_node(int cpu, int node);
|
extern void __cpuinit numa_set_node(int cpu, int node);
|
||||||
extern void __cpuinit numa_clear_node(int cpu);
|
extern void __cpuinit numa_clear_node(int cpu);
|
||||||
|
extern void __init numa_init_array(void);
|
||||||
|
extern void __init init_cpu_to_node(void);
|
||||||
extern void __cpuinit numa_add_cpu(int cpu);
|
extern void __cpuinit numa_add_cpu(int cpu);
|
||||||
extern void __cpuinit numa_remove_cpu(int cpu);
|
extern void __cpuinit numa_remove_cpu(int cpu);
|
||||||
#else /* CONFIG_NUMA */
|
#else /* CONFIG_NUMA */
|
||||||
static inline void numa_set_node(int cpu, int node) { }
|
static inline void numa_set_node(int cpu, int node) { }
|
||||||
static inline void numa_clear_node(int cpu) { }
|
static inline void numa_clear_node(int cpu) { }
|
||||||
|
static inline void numa_init_array(void) { }
|
||||||
|
static inline void init_cpu_to_node(void) { }
|
||||||
static inline void numa_add_cpu(int cpu) { }
|
static inline void numa_add_cpu(int cpu) { }
|
||||||
static inline void numa_remove_cpu(int cpu) { }
|
static inline void numa_remove_cpu(int cpu) { }
|
||||||
#endif /* CONFIG_NUMA */
|
#endif /* CONFIG_NUMA */
|
||||||
|
|
|
@ -13,7 +13,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
|
||||||
|
|
||||||
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
|
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
|
||||||
|
|
||||||
extern void numa_init_array(void);
|
|
||||||
extern int numa_off;
|
extern int numa_off;
|
||||||
|
|
||||||
extern unsigned long numa_free_all_bootmem(void);
|
extern unsigned long numa_free_all_bootmem(void);
|
||||||
|
@ -28,7 +27,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
|
||||||
*/
|
*/
|
||||||
#define NODE_MIN_SIZE (4*1024*1024)
|
#define NODE_MIN_SIZE (4*1024*1024)
|
||||||
|
|
||||||
extern void __init init_cpu_to_node(void);
|
|
||||||
extern int __cpuinit numa_cpu_node(int cpu);
|
extern int __cpuinit numa_cpu_node(int cpu);
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA_EMU
|
#ifdef CONFIG_NUMA_EMU
|
||||||
|
@ -37,7 +35,6 @@ extern int __cpuinit numa_cpu_node(int cpu);
|
||||||
void numa_emu_cmdline(char *);
|
void numa_emu_cmdline(char *);
|
||||||
#endif /* CONFIG_NUMA_EMU */
|
#endif /* CONFIG_NUMA_EMU */
|
||||||
#else
|
#else
|
||||||
static inline void init_cpu_to_node(void) { }
|
|
||||||
static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
|
static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1040,9 +1040,7 @@ void __init setup_arch(char **cmdline_p)
|
||||||
|
|
||||||
prefill_possible_map();
|
prefill_possible_map();
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
init_cpu_to_node();
|
init_cpu_to_node();
|
||||||
#endif
|
|
||||||
|
|
||||||
init_apic_mappings();
|
init_apic_mappings();
|
||||||
ioapic_and_gsi_init();
|
ioapic_and_gsi_init();
|
||||||
|
|
|
@ -38,11 +38,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
|
||||||
/*
|
/*
|
||||||
* Map cpu index to node index
|
* Map cpu index to node index
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_X86_32
|
|
||||||
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0);
|
|
||||||
#else
|
|
||||||
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
|
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
|
||||||
#endif
|
|
||||||
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
|
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
|
||||||
|
|
||||||
void __cpuinit numa_set_node(int cpu, int node)
|
void __cpuinit numa_set_node(int cpu, int node)
|
||||||
|
@ -99,6 +95,78 @@ void __init setup_node_to_cpumask_map(void)
|
||||||
pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
|
pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There are unfortunately some poorly designed mainboards around that
|
||||||
|
* only connect memory to a single CPU. This breaks the 1:1 cpu->node
|
||||||
|
* mapping. To avoid this fill in the mapping for all possible CPUs,
|
||||||
|
* as the number of CPUs is not known yet. We round robin the existing
|
||||||
|
* nodes.
|
||||||
|
*/
|
||||||
|
void __init numa_init_array(void)
|
||||||
|
{
|
||||||
|
int rr, i;
|
||||||
|
|
||||||
|
rr = first_node(node_online_map);
|
||||||
|
for (i = 0; i < nr_cpu_ids; i++) {
|
||||||
|
if (early_cpu_to_node(i) != NUMA_NO_NODE)
|
||||||
|
continue;
|
||||||
|
numa_set_node(i, rr);
|
||||||
|
rr = next_node(rr, node_online_map);
|
||||||
|
if (rr == MAX_NUMNODES)
|
||||||
|
rr = first_node(node_online_map);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static __init int find_near_online_node(int node)
|
||||||
|
{
|
||||||
|
int n, val;
|
||||||
|
int min_val = INT_MAX;
|
||||||
|
int best_node = -1;
|
||||||
|
|
||||||
|
for_each_online_node(n) {
|
||||||
|
val = node_distance(node, n);
|
||||||
|
|
||||||
|
if (val < min_val) {
|
||||||
|
min_val = val;
|
||||||
|
best_node = n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return best_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup early cpu_to_node.
|
||||||
|
*
|
||||||
|
* Populate cpu_to_node[] only if x86_cpu_to_apicid[],
|
||||||
|
* and apicid_to_node[] tables have valid entries for a CPU.
|
||||||
|
* This means we skip cpu_to_node[] initialisation for NUMA
|
||||||
|
* emulation and faking node case (when running a kernel compiled
|
||||||
|
* for NUMA on a non NUMA box), which is OK as cpu_to_node[]
|
||||||
|
* is already initialized in a round robin manner at numa_init_array,
|
||||||
|
* prior to this call, and this initialization is good enough
|
||||||
|
* for the fake NUMA cases.
|
||||||
|
*
|
||||||
|
* Called before the per_cpu areas are setup.
|
||||||
|
*/
|
||||||
|
void __init init_cpu_to_node(void)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
|
||||||
|
|
||||||
|
BUG_ON(cpu_to_apicid == NULL);
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
int node = numa_cpu_node(cpu);
|
||||||
|
|
||||||
|
if (node == NUMA_NO_NODE)
|
||||||
|
continue;
|
||||||
|
if (!node_online(node))
|
||||||
|
node = find_near_online_node(node);
|
||||||
|
numa_set_node(cpu, node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
|
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
|
||||||
|
|
||||||
# ifndef CONFIG_NUMA_EMU
|
# ifndef CONFIG_NUMA_EMU
|
||||||
|
|
|
@ -367,6 +367,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
|
||||||
*/
|
*/
|
||||||
|
|
||||||
get_memcfg_numa();
|
get_memcfg_numa();
|
||||||
|
numa_init_array();
|
||||||
|
|
||||||
kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
|
kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
|
||||||
|
|
||||||
|
|
|
@ -224,28 +224,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
|
||||||
node_set_online(nodeid);
|
node_set_online(nodeid);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* There are unfortunately some poorly designed mainboards around that
|
|
||||||
* only connect memory to a single CPU. This breaks the 1:1 cpu->node
|
|
||||||
* mapping. To avoid this fill in the mapping for all possible CPUs,
|
|
||||||
* as the number of CPUs is not known yet. We round robin the existing
|
|
||||||
* nodes.
|
|
||||||
*/
|
|
||||||
void __init numa_init_array(void)
|
|
||||||
{
|
|
||||||
int rr, i;
|
|
||||||
|
|
||||||
rr = first_node(node_online_map);
|
|
||||||
for (i = 0; i < nr_cpu_ids; i++) {
|
|
||||||
if (early_cpu_to_node(i) != NUMA_NO_NODE)
|
|
||||||
continue;
|
|
||||||
numa_set_node(i, rr);
|
|
||||||
rr = next_node(rr, node_online_map);
|
|
||||||
if (rr == MAX_NUMNODES)
|
|
||||||
rr = first_node(node_online_map);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA_EMU
|
#ifdef CONFIG_NUMA_EMU
|
||||||
/* Numa emulation */
|
/* Numa emulation */
|
||||||
static struct bootnode nodes[MAX_NUMNODES] __initdata;
|
static struct bootnode nodes[MAX_NUMNODES] __initdata;
|
||||||
|
@ -664,59 +642,6 @@ unsigned long __init numa_free_all_bootmem(void)
|
||||||
return pages;
|
return pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
|
|
||||||
static __init int find_near_online_node(int node)
|
|
||||||
{
|
|
||||||
int n, val;
|
|
||||||
int min_val = INT_MAX;
|
|
||||||
int best_node = -1;
|
|
||||||
|
|
||||||
for_each_online_node(n) {
|
|
||||||
val = node_distance(node, n);
|
|
||||||
|
|
||||||
if (val < min_val) {
|
|
||||||
min_val = val;
|
|
||||||
best_node = n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return best_node;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Setup early cpu_to_node.
|
|
||||||
*
|
|
||||||
* Populate cpu_to_node[] only if x86_cpu_to_apicid[],
|
|
||||||
* and apicid_to_node[] tables have valid entries for a CPU.
|
|
||||||
* This means we skip cpu_to_node[] initialisation for NUMA
|
|
||||||
* emulation and faking node case (when running a kernel compiled
|
|
||||||
* for NUMA on a non NUMA box), which is OK as cpu_to_node[]
|
|
||||||
* is already initialized in a round robin manner at numa_init_array,
|
|
||||||
* prior to this call, and this initialization is good enough
|
|
||||||
* for the fake NUMA cases.
|
|
||||||
*
|
|
||||||
* Called before the per_cpu areas are setup.
|
|
||||||
*/
|
|
||||||
void __init init_cpu_to_node(void)
|
|
||||||
{
|
|
||||||
int cpu;
|
|
||||||
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
|
|
||||||
|
|
||||||
BUG_ON(cpu_to_apicid == NULL);
|
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
|
||||||
int node = numa_cpu_node(cpu);
|
|
||||||
|
|
||||||
if (node == NUMA_NO_NODE)
|
|
||||||
continue;
|
|
||||||
if (!node_online(node))
|
|
||||||
node = find_near_online_node(node);
|
|
||||||
numa_set_node(cpu, node);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int __cpuinit numa_cpu_node(int cpu)
|
int __cpuinit numa_cpu_node(int cpu)
|
||||||
{
|
{
|
||||||
int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
|
int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
|
||||||
|
|
Loading…
Reference in New Issue