From 158f424f427e686816bc64cd623e3bfc3390dfb0 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 8 Jul 2017 09:30:57 +0800 Subject: [PATCH 1/3] x86/numa_emulation: Refine the calculation of max_emu_nid and dfl_phys_nid max_emu_nid and dfl_phys_nid is calculated from emu_nid_to_phys[], which is calculated in split_nodes_xxx_interleave(). From the logic in these functions, it is assured the emu_nid_to_phys[] has meaningful value if it return successfully and ensures dfl_phys_nid will get a valid value. This patch removes the error branch to check invalid dfl_phys_nid and abstracts out this part to a function for readability. Signed-off-by: Wei Yang Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: bp@alien8.de Cc: kirill@shutemov.name Cc: rientjes@google.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20170708013059.29708-2-richard.weiyang@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_emulation.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index a8f90ce3dedf..a6d55308660f 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -280,6 +280,22 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, return 0; } +int __init setup_emu2phys_nid(int *dfl_phys_nid) +{ + int i, max_emu_nid = 0; + + *dfl_phys_nid = NUMA_NO_NODE; + for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) { + if (emu_nid_to_phys[i] != NUMA_NO_NODE) { + max_emu_nid = i; + if (*dfl_phys_nid == NUMA_NO_NODE) + *dfl_phys_nid = emu_nid_to_phys[i]; + } + } + + return max_emu_nid; +} + /** * numa_emulation - Emulate NUMA nodes * @numa_meminfo: NUMA configuration to massage @@ -376,19 +392,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) * Determine the max emulated nid and the default phys nid to use * for unmapped nodes. */ - max_emu_nid = 0; - dfl_phys_nid = NUMA_NO_NODE; - for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) { - if (emu_nid_to_phys[i] != NUMA_NO_NODE) { - max_emu_nid = i; - if (dfl_phys_nid == NUMA_NO_NODE) - dfl_phys_nid = emu_nid_to_phys[i]; - } - } - if (dfl_phys_nid == NUMA_NO_NODE) { - pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n"); - goto no_emu; - } + max_emu_nid = setup_emu2phys_nid(&dfl_phys_nid); /* commit */ *numa_meminfo = ei; From d80a9eb3c78d7d0c823a8224cd6e3b37ebdfd8cd Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 8 Jul 2017 09:30:58 +0800 Subject: [PATCH 2/3] x86/numa_emulation: Assign physnode_mask directly from numa_nodes_parsed numa_init() has already called init_func(), which is responsible for setting numa_nodes_parsed, so use this nodemask instead of re-finding it when calling numa_emulation(). This patch gets the physnode_mask directly from numa_nodes_parsed. At the same time, it corrects the comment of these two functions. Signed-off-by: Wei Yang Reviewed-by: Borislav Petkov Acked-by: Thomas Gleixner Acked-by: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: bp@alien8.de Cc: kirill@shutemov.name Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20170708013059.29708-3-richard.weiyang@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_emulation.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index a6d55308660f..80904ede2e7f 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -75,13 +75,15 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, /* * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr - * to max_addr. The return value is the number of nodes allocated. + * to max_addr. + * + * Returns zero on success or negative on error. */ static int __init split_nodes_interleave(struct numa_meminfo *ei, struct numa_meminfo *pi, u64 addr, u64 max_addr, int nr_nodes) { - nodemask_t physnode_mask = NODE_MASK_NONE; + nodemask_t physnode_mask = numa_nodes_parsed; u64 size; int big; int nid = 0; @@ -116,9 +118,6 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, return -1; } - for (i = 0; i < pi->nr_blks; i++) - node_set(pi->blk[i].nid, physnode_mask); - /* * Continue to fill physical nodes with fake nodes until there is no * memory left on any of them. @@ -200,13 +199,15 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) /* * Sets up fake nodes of `size' interleaved over physical nodes ranging from - * `addr' to `max_addr'. The return value is the number of nodes allocated. + * `addr' to `max_addr'. + * + * Returns zero on success or negative on error. */ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, struct numa_meminfo *pi, u64 addr, u64 max_addr, u64 size) { - nodemask_t physnode_mask = NODE_MASK_NONE; + nodemask_t physnode_mask = numa_nodes_parsed; u64 min_size; int nid = 0; int i, ret; @@ -231,9 +232,6 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, } size &= FAKE_NODE_MIN_HASH_MASK; - for (i = 0; i < pi->nr_blks; i++) - node_set(pi->blk[i].nid, physnode_mask); - /* * Fill physical nodes with fake nodes of size until there is no memory * left on any of them. From 4f167201edda7cd7525cc7f23944731ef5dd99a8 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 8 Jul 2017 09:30:59 +0800 Subject: [PATCH 3/3] x86/numa_emulation: Recalculate numa_nodes_parsed from emulated nodes When emulating NUMA, the kernel's emulated NUMA configuration may contain more or less nodes than there are physical nodes. In numa_emulation(), we recalculate numa_meminfo/numa_distance/__apicid_to_node according to the number of emulated nodes, except numa_nodes_parsed, which is arguably an omission. Recalculate numa_nodes_parsed as well. Signed-off-by: Wei Yang Acked-by: Thomas Gleixner Acked-by: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: bp@alien8.de Cc: kirill@shutemov.name Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20170708013059.29708-4-richard.weiyang@gmail.com [ Changelog fixes. ] Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_emulation.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index 80904ede2e7f..d805162e6045 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -395,6 +395,13 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) /* commit */ *numa_meminfo = ei; + /* Make sure numa_nodes_parsed only contains emulated nodes */ + nodes_clear(numa_nodes_parsed); + for (i = 0; i < ARRAY_SIZE(ei.blk); i++) + if (ei.blk[i].start != ei.blk[i].end && + ei.blk[i].nid != NUMA_NO_NODE) + node_set(ei.blk[i].nid, numa_nodes_parsed); + /* * Transform __apicid_to_node table to use emulated nids by * reverse-mapping phys_nid. The maps should always exist but fall