x86, numa: Fake apicid and pxm mappings for NUMA emulation
This patch adds the equivalent of acpi_fake_nodes() for AMD Northbridge platforms. The goal is to fake the apicid-to-node mappings for NUMA emulation so the physical topology of the machine is correctly maintained within the kernel. This change also fakes proximity domains for both ACPI and k8 code so the physical distance between emulated nodes is maintained via node_distance(). This exports the correct distances via /sys/devices/system/node/.../distance based on the underlying topology. A new helper function, fake_physnodes(), is introduced to correctly invoke the correct NUMA code to fake these two mappings based on the system type. If there is no underlying NUMA configuration, all cpus are mapped to node 0 for local distance. Since acpi_fake_nodes() is no longer called with CONFIG_ACPI_NUMA, it's prototype can be removed from the header file for such a configuration. Signed-off-by: David Rientjes <rientjes@google.com> LKML-Reference: <alpine.DEB.2.00.1012221701360.3701@chino.kir.corp.google.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
parent
4e76f4e67a
commit
f51bf3073a
|
@ -193,11 +193,6 @@ extern int acpi_scan_nodes(unsigned long start, unsigned long end);
|
||||||
extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
|
extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
|
||||||
int num_nodes);
|
int num_nodes);
|
||||||
#endif
|
#endif
|
||||||
#else
|
|
||||||
static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
|
|
||||||
int num_nodes)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_ACPI_NUMA */
|
#endif /* CONFIG_ACPI_NUMA */
|
||||||
|
|
||||||
#define acpi_unlazy_tlb(x) leave_mm(x)
|
#define acpi_unlazy_tlb(x) leave_mm(x)
|
||||||
|
|
|
@ -13,6 +13,7 @@ extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
|
||||||
extern int amd_scan_nodes(void);
|
extern int amd_scan_nodes(void);
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA_EMU
|
#ifdef CONFIG_NUMA_EMU
|
||||||
|
extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
|
||||||
extern int amd_get_nodes(struct bootnode *nodes);
|
extern int amd_get_nodes(struct bootnode *nodes);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include <asm/amd_nb.h>
|
#include <asm/amd_nb.h>
|
||||||
|
|
||||||
static struct bootnode __initdata nodes[8];
|
static struct bootnode __initdata nodes[8];
|
||||||
|
static unsigned char __initdata nodeids[8];
|
||||||
static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
|
static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
|
||||||
|
|
||||||
static __init int find_northbridge(void)
|
static __init int find_northbridge(void)
|
||||||
|
@ -69,21 +70,6 @@ static __init void early_get_boot_cpu_id(void)
|
||||||
early_init_lapic_mapping();
|
early_init_lapic_mapping();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA_EMU
|
|
||||||
int __init amd_get_nodes(struct bootnode *physnodes)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
for_each_node_mask(i, nodes_parsed) {
|
|
||||||
physnodes[ret].start = nodes[i].start;
|
|
||||||
physnodes[ret].end = nodes[i].end;
|
|
||||||
ret++;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_NUMA_EMU */
|
|
||||||
|
|
||||||
int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
|
int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
|
||||||
{
|
{
|
||||||
unsigned long start = PFN_PHYS(start_pfn);
|
unsigned long start = PFN_PHYS(start_pfn);
|
||||||
|
@ -116,7 +102,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
|
||||||
base = read_pci_config(0, nb, 1, 0x40 + i*8);
|
base = read_pci_config(0, nb, 1, 0x40 + i*8);
|
||||||
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
|
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
|
||||||
|
|
||||||
nodeid = limit & 7;
|
nodeids[i] = nodeid = limit & 7;
|
||||||
if ((base & 3) == 0) {
|
if ((base & 3) == 0) {
|
||||||
if (i < numnodes)
|
if (i < numnodes)
|
||||||
pr_info("Skipping disabled node %d\n", i);
|
pr_info("Skipping disabled node %d\n", i);
|
||||||
|
@ -196,6 +182,79 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA_EMU
|
||||||
|
static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
|
||||||
|
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
|
||||||
|
};
|
||||||
|
|
||||||
|
int __init amd_get_nodes(struct bootnode *physnodes)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
for_each_node_mask(i, nodes_parsed) {
|
||||||
|
physnodes[ret].start = nodes[i].start;
|
||||||
|
physnodes[ret].end = nodes[i].end;
|
||||||
|
ret++;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init find_node_by_addr(unsigned long addr)
|
||||||
|
{
|
||||||
|
int ret = NUMA_NO_NODE;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < 8; i++)
|
||||||
|
if (addr >= nodes[i].start && addr < nodes[i].end) {
|
||||||
|
ret = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
|
||||||
|
* setup to represent the physical topology but reflect the emulated
|
||||||
|
* environment. For each emulated node, the real node which it appears on is
|
||||||
|
* found and a fake pxm to nid mapping is created which mirrors the actual
|
||||||
|
* locality. node_distance() then represents the correct distances between
|
||||||
|
* emulated nodes by using the fake acpi mappings to pxms.
|
||||||
|
*/
|
||||||
|
void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
|
||||||
|
{
|
||||||
|
unsigned int bits;
|
||||||
|
unsigned int cores;
|
||||||
|
unsigned int apicid_base = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
bits = boot_cpu_data.x86_coreid_bits;
|
||||||
|
cores = 1 << bits;
|
||||||
|
early_get_boot_cpu_id();
|
||||||
|
if (boot_cpu_physical_apicid > 0)
|
||||||
|
apicid_base = boot_cpu_physical_apicid;
|
||||||
|
|
||||||
|
for (i = 0; i < nr_nodes; i++) {
|
||||||
|
int index;
|
||||||
|
int nid;
|
||||||
|
int j;
|
||||||
|
|
||||||
|
nid = find_node_by_addr(nodes[i].start);
|
||||||
|
if (nid == NUMA_NO_NODE)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
index = nodeids[nid] << bits;
|
||||||
|
if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
|
||||||
|
for (j = apicid_base; j < cores + apicid_base; j++)
|
||||||
|
fake_apicid_to_node[index + j] = i;
|
||||||
|
#ifdef CONFIG_ACPI_NUMA
|
||||||
|
__acpi_map_pxm_to_node(nid, i);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_NUMA_EMU */
|
||||||
|
|
||||||
int __init amd_scan_nodes(void)
|
int __init amd_scan_nodes(void)
|
||||||
{
|
{
|
||||||
unsigned int bits;
|
unsigned int bits;
|
||||||
|
|
|
@ -324,6 +324,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
BUG_ON(acpi && amd);
|
||||||
|
#ifdef CONFIG_ACPI_NUMA
|
||||||
|
if (acpi)
|
||||||
|
acpi_fake_nodes(nodes, nr_nodes);
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_AMD_NUMA
|
||||||
|
if (amd)
|
||||||
|
amd_fake_nodes(nodes, nr_nodes);
|
||||||
|
#endif
|
||||||
|
if (!acpi && !amd)
|
||||||
|
for (i = 0; i < nr_cpu_ids; i++)
|
||||||
|
numa_set_node(i, 0);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Setups up nid to range from addr to addr + size. If the end
|
* Setups up nid to range from addr to addr + size. If the end
|
||||||
* boundary is greater than max_addr, then max_addr is used instead.
|
* boundary is greater than max_addr, then max_addr is used instead.
|
||||||
|
@ -595,7 +613,7 @@ static int __init numa_emulation(unsigned long start_pfn,
|
||||||
nodes[i].end >> PAGE_SHIFT);
|
nodes[i].end >> PAGE_SHIFT);
|
||||||
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
|
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
|
||||||
}
|
}
|
||||||
acpi_fake_nodes(nodes, num_nodes);
|
fake_physnodes(acpi, amd, num_nodes);
|
||||||
numa_init_array();
|
numa_init_array();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -497,8 +497,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
|
|
||||||
"topology.\n");
|
|
||||||
for (i = 0; i < num_nodes; i++) {
|
for (i = 0; i < num_nodes; i++) {
|
||||||
int nid, pxm;
|
int nid, pxm;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue