2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/pci.h>
|
2008-07-03 04:50:26 +08:00
|
|
|
#include <linux/topology.h>
|
2008-06-13 02:19:23 +08:00
|
|
|
#include "pci.h"
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
#include <asm/pci-direct.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/mpspec.h>
|
|
|
|
#include <linux/cpumask.h>
|
2008-07-03 04:50:26 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This discovers the pcibus <-> node mapping on AMD K8.
|
2008-02-19 19:21:20 +08:00
|
|
|
* also get peer root bus resource for io,mmio
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
|
|
|
|
#define BUS_NR 256
|
|
|
|
|
2008-07-03 04:50:26 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
static int mp_bus_to_node[BUS_NR];
|
|
|
|
|
|
|
|
void set_mp_bus_to_node(int busnum, int node)
|
|
|
|
{
|
|
|
|
if (busnum >= 0 && busnum < BUS_NR)
|
|
|
|
mp_bus_to_node[busnum] = node;
|
|
|
|
}
|
|
|
|
|
|
|
|
int get_mp_bus_to_node(int busnum)
|
|
|
|
{
|
|
|
|
int node = -1;
|
|
|
|
|
|
|
|
if (busnum < 0 || busnum > (BUS_NR - 1))
|
|
|
|
return node;
|
|
|
|
|
|
|
|
node = mp_bus_to_node[busnum];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* let numa_node_id to decide it later in dma_alloc_pages
|
|
|
|
* if there is no ram on that node
|
|
|
|
*/
|
|
|
|
if (node != -1 && !node_online(node))
|
|
|
|
node = -1;
|
|
|
|
|
|
|
|
return node;
|
|
|
|
}
|
2008-07-03 04:50:25 +08:00
|
|
|
|
2008-07-03 04:50:26 +08:00
|
|
|
#else /* CONFIG_X86_32 */
|
|
|
|
|
|
|
|
static unsigned char mp_bus_to_node[BUS_NR];
|
|
|
|
|
|
|
|
void set_mp_bus_to_node(int busnum, int node)
|
|
|
|
{
|
|
|
|
if (busnum >= 0 && busnum < BUS_NR)
|
|
|
|
mp_bus_to_node[busnum] = (unsigned char) node;
|
|
|
|
}
|
|
|
|
|
|
|
|
int get_mp_bus_to_node(int busnum)
|
|
|
|
{
|
|
|
|
int node;
|
|
|
|
|
|
|
|
if (busnum < 0 || busnum > (BUS_NR - 1))
|
|
|
|
return 0;
|
|
|
|
node = mp_bus_to_node[busnum];
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* CONFIG_X86_32 */
|
|
|
|
|
|
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
|
2008-07-03 04:50:25 +08:00
|
|
|
/*
|
|
|
|
* sub bus (transparent) will use entres from 3 to store extra from root,
|
|
|
|
* so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES?
|
|
|
|
*/
|
|
|
|
#define RES_NUM 16
|
|
|
|
struct pci_root_info {
|
|
|
|
char name[12];
|
|
|
|
unsigned int res_num;
|
|
|
|
struct resource res[RES_NUM];
|
|
|
|
int bus_min;
|
|
|
|
int bus_max;
|
|
|
|
int node;
|
|
|
|
int link;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* 4 at this time, it may become to 32 */
|
|
|
|
#define PCI_ROOT_NR 4
|
|
|
|
static int pci_root_num;
|
|
|
|
static struct pci_root_info pci_root_info[PCI_ROOT_NR];
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
void set_pci_bus_resources_arch_default(struct pci_bus *b)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int j;
|
|
|
|
struct pci_root_info *info;
|
|
|
|
|
2008-04-12 06:14:52 +08:00
|
|
|
/* if only one root bus, don't need to anything */
|
|
|
|
if (pci_root_num < 2)
|
2008-02-19 19:21:20 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < pci_root_num; i++) {
|
|
|
|
if (pci_root_info[i].bus_min == b->number)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i == pci_root_num)
|
|
|
|
return;
|
|
|
|
|
|
|
|
info = &pci_root_info[i];
|
|
|
|
for (j = 0; j < info->res_num; j++) {
|
|
|
|
struct resource *res;
|
|
|
|
struct resource *root;
|
|
|
|
|
|
|
|
res = &info->res[j];
|
|
|
|
b->resource[j] = res;
|
|
|
|
if (res->flags & IORESOURCE_IO)
|
|
|
|
root = &ioport_resource;
|
|
|
|
else
|
|
|
|
root = &iomem_resource;
|
|
|
|
insert_resource(root, res);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#define RANGE_NUM 16
|
|
|
|
|
|
|
|
struct res_range {
|
|
|
|
size_t start;
|
|
|
|
size_t end;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void __init update_range(struct res_range *range, size_t start,
|
|
|
|
size_t end)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int j;
|
|
|
|
|
|
|
|
for (j = 0; j < RANGE_NUM; j++) {
|
|
|
|
if (!range[j].end)
|
|
|
|
continue;
|
2008-04-13 16:41:58 +08:00
|
|
|
|
|
|
|
if (start <= range[j].start && end >= range[j].end) {
|
2008-02-19 19:21:20 +08:00
|
|
|
range[j].start = 0;
|
|
|
|
range[j].end = 0;
|
2008-04-13 16:41:58 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
|
|
|
|
range[j].start = end + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
|
2008-02-19 19:21:20 +08:00
|
|
|
range[j].end = start - 1;
|
2008-04-13 16:41:58 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start > range[j].start && end < range[j].end) {
|
2008-02-19 19:21:20 +08:00
|
|
|
/* find the new spare */
|
|
|
|
for (i = 0; i < RANGE_NUM; i++) {
|
|
|
|
if (range[i].end == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (i < RANGE_NUM) {
|
|
|
|
range[i].end = range[j].end;
|
|
|
|
range[i].start = end + 1;
|
|
|
|
} else {
|
|
|
|
printk(KERN_ERR "run of slot in ranges\n");
|
|
|
|
}
|
|
|
|
range[j].end = start - 1;
|
2008-04-13 16:41:58 +08:00
|
|
|
continue;
|
2008-02-19 19:21:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init update_res(struct pci_root_info *info, size_t start,
|
|
|
|
size_t end, unsigned long flags, int merge)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct resource *res;
|
|
|
|
|
|
|
|
if (!merge)
|
|
|
|
goto addit;
|
|
|
|
|
|
|
|
/* try to merge it with old one */
|
|
|
|
for (i = 0; i < info->res_num; i++) {
|
2008-04-13 16:41:58 +08:00
|
|
|
size_t final_start, final_end;
|
|
|
|
size_t common_start, common_end;
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
res = &info->res[i];
|
|
|
|
if (res->flags != flags)
|
|
|
|
continue;
|
2008-04-13 16:41:58 +08:00
|
|
|
|
|
|
|
common_start = max((size_t)res->start, start);
|
|
|
|
common_end = min((size_t)res->end, end);
|
|
|
|
if (common_start > common_end + 1)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
final_start = min((size_t)res->start, start);
|
|
|
|
final_end = max((size_t)res->end, end);
|
|
|
|
|
|
|
|
res->start = final_start;
|
|
|
|
res->end = final_end;
|
|
|
|
return;
|
2008-02-19 19:21:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
addit:
|
|
|
|
|
|
|
|
/* need to add that */
|
|
|
|
if (info->res_num >= RES_NUM)
|
|
|
|
return;
|
|
|
|
|
|
|
|
res = &info->res[info->res_num];
|
|
|
|
res->name = info->name;
|
|
|
|
res->flags = flags;
|
|
|
|
res->start = start;
|
|
|
|
res->end = end;
|
|
|
|
res->child = NULL;
|
|
|
|
info->res_num++;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct pci_hostbridge_probe {
|
|
|
|
u32 bus;
|
|
|
|
u32 slot;
|
|
|
|
u32 vendor;
|
|
|
|
u32 device;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct pci_hostbridge_probe pci_probes[] __initdata = {
|
|
|
|
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 },
|
|
|
|
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
|
|
|
|
{ 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
|
|
|
|
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 },
|
|
|
|
};
|
|
|
|
|
2008-03-06 17:15:31 +08:00
|
|
|
static u64 __initdata fam10h_mmconf_start;
|
|
|
|
static u64 __initdata fam10h_mmconf_end;
|
|
|
|
static void __init get_pci_mmcfg_amd_fam10h_range(void)
|
|
|
|
{
|
|
|
|
u32 address;
|
|
|
|
u64 base, msr;
|
|
|
|
unsigned segn_busn_bits;
|
|
|
|
|
|
|
|
/* assume all cpus from fam10h have mmconf */
|
|
|
|
if (boot_cpu_data.x86 < 0x10)
|
|
|
|
return;
|
|
|
|
|
|
|
|
address = MSR_FAM10H_MMIO_CONF_BASE;
|
|
|
|
rdmsrl(address, msr);
|
|
|
|
|
|
|
|
/* mmconfig is not enable */
|
|
|
|
if (!(msr & FAM10H_MMIO_CONF_ENABLE))
|
|
|
|
return;
|
|
|
|
|
|
|
|
base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
|
|
|
|
|
|
|
|
segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
|
|
|
|
FAM10H_MMIO_CONF_BUSRANGE_MASK;
|
|
|
|
|
|
|
|
fam10h_mmconf_start = base;
|
|
|
|
fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/**
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
* early_fill_mp_bus_to_node()
|
|
|
|
* called before pcibios_scan_root and pci_scan_bus
|
2005-04-17 06:20:36 +08:00
|
|
|
* fills the mp_bus_to_cpumask array based according to the LDT Bus Number
|
|
|
|
* Registers found in the K8 northbridge
|
|
|
|
*/
|
2008-02-19 19:21:20 +08:00
|
|
|
static int __init early_fill_mp_bus_info(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2008-02-19 19:21:20 +08:00
|
|
|
int i;
|
|
|
|
int j;
|
|
|
|
unsigned bus;
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
unsigned slot;
|
2008-02-19 19:21:20 +08:00
|
|
|
int found;
|
2008-02-19 19:15:08 +08:00
|
|
|
int node;
|
2008-02-19 19:21:20 +08:00
|
|
|
int link;
|
|
|
|
int def_node;
|
|
|
|
int def_link;
|
|
|
|
struct pci_root_info *info;
|
|
|
|
u32 reg;
|
|
|
|
struct resource *res;
|
|
|
|
size_t start;
|
|
|
|
size_t end;
|
|
|
|
struct res_range range[RANGE_NUM];
|
|
|
|
u64 val;
|
|
|
|
u32 address;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
#ifdef CONFIG_NUMA
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
for (i = 0; i < BUS_NR; i++)
|
|
|
|
mp_bus_to_node[i] = -1;
|
2008-02-19 19:21:20 +08:00
|
|
|
#endif
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
|
|
|
|
if (!early_pci_allowed())
|
|
|
|
return -1;
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
found = 0;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
|
|
|
|
u32 id;
|
|
|
|
u16 device;
|
|
|
|
u16 vendor;
|
2008-02-19 19:15:08 +08:00
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
bus = pci_probes[i].bus;
|
|
|
|
slot = pci_probes[i].slot;
|
|
|
|
id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID);
|
2008-02-19 19:15:08 +08:00
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
vendor = id & 0xffff;
|
|
|
|
device = (id>>16) & 0xffff;
|
|
|
|
if (pci_probes[i].vendor == vendor &&
|
|
|
|
pci_probes[i].device == device) {
|
|
|
|
found = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
return 0;
|
2008-02-19 19:15:08 +08:00
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
pci_root_num = 0;
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
int min_bus;
|
|
|
|
int max_bus;
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2));
|
2008-02-19 19:15:08 +08:00
|
|
|
|
|
|
|
/* Check if that register is enabled for bus range */
|
2008-02-19 19:21:20 +08:00
|
|
|
if ((reg & 7) != 3)
|
2008-02-19 19:15:08 +08:00
|
|
|
continue;
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
min_bus = (reg >> 16) & 0xff;
|
|
|
|
max_bus = (reg >> 24) & 0xff;
|
|
|
|
node = (reg >> 4) & 0x07;
|
|
|
|
#ifdef CONFIG_NUMA
|
2008-02-19 19:15:08 +08:00
|
|
|
for (j = min_bus; j <= max_bus; j++)
|
|
|
|
mp_bus_to_node[j] = (unsigned char) node;
|
2008-02-19 19:21:20 +08:00
|
|
|
#endif
|
|
|
|
link = (reg >> 8) & 0x03;
|
|
|
|
|
|
|
|
info = &pci_root_info[pci_root_num];
|
|
|
|
info->bus_min = min_bus;
|
|
|
|
info->bus_max = max_bus;
|
|
|
|
info->node = node;
|
|
|
|
info->link = link;
|
|
|
|
sprintf(info->name, "PCI Bus #%02x", min_bus);
|
|
|
|
pci_root_num++;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
/* get the default node and link for left over res */
|
|
|
|
reg = read_pci_config(bus, slot, 0, 0x60);
|
|
|
|
def_node = (reg >> 8) & 0x07;
|
|
|
|
reg = read_pci_config(bus, slot, 0, 0x64);
|
|
|
|
def_link = (reg >> 8) & 0x03;
|
|
|
|
|
|
|
|
memset(range, 0, sizeof(range));
|
|
|
|
range[0].end = 0xffff;
|
|
|
|
/* io port resource */
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3));
|
|
|
|
if (!(reg & 3))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
start = reg & 0xfff000;
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0xc4 + (i << 3));
|
|
|
|
node = reg & 0x07;
|
|
|
|
link = (reg >> 4) & 0x03;
|
|
|
|
end = (reg & 0xfff000) | 0xfff;
|
|
|
|
|
|
|
|
/* find the position */
|
|
|
|
for (j = 0; j < pci_root_num; j++) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
if (info->node == node && info->link == link)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j == pci_root_num)
|
|
|
|
continue; /* not found */
|
|
|
|
|
|
|
|
info = &pci_root_info[j];
|
2008-03-06 17:15:31 +08:00
|
|
|
printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n",
|
|
|
|
node, link, (u64)start, (u64)end);
|
2008-04-13 16:41:58 +08:00
|
|
|
|
|
|
|
/* kernel only handle 16 bit only */
|
|
|
|
if (end > 0xffff)
|
|
|
|
end = 0xffff;
|
|
|
|
update_res(info, start, end, IORESOURCE_IO, 1);
|
2008-02-19 19:21:20 +08:00
|
|
|
update_range(range, start, end);
|
|
|
|
}
|
|
|
|
/* add left over io port range to def node/link, [0, 0xffff] */
|
|
|
|
/* find the position */
|
|
|
|
for (j = 0; j < pci_root_num; j++) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
if (info->node == def_node && info->link == def_link)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j < pci_root_num) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
for (i = 0; i < RANGE_NUM; i++) {
|
|
|
|
if (!range[i].end)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
update_res(info, range[i].start, range[i].end,
|
|
|
|
IORESOURCE_IO, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(range, 0, sizeof(range));
|
|
|
|
/* 0xfd00000000-0xffffffffff for HT */
|
2008-03-06 17:15:31 +08:00
|
|
|
range[0].end = (0xfdULL<<32) - 1;
|
2008-02-19 19:21:20 +08:00
|
|
|
|
|
|
|
/* need to take out [0, TOM) for RAM*/
|
|
|
|
address = MSR_K8_TOP_MEM1;
|
|
|
|
rdmsrl(address, val);
|
2008-05-13 08:40:39 +08:00
|
|
|
end = (val & 0xffffff800000ULL);
|
2008-02-19 19:21:20 +08:00
|
|
|
printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
|
|
|
|
if (end < (1ULL<<32))
|
|
|
|
update_range(range, 0, end - 1);
|
|
|
|
|
2008-03-06 17:15:31 +08:00
|
|
|
/* get mmconfig */
|
|
|
|
get_pci_mmcfg_amd_fam10h_range();
|
|
|
|
/* need to take out mmconf range */
|
|
|
|
if (fam10h_mmconf_end) {
|
|
|
|
printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
|
|
|
|
update_range(range, fam10h_mmconf_start, fam10h_mmconf_end);
|
|
|
|
}
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
/* mmio resource */
|
|
|
|
for (i = 0; i < 8; i++) {
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3));
|
|
|
|
if (!(reg & 3))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
start = reg & 0xffffff00; /* 39:16 on 31:8*/
|
|
|
|
start <<= 8;
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
|
|
|
|
node = reg & 0x07;
|
|
|
|
link = (reg >> 4) & 0x03;
|
|
|
|
end = (reg & 0xffffff00);
|
|
|
|
end <<= 8;
|
|
|
|
end |= 0xffff;
|
|
|
|
|
|
|
|
/* find the position */
|
|
|
|
for (j = 0; j < pci_root_num; j++) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
if (info->node == node && info->link == link)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j == pci_root_num)
|
|
|
|
continue; /* not found */
|
|
|
|
|
|
|
|
info = &pci_root_info[j];
|
2008-03-06 17:15:31 +08:00
|
|
|
|
|
|
|
printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]",
|
|
|
|
node, link, (u64)start, (u64)end);
|
|
|
|
/*
|
|
|
|
* some sick allocation would have range overlap with fam10h
|
|
|
|
* mmconf range, so need to update start and end.
|
|
|
|
*/
|
|
|
|
if (fam10h_mmconf_end) {
|
|
|
|
int changed = 0;
|
|
|
|
u64 endx = 0;
|
|
|
|
if (start >= fam10h_mmconf_start &&
|
|
|
|
start <= fam10h_mmconf_end) {
|
|
|
|
start = fam10h_mmconf_end + 1;
|
|
|
|
changed = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (end >= fam10h_mmconf_start &&
|
|
|
|
end <= fam10h_mmconf_end) {
|
|
|
|
end = fam10h_mmconf_start - 1;
|
|
|
|
changed = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start < fam10h_mmconf_start &&
|
|
|
|
end > fam10h_mmconf_end) {
|
|
|
|
/* we got a hole */
|
|
|
|
endx = fam10h_mmconf_start - 1;
|
|
|
|
update_res(info, start, endx, IORESOURCE_MEM, 0);
|
|
|
|
update_range(range, start, endx);
|
|
|
|
printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx);
|
|
|
|
start = fam10h_mmconf_end + 1;
|
|
|
|
changed = 1;
|
|
|
|
}
|
|
|
|
if (changed) {
|
|
|
|
if (start <= end) {
|
|
|
|
printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end);
|
|
|
|
} else {
|
|
|
|
printk(KERN_CONT "%s\n", endx?"":" ==> none");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-13 16:41:58 +08:00
|
|
|
update_res(info, start, end, IORESOURCE_MEM, 1);
|
2008-02-19 19:21:20 +08:00
|
|
|
update_range(range, start, end);
|
2008-03-06 17:15:31 +08:00
|
|
|
printk(KERN_CONT "\n");
|
2008-02-19 19:21:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* need to take out [4G, TOM2) for RAM*/
|
|
|
|
/* SYS_CFG */
|
|
|
|
address = MSR_K8_SYSCFG;
|
|
|
|
rdmsrl(address, val);
|
|
|
|
/* TOP_MEM2 is enabled? */
|
|
|
|
if (val & (1<<21)) {
|
|
|
|
/* TOP_MEM2 */
|
|
|
|
address = MSR_K8_TOP_MEM2;
|
|
|
|
rdmsrl(address, val);
|
2008-05-13 08:40:39 +08:00
|
|
|
end = (val & 0xffffff800000ULL);
|
2008-02-19 19:21:20 +08:00
|
|
|
printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
|
|
|
|
update_range(range, 1ULL<<32, end - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* add left over mmio range to def node/link ?
|
|
|
|
* that is tricky, just record range in from start_min to 4G
|
|
|
|
*/
|
|
|
|
for (j = 0; j < pci_root_num; j++) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
if (info->node == def_node && info->link == def_link)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j < pci_root_num) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
|
|
|
|
for (i = 0; i < RANGE_NUM; i++) {
|
|
|
|
if (!range[i].end)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
update_res(info, range[i].start, range[i].end,
|
|
|
|
IORESOURCE_MEM, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < pci_root_num; i++) {
|
|
|
|
int res_num;
|
|
|
|
int busnum;
|
|
|
|
|
|
|
|
info = &pci_root_info[i];
|
|
|
|
res_num = info->res_num;
|
|
|
|
busnum = info->bus_min;
|
|
|
|
printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n",
|
|
|
|
info->bus_min, info->bus_max, info->node, info->link);
|
|
|
|
for (j = 0; j < res_num; j++) {
|
|
|
|
res = &info->res[j];
|
|
|
|
printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n",
|
|
|
|
busnum, j,
|
|
|
|
(res->flags & IORESOURCE_IO)?"io port":"mmio",
|
|
|
|
res->start, res->end);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
postcore_initcall(early_fill_mp_bus_info);
|
2008-06-13 02:19:23 +08:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* common 32/64 bit code */
|
|
|
|
|
|
|
|
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
|
|
|
|
|
|
|
|
static void enable_pci_io_ecs_per_cpu(void *unused)
|
|
|
|
{
|
|
|
|
u64 reg;
|
|
|
|
rdmsrl(MSR_AMD64_NB_CFG, reg);
|
|
|
|
if (!(reg & ENABLE_CF8_EXT_CFG)) {
|
|
|
|
reg |= ENABLE_CF8_EXT_CFG;
|
|
|
|
wrmsrl(MSR_AMD64_NB_CFG, reg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init enable_pci_io_ecs(void)
|
|
|
|
{
|
|
|
|
/* assume all cpus from fam10h have IO ECS */
|
|
|
|
if (boot_cpu_data.x86 < 0x10)
|
|
|
|
return 0;
|
2008-07-16 04:03:56 +08:00
|
|
|
on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1);
|
2008-06-13 02:19:23 +08:00
|
|
|
pci_probe |= PCI_HAS_IO_ECS;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
postcore_initcall(enable_pci_io_ecs);
|