2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/pci.h>
|
2008-07-03 04:50:26 +08:00
|
|
|
#include <linux/topology.h>
|
x86: fix: make PCI ECS for AMD CPUs hotplug capable
Until now, PCI ECS setup was performed at boot time only and for cpus
that are enabled then. This patch fixes this and adds cpu hotplug.
Tests sequence (check if ECS bit is set when bringing cpu online again):
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
# ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00400010
# echo 0 > /sys/devices/system/cpu/cpu1/online
# echo 1 > /sys/devices/system/cpu/cpu1/online
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-08-23 02:23:38 +08:00
|
|
|
#include <linux/cpu.h>
|
2008-12-27 21:02:28 +08:00
|
|
|
#include <asm/pci_x86.h>
|
2008-06-13 02:19:23 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
#include <asm/pci-direct.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/mpspec.h>
|
|
|
|
#include <linux/cpumask.h>
|
2008-07-03 04:50:26 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This discovers the pcibus <-> node mapping on AMD K8.
|
2008-02-19 19:21:20 +08:00
|
|
|
* also get peer root bus resource for io,mmio
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
|
|
|
|
#define BUS_NR 256
|
|
|
|
|
2008-07-03 04:50:26 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
static int mp_bus_to_node[BUS_NR];
|
|
|
|
|
|
|
|
void set_mp_bus_to_node(int busnum, int node)
|
|
|
|
{
|
|
|
|
if (busnum >= 0 && busnum < BUS_NR)
|
|
|
|
mp_bus_to_node[busnum] = node;
|
|
|
|
}
|
|
|
|
|
|
|
|
int get_mp_bus_to_node(int busnum)
|
|
|
|
{
|
|
|
|
int node = -1;
|
|
|
|
|
|
|
|
if (busnum < 0 || busnum > (BUS_NR - 1))
|
|
|
|
return node;
|
|
|
|
|
|
|
|
node = mp_bus_to_node[busnum];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* let numa_node_id to decide it later in dma_alloc_pages
|
|
|
|
* if there is no ram on that node
|
|
|
|
*/
|
|
|
|
if (node != -1 && !node_online(node))
|
|
|
|
node = -1;
|
|
|
|
|
|
|
|
return node;
|
|
|
|
}
|
2008-07-03 04:50:25 +08:00
|
|
|
|
2008-07-03 04:50:26 +08:00
|
|
|
#else /* CONFIG_X86_32 */
|
|
|
|
|
|
|
|
static unsigned char mp_bus_to_node[BUS_NR];
|
|
|
|
|
|
|
|
void set_mp_bus_to_node(int busnum, int node)
|
|
|
|
{
|
|
|
|
if (busnum >= 0 && busnum < BUS_NR)
|
|
|
|
mp_bus_to_node[busnum] = (unsigned char) node;
|
|
|
|
}
|
|
|
|
|
|
|
|
int get_mp_bus_to_node(int busnum)
|
|
|
|
{
|
|
|
|
int node;
|
|
|
|
|
|
|
|
if (busnum < 0 || busnum > (BUS_NR - 1))
|
|
|
|
return 0;
|
|
|
|
node = mp_bus_to_node[busnum];
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* CONFIG_X86_32 */
|
|
|
|
|
|
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
|
2008-07-03 04:50:25 +08:00
|
|
|
/*
|
|
|
|
* sub bus (transparent) will use entres from 3 to store extra from root,
|
|
|
|
* so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES?
|
|
|
|
*/
|
|
|
|
#define RES_NUM 16
|
|
|
|
struct pci_root_info {
|
|
|
|
char name[12];
|
|
|
|
unsigned int res_num;
|
|
|
|
struct resource res[RES_NUM];
|
|
|
|
int bus_min;
|
|
|
|
int bus_max;
|
|
|
|
int node;
|
|
|
|
int link;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* 4 at this time, it may become to 32 */
|
|
|
|
#define PCI_ROOT_NR 4
|
|
|
|
static int pci_root_num;
|
|
|
|
static struct pci_root_info pci_root_info[PCI_ROOT_NR];
|
|
|
|
|
2009-04-19 01:11:25 +08:00
|
|
|
void x86_pci_root_bus_res_quirks(struct pci_bus *b)
|
2008-02-19 19:21:20 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int j;
|
|
|
|
struct pci_root_info *info;
|
|
|
|
|
2009-04-21 09:35:40 +08:00
|
|
|
/* don't go for it if _CRS is used */
|
|
|
|
if (pci_probe & PCI_USE__CRS)
|
|
|
|
return;
|
|
|
|
|
2008-04-12 06:14:52 +08:00
|
|
|
/* if only one root bus, don't need to anything */
|
|
|
|
if (pci_root_num < 2)
|
2008-02-19 19:21:20 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < pci_root_num; i++) {
|
|
|
|
if (pci_root_info[i].bus_min == b->number)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i == pci_root_num)
|
|
|
|
return;
|
|
|
|
|
|
|
|
info = &pci_root_info[i];
|
|
|
|
for (j = 0; j < info->res_num; j++) {
|
|
|
|
struct resource *res;
|
|
|
|
struct resource *root;
|
|
|
|
|
|
|
|
res = &info->res[j];
|
|
|
|
b->resource[j] = res;
|
|
|
|
if (res->flags & IORESOURCE_IO)
|
|
|
|
root = &ioport_resource;
|
|
|
|
else
|
|
|
|
root = &iomem_resource;
|
|
|
|
insert_resource(root, res);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#define RANGE_NUM 16
|
|
|
|
|
|
|
|
struct res_range {
|
|
|
|
size_t start;
|
|
|
|
size_t end;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void __init update_range(struct res_range *range, size_t start,
|
|
|
|
size_t end)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int j;
|
|
|
|
|
|
|
|
for (j = 0; j < RANGE_NUM; j++) {
|
|
|
|
if (!range[j].end)
|
|
|
|
continue;
|
2008-04-13 16:41:58 +08:00
|
|
|
|
|
|
|
if (start <= range[j].start && end >= range[j].end) {
|
2008-02-19 19:21:20 +08:00
|
|
|
range[j].start = 0;
|
|
|
|
range[j].end = 0;
|
2008-04-13 16:41:58 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
|
|
|
|
range[j].start = end + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
|
2008-02-19 19:21:20 +08:00
|
|
|
range[j].end = start - 1;
|
2008-04-13 16:41:58 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start > range[j].start && end < range[j].end) {
|
2008-02-19 19:21:20 +08:00
|
|
|
/* find the new spare */
|
|
|
|
for (i = 0; i < RANGE_NUM; i++) {
|
|
|
|
if (range[i].end == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (i < RANGE_NUM) {
|
|
|
|
range[i].end = range[j].end;
|
|
|
|
range[i].start = end + 1;
|
|
|
|
} else {
|
|
|
|
printk(KERN_ERR "run of slot in ranges\n");
|
|
|
|
}
|
|
|
|
range[j].end = start - 1;
|
2008-04-13 16:41:58 +08:00
|
|
|
continue;
|
2008-02-19 19:21:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init update_res(struct pci_root_info *info, size_t start,
|
|
|
|
size_t end, unsigned long flags, int merge)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct resource *res;
|
|
|
|
|
|
|
|
if (!merge)
|
|
|
|
goto addit;
|
|
|
|
|
|
|
|
/* try to merge it with old one */
|
|
|
|
for (i = 0; i < info->res_num; i++) {
|
2008-04-13 16:41:58 +08:00
|
|
|
size_t final_start, final_end;
|
|
|
|
size_t common_start, common_end;
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
res = &info->res[i];
|
|
|
|
if (res->flags != flags)
|
|
|
|
continue;
|
2008-04-13 16:41:58 +08:00
|
|
|
|
|
|
|
common_start = max((size_t)res->start, start);
|
|
|
|
common_end = min((size_t)res->end, end);
|
|
|
|
if (common_start > common_end + 1)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
final_start = min((size_t)res->start, start);
|
|
|
|
final_end = max((size_t)res->end, end);
|
|
|
|
|
|
|
|
res->start = final_start;
|
|
|
|
res->end = final_end;
|
|
|
|
return;
|
2008-02-19 19:21:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
addit:
|
|
|
|
|
|
|
|
/* need to add that */
|
|
|
|
if (info->res_num >= RES_NUM)
|
|
|
|
return;
|
|
|
|
|
|
|
|
res = &info->res[info->res_num];
|
|
|
|
res->name = info->name;
|
|
|
|
res->flags = flags;
|
|
|
|
res->start = start;
|
|
|
|
res->end = end;
|
|
|
|
res->child = NULL;
|
|
|
|
info->res_num++;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct pci_hostbridge_probe {
|
|
|
|
u32 bus;
|
|
|
|
u32 slot;
|
|
|
|
u32 vendor;
|
|
|
|
u32 device;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct pci_hostbridge_probe pci_probes[] __initdata = {
|
|
|
|
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 },
|
|
|
|
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
|
|
|
|
{ 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
|
|
|
|
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 },
|
|
|
|
};
|
|
|
|
|
2008-03-06 17:15:31 +08:00
|
|
|
static u64 __initdata fam10h_mmconf_start;
|
|
|
|
static u64 __initdata fam10h_mmconf_end;
|
|
|
|
static void __init get_pci_mmcfg_amd_fam10h_range(void)
|
|
|
|
{
|
|
|
|
u32 address;
|
|
|
|
u64 base, msr;
|
|
|
|
unsigned segn_busn_bits;
|
|
|
|
|
|
|
|
/* assume all cpus from fam10h have mmconf */
|
|
|
|
if (boot_cpu_data.x86 < 0x10)
|
|
|
|
return;
|
|
|
|
|
|
|
|
address = MSR_FAM10H_MMIO_CONF_BASE;
|
|
|
|
rdmsrl(address, msr);
|
|
|
|
|
|
|
|
/* mmconfig is not enable */
|
|
|
|
if (!(msr & FAM10H_MMIO_CONF_ENABLE))
|
|
|
|
return;
|
|
|
|
|
|
|
|
base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
|
|
|
|
|
|
|
|
segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
|
|
|
|
FAM10H_MMIO_CONF_BUSRANGE_MASK;
|
|
|
|
|
|
|
|
fam10h_mmconf_start = base;
|
|
|
|
fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/**
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
* early_fill_mp_bus_to_node()
|
|
|
|
* called before pcibios_scan_root and pci_scan_bus
|
2005-04-17 06:20:36 +08:00
|
|
|
* fills the mp_bus_to_cpumask array based according to the LDT Bus Number
|
|
|
|
* Registers found in the K8 northbridge
|
|
|
|
*/
|
2008-02-19 19:21:20 +08:00
|
|
|
static int __init early_fill_mp_bus_info(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2008-02-19 19:21:20 +08:00
|
|
|
int i;
|
|
|
|
int j;
|
|
|
|
unsigned bus;
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
unsigned slot;
|
2008-02-19 19:21:20 +08:00
|
|
|
int found;
|
2008-02-19 19:15:08 +08:00
|
|
|
int node;
|
2008-02-19 19:21:20 +08:00
|
|
|
int link;
|
|
|
|
int def_node;
|
|
|
|
int def_link;
|
|
|
|
struct pci_root_info *info;
|
|
|
|
u32 reg;
|
|
|
|
struct resource *res;
|
|
|
|
size_t start;
|
|
|
|
size_t end;
|
|
|
|
struct res_range range[RANGE_NUM];
|
|
|
|
u64 val;
|
|
|
|
u32 address;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
#ifdef CONFIG_NUMA
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
for (i = 0; i < BUS_NR; i++)
|
|
|
|
mp_bus_to_node[i] = -1;
|
2008-02-19 19:21:20 +08:00
|
|
|
#endif
|
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-02-19 19:20:09 +08:00
|
|
|
|
|
|
|
if (!early_pci_allowed())
|
|
|
|
return -1;
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
found = 0;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
|
|
|
|
u32 id;
|
|
|
|
u16 device;
|
|
|
|
u16 vendor;
|
2008-02-19 19:15:08 +08:00
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
bus = pci_probes[i].bus;
|
|
|
|
slot = pci_probes[i].slot;
|
|
|
|
id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID);
|
2008-02-19 19:15:08 +08:00
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
vendor = id & 0xffff;
|
|
|
|
device = (id>>16) & 0xffff;
|
|
|
|
if (pci_probes[i].vendor == vendor &&
|
|
|
|
pci_probes[i].device == device) {
|
|
|
|
found = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
return 0;
|
2008-02-19 19:15:08 +08:00
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
pci_root_num = 0;
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
int min_bus;
|
|
|
|
int max_bus;
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2));
|
2008-02-19 19:15:08 +08:00
|
|
|
|
|
|
|
/* Check if that register is enabled for bus range */
|
2008-02-19 19:21:20 +08:00
|
|
|
if ((reg & 7) != 3)
|
2008-02-19 19:15:08 +08:00
|
|
|
continue;
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
min_bus = (reg >> 16) & 0xff;
|
|
|
|
max_bus = (reg >> 24) & 0xff;
|
|
|
|
node = (reg >> 4) & 0x07;
|
|
|
|
#ifdef CONFIG_NUMA
|
2008-02-19 19:15:08 +08:00
|
|
|
for (j = min_bus; j <= max_bus; j++)
|
|
|
|
mp_bus_to_node[j] = (unsigned char) node;
|
2008-02-19 19:21:20 +08:00
|
|
|
#endif
|
|
|
|
link = (reg >> 8) & 0x03;
|
|
|
|
|
|
|
|
info = &pci_root_info[pci_root_num];
|
|
|
|
info->bus_min = min_bus;
|
|
|
|
info->bus_max = max_bus;
|
|
|
|
info->node = node;
|
|
|
|
info->link = link;
|
|
|
|
sprintf(info->name, "PCI Bus #%02x", min_bus);
|
|
|
|
pci_root_num++;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
/* get the default node and link for left over res */
|
|
|
|
reg = read_pci_config(bus, slot, 0, 0x60);
|
|
|
|
def_node = (reg >> 8) & 0x07;
|
|
|
|
reg = read_pci_config(bus, slot, 0, 0x64);
|
|
|
|
def_link = (reg >> 8) & 0x03;
|
|
|
|
|
|
|
|
memset(range, 0, sizeof(range));
|
|
|
|
range[0].end = 0xffff;
|
|
|
|
/* io port resource */
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3));
|
|
|
|
if (!(reg & 3))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
start = reg & 0xfff000;
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0xc4 + (i << 3));
|
|
|
|
node = reg & 0x07;
|
|
|
|
link = (reg >> 4) & 0x03;
|
|
|
|
end = (reg & 0xfff000) | 0xfff;
|
|
|
|
|
|
|
|
/* find the position */
|
|
|
|
for (j = 0; j < pci_root_num; j++) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
if (info->node == node && info->link == link)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j == pci_root_num)
|
|
|
|
continue; /* not found */
|
|
|
|
|
|
|
|
info = &pci_root_info[j];
|
2008-03-06 17:15:31 +08:00
|
|
|
printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n",
|
|
|
|
node, link, (u64)start, (u64)end);
|
2008-04-13 16:41:58 +08:00
|
|
|
|
|
|
|
/* kernel only handle 16 bit only */
|
|
|
|
if (end > 0xffff)
|
|
|
|
end = 0xffff;
|
|
|
|
update_res(info, start, end, IORESOURCE_IO, 1);
|
2008-02-19 19:21:20 +08:00
|
|
|
update_range(range, start, end);
|
|
|
|
}
|
|
|
|
/* add left over io port range to def node/link, [0, 0xffff] */
|
|
|
|
/* find the position */
|
|
|
|
for (j = 0; j < pci_root_num; j++) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
if (info->node == def_node && info->link == def_link)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j < pci_root_num) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
for (i = 0; i < RANGE_NUM; i++) {
|
|
|
|
if (!range[i].end)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
update_res(info, range[i].start, range[i].end,
|
|
|
|
IORESOURCE_IO, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(range, 0, sizeof(range));
|
|
|
|
/* 0xfd00000000-0xffffffffff for HT */
|
2008-03-06 17:15:31 +08:00
|
|
|
range[0].end = (0xfdULL<<32) - 1;
|
2008-02-19 19:21:20 +08:00
|
|
|
|
|
|
|
/* need to take out [0, TOM) for RAM*/
|
|
|
|
address = MSR_K8_TOP_MEM1;
|
|
|
|
rdmsrl(address, val);
|
2008-05-13 08:40:39 +08:00
|
|
|
end = (val & 0xffffff800000ULL);
|
2008-02-19 19:21:20 +08:00
|
|
|
printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
|
|
|
|
if (end < (1ULL<<32))
|
|
|
|
update_range(range, 0, end - 1);
|
|
|
|
|
2008-03-06 17:15:31 +08:00
|
|
|
/* get mmconfig */
|
|
|
|
get_pci_mmcfg_amd_fam10h_range();
|
|
|
|
/* need to take out mmconf range */
|
|
|
|
if (fam10h_mmconf_end) {
|
|
|
|
printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
|
|
|
|
update_range(range, fam10h_mmconf_start, fam10h_mmconf_end);
|
|
|
|
}
|
|
|
|
|
2008-02-19 19:21:20 +08:00
|
|
|
/* mmio resource */
|
|
|
|
for (i = 0; i < 8; i++) {
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3));
|
|
|
|
if (!(reg & 3))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
start = reg & 0xffffff00; /* 39:16 on 31:8*/
|
|
|
|
start <<= 8;
|
|
|
|
reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
|
|
|
|
node = reg & 0x07;
|
|
|
|
link = (reg >> 4) & 0x03;
|
|
|
|
end = (reg & 0xffffff00);
|
|
|
|
end <<= 8;
|
|
|
|
end |= 0xffff;
|
|
|
|
|
|
|
|
/* find the position */
|
|
|
|
for (j = 0; j < pci_root_num; j++) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
if (info->node == node && info->link == link)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j == pci_root_num)
|
|
|
|
continue; /* not found */
|
|
|
|
|
|
|
|
info = &pci_root_info[j];
|
2008-03-06 17:15:31 +08:00
|
|
|
|
|
|
|
printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]",
|
|
|
|
node, link, (u64)start, (u64)end);
|
|
|
|
/*
|
|
|
|
* some sick allocation would have range overlap with fam10h
|
|
|
|
* mmconf range, so need to update start and end.
|
|
|
|
*/
|
|
|
|
if (fam10h_mmconf_end) {
|
|
|
|
int changed = 0;
|
|
|
|
u64 endx = 0;
|
|
|
|
if (start >= fam10h_mmconf_start &&
|
|
|
|
start <= fam10h_mmconf_end) {
|
|
|
|
start = fam10h_mmconf_end + 1;
|
|
|
|
changed = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (end >= fam10h_mmconf_start &&
|
|
|
|
end <= fam10h_mmconf_end) {
|
|
|
|
end = fam10h_mmconf_start - 1;
|
|
|
|
changed = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start < fam10h_mmconf_start &&
|
|
|
|
end > fam10h_mmconf_end) {
|
|
|
|
/* we got a hole */
|
|
|
|
endx = fam10h_mmconf_start - 1;
|
|
|
|
update_res(info, start, endx, IORESOURCE_MEM, 0);
|
|
|
|
update_range(range, start, endx);
|
|
|
|
printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx);
|
|
|
|
start = fam10h_mmconf_end + 1;
|
|
|
|
changed = 1;
|
|
|
|
}
|
|
|
|
if (changed) {
|
|
|
|
if (start <= end) {
|
|
|
|
printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end);
|
|
|
|
} else {
|
|
|
|
printk(KERN_CONT "%s\n", endx?"":" ==> none");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-13 16:41:58 +08:00
|
|
|
update_res(info, start, end, IORESOURCE_MEM, 1);
|
2008-02-19 19:21:20 +08:00
|
|
|
update_range(range, start, end);
|
2008-03-06 17:15:31 +08:00
|
|
|
printk(KERN_CONT "\n");
|
2008-02-19 19:21:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* need to take out [4G, TOM2) for RAM*/
|
|
|
|
/* SYS_CFG */
|
|
|
|
address = MSR_K8_SYSCFG;
|
|
|
|
rdmsrl(address, val);
|
|
|
|
/* TOP_MEM2 is enabled? */
|
|
|
|
if (val & (1<<21)) {
|
|
|
|
/* TOP_MEM2 */
|
|
|
|
address = MSR_K8_TOP_MEM2;
|
|
|
|
rdmsrl(address, val);
|
2008-05-13 08:40:39 +08:00
|
|
|
end = (val & 0xffffff800000ULL);
|
2008-02-19 19:21:20 +08:00
|
|
|
printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
|
|
|
|
update_range(range, 1ULL<<32, end - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* add left over mmio range to def node/link ?
|
|
|
|
* that is tricky, just record range in from start_min to 4G
|
|
|
|
*/
|
|
|
|
for (j = 0; j < pci_root_num; j++) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
if (info->node == def_node && info->link == def_link)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j < pci_root_num) {
|
|
|
|
info = &pci_root_info[j];
|
|
|
|
|
|
|
|
for (i = 0; i < RANGE_NUM; i++) {
|
|
|
|
if (!range[i].end)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
update_res(info, range[i].start, range[i].end,
|
|
|
|
IORESOURCE_MEM, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < pci_root_num; i++) {
|
|
|
|
int res_num;
|
|
|
|
int busnum;
|
|
|
|
|
|
|
|
info = &pci_root_info[i];
|
|
|
|
res_num = info->res_num;
|
|
|
|
busnum = info->bus_min;
|
|
|
|
printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n",
|
|
|
|
info->bus_min, info->bus_max, info->node, info->link);
|
|
|
|
for (j = 0; j < res_num; j++) {
|
|
|
|
res = &info->res[j];
|
|
|
|
printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n",
|
|
|
|
busnum, j,
|
|
|
|
(res->flags & IORESOURCE_IO)?"io port":"mmio",
|
|
|
|
res->start, res->end);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-08-23 02:23:37 +08:00
|
|
|
#else /* !CONFIG_X86_64 */
|
2008-06-13 02:19:23 +08:00
|
|
|
|
2008-08-23 02:23:37 +08:00
|
|
|
static int __init early_fill_mp_bus_info(void) { return 0; }
|
|
|
|
|
|
|
|
#endif /* !CONFIG_X86_64 */
|
2008-06-13 02:19:23 +08:00
|
|
|
|
|
|
|
/* common 32/64 bit code */
|
|
|
|
|
|
|
|
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
|
|
|
|
|
x86: fix: make PCI ECS for AMD CPUs hotplug capable
Until now, PCI ECS setup was performed at boot time only and for cpus
that are enabled then. This patch fixes this and adds cpu hotplug.
Tests sequence (check if ECS bit is set when bringing cpu online again):
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
# ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00400010
# echo 0 > /sys/devices/system/cpu/cpu1/online
# echo 1 > /sys/devices/system/cpu/cpu1/online
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-08-23 02:23:38 +08:00
|
|
|
static void enable_pci_io_ecs(void *unused)
|
2008-06-13 02:19:23 +08:00
|
|
|
{
|
|
|
|
u64 reg;
|
|
|
|
rdmsrl(MSR_AMD64_NB_CFG, reg);
|
|
|
|
if (!(reg & ENABLE_CF8_EXT_CFG)) {
|
|
|
|
reg |= ENABLE_CF8_EXT_CFG;
|
|
|
|
wrmsrl(MSR_AMD64_NB_CFG, reg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
x86: fix: make PCI ECS for AMD CPUs hotplug capable
Until now, PCI ECS setup was performed at boot time only and for cpus
that are enabled then. This patch fixes this and adds cpu hotplug.
Tests sequence (check if ECS bit is set when bringing cpu online again):
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
# ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00400010
# echo 0 > /sys/devices/system/cpu/cpu1/online
# echo 1 > /sys/devices/system/cpu/cpu1/online
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-08-23 02:23:38 +08:00
|
|
|
static int __cpuinit amd_cpu_notify(struct notifier_block *self,
|
|
|
|
unsigned long action, void *hcpu)
|
2008-06-13 02:19:23 +08:00
|
|
|
{
|
x86: fix: make PCI ECS for AMD CPUs hotplug capable
Until now, PCI ECS setup was performed at boot time only and for cpus
that are enabled then. This patch fixes this and adds cpu hotplug.
Tests sequence (check if ECS bit is set when bringing cpu online again):
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
# ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00400010
# echo 0 > /sys/devices/system/cpu/cpu1/online
# echo 1 > /sys/devices/system/cpu/cpu1/online
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-08-23 02:23:38 +08:00
|
|
|
int cpu = (long)hcpu;
|
2008-08-23 02:23:38 +08:00
|
|
|
switch (action) {
|
x86: fix: make PCI ECS for AMD CPUs hotplug capable
Until now, PCI ECS setup was performed at boot time only and for cpus
that are enabled then. This patch fixes this and adds cpu hotplug.
Tests sequence (check if ECS bit is set when bringing cpu online again):
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
# ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00400010
# echo 0 > /sys/devices/system/cpu/cpu1/online
# echo 1 > /sys/devices/system/cpu/cpu1/online
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-08-23 02:23:38 +08:00
|
|
|
case CPU_ONLINE:
|
|
|
|
case CPU_ONLINE_FROZEN:
|
|
|
|
smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return NOTIFY_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block __cpuinitdata amd_cpu_notifier = {
|
|
|
|
.notifier_call = amd_cpu_notify,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init pci_io_ecs_init(void)
|
|
|
|
{
|
|
|
|
int cpu;
|
|
|
|
|
2008-06-13 02:19:23 +08:00
|
|
|
/* assume all cpus from fam10h have IO ECS */
|
|
|
|
if (boot_cpu_data.x86 < 0x10)
|
|
|
|
return 0;
|
x86: fix: make PCI ECS for AMD CPUs hotplug capable
Until now, PCI ECS setup was performed at boot time only and for cpus
that are enabled then. This patch fixes this and adds cpu hotplug.
Tests sequence (check if ECS bit is set when bringing cpu online again):
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
# ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00400010
# echo 0 > /sys/devices/system/cpu/cpu1/online
# echo 1 > /sys/devices/system/cpu/cpu1/online
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-08-23 02:23:38 +08:00
|
|
|
|
|
|
|
register_cpu_notifier(&amd_cpu_notifier);
|
|
|
|
for_each_online_cpu(cpu)
|
|
|
|
amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
|
|
|
|
(void *)(long)cpu);
|
2008-06-13 02:19:23 +08:00
|
|
|
pci_probe |= PCI_HAS_IO_ECS;
|
x86: fix: make PCI ECS for AMD CPUs hotplug capable
Until now, PCI ECS setup was performed at boot time only and for cpus
that are enabled then. This patch fixes this and adds cpu hotplug.
Tests sequence (check if ECS bit is set when bringing cpu online again):
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
# ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00400010
# echo 0 > /sys/devices/system/cpu/cpu1/online
# echo 1 > /sys/devices/system/cpu/cpu1/online
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-08-23 02:23:38 +08:00
|
|
|
|
2008-06-13 02:19:23 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-08-23 02:23:37 +08:00
|
|
|
static int __init amd_postcore_init(void)
|
|
|
|
{
|
|
|
|
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
early_fill_mp_bus_info();
|
x86: fix: make PCI ECS for AMD CPUs hotplug capable
Until now, PCI ECS setup was performed at boot time only and for cpus
that are enabled then. This patch fixes this and adds cpu hotplug.
Tests sequence (check if ECS bit is set when bringing cpu online again):
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
# ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00400010
# echo 0 > /sys/devices/system/cpu/cpu1/online
# echo 1 > /sys/devices/system/cpu/cpu1/online
# ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr
00000008 00404010
Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-08-23 02:23:38 +08:00
|
|
|
pci_io_ecs_init();
|
2008-08-23 02:23:37 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
postcore_initcall(amd_postcore_init);
|