From 3504e47ffca5ed3f9e2cc7d37b428fbf1e00ad1b Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 10 Mar 2011 11:54:16 -0700 Subject: [PATCH 01/27] PCI: Enable ASPM state clearing regardless of policy Commit 2f671e2d allowed us to clear ASPM state when the FADT tells us it isn't supported, but we don't put this into effect if the aspm_policy is set to POLICY_POWERSAVE. Enable the state to be cleared regardless of policy. Signed-off-by: Alex Williamson Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aspm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index eee09f756ec9..3eb667b24787 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -608,7 +608,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev) * the BIOS's expectation, we'll do so once pci_enable_device() is * called. */ - if (aspm_policy != POLICY_POWERSAVE) { + if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) { pcie_config_aspm_path(link); pcie_set_clkpm(link, policy_to_clkpm_state(link)); } From 83d74e036b94ffbf871667eede5ef02993709452 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 5 Mar 2011 21:48:44 +0100 Subject: [PATCH 02/27] PCI/PM: Add kerneldoc description of pci_pm_reset() The pci_pm_reset() function is not a very nice interface due to its limitations and conditional behavior (e.g. it doesn't affect devices in low-power states), but it cannot be simply dropped, because existing device drivers may depend on it. However, its behavior and limitations should be well documented, so add an appropriate kerneldoc comment to it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2472e7177b4b..44d1c7c3876b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2479,6 +2479,21 @@ clear: return 0; } +/** + * pci_pm_reset - Put device into PCI_D3 and back into PCI_D0. + * @dev: Device to reset. + * @probe: If set, only check if the device can be reset this way. + * + * If @dev supports native PCI PM and its PCI_PM_CTRL_NO_SOFT_RESET flag is + * unset, it will be reinitialized internally when going from PCI_D3hot to + * PCI_D0. If that's the case and the device is not in a low-power state + * already, force it into PCI_D3hot and back to PCI_D0, causing it to be reset. + * + * NOTE: This causes the caller to sleep for twice the device power transition + * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms + * by devault (i.e. unless the @dev's d3_delay field has a different value). + * Moreover, only devices in D0 can be reset by this function. + */ static int pci_pm_reset(struct pci_dev *dev, int probe) { u16 csr; From 0e8ede5351b53610363215f750e576ca1db1d0cd Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Feb 2011 13:12:11 +0100 Subject: [PATCH 03/27] x86/PCI: Convert release_resource to release_region/release_mem_region Request_region should be used with release_region, not release_resource. The local variables region and region2 are dropped and the calls to release_resource are replaced with calls to release_region, using the first two arguments of the corresponding calls to request_region. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression x,E; @@ ( *x = request_region(...) | *x = request_mem_region(...) ) ... when != release_region(x) when != x = E * release_resource(x); // Signed-off-by: Julia Lawall Signed-off-by: Jesse Barnes --- arch/x86/pci/direct.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index bd33620b0071..e6fd8473fb7b 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -280,12 +280,9 @@ void __init pci_direct_init(int type) int __init pci_direct_probe(void) { - struct resource *region, *region2; - if ((pci_probe & PCI_PROBE_CONF1) == 0) goto type2; - region = request_region(0xCF8, 8, "PCI conf1"); - if (!region) + if (!request_region(0xCF8, 8, "PCI conf1")) goto type2; if (pci_check_type1()) { @@ -293,16 +290,14 @@ int __init pci_direct_probe(void) port_cf9_safe = true; return 1; } - release_resource(region); + release_region(0xCF8, 8); type2: if ((pci_probe & PCI_PROBE_CONF2) == 0) return 0; - region = request_region(0xCF8, 4, "PCI conf2"); - if (!region) + if (!request_region(0xCF8, 4, "PCI conf2")) return 0; - region2 = request_region(0xC000, 0x1000, "PCI conf2"); - if (!region2) + if (!request_region(0xC000, 0x1000, "PCI conf2")) goto fail2; if (pci_check_type2()) { @@ -311,8 +306,8 @@ int __init pci_direct_probe(void) return 2; } - release_resource(region2); + release_region(0xC000, 0x1000); fail2: - release_resource(region); + release_region(0xCF8, 4); return 0; } From 40294d8f14384780a61a2dea8c92a231176ae301 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Mon, 4 Apr 2011 17:12:59 +0800 Subject: [PATCH 04/27] PCI: Fix uninitialized variable bug in AER injection code If it was preempted, and the variable aer_mask_override is changed after the spin_unlock_irqrestore it will write an uninitialized variable by the pci_write_config_dword() function. Signed-off-by: Wanlong Gao Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aer_inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c index f62079ff06dd..95489cd9a555 100644 --- a/drivers/pci/pcie/aer/aer_inject.c +++ b/drivers/pci/pcie/aer/aer_inject.c @@ -326,7 +326,7 @@ static int aer_inject(struct aer_error_inj *einj) unsigned long flags; unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); int pos_cap_err, rp_pos_cap_err; - u32 sever, cor_mask, uncor_mask, cor_mask_orig, uncor_mask_orig; + u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0; int ret = 0; dev = pci_get_domain_bus_and_slot((int)einj->domain, einj->bus, devfn); From 63c4408074cbcc070ac17fc10e524800eb9bd0b0 Mon Sep 17 00:00:00 2001 From: Hemant Pedanekar Date: Tue, 5 Apr 2011 12:32:50 +0530 Subject: [PATCH 05/27] PCI: Add quirk for setting valid class for TI816X Endpoint TI816X (common name for DM816x/C6A816x/AM389x family) devices configured to boot as PCIe Endpoint have class code = 0. This makes kernel PCI bus code to skip allocating BARs to these devices resulting into following type of error when trying to enable them: "Device 0000:01:00.0 not available because of resource collisions" The device cannot be operated because of the above issue. This patch adds a ID specific (TI VENDOR ID and 816X DEVICE ID based) 'early' fixup quirk to replace class code with PCI_CLASS_MULTIMEDIA_VIDEO as class. Signed-off-by: Hemant Pedanekar Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 5129ed6d8fa7..4b2bbe813fce 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2784,6 +2784,16 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, vtd_mask_spec_errors); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors); #endif +static void __devinit fixup_ti816x_class(struct pci_dev* dev) +{ + /* TI 816x devices do not have class code set when in PCIe boot mode */ + if (dev->class == PCI_CLASS_NOT_DEFINED) { + dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n"); + dev->class = PCI_CLASS_MULTIMEDIA_VIDEO; + } +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { From 5491ff511d31ed06e9572f1e84e3494be66b6e8c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 12 Apr 2011 10:20:48 -0700 Subject: [PATCH 06/27] x86/PCI: Remove dma32_reserve_bootmem This workaround holds a dma32 buffer at early boot to prevent later bootmem allocations from stealing it in the case of large RAM configs. Now that x86 is using memblock, and the nobootmem wrapper does top-down allocation, it's no longer necessary, so remove it. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci.h | 2 -- arch/x86/kernel/pci-dma.c | 64 -------------------------------------- arch/x86/kernel/setup.c | 1 - 3 files changed, 67 deletions(-) diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 676129229630..d498943b906c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -135,8 +135,6 @@ void default_teardown_msi_irqs(struct pci_dev *dev); #include "pci_64.h" #endif -void dma32_reserve_bootmem(void); - /* implement the pci_ DMA API in terms of the generic device dma_ one */ #include diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 9ea999a4dcc1..b49d00da2aed 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -68,74 +68,10 @@ int dma_set_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_mask); -#if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA) -static __initdata void *dma32_bootmem_ptr; -static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); - -static int __init parse_dma32_size_opt(char *p) -{ - if (!p) - return -EINVAL; - dma32_bootmem_size = memparse(p, &p); - return 0; -} -early_param("dma32_size", parse_dma32_size_opt); - -void __init dma32_reserve_bootmem(void) -{ - unsigned long size, align; - if (max_pfn <= MAX_DMA32_PFN) - return; - - /* - * check aperture_64.c allocate_aperture() for reason about - * using 512M as goal - */ - align = 64ULL<<20; - size = roundup(dma32_bootmem_size, align); - dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, - 512ULL<<20); - /* - * Kmemleak should not scan this block as it may not be mapped via the - * kernel direct mapping. - */ - kmemleak_ignore(dma32_bootmem_ptr); - if (dma32_bootmem_ptr) - dma32_bootmem_size = size; - else - dma32_bootmem_size = 0; -} -static void __init dma32_free_bootmem(void) -{ - - if (max_pfn <= MAX_DMA32_PFN) - return; - - if (!dma32_bootmem_ptr) - return; - - free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size); - - dma32_bootmem_ptr = NULL; - dma32_bootmem_size = 0; -} -#else -void __init dma32_reserve_bootmem(void) -{ -} -static void __init dma32_free_bootmem(void) -{ -} - -#endif - void __init pci_iommu_alloc(void) { struct iommu_table_entry *p; - /* free the range so iommu could get some range less than 4G */ - dma32_free_bootmem(); - sort_iommu_table(__iommu_table, __iommu_table_end); check_iommu_entries(__iommu_table, __iommu_table_end); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4be9b398470e..cab4f24e2177 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -966,7 +966,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(); memblock_find_dma_reserve(); - dma32_reserve_bootmem(); #ifdef CONFIG_KVM_CLOCK kvmclock_init(); From 5d9c0a795fa3769db681d174b4c15c6e6e4a9b9a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 15 Apr 2011 10:03:53 +0200 Subject: [PATCH 07/27] PCI: Fix typo in ich7 quirk comment Signed-off-by: Jean Delvare Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 4b2bbe813fce..227ec4f73f11 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -681,7 +681,7 @@ static void __devinit ich7_lpc_generic_decode(struct pci_dev *dev, unsigned reg, /* ICH7-10 has the same common LPC generic IO decode registers */ static void __devinit quirk_ich7_lpc(struct pci_dev *dev) { - /* We share the common ACPI/DPIO decode with ICH6 */ + /* We share the common ACPI/GPIO decode with ICH6 */ ich6_lpc_acpi_gpio(dev); /* And have 4 ICH7+ generic decodes */ From b6d95bb63ce6cea7c1a344aa46bbd6e253a3f6ce Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 15 Apr 2011 10:24:07 +0200 Subject: [PATCH 08/27] PCI: Use ICH6_GPIO_EN in ich6_lpc_acpi_gpio We were just lucky that ICH4_GPIO_EN and ICH6_GPIO_EN happen to have the same value. Signed-off-by: Jean Delvare Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 227ec4f73f11..4fc1b0daedaf 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -606,7 +606,7 @@ static void __devinit ich6_lpc_acpi_gpio(struct pci_dev *dev) } pci_read_config_byte(dev, ICH6_GPIO_CNTL, &enable); - if (enable & ICH4_GPIO_EN) { + if (enable & ICH6_GPIO_EN) { pci_read_config_dword(dev, ICH6_GPIOBASE, ®ion); region &= PCI_BASE_ADDRESS_IO_MASK; if (region >= PCIBIOS_MIN_IO) From d97ecd819137118b4686a753415f93215a6edacf Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Sun, 17 Apr 2011 08:22:21 -0700 Subject: [PATCH 09/27] PCI: check pci_vpd_pci22_wait() return pci_vpd_pci22_write() calls pci_vpd_pci22_wait() after writing PCI_VPD_DATA and PCI_VPD_ADDR to wait for the VPD operation to complete. The result pci_vpd_pci22_wait() was not checked for error. This change checks for error. Signed-off-by: Greg Thelen Signed-off-by: Jesse Barnes --- drivers/pci/access.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 531bc697d800..0c1f20f570a4 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -324,6 +324,8 @@ static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, loff_t pos, size_t count vpd->busy = true; vpd->flag = 0; ret = pci_vpd_pci22_wait(dev); + if (ret < 0) + break; pos += sizeof(u32); } From 34e3207205ef492451cc5c53694d4772a9728b9f Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Sun, 17 Apr 2011 08:20:32 -0700 Subject: [PATCH 10/27] PCI: handle positive error codes Callers expect pci_user_{read,write}_config_*() to indicate errors by returning negative values. Prior to this change, the indicated routines could return positive error codes (e.g. PCIBIOS_BAD_REGISTER_NUMBER) which callers would mistakenly interpret as success. This change converts any non-zero return from the mentioned routines into unambiguous negative value return codes. Signed-off-by: Greg Thelen Signed-off-by: Jesse Barnes --- drivers/pci/access.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 0c1f20f570a4..fdaa42aac7c6 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -143,33 +143,41 @@ static noinline void pci_wait_ucfg(struct pci_dev *dev) __remove_wait_queue(&pci_ucfg_wait, &wait); } +/* Returns 0 on success, negative values indicate error. */ #define PCI_USER_READ_CONFIG(size,type) \ int pci_user_read_config_##size \ (struct pci_dev *dev, int pos, type *val) \ { \ int ret = 0; \ u32 data = -1; \ - if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ + if (PCI_##size##_BAD) \ + return -EINVAL; \ raw_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->read(dev->bus, dev->devfn, \ pos, sizeof(type), &data); \ raw_spin_unlock_irq(&pci_lock); \ *val = (type)data; \ + if (ret > 0) \ + ret = -EINVAL; \ return ret; \ } +/* Returns 0 on success, negative values indicate error. */ #define PCI_USER_WRITE_CONFIG(size,type) \ int pci_user_write_config_##size \ (struct pci_dev *dev, int pos, type val) \ { \ int ret = -EIO; \ - if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ + if (PCI_##size##_BAD) \ + return -EINVAL; \ raw_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->write(dev->bus, dev->devfn, \ pos, sizeof(type), val); \ raw_spin_unlock_irq(&pci_lock); \ + if (ret > 0) \ + ret = -EINVAL; \ return ret; \ } @@ -197,6 +205,8 @@ struct pci_vpd_pci22 { * This code has to spin since there is no other notification from the PCI * hardware. Since the VPD is often implemented by serial attachment to an * EEPROM, it may take many milliseconds to complete. + * + * Returns 0 on success, negative values indicate error. */ static int pci_vpd_pci22_wait(struct pci_dev *dev) { @@ -212,7 +222,7 @@ static int pci_vpd_pci22_wait(struct pci_dev *dev) for (;;) { ret = pci_user_read_config_word(dev, vpd->cap + PCI_VPD_ADDR, &status); - if (ret) + if (ret < 0) return ret; if ((status & PCI_VPD_ADDR_F) == vpd->flag) { From c0a86a9bea55d505574120f3e9775e3844276505 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Tue, 19 Apr 2011 16:35:15 -0700 Subject: [PATCH 11/27] x86/PCI: irq and pci_ids patch for Intel Panther Point DeviceIDs This patch adds the LPC Controller DeviceIDs for the Intel Panther Point PCH. Acked-by: Jean Delvare Signed-off-by: Seth Heasley Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 4 +++- include/linux/pci_ids.h | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 8201165bae28..372e9b8989b3 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -602,7 +602,9 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route || (device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN && device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX) || (device >= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN && - device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX)) { + device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX) + || (device >= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN && + device <= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX)) { r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4e2c9150a785..52f4ed4de490 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2480,6 +2480,9 @@ #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN 0x1c41 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f +#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_SMBUS 0x1e22 +#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN 0x1e40 +#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX 0x1e5f #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41 From a246670ddee3132fa71f8993d3989ad8ac04d965 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 30 Apr 2011 00:21:38 +0200 Subject: [PATCH 12/27] PCI/ACPI: Report _OSC control mask returned on failure to get control If an attempt to get _OSC control of the PCIe native features from the BIOS fails, report the resulting mask of control flags the BIOS was willing to grant in the error message. Moreover, if the _OSC support mask is insufficient for requesting control of the PCIe native features or pcie_ports_disabled is set, print a diagnostic message containing the _OSC support mask. This helps to diagnose obscure _OSC-related problems on a number machines. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index f911a2f8cc34..d06078d660ad 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -596,12 +596,18 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) dev_info(root->bus->bridge, "ACPI _OSC control (0x%02x) granted\n", flags); } else { - dev_dbg(root->bus->bridge, - "ACPI _OSC request failed (code %d)\n", status); - printk(KERN_INFO "Unable to assume _OSC PCIe control. " - "Disabling ASPM\n"); + dev_info(root->bus->bridge, + "ACPI _OSC request failed (%s), " + "returned control mask: 0x%02x\n", + acpi_format_exception(status), flags); + pr_info("ACPI _OSC control for PCIe not granted, " + "disabling ASPM\n"); pcie_no_aspm(); } + } else { + dev_info(root->bus->bridge, + "Unable to request _OSC control " + "(_OSC support mask: 0x%02x)\n", flags); } pci_acpi_add_bus_pm_notifier(device, root->bus); From e522a7126c7c144a1dd14c6f217ac31e71082b1d Mon Sep 17 00:00:00 2001 From: "Jordan_Hargrave@Dell.com" Date: Mon, 9 May 2011 15:24:55 -0500 Subject: [PATCH 13/27] PCI: Set PCIE maxpayload for card during hotplug insertion The following patch sets the MaxPayload setting to match the parent reading when inserting a PCIE card into a hotplug slot. On our system, the upstream bridge is set to 256, but when inserting a card, the card setting defaults to 128. As soon as I/O is performed to the card it starts receiving errors since the payload size is too small. Reviewed-by: Kenji Kaneshige Signed-off-by: Jordan Hargrave Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pcihp_slot.c | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index 80b461c98557..749fdf070319 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c @@ -158,6 +158,47 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } +/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */ +static int pci_set_payload(struct pci_dev *dev) +{ + int pos, ppos; + u16 pctl, psz; + u16 dctl, dsz, dcap, dmax; + struct pci_dev *parent; + + parent = dev->bus->self; + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return 0; + + /* Read Device MaxPayload capability and setting */ + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl); + pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap); + dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; + dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD); + + /* Read Parent MaxPayload setting */ + ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); + if (!ppos) + return 0; + pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); + psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; + + /* If parent payload > device max payload -> error + * If parent payload > device payload -> set speed + * If parent payload <= device payload -> do nothing + */ + if (psz > dmax) + return -1; + else if (psz > dsz) { + dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz); + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, + (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) + + (psz << 5)); + } + return 0; +} + void pci_configure_slot(struct pci_dev *dev) { struct pci_dev *cdev; @@ -169,6 +210,10 @@ void pci_configure_slot(struct pci_dev *dev) (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) return; + ret = pci_set_payload(dev); + if (ret) + dev_warn(&dev->dev, "could not set device max payload\n"); + memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); if (ret) From 69643e4829c5cd13bafe44a6b9f3eb2086e0f618 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 11 May 2011 17:00:32 +0100 Subject: [PATCH 14/27] PCI hotplug: acpiphp: assume device is in state D0 after powering on a slot. Devices which do not support PCI configuration space based power management may not otherwise be enabled. Signed-off-by: Ian Campbell Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/acpiphp_glue.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 2f67e9bc2f96..a70fa89f76fd 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -827,6 +827,13 @@ static int __ref enable_device(struct acpiphp_slot *slot) acpiphp_set_hpp_values(bus); acpiphp_set_acpi_region(slot); pci_enable_bridges(bus); + + list_for_each_entry(dev, &bus->devices, bus_list) { + /* Assume that newly added devices are powered on already. */ + if (!dev->is_added) + dev->current_state = PCI_D0; + } + pci_bus_add_devices(bus); list_for_each_entry(func, &slot->funcs, sibling) { From b48d4425b602f5f4978299474743dbea130d940d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 19 Oct 2010 13:07:57 -0700 Subject: [PATCH 15/27] PCI: add ID-based ordering enable/disable support Add support to allow drivers to enable/disable ID-based ordering. Where supported, ID-based ordering can significantly improve the latency of individual requests by preventing them from queueing up behind unrelated traffic. Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 53 ++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 13 ++++++++++ include/linux/pci_regs.h | 2 ++ 3 files changed, 68 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 44d1c7c3876b..d0182bed7acc 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1834,6 +1834,59 @@ void pci_enable_ari(struct pci_dev *dev) bridge->ari_enabled = 1; } +/** + * pci_enable_ido - enable ID-based ordering on a device + * @dev: the PCI device + * @type: which types of IDO to enable + * + * Enable ID-based ordering on @dev. @type can contain the bits + * %PCI_EXP_IDO_REQUEST and/or %PCI_EXP_IDO_COMPLETION to indicate + * which types of transactions are allowed to be re-ordered. + */ +void pci_enable_ido(struct pci_dev *dev, unsigned long type) +{ + int pos; + u16 ctrl; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + if (type & PCI_EXP_IDO_REQUEST) + ctrl |= PCI_EXP_IDO_REQ_EN; + if (type & PCI_EXP_IDO_COMPLETION) + ctrl |= PCI_EXP_IDO_CMP_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_enable_ido); + +/** + * pci_disable_ido - disable ID-based ordering on a device + * @dev: the PCI device + * @type: which types of IDO to disable + */ +void pci_disable_ido(struct pci_dev *dev, unsigned long type) +{ + int pos; + u16 ctrl; + + if (!pci_is_pcie(dev)) + return; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + if (type & PCI_EXP_IDO_REQUEST) + ctrl &= ~PCI_EXP_IDO_REQ_EN; + if (type & PCI_EXP_IDO_COMPLETION) + ctrl &= ~PCI_EXP_IDO_CMP_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_disable_ido); + static int pci_acs_enable; /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 96f70d7e058d..551ddcb5f940 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -828,6 +828,11 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, return __pci_enable_wake(dev, state, false, enable); } +#define PCI_EXP_IDO_REQUEST (1<<0) +#define PCI_EXP_IDO_COMPLETION (1<<1) +void pci_enable_ido(struct pci_dev *dev, unsigned long type); +void pci_disable_ido(struct pci_dev *dev, unsigned long type); + /* For use by arch with custom probe code */ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); @@ -1207,6 +1212,14 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, return 0; } +static inline void pci_enable_ido(struct pci_dev *dev, unsigned long type) +{ +} + +static inline void pci_disable_ido(struct pci_dev *dev, unsigned long type) +{ +} + static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index be01380f798a..d9acf9b99814 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -510,6 +510,8 @@ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_IDO_REQ_EN 0x100 /* ID-based ordering request enable */ +#define PCI_EXP_IDO_CMP_EN 0x200 /* ID-based ordering completion enable */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ From 48a92a8179b3e677fac07db7bd109e68f020468c Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 10 Jan 2011 12:46:36 -0800 Subject: [PATCH 16/27] PCI: add OBFF enable/disable support OBFF (optimized buffer flush/fill), where supported, can help improve energy efficiency by giving devices information about when interrupts and other activity will have a reduced power impact. It requires support from both the device and system (i.e. not only does the device need to respond to OBFF messages, but the platform must be capable of generating and routing them to the end point). Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 92 ++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 16 +++++++ include/linux/pci_regs.h | 6 +++ 3 files changed, 114 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d0182bed7acc..01e4cab2e5cb 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1887,6 +1887,98 @@ void pci_disable_ido(struct pci_dev *dev, unsigned long type) } EXPORT_SYMBOL(pci_disable_ido); +/** + * pci_enable_obff - enable optimized buffer flush/fill + * @dev: PCI device + * @type: type of signaling to use + * + * Try to enable @type OBFF signaling on @dev. It will try using WAKE# + * signaling if possible, falling back to message signaling only if + * WAKE# isn't supported. @type should indicate whether the PCIe link + * be brought out of L0s or L1 to send the message. It should be either + * %PCI_EXP_OBFF_SIGNAL_ALWAYS or %PCI_OBFF_SIGNAL_L0. + * + * If your device can benefit from receiving all messages, even at the + * power cost of bringing the link back up from a low power state, use + * %PCI_EXP_OBFF_SIGNAL_ALWAYS. Otherwise, use %PCI_OBFF_SIGNAL_L0 (the + * preferred type). + * + * RETURNS: + * Zero on success, appropriate error number on failure. + */ +int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type type) +{ + int pos; + u32 cap; + u16 ctrl; + int ret; + + if (!pci_is_pcie(dev)) + return -ENOTSUPP; + + pos = pci_pcie_cap(dev); + if (!pos) + return -ENOTSUPP; + + pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap); + if (!(cap & PCI_EXP_OBFF_MASK)) + return -ENOTSUPP; /* no OBFF support at all */ + + /* Make sure the topology supports OBFF as well */ + if (dev->bus) { + ret = pci_enable_obff(dev->bus->self, type); + if (ret) + return ret; + } + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + if (cap & PCI_EXP_OBFF_WAKE) + ctrl |= PCI_EXP_OBFF_WAKE_EN; + else { + switch (type) { + case PCI_EXP_OBFF_SIGNAL_L0: + if (!(ctrl & PCI_EXP_OBFF_WAKE_EN)) + ctrl |= PCI_EXP_OBFF_MSGA_EN; + break; + case PCI_EXP_OBFF_SIGNAL_ALWAYS: + ctrl &= ~PCI_EXP_OBFF_WAKE_EN; + ctrl |= PCI_EXP_OBFF_MSGB_EN; + break; + default: + WARN(1, "bad OBFF signal type\n"); + return -ENOTSUPP; + } + } + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); + + return 0; +} +EXPORT_SYMBOL(pci_enable_obff); + +/** + * pci_disable_obff - disable optimized buffer flush/fill + * @dev: PCI device + * + * Disable OBFF on @dev. + */ +void pci_disable_obff(struct pci_dev *dev) +{ + int pos; + u16 ctrl; + + if (!pci_is_pcie(dev)) + return; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + ctrl &= ~PCI_EXP_OBFF_WAKE_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_disable_obff); + static int pci_acs_enable; /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 551ddcb5f940..45a035cccd93 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -833,6 +833,13 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, void pci_enable_ido(struct pci_dev *dev, unsigned long type); void pci_disable_ido(struct pci_dev *dev, unsigned long type); +enum pci_obff_signal_type { + PCI_EXP_OBFF_SIGNAL_L0, + PCI_EXP_OBFF_SIGNAL_ALWAYS, +}; +int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type); +void pci_disable_obff(struct pci_dev *dev); + /* For use by arch with custom probe code */ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); @@ -1220,6 +1227,15 @@ static inline void pci_disable_ido(struct pci_dev *dev, unsigned long type) { } +static inline int pci_enable_obff(struct pci_dev *dev, unsigned long type) +{ + return 0; +} + +static inline void pci_disable_obff(struct pci_dev *dev) +{ +} + static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index d9acf9b99814..aa420261843d 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -508,10 +508,16 @@ #define PCI_EXP_RTSTA_PENDING 0x20000 /* PME pending */ #define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_OBFF_MASK 0xc0000 /* OBFF support mechanism */ +#define PCI_EXP_OBFF_MSG 0x40000 /* New message signaling */ +#define PCI_EXP_OBFF_WAKE 0x80000 /* Re-use WAKE# for OBFF */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_IDO_REQ_EN 0x100 /* ID-based ordering request enable */ #define PCI_EXP_IDO_CMP_EN 0x200 /* ID-based ordering completion enable */ +#define PCI_EXP_OBFF_MSGA_EN 0x2000 /* OBFF enable with Message type A */ +#define PCI_EXP_OBFF_MSGB_EN 0x4000 /* OBFF enable with Message type B */ +#define PCI_EXP_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ From 51c2e0a7e5bc7ed1384cc68cfb95e702571500c9 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 14 Jan 2011 08:53:04 -0800 Subject: [PATCH 17/27] PCI: add latency tolerance reporting enable/disable support Latency tolerance reporting allows devices to send messages to the root complex indicating their latency tolerance for snooped & unsnooped memory transactions. Add support for enabling & disabling this feature, along with a routine to set the max latencies a device should send upstream. Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 149 +++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 5 ++ include/linux/pci_regs.h | 9 +++ 3 files changed, 163 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 01e4cab2e5cb..53302cbdb94c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1979,6 +1979,155 @@ void pci_disable_obff(struct pci_dev *dev) } EXPORT_SYMBOL(pci_disable_obff); +/** + * pci_ltr_supported - check whether a device supports LTR + * @dev: PCI device + * + * RETURNS: + * True if @dev supports latency tolerance reporting, false otherwise. + */ +bool pci_ltr_supported(struct pci_dev *dev) +{ + int pos; + u32 cap; + + if (!pci_is_pcie(dev)) + return false; + + pos = pci_pcie_cap(dev); + if (!pos) + return false; + + pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap); + + return cap & PCI_EXP_DEVCAP2_LTR; +} +EXPORT_SYMBOL(pci_ltr_supported); + +/** + * pci_enable_ltr - enable latency tolerance reporting + * @dev: PCI device + * + * Enable LTR on @dev if possible, which means enabling it first on + * upstream ports. + * + * RETURNS: + * Zero on success, errno on failure. + */ +int pci_enable_ltr(struct pci_dev *dev) +{ + int pos; + u16 ctrl; + int ret; + + if (!pci_ltr_supported(dev)) + return -ENOTSUPP; + + pos = pci_pcie_cap(dev); + if (!pos) + return -ENOTSUPP; + + /* Only primary function can enable/disable LTR */ + if (PCI_FUNC(dev->devfn) != 0) + return -EINVAL; + + /* Enable upstream ports first */ + if (dev->bus) { + ret = pci_enable_ltr(dev->bus->self); + if (ret) + return ret; + } + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + ctrl |= PCI_EXP_LTR_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); + + return 0; +} +EXPORT_SYMBOL(pci_enable_ltr); + +/** + * pci_disable_ltr - disable latency tolerance reporting + * @dev: PCI device + */ +void pci_disable_ltr(struct pci_dev *dev) +{ + int pos; + u16 ctrl; + + if (!pci_ltr_supported(dev)) + return; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + /* Only primary function can enable/disable LTR */ + if (PCI_FUNC(dev->devfn) != 0) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + ctrl &= ~PCI_EXP_LTR_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_disable_ltr); + +static int __pci_ltr_scale(int *val) +{ + int scale = 0; + + while (*val > 1023) { + *val = (*val + 31) / 32; + scale++; + } + return scale; +} + +/** + * pci_set_ltr - set LTR latency values + * @dev: PCI device + * @snoop_lat_ns: snoop latency in nanoseconds + * @nosnoop_lat_ns: nosnoop latency in nanoseconds + * + * Figure out the scale and set the LTR values accordingly. + */ +int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns) +{ + int pos, ret, snoop_scale, nosnoop_scale; + u16 val; + + if (!pci_ltr_supported(dev)) + return -ENOTSUPP; + + snoop_scale = __pci_ltr_scale(&snoop_lat_ns); + nosnoop_scale = __pci_ltr_scale(&nosnoop_lat_ns); + + if (snoop_lat_ns > PCI_LTR_VALUE_MASK || + nosnoop_lat_ns > PCI_LTR_VALUE_MASK) + return -EINVAL; + + if ((snoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)) || + (nosnoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT))) + return -EINVAL; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR); + if (!pos) + return -ENOTSUPP; + + val = (snoop_scale << PCI_LTR_SCALE_SHIFT) | snoop_lat_ns; + ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_SNOOP_LAT, val); + if (ret != 4) + return -EIO; + + val = (nosnoop_scale << PCI_LTR_SCALE_SHIFT) | nosnoop_lat_ns; + ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_NOSNOOP_LAT, val); + if (ret != 4) + return -EIO; + + return 0; +} +EXPORT_SYMBOL(pci_set_ltr); + static int pci_acs_enable; /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 45a035cccd93..df4d69b82144 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -840,6 +840,11 @@ enum pci_obff_signal_type { int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type); void pci_disable_obff(struct pci_dev *dev); +bool pci_ltr_supported(struct pci_dev *dev); +int pci_enable_ltr(struct pci_dev *dev); +void pci_disable_ltr(struct pci_dev *dev); +int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns); + /* For use by arch with custom probe code */ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index aa420261843d..e8840964aca1 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -508,6 +508,7 @@ #define PCI_EXP_RTSTA_PENDING 0x20000 /* PME pending */ #define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCAP2_LTR 0x800 /* Latency tolerance reporting */ #define PCI_EXP_OBFF_MASK 0xc0000 /* OBFF support mechanism */ #define PCI_EXP_OBFF_MSG 0x40000 /* New message signaling */ #define PCI_EXP_OBFF_WAKE 0x80000 /* Re-use WAKE# for OBFF */ @@ -515,6 +516,7 @@ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_IDO_REQ_EN 0x100 /* ID-based ordering request enable */ #define PCI_EXP_IDO_CMP_EN 0x200 /* ID-based ordering completion enable */ +#define PCI_EXP_LTR_EN 0x400 /* Latency tolerance reporting */ #define PCI_EXP_OBFF_MSGA_EN 0x2000 /* OBFF enable with Message type A */ #define PCI_EXP_OBFF_MSGB_EN 0x4000 /* OBFF enable with Message type B */ #define PCI_EXP_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ @@ -535,6 +537,7 @@ #define PCI_EXT_CAP_ID_ARI 14 #define PCI_EXT_CAP_ID_ATS 15 #define PCI_EXT_CAP_ID_SRIOV 16 +#define PCI_EXT_CAP_ID_LTR 24 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -691,6 +694,12 @@ #define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ #define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ +#define PCI_LTR_MAX_SNOOP_LAT 0x4 +#define PCI_LTR_MAX_NOSNOOP_LAT 0x6 +#define PCI_LTR_VALUE_MASK 0x000003ff +#define PCI_LTR_SCALE_MASK 0x00001c00 +#define PCI_LTR_SCALE_SHIFT 10 + /* Access Control Service */ #define PCI_ACS_CAP 0x04 /* ACS Capability Register */ #define PCI_ACS_SV 0x01 /* Source Validation */ From a3170c1f924ce2565c4e160b9b095e65c03b2dc6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 23 Feb 2011 10:08:10 +0000 Subject: [PATCH 18/27] x86/PCI: derive pcibios_last_bus from ACPI MCFG On various newer Intel systems the PCI bus(ses) the non-core devices live on aren't getting announced by ACPI except through the bus range covered by mmconfig. At least the i7core-edac driver depends on these devices getting detected. Mauro, could you check whether with this change the Xeon 55xx hack in that driver can go away altogether, and with it the bogus exporting of pcibios_scan_specific_bus()? Signed-off-by: Jan Beulich Cc: Mauro Carvalho Chehab Cc: Aristeu Sergio Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index e282886616a0..750c346ef50a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -606,6 +606,16 @@ static void __init __pci_mmcfg_init(int early) if (list_empty(&pci_mmcfg_list)) return; + if (pcibios_last_bus < 0) { + const struct pci_mmcfg_region *cfg; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->segment) + break; + pcibios_last_bus = cfg->end_bus; + } + } + if (pci_mmcfg_arch_init()) pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; else { From 9f728f53dd70396f3183d2f0861022259471824b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 12 May 2011 17:11:47 -0700 Subject: [PATCH 19/27] PCI/e1000e: Add and use pci_disable_link_state_locked() Need to use it in _e1000e_disable_aspm. This routine is used for error recovery, where the pci_bus_sem is already held, and we don't want pci_disable_link_state to try to take it again. So add a locked variant for use in cases like this. Found lock up: [ 2374.654557] kworker/32:1 D ffff881027f6b0f0 0 6075 2 0x00000000 [ 2374.654816] ffff88503f099a68 0000000000000046 ffff88503f098000 0000000000004000 [ 2374.654837] 00000000001d1ec0 ffff88503f099fd8 00000000001d1ec0 ffff88503f099fd8 [ 2374.654860] 0000000000004000 00000000001d1ec0 ffff88503dcc8000 ffff88503f090000 [ 2374.654880] Call Trace: [ 2374.654898] [] ? __lock_acquired+0x3a/0x224 [ 2374.654914] [] ? _raw_spin_unlock_irq+0x30/0x36 [ 2374.654925] [] ? trace_hardirqs_on_caller+0x1f/0x178 [ 2374.654936] [] rwsem_down_failed_common+0xd3/0x103 [ 2374.654945] [] ? __lock_contended+0x3a/0x2a2 [ 2374.654955] [] rwsem_down_read_failed+0x12/0x14 [ 2374.654967] [] call_rwsem_down_read_failed+0x14/0x30 [ 2374.654981] [] ? pci_disable_link_state+0x5f/0xf5 [ 2374.654990] [] ? down_read+0x7e/0x91 [ 2374.654999] [] ? pci_disable_link_state+0x5f/0xf5 [ 2374.655008] [] pci_disable_link_state+0x5f/0xf5 [ 2374.655024] [] e1000e_disable_aspm+0x55/0x5a [ 2374.655037] [] e1000_io_slot_reset+0x59/0xea [ 2374.655048] [] ? report_mmio_enabled+0x5d/0x5d [ 2374.655057] [] report_slot_reset+0x2e/0x5d [ 2374.655072] [] pci_walk_bus+0x8a/0xb7 [ 2374.655081] [] ? report_mmio_enabled+0x5d/0x5d [ 2374.655091] [] broadcast_error_message+0xa4/0xb2 [ 2374.655101] [] ? pci_bus_read_config_dword+0x72/0x80 [ 2374.655110] [] do_recovery+0x9e/0xf9 [ 2374.655120] [] handle_error_source+0x4c/0x51 [ 2374.655129] [] aer_isr_one_error+0x1e9/0x21a [ 2374.655138] [] aer_isr+0xc7/0xcc [ 2374.655147] [] ? aer_isr_one_error+0x21a/0x21a [ 2374.655159] [] process_one_work+0x237/0x3ec [ 2374.655168] [] ? process_one_work+0x1a8/0x3ec [ 2374.655178] [] worker_thread+0x17c/0x240 [ 2374.655186] [] ? trace_hardirqs_on+0xd/0xf [ 2374.655196] [] ? manage_workers+0xab/0xab [ 2374.655209] [] kthread+0xa0/0xa8 [ 2374.655223] [] kernel_thread_helper+0x4/0x10 [ 2374.655232] [] ? retint_restore_args+0xe/0xe [ 2374.655243] [] ? __init_kthread_worker+0x5b/0x5b [ 2374.655252] [] ? gs_change+0xb/0xb when aer happens, pci_walk_bus already have down_read(&pci_bus_sem)... then report_slot_reset ==> e1000_io_slot_reset ==> e1000e_disable_aspm ==> pci_disable_link_state... We can not use pci_disable_link_state, and it will try to hold pci_bus_sem again. Try to have __pci_disable_link_state that will not need to hold pci_bus_sem. -v2: change name to pci_disable_link_state_locked() according to Jesse. [jbarnes: make sure new function is exported for modules] Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/net/e1000e/netdev.c | 2 +- drivers/pci/pcie/aspm.c | 19 ++++++++++++++++--- include/linux/pci-aspm.h | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 506a0a0043b3..5fb43f098f17 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -5347,7 +5347,7 @@ static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep, #ifdef CONFIG_PCIEASPM static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state) { - pci_disable_link_state(pdev, state); + pci_disable_link_state_locked(pdev, state); } #else static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 3eb667b24787..6892601fc76f 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -734,7 +734,7 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev) * pci_disable_link_state - disable pci device's link state, so the link will * never enter specific states */ -void pci_disable_link_state(struct pci_dev *pdev, int state) +static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) { struct pci_dev *parent = pdev->bus->self; struct pcie_link_state *link; @@ -747,7 +747,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state) if (!parent || !parent->link_state) return; - down_read(&pci_bus_sem); + if (sem) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); link = parent->link_state; if (state & PCIE_LINK_STATE_L0S) @@ -761,7 +762,19 @@ void pci_disable_link_state(struct pci_dev *pdev, int state) pcie_set_clkpm(link, 0); } mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (sem) + up_read(&pci_bus_sem); +} + +void pci_disable_link_state_locked(struct pci_dev *pdev, int state) +{ + __pci_disable_link_state(pdev, state, false); +} +EXPORT_SYMBOL(pci_disable_link_state_locked); + +void pci_disable_link_state(struct pci_dev *pdev, int state) +{ + __pci_disable_link_state(pdev, state, true); } EXPORT_SYMBOL(pci_disable_link_state); diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h index 67cb3ae38016..7cea7b6c1413 100644 --- a/include/linux/pci-aspm.h +++ b/include/linux/pci-aspm.h @@ -28,6 +28,7 @@ extern void pcie_aspm_exit_link_state(struct pci_dev *pdev); extern void pcie_aspm_pm_state_change(struct pci_dev *pdev); extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev); extern void pci_disable_link_state(struct pci_dev *pdev, int state); +extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state); extern void pcie_clear_aspm(void); extern void pcie_no_aspm(void); #else From 24a4742f0be6226eb0106fbb17caf4d711d1ad43 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 10 May 2011 10:02:11 -0600 Subject: [PATCH 20/27] PCI: Track the size of each saved capability data area This will allow us to store and load it later. Signed-off-by: Alex Williamson Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 12 +++++++----- include/linux/pci.h | 11 ++++++++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 53302cbdb94c..d6e5b8ea9194 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -830,7 +830,7 @@ static int pci_save_pcie_state(struct pci_dev *dev) dev_err(&dev->dev, "buffer not found in %s\n", __func__); return -ENOMEM; } - cap = (u16 *)&save_state->data[0]; + cap = (u16 *)&save_state->cap.data[0]; pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); @@ -863,7 +863,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev) pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!save_state || pos <= 0) return; - cap = (u16 *)&save_state->data[0]; + cap = (u16 *)&save_state->cap.data[0]; pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); @@ -899,7 +899,8 @@ static int pci_save_pcix_state(struct pci_dev *dev) return -ENOMEM; } - pci_read_config_word(dev, pos + PCI_X_CMD, (u16 *)save_state->data); + pci_read_config_word(dev, pos + PCI_X_CMD, + (u16 *)save_state->cap.data); return 0; } @@ -914,7 +915,7 @@ static void pci_restore_pcix_state(struct pci_dev *dev) pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); if (!save_state || pos <= 0) return; - cap = (u16 *)&save_state->data[0]; + cap = (u16 *)&save_state->cap.data[0]; pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]); } @@ -1771,7 +1772,8 @@ static int pci_add_cap_save_buffer( if (!save_state) return -ENOMEM; - save_state->cap_nr = cap; + save_state->cap.cap_nr = cap; + save_state->cap.size = size; pci_add_saved_cap(dev, save_state); return 0; diff --git a/include/linux/pci.h b/include/linux/pci.h index df4d69b82144..61ef8f2f9b19 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -214,10 +214,15 @@ enum pci_bus_speed { PCI_SPEED_UNKNOWN = 0xff, }; +struct pci_cap_saved_data { + char cap_nr; + unsigned int size; + u32 data[0]; +}; + struct pci_cap_saved_state { struct hlist_node next; - char cap_nr; - u32 data[0]; + struct pci_cap_saved_data cap; }; struct pcie_link_state; @@ -366,7 +371,7 @@ static inline struct pci_cap_saved_state *pci_find_saved_cap( struct hlist_node *pos; hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) { - if (tmp->cap_nr == cap) + if (tmp->cap.cap_nr == cap) return tmp; } return NULL; From ffbdd3f7931fb7cb7e36d00d16303ec433be5145 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 10 May 2011 10:02:27 -0600 Subject: [PATCH 21/27] PCI: Add interfaces to store and load the device saved state For KVM device assignment, we'd like to save off the state of a device prior to passing it to the guest and restore it later. We also want to allow pci_reset_funciton() to be called while the device is owned by the guest. This however overwrites and invalidates the struct pci_dev buffers, so we can't just manually call save and restore. Add generic interfaces for the saved state to be stored and reloaded back into struct pci_dev at a later time. Signed-off-by: Alex Williamson Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 98 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 4 ++ 2 files changed, 102 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d6e5b8ea9194..22c9b27fdd8d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -976,6 +976,104 @@ void pci_restore_state(struct pci_dev *dev) dev->state_saved = false; } +struct pci_saved_state { + u32 config_space[16]; + struct pci_cap_saved_data cap[0]; +}; + +/** + * pci_store_saved_state - Allocate and return an opaque struct containing + * the device saved state. + * @dev: PCI device that we're dealing with + * + * Rerturn NULL if no state or error. + */ +struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) +{ + struct pci_saved_state *state; + struct pci_cap_saved_state *tmp; + struct pci_cap_saved_data *cap; + struct hlist_node *pos; + size_t size; + + if (!dev->state_saved) + return NULL; + + size = sizeof(*state) + sizeof(struct pci_cap_saved_data); + + hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) + size += sizeof(struct pci_cap_saved_data) + tmp->cap.size; + + state = kzalloc(size, GFP_KERNEL); + if (!state) + return NULL; + + memcpy(state->config_space, dev->saved_config_space, + sizeof(state->config_space)); + + cap = state->cap; + hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) { + size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size; + memcpy(cap, &tmp->cap, len); + cap = (struct pci_cap_saved_data *)((u8 *)cap + len); + } + /* Empty cap_save terminates list */ + + return state; +} +EXPORT_SYMBOL_GPL(pci_store_saved_state); + +/** + * pci_load_saved_state - Reload the provided save state into struct pci_dev. + * @dev: PCI device that we're dealing with + * @state: Saved state returned from pci_store_saved_state() + */ +int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state) +{ + struct pci_cap_saved_data *cap; + + dev->state_saved = false; + + if (!state) + return 0; + + memcpy(dev->saved_config_space, state->config_space, + sizeof(state->config_space)); + + cap = state->cap; + while (cap->size) { + struct pci_cap_saved_state *tmp; + + tmp = pci_find_saved_cap(dev, cap->cap_nr); + if (!tmp || tmp->cap.size != cap->size) + return -EINVAL; + + memcpy(tmp->cap.data, cap->data, tmp->cap.size); + cap = (struct pci_cap_saved_data *)((u8 *)cap + + sizeof(struct pci_cap_saved_data) + cap->size); + } + + dev->state_saved = true; + return 0; +} +EXPORT_SYMBOL_GPL(pci_load_saved_state); + +/** + * pci_load_and_free_saved_state - Reload the save state pointed to by state, + * and free the memory allocated for it. + * @dev: PCI device that we're dealing with + * @state: Pointer to saved state returned from pci_store_saved_state() + */ +int pci_load_and_free_saved_state(struct pci_dev *dev, + struct pci_saved_state **state) +{ + int ret = pci_load_saved_state(dev, *state); + kfree(*state); + *state = NULL; + return ret; +} +EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state); + static int do_pci_enable_device(struct pci_dev *dev, int bars) { int err; diff --git a/include/linux/pci.h b/include/linux/pci.h index 61ef8f2f9b19..4604d1d5514d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -812,6 +812,10 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); void pci_restore_state(struct pci_dev *dev); +struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev); +int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state); +int pci_load_and_free_saved_state(struct pci_dev *dev, + struct pci_saved_state **state); int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); From f8fcfd775523347afe460dc3a0f45d0479e784a2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 10 May 2011 10:02:39 -0600 Subject: [PATCH 22/27] KVM: Use pci_store/load_saved_state() around VM device usage Store the device saved state so that we can reload the device back to the original state when it's unassigned. This has the benefit that the state survives across pci_reset_function() calls via the PCI sysfs reset interface while the VM is using the device. Signed-off-by: Alex Williamson Acked-by: Avi Kivity Signed-off-by: Jesse Barnes --- include/linux/kvm_host.h | 1 + virt/kvm/assigned-dev.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ab428552af8e..9272db03a3e5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -513,6 +513,7 @@ struct kvm_assigned_dev_kernel { struct kvm *kvm; spinlock_t intx_lock; char irq_name[32]; + struct pci_saved_state *pci_saved_state; }; struct kvm_irq_mask_notifier { diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index ae72ae604c89..6cc4b97ec458 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -197,8 +197,13 @@ static void kvm_free_assigned_device(struct kvm *kvm, { kvm_free_assigned_irq(kvm, assigned_dev); - __pci_reset_function(assigned_dev->dev); - pci_restore_state(assigned_dev->dev); + pci_reset_function(assigned_dev->dev); + if (pci_load_and_free_saved_state(assigned_dev->dev, + &assigned_dev->pci_saved_state)) + printk(KERN_INFO "%s: Couldn't reload %s saved state\n", + __func__, dev_name(&assigned_dev->dev->dev)); + else + pci_restore_state(assigned_dev->dev); pci_release_regions(assigned_dev->dev); pci_disable_device(assigned_dev->dev); @@ -516,7 +521,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, pci_reset_function(dev); pci_save_state(dev); - + match->pci_saved_state = pci_store_saved_state(dev); + if (!match->pci_saved_state) + printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", + __func__, dev_name(&dev->dev)); match->assigned_dev_id = assigned_dev->assigned_dev_id; match->host_segnr = assigned_dev->segnr; match->host_busnr = assigned_dev->busnr; @@ -546,7 +554,9 @@ out: mutex_unlock(&kvm->lock); return r; out_list_del: - pci_restore_state(dev); + if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) + printk(KERN_INFO "%s: Couldn't reload %s saved state\n", + __func__, dev_name(&dev->dev)); list_del(&match->list); pci_release_regions(dev); out_disable: From da7822e5ad71ec9b745b412639f1e5e0ba795a20 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 12 May 2011 17:11:37 -0700 Subject: [PATCH 23/27] PCI: update bridge resources to get more big ranges when allocating space (again) With Ram's fixes, this should be safe to do again. So let's give it another try. BIOS separates IO ranges between several IOHs, and on some slots, BIOS assigns resources to a bridge, but stops assigning resources to the device under that bridge, because the device needs a big resource. So: 1. allocate resources and record the failed device resources 2. clear the BIOS assigned resources of the parent bridge of failing device 3. go back and call pci assign unassigned 4. if it still fails, go up the tree, clear more bridges. and try again Now Ram's allocate requested resource already got into mainline. could put this one again. Reviewed-by: Ram Pai Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 125 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 8 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ebf51ad1b714..7a65db400253 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -991,30 +991,139 @@ static void pci_bus_dump_resources(struct pci_bus *bus) } } +static int __init pci_bus_get_depth(struct pci_bus *bus) +{ + int depth = 0; + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + int ret; + struct pci_bus *b = dev->subordinate; + if (!b) + continue; + + ret = pci_bus_get_depth(b); + if (ret + 1 > depth) + depth = ret + 1; + } + + return depth; +} +static int __init pci_get_max_depth(void) +{ + int depth = 0; + struct pci_bus *bus; + + list_for_each_entry(bus, &pci_root_buses, node) { + int ret; + + ret = pci_bus_get_depth(bus); + if (ret > depth) + depth = ret; + } + + return depth; +} + +/* + * first try will not touch pci bridge res + * second and later try will clear small leaf bridge res + * will stop till to the max deepth if can not find good one + */ void __init pci_assign_unassigned_resources(void) { struct pci_bus *bus; struct resource_list_x add_list; /* list of resources that want additional resources */ + int tried_times = 0; + enum release_type rel_type = leaf_only; + struct resource_list_x head, *list; + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM | + IORESOURCE_PREFETCH; + unsigned long failed_type; + int max_depth = pci_get_max_depth(); + int pci_try_num; + + + head.next = NULL; add_list.next = NULL; + + pci_try_num = max_depth + 1; + printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", + max_depth, pci_try_num); + +again: /* Depth first, calculate sizes and alignments of all subordinate buses. */ - list_for_each_entry(bus, &pci_root_buses, node) { + list_for_each_entry(bus, &pci_root_buses, node) __pci_bus_size_bridges(bus, &add_list); - } /* Depth last, allocate resources and update the hardware. */ - list_for_each_entry(bus, &pci_root_buses, node) { - __pci_bus_assign_resources(bus, &add_list, NULL); - pci_enable_bridges(bus); - } + list_for_each_entry(bus, &pci_root_buses, node) + __pci_bus_assign_resources(bus, &add_list, &head); BUG_ON(add_list.next); + tried_times++; + + /* any device complain? */ + if (!head.next) + goto enable_and_dump; + failed_type = 0; + for (list = head.next; list;) { + failed_type |= list->flags; + list = list->next; + } + /* + * io port are tight, don't try extra + * or if reach the limit, don't want to try more + */ + failed_type &= type_mask; + if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) { + free_list(resource_list_x, &head); + goto enable_and_dump; + } + + printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n", + tried_times + 1); + + /* third times and later will not check if it is leaf */ + if ((tried_times + 1) > 2) + rel_type = whole_subtree; + + /* + * Try to release leaf bridge's resources that doesn't fit resource of + * child device under that bridge + */ + for (list = head.next; list;) { + bus = list->dev->bus; + pci_bus_release_bridge_resources(bus, list->flags & type_mask, + rel_type); + list = list->next; + } + /* restore size and flags */ + for (list = head.next; list;) { + struct resource *res = list->res; + + res->start = list->start; + res->end = list->end; + res->flags = list->flags; + if (list->dev->subordinate) + res->flags = 0; + + list = list->next; + } + free_list(resource_list_x, &head); + + goto again; + +enable_and_dump: + /* Depth last, update the hardware. */ + list_for_each_entry(bus, &pci_root_buses, node) + pci_enable_bridges(bus); /* dump the resource on buses */ - list_for_each_entry(bus, &pci_root_buses, node) { + list_for_each_entry(bus, &pci_root_buses, node) pci_bus_dump_resources(bus); - } } void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) From b9d320fcb6259baffaeaf93a5fce252cd09333d6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 12 May 2011 17:11:39 -0700 Subject: [PATCH 24/27] PCI: add rescan to /sys/.../pci_bus/.../ After remove the device from /sys, we have to rescan all or find out the bridge and access /sys../device/rescan there. this patch add /sys/.../pci_bus/.../rescan. So user can rescan more easy. that is more clean and easy to understand. like after remove 0000:c4:00.0, you can rescan 0000:c4 directly. -v2: According to Jesse, use function instead of exposing attr, so could hide #ifdef in header file. also add code to remove rescan file in remove path. -v3: GregKH pointed out that we should use dev_attrs to avoid racing. So add pcibus_attrs and make it to be member of pcibus_attrs. -v4: Change name to pcibus_dev_attrs according to GregKH Acked-by: Greg Kroah-Hartman Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- Documentation/ABI/testing/sysfs-bus-pci | 9 +++++++++ drivers/pci/pci-sysfs.c | 26 +++++++++++++++++++++++++ drivers/pci/pci.h | 1 + drivers/pci/probe.c | 1 + 4 files changed, 37 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 36bf454ba855..349ecf26ce10 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -74,6 +74,15 @@ Description: hot-remove the PCI device and any of its children. Depends on CONFIG_HOTPLUG. +What: /sys/bus/pci/devices/.../pci_bus/.../rescan +Date: May 2011 +Contact: Linux PCI developers +Description: + Writing a non-zero value to this attribute will + force a rescan of the bus and all child buses, + and re-discover devices removed earlier from this + part of the device tree. Depends on CONFIG_HOTPLUG. + What: /sys/bus/pci/devices/.../rescan Date: January 2009 Contact: Linux PCI developers diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index f8deb3e380a2..c690abc0e5b8 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -318,6 +318,25 @@ remove_store(struct device *dev, struct device_attribute *dummy, count = ret; return count; } + +static ssize_t +dev_bus_rescan_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + struct pci_bus *bus = to_pci_bus(dev); + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) { + mutex_lock(&pci_remove_rescan_mutex); + pci_rescan_bus(bus); + mutex_unlock(&pci_remove_rescan_mutex); + } + return count; +} + #endif struct device_attribute pci_dev_attrs[] = { @@ -347,6 +366,13 @@ struct device_attribute pci_dev_attrs[] = { __ATTR_NULL, }; +struct device_attribute pcibus_dev_attrs[] = { +#ifdef CONFIG_HOTPLUG + __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_bus_rescan_store), +#endif + __ATTR_NULL, +}; + static ssize_t boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index a6ec200fe5ee..dcf640ad8df6 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -156,6 +156,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev) } extern struct device_attribute pci_dev_attrs[]; +extern struct device_attribute pcibus_dev_attrs[]; extern struct device_attribute dev_attr_cpuaffinity; extern struct device_attribute dev_attr_cpulistaffinity; #ifdef CONFIG_HOTPLUG diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 44cbbbaa499d..c471295cd4b9 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -95,6 +95,7 @@ static void release_pcibus_dev(struct device *dev) static struct class pcibus_class = { .name = "pci_bus", .dev_release = &release_pcibus_dev, + .dev_attrs = pcibus_dev_attrs, }; static int __init pcibus_class_init(void) From dc2c2c9dd513dec6c17df04e8abff795e20a5271 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 12 May 2011 17:11:40 -0700 Subject: [PATCH 25/27] PCI/sysfs: move bus cpuaffinity to class dev_attrs Requested by Greg KH to fix a race condition in the creating of PCI bus cpuaffinity files. Acked-by: Greg Kroah-Hartman Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 6 ------ drivers/pci/pci-sysfs.c | 36 +++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 2 -- drivers/pci/probe.c | 42 ----------------------------------------- drivers/pci/remove.c | 2 -- 5 files changed, 36 insertions(+), 52 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 69546e9213dd..1e2ad92a4752 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -163,12 +163,6 @@ int pci_bus_add_child(struct pci_bus *bus) bus->is_added = 1; - retval = device_create_file(&bus->dev, &dev_attr_cpuaffinity); - if (retval) - return retval; - - retval = device_create_file(&bus->dev, &dev_attr_cpulistaffinity); - /* Create legacy_io and legacy_mem files for this bus */ pci_create_legacy_files(bus); diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index c690abc0e5b8..7bcf12adced7 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -108,6 +108,40 @@ static ssize_t local_cpulist_show(struct device *dev, return len; } +/* + * PCI Bus Class Devices + */ +static ssize_t pci_bus_show_cpuaffinity(struct device *dev, + int type, + struct device_attribute *attr, + char *buf) +{ + int ret; + const struct cpumask *cpumask; + + cpumask = cpumask_of_pcibus(to_pci_bus(dev)); + ret = type ? + cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask) : + cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask); + buf[ret++] = '\n'; + buf[ret] = '\0'; + return ret; +} + +static inline ssize_t pci_bus_show_cpumaskaffinity(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return pci_bus_show_cpuaffinity(dev, 0, attr, buf); +} + +static inline ssize_t pci_bus_show_cpulistaffinity(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return pci_bus_show_cpuaffinity(dev, 1, attr, buf); +} + /* show resources */ static ssize_t resource_show(struct device * dev, struct device_attribute *attr, char * buf) @@ -370,6 +404,8 @@ struct device_attribute pcibus_dev_attrs[] = { #ifdef CONFIG_HOTPLUG __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_bus_rescan_store), #endif + __ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL), + __ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL), __ATTR_NULL, }; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index dcf640ad8df6..4ee9e8a2607f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -157,8 +157,6 @@ static inline int pci_no_d1d2(struct pci_dev *dev) } extern struct device_attribute pci_dev_attrs[]; extern struct device_attribute pcibus_dev_attrs[]; -extern struct device_attribute dev_attr_cpuaffinity; -extern struct device_attribute dev_attr_cpulistaffinity; #ifdef CONFIG_HOTPLUG extern struct bus_attribute pci_bus_attrs[]; #else diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c471295cd4b9..48849ffdd672 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -42,43 +42,6 @@ int no_pci_devices(void) } EXPORT_SYMBOL(no_pci_devices); -/* - * PCI Bus Class Devices - */ -static ssize_t pci_bus_show_cpuaffinity(struct device *dev, - int type, - struct device_attribute *attr, - char *buf) -{ - int ret; - const struct cpumask *cpumask; - - cpumask = cpumask_of_pcibus(to_pci_bus(dev)); - ret = type? - cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask) : - cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask); - buf[ret++] = '\n'; - buf[ret] = '\0'; - return ret; -} - -static ssize_t inline pci_bus_show_cpumaskaffinity(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return pci_bus_show_cpuaffinity(dev, 0, attr, buf); -} - -static ssize_t inline pci_bus_show_cpulistaffinity(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return pci_bus_show_cpuaffinity(dev, 1, attr, buf); -} - -DEVICE_ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL); -DEVICE_ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL); - /* * PCI Bus Class */ @@ -1456,9 +1419,6 @@ struct pci_bus * pci_create_bus(struct device *parent, error = device_register(&b->dev); if (error) goto class_dev_reg_err; - error = device_create_file(&b->dev, &dev_attr_cpuaffinity); - if (error) - goto dev_create_file_err; /* Create legacy_io and legacy_mem files for this bus */ pci_create_legacy_files(b); @@ -1469,8 +1429,6 @@ struct pci_bus * pci_create_bus(struct device *parent, return b; -dev_create_file_err: - device_unregister(&b->dev); class_dev_reg_err: device_unregister(dev); dev_reg_err: diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 176615e7231f..7f87beed35ac 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -73,8 +73,6 @@ void pci_remove_bus(struct pci_bus *pci_bus) return; pci_remove_legacy_files(pci_bus); - device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity); - device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity); device_unregister(&pci_bus->dev); } EXPORT_SYMBOL(pci_remove_bus); From cbfddd20937ed890ce7027fa08a7c84e977128cf Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Fri, 20 May 2011 13:36:01 +0800 Subject: [PATCH 26/27] PCI: remove unused AER functions In the commit 28eb5f2, aer_osc_setup is removed but corresponding definiton information in the aerdrv.h is missed. Acked-by: Rafael J. Wysocki Signed-off-by: Chen Gong Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 3eb77080366a..94a7598eb262 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -114,15 +114,6 @@ extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); extern void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info); extern irqreturn_t aer_irq(int irq, void *context); -#ifdef CONFIG_ACPI -extern int aer_osc_setup(struct pcie_device *pciedev); -#else -static inline int aer_osc_setup(struct pcie_device *pciedev) -{ - return 0; -} -#endif - #ifdef CONFIG_ACPI_APEI extern int pcie_aer_get_firmware_first(struct pci_dev *pci_dev); #else From 9251bac97d47fdaea406ea0595c2d0aa50022f12 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 15 May 2011 18:13:46 +0200 Subject: [PATCH 27/27] PCI: Don't use dmi_name_in_vendors in quirk Don't use the costly dmi_name_in_vendors() when we know the string we are looking for can only be in the DMI board name field. This is more robust and, more importantly, much faster. Signed-off-by: Jean Delvare Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 4fc1b0daedaf..e8a140669f90 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2349,8 +2349,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, */ static void __devinit nvenet_msi_disable(struct pci_dev *dev) { - if (dmi_name_in_vendors("P5N32-SLI PREMIUM") || - dmi_name_in_vendors("P5N32-E SLI")) { + const char *board_name = dmi_get_system_info(DMI_BOARD_NAME); + + if (board_name && + (strstr(board_name, "P5N32-SLI PREMIUM") || + strstr(board_name, "P5N32-E SLI"))) { dev_info(&dev->dev, "Disabling msi for MCP55 NIC on P5N32-SLI\n"); dev->no_msi = 1;