From 99d440242c08c8a95576c0bc89278f8912f65bda Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 13 Aug 2014 02:22:37 -0400 Subject: [PATCH 1/5] trace, RAS: Replace bare numbers with #defines for PCIe AER error strings Replace bare numbers like "BIT(0)" with the existing #defines, e.g., PCI_ERR_COR_RCVR, to improve maintainability. This way grep will find more uses of the #defines. No functional change. [bhelgaas: changelog] Signed-off-by: Chen, Gong Signed-off-by: Bjorn Helgaas --- include/ras/ras_event.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 47da53c27ffa..0f2cca4ccbf0 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -174,24 +175,24 @@ TRACE_EVENT(mc_event, */ #define aer_correctable_errors \ - {BIT(0), "Receiver Error"}, \ - {BIT(6), "Bad TLP"}, \ - {BIT(7), "Bad DLLP"}, \ - {BIT(8), "RELAY_NUM Rollover"}, \ - {BIT(12), "Replay Timer Timeout"}, \ - {BIT(13), "Advisory Non-Fatal"} + {PCI_ERR_COR_RCVR, "Receiver Error"}, \ + {PCI_ERR_COR_BAD_TLP, "Bad TLP"}, \ + {PCI_ERR_COR_BAD_DLLP, "Bad DLLP"}, \ + {PCI_ERR_COR_REP_ROLL, "RELAY_NUM Rollover"}, \ + {PCI_ERR_COR_REP_TIMER, "Replay Timer Timeout"},\ + {PCI_ERR_COR_ADV_NFAT, "Advisory Non-Fatal"} #define aer_uncorrectable_errors \ - {BIT(4), "Data Link Protocol"}, \ - {BIT(12), "Poisoned TLP"}, \ - {BIT(13), "Flow Control Protocol"}, \ - {BIT(14), "Completion Timeout"}, \ - {BIT(15), "Completer Abort"}, \ - {BIT(16), "Unexpected Completion"}, \ - {BIT(17), "Receiver Overflow"}, \ - {BIT(18), "Malformed TLP"}, \ - {BIT(19), "ECRC"}, \ - {BIT(20), "Unsupported Request"} + {PCI_ERR_UNC_DLP, "Data Link Protocol"}, \ + {PCI_ERR_UNC_POISON_TLP,"Poisoned TLP"}, \ + {PCI_ERR_UNC_FCP, "Flow Control Protocol"}, \ + {PCI_ERR_UNC_COMP_TIME, "Completion Timeout"}, \ + {PCI_ERR_UNC_COMP_ABORT,"Completer Abort"}, \ + {PCI_ERR_UNC_UNX_COMP, "Unexpected Completion"}, \ + {PCI_ERR_UNC_RX_OVER, "Receiver Overflow"}, \ + {PCI_ERR_UNC_MALF_TLP, "Malformed TLP"}, \ + {PCI_ERR_UNC_ECRC, "ECRC"}, \ + {PCI_ERR_UNC_UNSUP, "Unsupported Request"} TRACE_EVENT(aer_event, TP_PROTO(const char *dev_name, From cb9a684acb3d79a5431ac78832148817debf3a06 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 13 Aug 2014 02:22:39 -0400 Subject: [PATCH 2/5] trace, RAS: Add additional PCIe AER error strings Add all AER error bits defined in PCIe r3.0. [bhelgaas: changelog] Signed-off-by: Chen, Gong Signed-off-by: Bjorn Helgaas --- include/ras/ras_event.h | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 0f2cca4ccbf0..0f04a9755d1e 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -174,25 +174,34 @@ TRACE_EVENT(mc_event, * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED */ -#define aer_correctable_errors \ - {PCI_ERR_COR_RCVR, "Receiver Error"}, \ - {PCI_ERR_COR_BAD_TLP, "Bad TLP"}, \ - {PCI_ERR_COR_BAD_DLLP, "Bad DLLP"}, \ - {PCI_ERR_COR_REP_ROLL, "RELAY_NUM Rollover"}, \ - {PCI_ERR_COR_REP_TIMER, "Replay Timer Timeout"},\ - {PCI_ERR_COR_ADV_NFAT, "Advisory Non-Fatal"} +#define aer_correctable_errors \ + {PCI_ERR_COR_RCVR, "Receiver Error"}, \ + {PCI_ERR_COR_BAD_TLP, "Bad TLP"}, \ + {PCI_ERR_COR_BAD_DLLP, "Bad DLLP"}, \ + {PCI_ERR_COR_REP_ROLL, "RELAY_NUM Rollover"}, \ + {PCI_ERR_COR_REP_TIMER, "Replay Timer Timeout"}, \ + {PCI_ERR_COR_ADV_NFAT, "Advisory Non-Fatal Error"}, \ + {PCI_ERR_COR_INTERNAL, "Corrected Internal Error"}, \ + {PCI_ERR_COR_LOG_OVER, "Header Log Overflow"} -#define aer_uncorrectable_errors \ - {PCI_ERR_UNC_DLP, "Data Link Protocol"}, \ +#define aer_uncorrectable_errors \ + {PCI_ERR_UNC_TRAIN, "Undefined"}, \ + {PCI_ERR_UNC_DLP, "Data Link Protocol Error"}, \ + {PCI_ERR_UNC_SURPDN, "Surprise Down Error"}, \ {PCI_ERR_UNC_POISON_TLP,"Poisoned TLP"}, \ - {PCI_ERR_UNC_FCP, "Flow Control Protocol"}, \ + {PCI_ERR_UNC_FCP, "Flow Control Protocol Error"}, \ {PCI_ERR_UNC_COMP_TIME, "Completion Timeout"}, \ {PCI_ERR_UNC_COMP_ABORT,"Completer Abort"}, \ {PCI_ERR_UNC_UNX_COMP, "Unexpected Completion"}, \ {PCI_ERR_UNC_RX_OVER, "Receiver Overflow"}, \ {PCI_ERR_UNC_MALF_TLP, "Malformed TLP"}, \ - {PCI_ERR_UNC_ECRC, "ECRC"}, \ - {PCI_ERR_UNC_UNSUP, "Unsupported Request"} + {PCI_ERR_UNC_ECRC, "ECRC Error"}, \ + {PCI_ERR_UNC_UNSUP, "Unsupported Request Error"}, \ + {PCI_ERR_UNC_ACSV, "ACS Violation"}, \ + {PCI_ERR_UNC_INTN, "Uncorrectable Internal Error"},\ + {PCI_ERR_UNC_MCBTLP, "MC Blocked TLP"}, \ + {PCI_ERR_UNC_ATOMEG, "AtomicOp Egress Blocked"}, \ + {PCI_ERR_UNC_TLPPRE, "TLP Prefix Blocked Error"} TRACE_EVENT(aer_event, TP_PROTO(const char *dev_name, From d179111767aa2a1d594023ce65abf9c81bfbb0cf Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Thu, 25 Sep 2014 09:36:43 -0600 Subject: [PATCH 3/5] PCI/AER: Add additional PCIe AER error strings Add strings for all AER error bits defined in PCIe r3.0. [bhelgaas: changelog, drop designated initializer change] Signed-off-by: Chen, Gong Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer/aerdrv_errprint.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 35d06e177917..c6849d9e86ce 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -89,15 +89,17 @@ static const char *aer_correctable_error_string[] = { NULL, "Replay Timer Timeout", /* Bit Position 12 */ "Advisory Non-Fatal", /* Bit Position 13 */ + "Corrected Internal Error", /* Bit Position 14 */ + "Header Log Overflow", /* Bit Position 15 */ }; static const char *aer_uncorrectable_error_string[] = { - NULL, + "Undefined", /* Bit Position 0 */ NULL, NULL, NULL, "Data Link Protocol", /* Bit Position 4 */ - NULL, + "Surprise Down Error", /* Bit Position 5 */ NULL, NULL, NULL, @@ -113,6 +115,11 @@ static const char *aer_uncorrectable_error_string[] = { "Malformed TLP", /* Bit Position 18 */ "ECRC", /* Bit Position 19 */ "Unsupported Request", /* Bit Position 20 */ + "ACS Violation", /* Bit Position 21 */ + "Uncorrectable Internal Error", /* Bit Position 22 */ + "MC Blocked TLP", /* Bit Position 23 */ + "AtomicOp Egress Blocked", /* Bit Position 24 */ + "TLP Prefix Blocked Error", /* Bit Position 25 */ }; static const char *aer_agent_string[] = { From 846fc70986a65563a19ae86928c3acf34f12296d Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 13 Aug 2014 02:22:40 -0400 Subject: [PATCH 4/5] PCI/AER: Rename PCI_ERR_UNC_TRAIN to PCI_ERR_UNC_UND In PCIe r1.0, sec 5.10.2, bit 0 of the Uncorrectable Error Status, Mask, and Severity Registers was for "Training Error." In PCIe r1.1, sec 7.10.2, bit 0 was redefined to be "Undefined." Rename PCI_ERR_UNC_TRAIN to PCI_ERR_UNC_UND to reflect this change. No functional change. [bhelgaas: changelog] Signed-off-by: Chen, Gong Signed-off-by: Bjorn Helgaas --- drivers/vfio/pci/vfio_pci_config.c | 2 +- include/ras/ras_event.h | 2 +- include/uapi/linux/pci_regs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index e50790e91f76..1de3f94aa7de 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -727,7 +727,7 @@ static int __init init_pci_ext_cap_err_perm(struct perm_bits *perm) p_setd(perm, 0, ALL_VIRT, NO_WRITE); /* Writable bits mask */ - mask = PCI_ERR_UNC_TRAIN | /* Training */ + mask = PCI_ERR_UNC_UND | /* Undefined */ PCI_ERR_UNC_DLP | /* Data Link Protocol */ PCI_ERR_UNC_SURPDN | /* Surprise Down */ PCI_ERR_UNC_POISON_TLP | /* Poisoned TLP */ diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 0f04a9755d1e..79abb9c71772 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -185,7 +185,7 @@ TRACE_EVENT(mc_event, {PCI_ERR_COR_LOG_OVER, "Header Log Overflow"} #define aer_uncorrectable_errors \ - {PCI_ERR_UNC_TRAIN, "Undefined"}, \ + {PCI_ERR_UNC_UND, "Undefined"}, \ {PCI_ERR_UNC_DLP, "Data Link Protocol Error"}, \ {PCI_ERR_UNC_SURPDN, "Surprise Down Error"}, \ {PCI_ERR_UNC_POISON_TLP,"Poisoned TLP"}, \ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 30db069bce62..99e3182f2c96 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -630,7 +630,7 @@ /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ -#define PCI_ERR_UNC_TRAIN 0x00000001 /* Training */ +#define PCI_ERR_UNC_UND 0x00000001 /* Undefined */ #define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ #define PCI_ERR_UNC_SURPDN 0x00000020 /* Surprise Down */ #define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ From 100ebb2c48eaddd6a7ce9602d5d4c37f0a3c9232 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 26 Sep 2014 17:07:59 -0600 Subject: [PATCH 5/5] PCI: Add ACS quirk for Intel 10G NICs Intel has verified there is no peer-to-peer between functions for the below selection of 82598, 82599, and X520 10G NICs. These NICs lack an ACS capability, so we're not able to determine this isolation without the help of quirks. Generalize the Solarflare quirk and add these Intel 10G NICs. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Acked-by: John Ronciak --- drivers/pci/quirks.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 81777c27e14c..322d577503bd 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3615,14 +3615,16 @@ static int pci_quirk_intel_pch_acs(struct pci_dev *dev, u16 acs_flags) return acs_flags & ~flags ? 0 : 1; } -static int pci_quirk_solarflare_acs(struct pci_dev *dev, u16 acs_flags) +static int pci_quirk_mf_endpoint_acs(struct pci_dev *dev, u16 acs_flags) { /* * SV, TB, and UF are not relevant to multifunction endpoints. * - * Solarflare indicates that peer-to-peer between functions is not - * possible, therefore RR, CR, and DT are not implemented. Mask - * these out as if they were clear in the ACS capabilities register. + * Multifunction devices are only required to implement RR, CR, and DT + * in their ACS capability if they support peer-to-peer transactions. + * Devices matching this quirk have been verified by the vendor to not + * perform peer-to-peer with other functions, allowing us to mask out + * these bits as if they were unimplemented in the ACS capability. */ acs_flags &= ~(PCI_ACS_SV | PCI_ACS_TB | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF | PCI_ACS_DT); @@ -3641,8 +3643,28 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ATI, 0x439d, pci_quirk_amd_sb_acs }, { PCI_VENDOR_ID_ATI, 0x4384, pci_quirk_amd_sb_acs }, { PCI_VENDOR_ID_ATI, 0x4399, pci_quirk_amd_sb_acs }, - { PCI_VENDOR_ID_SOLARFLARE, 0x0903, pci_quirk_solarflare_acs }, - { PCI_VENDOR_ID_SOLARFLARE, 0x0923, pci_quirk_solarflare_acs }, + { PCI_VENDOR_ID_SOLARFLARE, 0x0903, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_SOLARFLARE, 0x0923, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10C6, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10DB, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10DD, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10E1, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10F1, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10F7, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10F8, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10F9, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10FA, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10FB, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x10FC, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x1507, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x1514, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x151C, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x1529, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x152A, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x154D, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x154F, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x1551, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_INTEL, 0x1558, pci_quirk_mf_endpoint_acs }, { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_intel_pch_acs }, { 0 } };