PCI/AER: Add sysfs attributes for rootport cumulative stats

Add sysfs attributes for rootport statistics (that are cumulative of all
the ERR_* messages seen on this PCI hierarchy).

Signed-off-by: Rajat Jain <rajatja@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
This commit is contained in:
Rajat Jain 2018-06-21 16:48:29 -07:00 committed by Bjorn Helgaas
parent 81aa5206f9
commit 12833017e5
2 changed files with 75 additions and 0 deletions

View File

@ -92,3 +92,31 @@ AtomicOp Egress Blocked 0
TLP Prefix Blocked Error 0 TLP Prefix Blocked Error 0
TOTAL_ERR_NONFATAL 0 TOTAL_ERR_NONFATAL 0
------------------------------------------------------------------------- -------------------------------------------------------------------------
============================
PCIe Rootport AER statistics
============================
These attributes show up under only the rootports (or root complex event
collectors) that are AER capable. These indicate the number of error messages as
"reported to" the rootport. Please note that the rootports also transmit
(internally) the ERR_* messages for errors seen by the internal rootport PCI
device, so these counters include them and are thus cumulative of all the error
messages on the PCI hierarchy originating at that root port.
Where: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_cor
Date: July 2018
Kernel Version: 4.19.0
Contact: linux-pci@vger.kernel.org, rajatja@google.com
Description: Total number of ERR_COR messages reported to rootport.
Where: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_fatal
Date: July 2018
Kernel Version: 4.19.0
Contact: linux-pci@vger.kernel.org, rajatja@google.com
Description: Total number of ERR_FATAL messages reported to rootport.
Where: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_nonfatal
Date: July 2018
Kernel Version: 4.19.0
Contact: linux-pci@vger.kernel.org, rajatja@google.com
Description: Total number of ERR_NONFATAL messages reported to rootport.

View File

@ -577,10 +577,30 @@ aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
aer_uncorrectable_error_string, "ERR_NONFATAL", aer_uncorrectable_error_string, "ERR_NONFATAL",
dev_total_nonfatal_errs); dev_total_nonfatal_errs);
#define aer_stats_rootport_attr(name, field) \
static ssize_t \
name##_show(struct device *dev, struct device_attribute *attr, \
char *buf) \
{ \
struct pci_dev *pdev = to_pci_dev(dev); \
return sprintf(buf, "%llu\n", pdev->aer_stats->field); \
} \
static DEVICE_ATTR_RO(name)
aer_stats_rootport_attr(aer_rootport_total_err_cor,
rootport_total_cor_errs);
aer_stats_rootport_attr(aer_rootport_total_err_fatal,
rootport_total_fatal_errs);
aer_stats_rootport_attr(aer_rootport_total_err_nonfatal,
rootport_total_nonfatal_errs);
static struct attribute *aer_stats_attrs[] __ro_after_init = { static struct attribute *aer_stats_attrs[] __ro_after_init = {
&dev_attr_aer_dev_correctable.attr, &dev_attr_aer_dev_correctable.attr,
&dev_attr_aer_dev_fatal.attr, &dev_attr_aer_dev_fatal.attr,
&dev_attr_aer_dev_nonfatal.attr, &dev_attr_aer_dev_nonfatal.attr,
&dev_attr_aer_rootport_total_err_cor.attr,
&dev_attr_aer_rootport_total_err_fatal.attr,
&dev_attr_aer_rootport_total_err_nonfatal.attr,
NULL NULL
}; };
@ -593,6 +613,12 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
if (!pdev->aer_stats) if (!pdev->aer_stats)
return 0; return 0;
if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
a == &dev_attr_aer_rootport_total_err_fatal.attr ||
a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT)
return 0;
return a->mode; return a->mode;
} }
@ -635,6 +661,25 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
counter[i]++; counter[i]++;
} }
static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
struct aer_err_source *e_src)
{
struct aer_stats *aer_stats = pdev->aer_stats;
if (!aer_stats)
return;
if (e_src->status & PCI_ERR_ROOT_COR_RCV)
aer_stats->rootport_total_cor_errs++;
if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
aer_stats->rootport_total_fatal_errs++;
else
aer_stats->rootport_total_nonfatal_errs++;
}
}
static void __print_tlp_header(struct pci_dev *dev, static void __print_tlp_header(struct pci_dev *dev,
struct aer_header_log_regs *t) struct aer_header_log_regs *t)
{ {
@ -1085,6 +1130,8 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
struct pci_dev *pdev = rpc->rpd; struct pci_dev *pdev = rpc->rpd;
struct aer_err_info *e_info = &rpc->e_info; struct aer_err_info *e_info = &rpc->e_info;
pci_rootport_aer_stats_incr(pdev, e_src);
/* /*
* There is a possibility that both correctable error and * There is a possibility that both correctable error and
* uncorrectable error being logged. Report correctable error first. * uncorrectable error being logged. Report correctable error first.