From 41003396f932d7f027725c7acebb6a7caa41dc3e Mon Sep 17 00:00:00 2001 From: Sergey Temerkhanov Date: Fri, 24 Mar 2017 22:28:37 +0000 Subject: [PATCH 01/17] EDAC, thunderx: Add Cavium ThunderX EDAC driver Add support for Cavium ThunderX EDAC capable on-chip peripherals, namely the DRAM controller (LMC), cache coherent processor interconnect (CCPI) and level 2 cache blocks (L2C-TAD, L2C-MCI, L2C-CBC) Signed-off-by: Sergey Temerkhanov Cc: David.Daney@cavium.com Cc: Jan.Glauber@cavium.com Cc: linux-edac Link: http://lkml.kernel.org/r/20170324222837.60583-1-s.temerkhanov@gmail.com Signed-off-by: Borislav Petkov --- MAINTAINERS | 1 + drivers/edac/Kconfig | 11 + drivers/edac/Makefile | 1 + drivers/edac/thunderx_edac.c | 2183 ++++++++++++++++++++++++++++++++++ 4 files changed, 2196 insertions(+) create mode 100644 drivers/edac/thunderx_edac.c diff --git a/MAINTAINERS b/MAINTAINERS index 572fe4252ac4..af89e5d0912b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4695,6 +4695,7 @@ L: linux-edac@vger.kernel.org L: linux-mips@linux-mips.org S: Supported F: drivers/edac/octeon_edac* +F: drivers/edac/thunderx_edac* EDAC-E752X M: Mark Gross diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 4773f2867234..7c68e6f955c7 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -392,6 +392,17 @@ config EDAC_OCTEON_PCI Support for error detection and correction on the Cavium Octeon family of SOCs. +config EDAC_THUNDERX + tristate "Cavium ThunderX EDAC" + depends on EDAC_MM_EDAC + depends on ARM64 + depends on PCI + help + Support for error detection and correction on the + Cavium ThunderX memory controllers (LMC), Cache + Coherent Processor Interconnect (CCPI) and L2 cache + blocks (TAD, CBC, MCI). + config EDAC_ALTERA bool "Altera SOCFPGA ECC" depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 587107e90996..52d735f29073 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -74,6 +74,7 @@ obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o +obj-$(CONFIG_EDAC_THUNDERX) += thunderx_edac.o obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c new file mode 100644 index 000000000000..b5fe6894df5e --- /dev/null +++ b/drivers/edac/thunderx_edac.c @@ -0,0 +1,2183 @@ +/* + * Cavium ThunderX memory controller kernel module + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "edac_module.h" + +#define phys_to_pfn(phys) (PFN_DOWN(phys)) + +#define THUNDERX_NODE GENMASK(45, 44) + +enum { + ERR_CORRECTED = 1, + ERR_UNCORRECTED = 2, + ERR_UNKNOWN = 3, +}; + +#define MAX_SYNDROME_REGS 4 + +struct error_syndrome { + u64 reg[MAX_SYNDROME_REGS]; +}; + +struct error_descr { + int type; + u64 mask; + char *descr; +}; + +static void decode_register(char *str, size_t size, + const struct error_descr *descr, + const uint64_t reg) +{ + int ret = 0; + + while (descr->type && descr->mask && descr->descr) { + if (reg & descr->mask) { + ret = snprintf(str, size, "\n\t%s, %s", + descr->type == ERR_CORRECTED ? + "Corrected" : "Uncorrected", + descr->descr); + str += ret; + size -= ret; + } + descr++; + } +} + +static unsigned long get_bits(unsigned long data, int pos, int width) +{ + return (data >> pos) & ((1 << width) - 1); +} + +#define L2C_CTL 0x87E080800000 +#define L2C_CTL_DISIDXALIAS BIT(0) + +#define PCI_DEVICE_ID_THUNDER_LMC 0xa022 + +#define LMC_FADR 0x20 +#define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1) +#define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1) +#define LMC_FADR_FBANK(x) ((x >> 32) & 0xf) +#define LMC_FADR_FROW(x) ((x >> 14) & 0xffff) +#define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff) + +#define LMC_NXM_FADR 0x28 +#define LMC_ECC_SYND 0x38 + +#define LMC_ECC_PARITY_TEST 0x108 + +#define LMC_INT_W1S 0x150 + +#define LMC_INT_ENA_W1C 0x158 +#define LMC_INT_ENA_W1S 0x160 + +#define LMC_CONFIG 0x188 + +#define LMC_CONFIG_BG2 BIT(62) +#define LMC_CONFIG_RANK_ENA BIT(42) +#define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF) +#define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_XOR_BANK BIT(16) + +#define LMC_INT 0x1F0 + +#define LMC_INT_DDR_ERR BIT(11) +#define LMC_INT_DED_ERR (0xFUL << 5) +#define LMC_INT_SEC_ERR (0xFUL << 1) +#define LMC_INT_NXM_WR_MASK BIT(0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_FADR_SCRAMBLED 0x330 + +#define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \ + LMC_INT_NXM_WR_MASK) + +#define LMC_INT_CE (LMC_INT_SEC_ERR) + +static const struct error_descr lmc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = LMC_INT_SEC_ERR, + .descr = "Single-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DDR_ERR, + .descr = "DDR chip error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DED_ERR, + .descr = "Double-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_NXM_WR_MASK, + .descr = "Non-existent memory write", + }, + {0, 0, NULL}, +}; + +#define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5) +#define LMC_INT_EN_DLCRAM_DED_ERR BIT(4) +#define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3) +#define LMC_INT_INTR_DED_ENA BIT(2) +#define LMC_INT_INTR_SEC_ENA BIT(1) +#define LMC_INT_INTR_NXM_WR_ENA BIT(0) + +#define LMC_INT_ENA_ALL GENMASK(5, 0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_RDIMM BIT(0) + +#define LMC_SCRAM_FADR 0x330 + +#define LMC_CHAR_MASK0 0x228 +#define LMC_CHAR_MASK2 0x238 + +#define RING_ENTRIES 8 + +struct debugfs_entry { + const char *name; + umode_t mode; + const struct file_operations fops; +}; + +struct lmc_err_ctx { + u64 reg_int; + u64 reg_fadr; + u64 reg_nxm_fadr; + u64 reg_scram_fadr; + u64 reg_ecc_synd; +}; + +struct thunderx_lmc { + void __iomem *regs; + struct pci_dev *pdev; + struct msix_entry msix_ent; + + atomic_t ecc_int; + + u64 mask0; + u64 mask2; + u64 parity_test; + u64 node; + + int xbits; + int bank_width; + int pbank_lsb; + int dimm_lsb; + int rank_lsb; + int bank_lsb; + int row_lsb; + int col_hi_lsb; + + int xor_bank; + int l2c_alias; + + struct page *mem; + + struct lmc_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +#define ring_pos(pos, size) ((pos) & (size - 1)) + +#define DEBUGFS_STRUCT(_name, _mode, _write, _read) \ +static struct debugfs_entry debugfs_##_name = { \ + .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ + .fops = { \ + .open = simple_open, \ + .write = _write, \ + .read = _read, \ + .llseek = generic_file_llseek, \ + }, \ +} + +#define DEBUGFS_FIELD_ATTR(_type, _field) \ +static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + snprintf(buf, count, "0x%016llx", pdata->_field); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &pdata->_field); \ + \ + return res ? res : count; \ +} \ + \ +DEBUGFS_STRUCT(_field, 0600, \ + thunderx_##_type##_##_field##_write, \ + thunderx_##_type##_##_field##_read) \ + +#define DEBUGFS_REG_ATTR(_type, _name, _reg) \ +static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + u64 val; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &val); \ + \ + if (!res) { \ + writeq(val, pdata->regs + _reg); \ + res = count; \ + } \ + \ + return res; \ +} \ + \ +DEBUGFS_STRUCT(_name, 0600, \ + thunderx_##_type##_##_name##_write, \ + thunderx_##_type##_##_name##_read) + +#define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field) + +/* + * To get an ECC error injected, the following steps are needed: + * - Setup the ECC injection by writing the appropriate parameters: + * echo > /sys/kernel/debug//ecc_mask0 + * echo > /sys/kernel/debug//ecc_mask2 + * echo 0x802 > /sys/kernel/debug//ecc_parity_test + * - Do the actual injection: + * echo 1 > /sys/kernel/debug//inject_ecc + */ +static ssize_t thunderx_lmc_inject_int_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + u64 val; + int res; + + res = kstrtoull_from_user(data, count, 0, &val); + + if (!res) { + /* Trigger the interrupt */ + writeq(val, lmc->regs + LMC_INT_W1S); + res = count; + } + + return res; +} + +static ssize_t thunderx_lmc_int_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + char buf[20]; + u64 lmc_int = readq(lmc->regs + LMC_INT); + + snprintf(buf, sizeof(buf), "0x%016llx", lmc_int); + return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf)); +} + +#define TEST_PATTERN 0xa5 + +static int inject_ecc_fn(void *arg) +{ + struct thunderx_lmc *lmc = arg; + uintptr_t addr, phys; + unsigned int cline_size = cache_line_size(); + const unsigned int lines = PAGE_SIZE / cline_size; + unsigned int i, cl_idx; + + addr = (uintptr_t)page_address(lmc->mem); + phys = (uintptr_t)page_to_phys(lmc->mem); + + cl_idx = (phys & 0x7f) >> 4; + lmc->parity_test &= ~(7ULL << 8); + lmc->parity_test |= (cl_idx << 8); + + writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0); + writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2); + writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST); + + readq(lmc->regs + LMC_CHAR_MASK0); + readq(lmc->regs + LMC_CHAR_MASK2); + readq(lmc->regs + LMC_ECC_PARITY_TEST); + + for (i = 0; i < lines; i++) { + memset((void *)addr, TEST_PATTERN, cline_size); + barrier(); + + /* + * Flush L1 cachelines to the PoC (L2). + * This will cause cacheline eviction to the L2. + */ + asm volatile("dc civac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Flush L2 cachelines to the DRAM. + * This will cause cacheline eviction to the DRAM + * and ECC corruption according to the masks set. + */ + __asm__ volatile("sys #0,c11,C1,#2, %0\n" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L2 cachelines. + * The subsequent load will cause cacheline fetch + * from the DRAM and an error interrupt + */ + __asm__ volatile("sys #0,c11,C1,#1, %0" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L1 cachelines. + * The subsequent load will cause cacheline fetch + * from the L2 and/or DRAM + */ + asm volatile("dc ivac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + return 0; +} + +static ssize_t thunderx_lmc_inject_ecc_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + + unsigned int cline_size = cache_line_size(); + + u8 tmp[cline_size]; + void __iomem *addr; + unsigned int offs, timeout = 100000; + + atomic_set(&lmc->ecc_int, 0); + + lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0); + + if (!lmc->mem) + return -ENOMEM; + + addr = page_address(lmc->mem); + + while (!atomic_read(&lmc->ecc_int) && timeout--) { + stop_machine(inject_ecc_fn, lmc, NULL); + + for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) { + /* + * Do a load from the previously rigged location + * This should generate an error interrupt. + */ + memcpy(tmp, addr + offs, cline_size); + asm volatile("dsb ld\n"); + } + } + + __free_pages(lmc->mem, 0); + + return count; +} + +LMC_DEBUGFS_ENT(mask0); +LMC_DEBUGFS_ENT(mask2); +LMC_DEBUGFS_ENT(parity_test); + +DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL); +DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL); +DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read); + +struct debugfs_entry *lmc_dfs_ents[] = { + &debugfs_mask0, + &debugfs_mask2, + &debugfs_parity_test, + &debugfs_inject_ecc, + &debugfs_inject_int, + &debugfs_int_w1c, +}; + +static int thunderx_create_debugfs_nodes(struct dentry *parent, + struct debugfs_entry *attrs[], + void *data, + size_t num) +{ + int i; + struct dentry *ent; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return 0; + + if (!parent) + return -ENOENT; + + for (i = 0; i < num; i++) { + ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode, + parent, data, &attrs[i]->fops); + + if (!ent) + break; + } + + return i; +} + +static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc) +{ + phys_addr_t addr = 0; + int bank, xbits; + + addr |= lmc->node << 40; + addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb; + addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb; + addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb; + addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb; + + bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb; + + if (lmc->xor_bank) + bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width); + + addr |= bank << lmc->bank_lsb; + + xbits = PCI_FUNC(lmc->pdev->devfn); + + if (lmc->l2c_alias) + xbits ^= get_bits(addr, 20, lmc->xbits) ^ + get_bits(addr, 12, lmc->xbits); + + addr |= xbits << 7; + + return addr; +} + +static unsigned int thunderx_get_num_lmcs(unsigned int node) +{ + unsigned int number = 0; + struct pci_dev *pdev = NULL; + + do { + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_LMC, + pdev); + if (pdev) { +#ifdef CONFIG_NUMA + if (pdev->dev.numa_node == node) + number++; +#else + number++; +#endif + } + } while (pdev); + + return number; +} + +#define LMC_MESSAGE_SIZE 120 +#define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors)) + +static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + + unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx)); + struct lmc_err_ctx *ctx = &lmc->err_ctx[head]; + + writeq(0, lmc->regs + LMC_CHAR_MASK0); + writeq(0, lmc->regs + LMC_CHAR_MASK2); + writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST); + + ctx->reg_int = readq(lmc->regs + LMC_INT); + ctx->reg_fadr = readq(lmc->regs + LMC_FADR); + ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR); + ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR); + ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND); + + lmc->ring_head++; + + atomic_set(&lmc->ecc_int, 1); + + /* Clear the interrupt */ + writeq(ctx->reg_int, lmc->regs + LMC_INT); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + phys_addr_t phys_addr; + + unsigned long tail; + struct lmc_err_ctx *ctx; + + irqreturn_t ret = IRQ_NONE; + + char *msg; + char *other; + + msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(lmc->ring_head, lmc->ring_tail, + ARRAY_SIZE(lmc->err_ctx))) { + tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx)); + + ctx = &lmc->err_ctx[tail]; + + dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n", + ctx->reg_int); + dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n", + ctx->reg_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n", + ctx->reg_nxm_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n", + ctx->reg_scram_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n", + ctx->reg_ecc_synd); + + snprintf(msg, LMC_MESSAGE_SIZE, + "DIMM %lld rank %lld bank %lld row %lld col %lld", + LMC_FADR_FDIMM(ctx->reg_scram_fadr), + LMC_FADR_FBUNK(ctx->reg_scram_fadr), + LMC_FADR_FBANK(ctx->reg_scram_fadr), + LMC_FADR_FROW(ctx->reg_scram_fadr), + LMC_FADR_FCOL(ctx->reg_scram_fadr)); + + decode_register(other, LMC_OTHER_SIZE, lmc_errors, + ctx->reg_int); + + phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc); + + if (ctx->reg_int & LMC_INT_UE) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + else if (ctx->reg_int & LMC_INT_CE) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + + lmc->ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(msg); + kfree(other); + + return ret; +} + +#ifdef CONFIG_PM +static int thunderx_lmc_suspend(struct pci_dev *pdev, pm_message_t state) +{ + pci_save_state(pdev); + pci_disable_device(pdev); + + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int thunderx_lmc_resume(struct pci_dev *pdev) +{ + pci_set_power_state(pdev, PCI_D0); + pci_enable_wake(pdev, PCI_D0, 0); + pci_restore_state(pdev); + + return 0; +} +#endif + +static const struct pci_device_id thunderx_lmc_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) }, + { 0, }, +}; + +static inline int pci_dev_to_mc_idx(struct pci_dev *pdev) +{ + int node = dev_to_node(&pdev->dev); + int ret = PCI_FUNC(pdev->devfn); + + ret += max(node, 0) << 8; + + return ret; +} + +static int thunderx_lmc_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_lmc *lmc; + struct edac_mc_layer layer; + struct mem_ctl_info *mci; + u64 lmc_control, lmc_ddr_pll_ctl, lmc_config; + int ret; + u64 lmc_int; + void *l2c_ioaddr; + + layer.type = EDAC_MC_LAYER_SLOT; + layer.size = 2; + layer.is_virt_csrow = false; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer, + sizeof(struct thunderx_lmc)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + lmc = mci->pvt_info; + + pci_set_drvdata(pdev, mci); + + lmc->regs = pcim_iomap_table(pdev)[0]; + + lmc_control = readq(lmc->regs + LMC_CONTROL); + lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL); + lmc_config = readq(lmc->regs + LMC_CONFIG); + + if (lmc_control & LMC_CONTROL_RDIMM) { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_RDDR4 : MEM_RDDR3; + } else { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_DDR4 : MEM_DDR3; + } + + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + + mci->mod_name = "thunderx-lmc"; + mci->mod_ver = "1"; + mci->ctl_name = "thunderx-lmc"; + mci->dev_name = dev_name(&pdev->dev); + mci->scrub_mode = SCRUB_NONE; + + lmc->pdev = pdev; + lmc->msix_ent.entry = 0; + + lmc->ring_head = 0; + lmc->ring_tail = 0; + + ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector, + thunderx_lmc_err_isr, + thunderx_lmc_threaded_isr, 0, + "[EDAC] ThunderX LMC", mci); + if (ret) { + dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret); + goto err_free; + } + + lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0)); + + lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1; + lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) && + FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3; + + lmc->pbank_lsb = (lmc_config >> 5) & 0xf; + lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits; + lmc->rank_lsb = lmc->dimm_lsb; + lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0; + lmc->bank_lsb = 7 + lmc->xbits; + lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits; + + lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width; + + lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK; + + l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), + PAGE_SIZE); + + if (!l2c_ioaddr) { + dev_err(&pdev->dev, "Cannot map L2C_CTL\n"); + goto err_free; + } + + lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS); + + iounmap(l2c_ioaddr); + + ret = edac_mc_add_mc(mci); + if (ret) { + dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret); + goto err_free; + } + + lmc_int = readq(lmc->regs + LMC_INT); + writeq(lmc_int, lmc->regs + LMC_INT); + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S); + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ret = thunderx_create_debugfs_nodes(mci->debugfs, + lmc_dfs_ents, + lmc, + ARRAY_SIZE(lmc_dfs_ents)); + + if (ret != ARRAY_SIZE(lmc_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + return 0; + +err_free: + pci_set_drvdata(pdev, NULL); + edac_mc_free(mci); + + return ret; +} + +static void thunderx_lmc_remove(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci = pci_get_drvdata(pdev); + struct thunderx_lmc *lmc = mci->pvt_info; + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl); + +static struct pci_driver thunderx_lmc_driver = { + .name = "thunderx_lmc_edac", + .probe = thunderx_lmc_probe, + .remove = thunderx_lmc_remove, +#ifdef CONFIG_PM + .suspend = thunderx_lmc_suspend, + .resume = thunderx_lmc_resume, +#endif + .id_table = thunderx_lmc_pci_tbl, +}; + +/*---------------------- OCX driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_OCX 0xa013 + +#define OCX_LINK_INTS 3 +#define OCX_INTS (OCX_LINK_INTS + 1) +#define OCX_RX_LANES 24 +#define OCX_RX_LANE_STATS 15 + +#define OCX_COM_INT 0x100 +#define OCX_COM_INT_W1S 0x108 +#define OCX_COM_INT_ENA_W1S 0x110 +#define OCX_COM_INT_ENA_W1C 0x118 + +#define OCX_COM_IO_BADID BIT(54) +#define OCX_COM_MEM_BADID BIT(53) +#define OCX_COM_COPR_BADID BIT(52) +#define OCX_COM_WIN_REQ_BADID BIT(51) +#define OCX_COM_WIN_REQ_TOUT BIT(50) +#define OCX_COM_RX_LANE GENMASK(23, 0) + +#define OCX_COM_INT_UE (0) + +#define OCX_COM_INT_CE (OCX_COM_IO_BADID | \ + OCX_COM_MEM_BADID | \ + OCX_COM_COPR_BADID | \ + OCX_COM_WIN_REQ_BADID | \ + OCX_COM_WIN_REQ_TOUT) + +static const struct error_descr ocx_com_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_IO_BADID, + .descr = "Invalid IO transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_MEM_BADID, + .descr = "Invalid memory transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_COPR_BADID, + .descr = "Invalid coprocessor transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_BADID, + .descr = "Invalid SLI transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_TOUT, + .descr = "Window/core request timeout", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8) +#define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8) + +#define OCX_COM_LINK_BAD_WORD BIT(13) +#define OCX_COM_LINK_ALIGN_FAIL BIT(12) +#define OCX_COM_LINK_ALIGN_DONE BIT(11) +#define OCX_COM_LINK_UP BIT(10) +#define OCX_COM_LINK_STOP BIT(9) +#define OCX_COM_LINK_BLK_ERR BIT(8) +#define OCX_COM_LINK_REINIT BIT(7) +#define OCX_COM_LINK_LNK_DATA BIT(6) +#define OCX_COM_LINK_RXFIFO_DBE BIT(5) +#define OCX_COM_LINK_RXFIFO_SBE BIT(4) +#define OCX_COM_LINK_TXFIFO_DBE BIT(3) +#define OCX_COM_LINK_TXFIFO_SBE BIT(2) +#define OCX_COM_LINK_REPLAY_DBE BIT(1) +#define OCX_COM_LINK_REPLAY_SBE BIT(0) + +static const struct error_descr ocx_com_link_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_REPLAY_SBE, + .descr = "Replay buffer single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_TXFIFO_SBE, + .descr = "TX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_RXFIFO_SBE, + .descr = "RX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BLK_ERR, + .descr = "Block code error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_ALIGN_FAIL, + .descr = "Link alignment failure", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BAD_WORD, + .descr = "Bad code word", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_REPLAY_DBE, + .descr = "Replay buffer double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_TXFIFO_DBE, + .descr = "TX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_RXFIFO_DBE, + .descr = "RX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_STOP, + .descr = "Link stopped", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \ + OCX_COM_LINK_TXFIFO_DBE | \ + OCX_COM_LINK_RXFIFO_DBE | \ + OCX_COM_LINK_STOP) + +#define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \ + OCX_COM_LINK_TXFIFO_SBE | \ + OCX_COM_LINK_RXFIFO_SBE | \ + OCX_COM_LINK_BLK_ERR | \ + OCX_COM_LINK_ALIGN_FAIL | \ + OCX_COM_LINK_BAD_WORD) + +#define OCX_LNE_INT(x) (0x8018 + (x) * 0x100) +#define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100) +#define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100) +#define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100) +#define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8) + +#define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8) +#define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2) +#define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1) +#define OCX_LNE_CFG_RX_STAT_ENA BIT(0) + + +#define OCX_LANE_BAD_64B67B BIT(8) +#define OCX_LANE_DSKEW_FIFO_OVFL BIT(5) +#define OCX_LANE_SCRM_SYNC_LOSS BIT(4) +#define OCX_LANE_UKWN_CNTL_WORD BIT(3) +#define OCX_LANE_CRC32_ERR BIT(2) +#define OCX_LANE_BDRY_SYNC_LOSS BIT(1) +#define OCX_LANE_SERDES_LOCK_LOSS BIT(0) + +#define OCX_COM_LANE_INT_UE (0) +#define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \ + OCX_LANE_BDRY_SYNC_LOSS | \ + OCX_LANE_CRC32_ERR | \ + OCX_LANE_UKWN_CNTL_WORD | \ + OCX_LANE_SCRM_SYNC_LOSS | \ + OCX_LANE_DSKEW_FIFO_OVFL | \ + OCX_LANE_BAD_64B67B) + +static const struct error_descr ocx_lane_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SERDES_LOCK_LOSS, + .descr = "RX SerDes lock lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BDRY_SYNC_LOSS, + .descr = "RX word boundary lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_CRC32_ERR, + .descr = "CRC32 error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_UKWN_CNTL_WORD, + .descr = "Unknown control word", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SCRM_SYNC_LOSS, + .descr = "Scrambler synchronization lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_DSKEW_FIFO_OVFL, + .descr = "RX deskew FIFO overflow", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BAD_64B67B, + .descr = "Bad 64B/67B codeword", + }, + {0, 0, NULL}, +}; + +#define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0)) +#define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0)) +#define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \ + GENMASK(9, 7) | GENMASK(5, 0)) + +#define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000) +#define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000) + +struct ocx_com_err_ctx { + u64 reg_com_int; + u64 reg_lane_int[OCX_RX_LANES]; + u64 reg_lane_stat11[OCX_RX_LANES]; +}; + +struct ocx_link_err_ctx { + u64 reg_com_link_int; + int link; +}; + +struct thunderx_ocx { + void __iomem *regs; + int com_link; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + struct msix_entry msix_ent[OCX_INTS]; + + struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES]; + struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES]; + + unsigned long com_ring_head; + unsigned long com_ring_tail; + + unsigned long link_ring_head; + unsigned long link_ring_tail; +}; + +#define OCX_MESSAGE_SIZE SZ_1K +#define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors)) + +/* This handler is threaded */ +static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + int lane; + unsigned long head = ring_pos(ocx->com_ring_head, + ARRAY_SIZE(ocx->com_err_ctx)); + struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head]; + + ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT); + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + ctx->reg_lane_int[lane] = + readq(ocx->regs + OCX_LNE_INT(lane)); + ctx->reg_lane_stat11[lane] = + readq(ocx->regs + OCX_LNE_STAT(lane, 11)); + + writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane)); + } + + writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT); + + ocx->com_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + irqreturn_t ret = IRQ_NONE; + + unsigned long tail; + struct ocx_com_err_ctx *ctx; + int lane; + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx))) { + tail = ring_pos(ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx)); + ctx = &ocx->com_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx", + ocx->edac_dev->ctl_name, ctx->reg_com_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_errors, ctx->reg_com_int); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + for (lane = 0; lane < OCX_RX_LANES; lane++) + if (ctx->reg_com_int & BIT(lane)) { + snprintf(other, OCX_OTHER_SIZE, + "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx", + lane, ctx->reg_lane_int[lane], + lane, ctx->reg_lane_stat11[lane]); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + decode_register(other, OCX_OTHER_SIZE, + ocx_lane_errors, + ctx->reg_lane_int[lane]); + strncat(msg, other, OCX_MESSAGE_SIZE); + } + + if (ctx->reg_com_int & OCX_COM_INT_UE) + edac_device_handle_ue(ocx->edac_dev, 0, 0, msg); + else if (ctx->reg_com_int & OCX_COM_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->com_ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + unsigned long head = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head]; + + ctx->link = msix->entry; + ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + ocx->link_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + irqreturn_t ret = IRQ_NONE; + unsigned long tail; + struct ocx_link_err_ctx *ctx; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail, + ARRAY_SIZE(ocx->link_err_ctx))) { + tail = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + + ctx = &ocx->link_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, + "%s: OCX_COM_LINK_INT[%d]: %016llx", + ocx->edac_dev->ctl_name, + ctx->link, ctx->reg_com_link_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_link_errors, ctx->reg_com_link_int); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE) + edac_device_handle_ue(ocx->edac_dev, 0, 0, msg); + else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->link_ring_tail++; + } + + ret = IRQ_HANDLED; +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg) + +OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0)); +OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1)); +OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2)); + +OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0)); +OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1)); +OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2)); +OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3)); +OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4)); +OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5)); +OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6)); +OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7)); + +OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8)); +OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9)); +OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10)); +OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11)); +OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12)); +OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13)); +OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14)); +OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15)); + +OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16)); +OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17)); +OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18)); +OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19)); +OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20)); +OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21)); +OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22)); +OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23)); + +OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S); + +struct debugfs_entry *ocx_dfs_ents[] = { + &debugfs_tlk0_ecc_ctl, + &debugfs_tlk1_ecc_ctl, + &debugfs_tlk2_ecc_ctl, + + &debugfs_rlk0_ecc_ctl, + &debugfs_rlk1_ecc_ctl, + &debugfs_rlk2_ecc_ctl, + + &debugfs_com_link0_int, + &debugfs_com_link1_int, + &debugfs_com_link2_int, + + &debugfs_lne00_badcnt, + &debugfs_lne01_badcnt, + &debugfs_lne02_badcnt, + &debugfs_lne03_badcnt, + &debugfs_lne04_badcnt, + &debugfs_lne05_badcnt, + &debugfs_lne06_badcnt, + &debugfs_lne07_badcnt, + &debugfs_lne08_badcnt, + &debugfs_lne09_badcnt, + &debugfs_lne10_badcnt, + &debugfs_lne11_badcnt, + &debugfs_lne12_badcnt, + &debugfs_lne13_badcnt, + &debugfs_lne14_badcnt, + &debugfs_lne15_badcnt, + &debugfs_lne16_badcnt, + &debugfs_lne17_badcnt, + &debugfs_lne18_badcnt, + &debugfs_lne19_badcnt, + &debugfs_lne20_badcnt, + &debugfs_lne21_badcnt, + &debugfs_lne22_badcnt, + &debugfs_lne23_badcnt, + + &debugfs_com_int, +}; + +static const struct pci_device_id thunderx_ocx_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) }, + { 0, }, +}; + +static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx) +{ + int lane, stat, cfg; + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + cfg = readq(ocx->regs + OCX_LNE_CFG(lane)); + cfg |= OCX_LNE_CFG_RX_STAT_RDCLR; + cfg &= ~OCX_LNE_CFG_RX_STAT_ENA; + writeq(cfg, ocx->regs + OCX_LNE_CFG(lane)); + + for (stat = 0; stat < OCX_RX_LANE_STATS; stat++) + readq(ocx->regs + OCX_LNE_STAT(lane, stat)); + } +} + +static int thunderx_ocx_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_ocx *ocx; + struct edac_device_ctl_info *edac_dev; + char name[32]; + int idx; + int i; + int ret; + u64 reg; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), "OCX%d", idx); + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx), + name, 1, "CCPI", 1, + 0, NULL, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret); + return -ENOMEM; + } + ocx = edac_dev->pvt_info; + ocx->edac_dev = edac_dev; + ocx->com_ring_head = 0; + ocx->com_ring_tail = 0; + ocx->link_ring_head = 0; + ocx->link_ring_tail = 0; + + ocx->regs = pcim_iomap_table(pdev)[0]; + if (!ocx->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + ret = -ENODEV; + goto err_free; + } + + ocx->pdev = pdev; + + for (i = 0; i < OCX_INTS; i++) { + ocx->msix_ent[i].entry = i; + ocx->msix_ent[i].vector = 0; + } + + ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + for (i = 0; i < OCX_INTS; i++) { + ret = devm_request_threaded_irq(&pdev->dev, + ocx->msix_ent[i].vector, + (i == 3) ? + thunderx_ocx_com_isr : + thunderx_ocx_lnk_isr, + (i == 3) ? + thunderx_ocx_com_threaded_isr : + thunderx_ocx_lnk_threaded_isr, + 0, "[EDAC] ThunderX OCX", + &ocx->msix_ent[i]); + if (ret) + goto err_free; + } + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-ocx"; + edac_dev->ctl_name = "thunderx-ocx"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + ret = thunderx_create_debugfs_nodes(ocx->debugfs, + ocx_dfs_ents, + ocx, + ARRAY_SIZE(ocx_dfs_ents)); + if (ret != ARRAY_SIZE(ocx_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + thunderx_ocx_clearstats(ocx); + + for (i = 0; i < OCX_RX_LANES; i++) { + writeq(OCX_LNE_INT_ENA_ALL, + ocx->regs + OCX_LNE_INT_EN(i)); + + reg = readq(ocx->regs + OCX_LNE_INT(i)); + writeq(reg, ocx->regs + OCX_LNE_INT(i)); + + } + + for (i = 0; i < OCX_LINK_INTS; i++) { + reg = readq(ocx->regs + OCX_COM_LINKX_INT(i)); + writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i)); + + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i)); + } + + reg = readq(ocx->regs + OCX_COM_INT); + writeq(reg, ocx->regs + OCX_COM_INT); + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S); + + return 0; +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_ocx_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_ocx *ocx = edac_dev->pvt_info; + int i; + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C); + + for (i = 0; i < OCX_INTS; i++) { + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i)); + } + + edac_debugfs_remove_recursive(ocx->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl); + +static struct pci_driver thunderx_ocx_driver = { + .name = "thunderx_ocx_edac", + .probe = thunderx_ocx_probe, + .remove = thunderx_ocx_remove, + .id_table = thunderx_ocx_pci_tbl, +}; + +/*---------------------- L2C driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e +#define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f +#define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030 + +#define L2C_TAD_INT_W1C 0x40000 +#define L2C_TAD_INT_W1S 0x40008 + +#define L2C_TAD_INT_ENA_W1C 0x40020 +#define L2C_TAD_INT_ENA_W1S 0x40028 + + +#define L2C_TAD_INT_L2DDBE BIT(1) +#define L2C_TAD_INT_SBFSBE BIT(2) +#define L2C_TAD_INT_SBFDBE BIT(3) +#define L2C_TAD_INT_FBFSBE BIT(4) +#define L2C_TAD_INT_FBFDBE BIT(5) +#define L2C_TAD_INT_TAGDBE BIT(9) +#define L2C_TAD_INT_RDDISLMC BIT(15) +#define L2C_TAD_INT_WRDISLMC BIT(16) +#define L2C_TAD_INT_LFBTO BIT(17) +#define L2C_TAD_INT_GSYNCTO BIT(18) +#define L2C_TAD_INT_RTGSBE BIT(32) +#define L2C_TAD_INT_RTGDBE BIT(33) +#define L2C_TAD_INT_RDDISOCI BIT(34) +#define L2C_TAD_INT_WRDISOCI BIT(35) + +#define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE) + +#define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \ + L2C_TAD_INT_FBFSBE) + +#define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFDBE | \ + L2C_TAD_INT_TAGDBE | \ + L2C_TAD_INT_RTGDBE | \ + L2C_TAD_INT_WRDISOCI | \ + L2C_TAD_INT_RDDISOCI | \ + L2C_TAD_INT_WRDISLMC | \ + L2C_TAD_INT_RDDISLMC | \ + L2C_TAD_INT_LFBTO | \ + L2C_TAD_INT_GSYNCTO) + +static const struct error_descr l2_tad_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_SBFSBE, + .descr = "SBF single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_FBFSBE, + .descr = "FBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_L2DDBE, + .descr = "L2D double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_SBFDBE, + .descr = "SBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_FBFDBE, + .descr = "FBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_TAGDBE, + .descr = "TAG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RTGDBE, + .descr = "RTG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISOCI, + .descr = "Write to a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISLMC, + .descr = "Write to a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISLMC, + .descr = "Read from a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_LFBTO, + .descr = "LFB entry timeout", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_GSYNCTO, + .descr = "Global sync CCPI timeout", + }, + {0, 0, NULL}, +}; + +#define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE) + +#define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE) + +#define L2C_TAD_INT_NXM (0) + +#define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC) + +#define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI) + +#define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \ + L2C_TAD_INT_RTG | L2C_TAD_INT_NXM | \ + L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \ + L2C_TAD_INT_LFBTO) + +#define L2C_TAD_TIMETWO 0x50000 +#define L2C_TAD_TIMEOUT 0x50100 +#define L2C_TAD_ERR 0x60000 +#define L2C_TAD_TQD_ERR 0x60100 +#define L2C_TAD_TTG_ERR 0x60200 + + +#define L2C_CBC_INT_W1C 0x60000 + +#define L2C_CBC_INT_RSDSBE BIT(0) +#define L2C_CBC_INT_RSDDBE BIT(1) + +#define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE) + +#define L2C_CBC_INT_MIBSBE BIT(4) +#define L2C_CBC_INT_MIBDBE BIT(5) + +#define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE) + +#define L2C_CBC_INT_IORDDISOCI BIT(6) +#define L2C_CBC_INT_IOWRDISOCI BIT(7) + +#define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \ + L2C_CBC_INT_IOWRDISOCI) + +#define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE) +#define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE) + + +static const struct error_descr l2_cbc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_RSDSBE, + .descr = "RSD single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_MIBSBE, + .descr = "MIB single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_RSDDBE, + .descr = "RSD double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_MIBDBE, + .descr = "MIB double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IORDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IOWRDISOCI, + .descr = "Write to a disabled CCPI", + }, + {0, 0, NULL}, +}; + +#define L2C_CBC_INT_W1S 0x60008 +#define L2C_CBC_INT_ENA_W1C 0x60020 + +#define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \ + L2C_CBC_INT_IODISOCI) + +#define L2C_CBC_INT_ENA_W1S 0x60028 + +#define L2C_CBC_IODISOCIERR 0x80008 +#define L2C_CBC_IOCERR 0x80010 +#define L2C_CBC_RSDERR 0x80018 +#define L2C_CBC_MIBERR 0x80020 + + +#define L2C_MCI_INT_W1C 0x0 + +#define L2C_MCI_INT_VBFSBE BIT(0) +#define L2C_MCI_INT_VBFDBE BIT(1) + +static const struct error_descr l2_mci_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_MCI_INT_VBFSBE, + .descr = "VBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_MCI_INT_VBFDBE, + .descr = "VBF double-bit error", + }, + {0, 0, NULL}, +}; + +#define L2C_MCI_INT_W1S 0x8 +#define L2C_MCI_INT_ENA_W1C 0x20 + +#define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE) + +#define L2C_MCI_INT_ENA_W1S 0x28 + +#define L2C_MCI_ERR 0x10000 + +#define L2C_MESSAGE_SIZE SZ_1K +#define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors)) + +struct l2c_err_ctx { + char *reg_ext_name; + u64 reg_int; + u64 reg_ext; +}; + +struct thunderx_l2c { + void __iomem *regs; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + + int index; + + struct msix_entry msix_ent; + + struct l2c_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx)); + struct l2c_err_ctx *ctx = &tad->err_ctx[head]; + + ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C); + + if (ctx->reg_int & L2C_TAD_INT_ECC) { + ctx->reg_ext_name = "TQD_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_TAG) { + ctx->reg_ext_name = "TTG_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_NXM) { + ctx->reg_ext_name = "ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) { + ctx->reg_ext_name = "TIMEOUT"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT); + } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) { + ctx->reg_ext_name = "ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR); + } + + writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C); + + tad->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx)); + struct l2c_err_ctx *ctx = &cbc->err_ctx[head]; + + ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C); + + if (ctx->reg_int & L2C_CBC_INT_RSD) { + ctx->reg_ext_name = "RSDERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR); + } else if (ctx->reg_int & L2C_CBC_INT_MIB) { + ctx->reg_ext_name = "MIBERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR); + } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) { + ctx->reg_ext_name = "IODISOCIERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR); + } + + writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C); + + cbc->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx)); + struct l2c_err_ctx *ctx = &mci->err_ctx[head]; + + ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C); + ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR); + + writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C); + + ctx->reg_ext_name = "ERR"; + + mci->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx)); + struct l2c_err_ctx *ctx = &l2c->err_ctx[tail]; + irqreturn_t ret = IRQ_NONE; + + u64 mask_ue, mask_ce; + const struct error_descr *l2_errors; + char *reg_int_name; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + switch (l2c->pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + reg_int_name = "L2C_TAD_INT"; + mask_ue = L2C_TAD_INT_UE; + mask_ce = L2C_TAD_INT_CE; + l2_errors = l2_tad_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + reg_int_name = "L2C_CBC_INT"; + mask_ue = L2C_CBC_INT_UE; + mask_ce = L2C_CBC_INT_CE; + l2_errors = l2_cbc_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + reg_int_name = "L2C_MCI_INT"; + mask_ue = L2C_MCI_INT_VBFDBE; + mask_ce = L2C_MCI_INT_VBFSBE; + l2_errors = l2_mci_errors; + break; + default: + dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", + l2c->pdev->device); + return IRQ_NONE; + } + + while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, + ARRAY_SIZE(l2c->err_ctx))) { + snprintf(msg, L2C_MESSAGE_SIZE, + "%s: %s: %016llx, %s: %016llx", + l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int, + ctx->reg_ext_name, ctx->reg_ext); + + decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int); + + strncat(msg, other, L2C_MESSAGE_SIZE); + + if (ctx->reg_int & mask_ue) + edac_device_handle_ue(l2c->edac_dev, 0, 0, msg); + else if (ctx->reg_int & mask_ce) + edac_device_handle_ce(l2c->edac_dev, 0, 0, msg); + + l2c->ring_tail++; + } + + return IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg) + +L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S); + +struct debugfs_entry *l2c_tad_dfs_ents[] = { + &debugfs_tad_int, +}; + +L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S); + +struct debugfs_entry *l2c_cbc_dfs_ents[] = { + &debugfs_cbc_int, +}; + +L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S); + +struct debugfs_entry *l2c_mci_dfs_ents[] = { + &debugfs_mci_int, +}; + +static const struct pci_device_id thunderx_l2c_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), }, + { 0, }, +}; + +static int thunderx_l2c_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_l2c *l2c; + struct edac_device_ctl_info *edac_dev; + struct debugfs_entry **l2c_devattr; + size_t dfs_entries; + irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL; + char name[32]; + const char *fmt; + u64 reg_en_offs, reg_en_mask; + int idx; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + thunderx_l2c_isr = thunderx_l2c_tad_isr; + l2c_devattr = l2c_tad_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents); + fmt = "L2C-TAD%d"; + reg_en_offs = L2C_TAD_INT_ENA_W1S; + reg_en_mask = L2C_TAD_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + thunderx_l2c_isr = thunderx_l2c_cbc_isr; + l2c_devattr = l2c_cbc_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents); + fmt = "L2C-CBC%d"; + reg_en_offs = L2C_CBC_INT_ENA_W1S; + reg_en_mask = L2C_CBC_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + thunderx_l2c_isr = thunderx_l2c_mci_isr; + l2c_devattr = l2c_mci_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents); + fmt = "L2C-MCI%d"; + reg_en_offs = L2C_MCI_INT_ENA_W1S; + reg_en_mask = L2C_MCI_INT_ENA_ALL; + break; + default: + //Should never ever get here + dev_err(&pdev->dev, "Unsupported PCI device: %04x\n", + pdev->device); + return -EINVAL; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), fmt, idx); + + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c), + name, 1, "L2C", 1, 0, + NULL, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); + return -ENOMEM; + } + + l2c = edac_dev->pvt_info; + l2c->edac_dev = edac_dev; + + l2c->regs = pcim_iomap_table(pdev)[0]; + if (!l2c->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources\n"); + ret = -ENODEV; + goto err_free; + } + + l2c->pdev = pdev; + + l2c->ring_head = 0; + l2c->ring_tail = 0; + + l2c->msix_ent.entry = 0; + l2c->msix_ent.vector = 0; + + ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector, + thunderx_l2c_isr, + thunderx_l2c_threaded_isr, + 0, "[EDAC] ThunderX L2C", + &l2c->msix_ent); + if (ret) + goto err_free; + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-l2c"; + edac_dev->ctl_name = "thunderx-l2c"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr, + l2c, dfs_entries); + + if (ret != dfs_entries) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + writeq(reg_en_mask, l2c->regs + reg_en_offs); + + return 0; + +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_l2c_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_l2c *l2c = edac_dev->pvt_info; + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C); + break; + } + + edac_debugfs_remove_recursive(l2c->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl); + +static struct pci_driver thunderx_l2c_driver = { + .name = "thunderx_l2c_edac", + .probe = thunderx_l2c_probe, + .remove = thunderx_l2c_remove, + .id_table = thunderx_l2c_pci_tbl, +}; + +static int __init thunderx_edac_init(void) +{ + int rc = 0; + + rc = pci_register_driver(&thunderx_lmc_driver); + if (rc) + return rc; + + rc = pci_register_driver(&thunderx_ocx_driver); + if (rc) + goto err_lmc; + + rc = pci_register_driver(&thunderx_l2c_driver); + if (rc) + goto err_ocx; + + return rc; +err_ocx: + pci_unregister_driver(&thunderx_ocx_driver); +err_lmc: + pci_unregister_driver(&thunderx_lmc_driver); + + return rc; +} + +static void __exit thunderx_edac_exit(void) +{ + pci_unregister_driver(&thunderx_l2c_driver); + pci_unregister_driver(&thunderx_ocx_driver); + pci_unregister_driver(&thunderx_lmc_driver); + +} + +module_init(thunderx_edac_init); +module_exit(thunderx_edac_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Cavium, Inc."); +MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX"); From 621c4fe3cca5a0bc421916696e0c5e764d0a510b Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Wed, 5 Apr 2017 12:27:39 +0200 Subject: [PATCH 02/17] EDAC, thunderx: Fix L2C MCI interrupt disable Fix a typo that disabled the MCI interrupts using the wrong bitmask. Signed-off-by: Jan Glauber Cc: David Daney Cc: Ralf Baechle Cc: Sergey Temerkhanov Cc: linux-edac Link: http://lkml.kernel.org/r/20170405102739.6301-1-jglauber@cavium.com Signed-off-by: Borislav Petkov --- drivers/edac/thunderx_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index b5fe6894df5e..f39f4bb7b0f9 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -2123,7 +2123,7 @@ static void thunderx_l2c_remove(struct pci_dev *pdev) writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C); break; case PCI_DEVICE_ID_THUNDER_L2C_MCI: - writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C); + writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C); break; } From 25b223ddfe2a557307c05fe673e09d94ae950877 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Wed, 5 Apr 2017 13:01:02 -0500 Subject: [PATCH 03/17] EDAC, altera: Fix peripheral warnings for Cyclone5 The peripherals' RAS functionality only exist on the Arria10 SoCFPGA. The Cyclone5 initialization generates EDAC warnings when the peripherals aren't found in the device tree. Fix by checking for Arria10 in the init functions. Signed-off-by: Thor Thayer Cc: linux-edac Link: http://lkml.kernel.org/r/1491415262-5018-1-git-send-email-thor.thayer@linux.intel.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index c5a5b91f37f0..7717b094fabb 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1023,13 +1023,23 @@ out: return ret; } +static int socfpga_is_a10(void) +{ + return of_machine_is_compatible("altr,socfpga-arria10"); +} + static int validate_parent_available(struct device_node *np); static const struct of_device_id altr_edac_a10_device_of_match[]; static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) { int irq; - struct device_node *child, *np = of_find_compatible_node(NULL, NULL, - "altr,socfpga-a10-ecc-manager"); + struct device_node *child, *np; + + if (!socfpga_is_a10()) + return -ENODEV; + + np = of_find_compatible_node(NULL, NULL, + "altr,socfpga-a10-ecc-manager"); if (!np) { edac_printk(KERN_ERR, EDAC_DEVICE, "ECC Manager not found\n"); return -ENODEV; @@ -1545,8 +1555,12 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = { static int __init socfpga_init_sdmmc_ecc(void) { int rc = -ENODEV; - struct device_node *child = of_find_compatible_node(NULL, NULL, - "altr,socfpga-sdmmc-ecc"); + struct device_node *child; + + if (!socfpga_is_a10()) + return -ENODEV; + + child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); if (!child) { edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n"); return -ENODEV; From 3d2d8c0f84ca518da9210b3f855643efd1629e07 Mon Sep 17 00:00:00 2001 From: Sergey Temerkhanov Date: Thu, 6 Apr 2017 11:37:55 +0000 Subject: [PATCH 04/17] EDAC, thunderx: Change LMC index calculation Shift the node number by 3 bits instead of 8 allowing proper functioning with default EDAC_MAX_MCS. Signed-off-by: Sergey Temerkhanov Cc: David Daney Cc: Jan.Glauber@cavium.com Cc: linux-edac Link: http://lkml.kernel.org/r/20170406113755.17082-1-s.temerkhanov@gmail.com Signed-off-by: Borislav Petkov --- drivers/edac/thunderx_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index f39f4bb7b0f9..955f73cdd5fd 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -670,7 +670,7 @@ static inline int pci_dev_to_mc_idx(struct pci_dev *pdev) int node = dev_to_node(&pdev->dev); int ret = PCI_FUNC(pdev->devfn); - ret += max(node, 0) << 8; + ret += max(node, 0) << 3; return ret; } From 5195c206fda39a10d946505d95ec653e2b83a8ab Mon Sep 17 00:00:00 2001 From: Sergey Temerkhanov Date: Thu, 6 Apr 2017 11:38:34 +0000 Subject: [PATCH 05/17] EDAC, thunderx: Remove unused code Remove unused code reserved for upcoming CPUs. Reported-by: Dan Carpenter Signed-off-by: Sergey Temerkhanov Cc: David Daney Cc: Jan.Glauber@cavium.com Cc: linux-edac Link: http://lkml.kernel.org/r/20170406113834.17153-1-s.temerkhanov@gmail.com Signed-off-by: Borislav Petkov --- drivers/edac/thunderx_edac.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index 955f73cdd5fd..86d585cb6d32 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -864,8 +864,6 @@ static struct pci_driver thunderx_lmc_driver = { #define OCX_COM_WIN_REQ_TOUT BIT(50) #define OCX_COM_RX_LANE GENMASK(23, 0) -#define OCX_COM_INT_UE (0) - #define OCX_COM_INT_CE (OCX_COM_IO_BADID | \ OCX_COM_MEM_BADID | \ OCX_COM_COPR_BADID | \ @@ -1175,9 +1173,7 @@ static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id) strncat(msg, other, OCX_MESSAGE_SIZE); } - if (ctx->reg_com_int & OCX_COM_INT_UE) - edac_device_handle_ue(ocx->edac_dev, 0, 0, msg); - else if (ctx->reg_com_int & OCX_COM_INT_CE) + if (ctx->reg_com_int & OCX_COM_INT_CE) edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); ocx->com_ring_tail++; @@ -1645,14 +1641,12 @@ static const struct error_descr l2_tad_errors[] = { #define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE) -#define L2C_TAD_INT_NXM (0) - #define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC) #define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI) #define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \ - L2C_TAD_INT_RTG | L2C_TAD_INT_NXM | \ + L2C_TAD_INT_RTG | \ L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \ L2C_TAD_INT_LFBTO) @@ -1803,9 +1797,6 @@ static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id) } else if (ctx->reg_int & L2C_TAD_INT_TAG) { ctx->reg_ext_name = "TTG_ERR"; ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR); - } else if (ctx->reg_int & L2C_TAD_INT_NXM) { - ctx->reg_ext_name = "ERR"; - ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR); } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) { ctx->reg_ext_name = "TIMEOUT"; ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT); From 76f6a26ce94f1af2a04fcb44fe74386ec524931f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 3 Feb 2017 14:28:38 +0100 Subject: [PATCH 06/17] EDAC, highbank: Align Makefile directives ... like the rest of the file. Signed-off-by: Borislav Petkov --- drivers/edac/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 52d735f29073..76517ebc0a7f 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -67,8 +67,8 @@ obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o obj-$(CONFIG_EDAC_TILE) += tile_edac.o -obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o -obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o From db47d5f856467ce0dd3af7e20a33df3d901266df Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 25 Jan 2017 20:30:29 +0100 Subject: [PATCH 07/17] x86/nmi, EDAC: Get rid of DRAM error reporting thru PCI SERR NMI Apparently, some machines used to report DRAM errors through a PCI SERR NMI. This is why we have a call into EDAC in the NMI handler. See c0d121720220 ("drivers/edac: add new nmi rescan"). From looking at the patch above, that's two drivers: e752x_edac.c and e7xxx_edac.c. Now, I wanna say those are old machines which are probably decommissioned already. Tony says that "[t]the newest CPU supported by either of those drivers is the Xeon E7520 (a.k.a. "Nehalem") released in Q1'2010. Possibly some folks are still using these ... but people that hold onto h/w for 7 years generally cling to old s/w too ... so I'd guess it unlikely that we will get complaints for breaking these in upstream." So even if there is a small number still in use, we did load EDAC with edac_op_state == EDAC_OPSTATE_POLL by default (we still do, in fact) which means a default EDAC setup without any parameters supplied on the command line or otherwise would never even log the error in the NMI handler because we're polling by default: inline int edac_handler_set(void) { if (edac_op_state == EDAC_OPSTATE_POLL) return 0; return atomic_read(&edac_handlers); } So, long story short, I'd like to get rid of that nastiness called edac_stub.c and confine all the EDAC drivers solely to drivers/edac/. If we ever have to do stuff like that again, it should be notifiers we're using and not some insanity like this one. Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: Tony Luck --- arch/x86/kernel/nmi.c | 11 ----------- drivers/edac/edac_stub.c | 22 ---------------------- include/linux/edac.h | 2 -- 3 files changed, 35 deletions(-) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index f088ea4c66e7..f0c4c890f71b 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -224,17 +224,6 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", reason, smp_processor_id()); - /* - * On some machines, PCI SERR line is used to report memory - * errors. EDAC makes use of it. - */ -#if defined(CONFIG_EDAC) - if (edac_handler_set()) { - edac_atomic_assert_error(); - return; - } -#endif - if (panic_on_unrecovered_nmi) nmi_panic(regs, "NMI: Not continuing"); diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index 952e411f01f2..d1389e4b7989 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -44,25 +44,3 @@ static int __init edac_report_setup(char *str) return 0; } __setup("edac_report=", edac_report_setup); - -/* - * called to determine if there is an EDAC driver interested in - * knowing an event (such as NMI) occurred - */ -int edac_handler_set(void) -{ - if (edac_op_state == EDAC_OPSTATE_POLL) - return 0; - - return atomic_read(&edac_handlers); -} -EXPORT_SYMBOL_GPL(edac_handler_set); - -/* - * handler for NMI type of interrupts to assert error - */ -void edac_atomic_assert_error(void) -{ - edac_err_assert++; -} -EXPORT_SYMBOL_GPL(edac_atomic_assert_error); diff --git a/include/linux/edac.h b/include/linux/edac.h index 5b6adf964248..bf8daabf3d51 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -31,8 +31,6 @@ extern int edac_op_state; extern int edac_err_assert; extern atomic_t edac_handlers; -extern int edac_handler_set(void); -extern void edac_atomic_assert_error(void); extern struct bus_type *edac_get_sysfs_subsys(void); enum { From 97bb6c17ad5a0892beb45070dfe8c7d6d0e5326e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 26 Jan 2017 16:49:59 +0100 Subject: [PATCH 08/17] EDAC: Get rid of edac_handlers Use mc_devices list instead to check whether we have EDAC driver instances successfully registered with EDAC core. Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 6 ++---- drivers/edac/edac_stub.c | 3 --- include/linux/edac.h | 1 - 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index e5573c56b15e..824d31193b69 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -601,7 +601,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) } list_add_tail_rcu(&mci->link, insert_before); - atomic_inc(&edac_handlers); return 0; fail0: @@ -619,7 +618,6 @@ fail1: static int del_mc_from_global_list(struct mem_ctl_info *mci) { - int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -628,7 +626,7 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci) synchronize_rcu(); INIT_LIST_HEAD(&mci->link); - return handlers; + return list_empty(&mc_devices); } struct mem_ctl_info *edac_mc_find(int idx) @@ -763,7 +761,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) /* mark MCI offline: */ mci->op_state = OP_OFFLINE; - if (!del_mc_from_global_list(mci)) + if (del_mc_from_global_list(mci)) edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index d1389e4b7989..cfb250fa38ce 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -20,9 +20,6 @@ int edac_op_state = EDAC_OPSTATE_INVAL; EXPORT_SYMBOL_GPL(edac_op_state); -atomic_t edac_handlers = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(edac_handlers); - int edac_err_assert = 0; EXPORT_SYMBOL_GPL(edac_err_assert); diff --git a/include/linux/edac.h b/include/linux/edac.h index bf8daabf3d51..9fd6fe53ab2a 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -29,7 +29,6 @@ struct device; extern int edac_op_state; extern int edac_err_assert; -extern atomic_t edac_handlers; extern struct bus_type *edac_get_sysfs_subsys(void); From d3116a0837261405e0febb8043fe7040c8ebccb4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 26 Jan 2017 18:25:11 +0100 Subject: [PATCH 09/17] EDAC: Remove edac_err_assert ... and the glue around it. It is not needed anymore. Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 18 +----------------- drivers/edac/edac_stub.c | 3 --- include/linux/edac.h | 1 - 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 824d31193b69..482b6aea1ce7 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -504,22 +504,6 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev) } EXPORT_SYMBOL_GPL(find_mci_by_dev); -/* - * handler for EDAC to check if NMI type handler has asserted interrupt - */ -static int edac_mc_assert_error_check_and_clear(void) -{ - int old_state; - - if (edac_op_state == EDAC_OPSTATE_POLL) - return 1; - - old_state = edac_err_assert; - edac_err_assert = 0; - - return old_state; -} - /* * edac_mc_workq_function * performs the operation scheduled by a workq request @@ -536,7 +520,7 @@ static void edac_mc_workq_function(struct work_struct *work_req) return; } - if (edac_mc_assert_error_check_and_clear()) + if (edac_op_state == EDAC_OPSTATE_POLL) mci->edac_check(mci); mutex_unlock(&mem_ctls_mutex); diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index cfb250fa38ce..f02d21d8130f 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -20,9 +20,6 @@ int edac_op_state = EDAC_OPSTATE_INVAL; EXPORT_SYMBOL_GPL(edac_op_state); -int edac_err_assert = 0; -EXPORT_SYMBOL_GPL(edac_err_assert); - int edac_report_status = EDAC_REPORTING_ENABLED; EXPORT_SYMBOL_GPL(edac_report_status); diff --git a/include/linux/edac.h b/include/linux/edac.h index 9fd6fe53ab2a..c55e93975079 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -28,7 +28,6 @@ struct device; #define EDAC_OPSTATE_INT 2 extern int edac_op_state; -extern int edac_err_assert; extern struct bus_type *edac_get_sysfs_subsys(void); From 8c22b4fece594e0602a6204876842b5a94df6568 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 26 Jan 2017 22:18:12 +0100 Subject: [PATCH 10/17] EDAC: Move edac_op_state to edac_mc.c ... as part of moving stuff away from edac_stub.c Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 3 +++ drivers/edac/edac_stub.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 482b6aea1ce7..681387d20fb9 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -40,6 +40,9 @@ #define edac_atomic_scrub(va, size) do { } while (0) #endif +int edac_op_state = EDAC_OPSTATE_INVAL; +EXPORT_SYMBOL_GPL(edac_op_state); + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index f02d21d8130f..aa31cbd17cd2 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -17,9 +17,6 @@ #include #include -int edac_op_state = EDAC_OPSTATE_INVAL; -EXPORT_SYMBOL_GPL(edac_op_state); - int edac_report_status = EDAC_REPORTING_ENABLED; EXPORT_SYMBOL_GPL(edac_report_status); From 9613916b02bfc7567ac2c5178469185e8912ddff Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 3 Feb 2017 14:17:43 +0100 Subject: [PATCH 11/17] ACPI/extlog: Add EDAC dependency We are calling EDAC functions - make the proper dependencies explicit. Signed-off-by: Borislav Petkov Cc: linux-acpi@vger.kernel.org --- drivers/acpi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 83e5f7e1a20d..a71874df3410 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -469,7 +469,7 @@ config ACPI_WATCHDOG config ACPI_EXTLOG tristate "Extended Error Log support" - depends on X86_MCE && X86_LOCAL_APIC + depends on X86_MCE && X86_LOCAL_APIC && EDAC select UEFI_CPER select RAS default n From be1d162948f5bb0ced260e60208e7dc06cd45cab Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 3 Feb 2017 17:15:53 +0100 Subject: [PATCH 12/17] EDAC: Issue tracepoint only when it is defined ... and this happens only when CONFIG_RAS is enabled. Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 681387d20fb9..735546ea6ebe 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -1180,10 +1180,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, /* Report the error via the trace interface */ grain_bits = fls_long(e->grain) + 1; - trace_mc_event(type, e->msg, e->label, e->error_count, - mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, - (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, - grain_bits, e->syndrome, e->other_detail); + + if (IS_ENABLED(CONFIG_RAS)) + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, + e->low_layer, + (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); edac_raw_mc_handle_error(type, mci, e); } From e3c4ff6d8c949fa9a9ea1bd005bf1967efe09d5d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 3 Feb 2017 18:18:05 +0100 Subject: [PATCH 13/17] EDAC: Remove EDAC_MM_EDAC Move all the EDAC core functionality behind CONFIG_EDAC and get rid of that indirection. Update defconfigs which had it. While at it, fix dependencies such that EDAC depends on RAS for the tracepoints. Signed-off-by: Borislav Petkov Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: Chris Metcalf Cc: linux-edac@vger.kernel.org --- arch/arm/configs/multi_v7_defconfig | 1 - arch/arm/configs/pxa_defconfig | 3 +- arch/powerpc/configs/85xx-hw.config | 3 +- arch/powerpc/configs/85xx/ge_imp3a_defconfig | 1 - .../configs/85xx/xes_mpc85xx_defconfig | 1 - arch/powerpc/configs/cell_defconfig | 1 - arch/powerpc/configs/pasemi_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/configs/ppc64e_defconfig | 1 - arch/powerpc/configs/ppc6xx_defconfig | 3 +- arch/tile/configs/tilegx_defconfig | 1 - arch/tile/configs/tilepro_defconfig | 1 - drivers/acpi/Kconfig | 1 - drivers/edac/Kconfig | 101 ++++++++---------- drivers/edac/Makefile | 3 +- drivers/edac/edac_stub.c | 2 +- 16 files changed, 48 insertions(+), 77 deletions(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index a94126fb02c2..6aa7be191f1a 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -748,7 +748,6 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_LEDS_TRIGGER_TRANSIENT=y CONFIG_LEDS_TRIGGER_CAMERA=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_HIGHBANK_MC=y CONFIG_EDAC_HIGHBANK_L2=y CONFIG_RTC_CLASS=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 2aac99fd1c41..1318f61589dc 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -635,8 +635,7 @@ CONFIG_LEDS_TRIGGER_GPIO=m CONFIG_LEDS_TRIGGER_DEFAULT_ON=m CONFIG_LEDS_TRIGGER_TRANSIENT=m CONFIG_LEDS_TRIGGER_CAMERA=m -CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=m +CONFIG_EDAC=m CONFIG_RTC_CLASS=y CONFIG_RTC_DEBUG=y CONFIG_RTC_DRV_DS1307=m diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config index 528ff0e714e6..c03d0fb16665 100644 --- a/arch/powerpc/configs/85xx-hw.config +++ b/arch/powerpc/configs/85xx-hw.config @@ -16,9 +16,8 @@ CONFIG_DAVICOM_PHY=y CONFIG_DMADEVICES=y CONFIG_E1000E=y CONFIG_E1000=y -CONFIG_EDAC_MM_EDAC=y -CONFIG_EDAC_MPC85XX=y CONFIG_EDAC=y +CONFIG_EDAC_MPC85XX=y CONFIG_EEPROM_AT24=y CONFIG_EEPROM_LEGACY=y CONFIG_FB_FSL_DIU=y diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index c79283be5680..a917f7afb4f9 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -155,7 +155,6 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y CONFIG_USB_OHCI_HCD_PPC_OF_LE=y CONFIG_USB_STORAGE=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_MPC85XX=y CONFIG_RTC_CLASS=y # CONFIG_RTC_INTF_PROC is not set diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index dbd961de251e..72900b84d3e0 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -116,7 +116,6 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_CMOS=y diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 2d7fcbe047ac..aa564599e368 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -179,7 +179,6 @@ CONFIG_INFINIBAND_MTHCA=m CONFIG_INFINIBAND_IPOIB=m CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_CELL=y CONFIG_UIO=m CONFIG_EXT2_FS=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 5553c5ce4274..fe43ff47bd2f 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -142,7 +142,6 @@ CONFIG_USB_UHCI_HCD=y CONFIG_USB_SL811_HCD=y CONFIG_USB_STORAGE=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_PASEMI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 4f1288b04303..f2e03f032041 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -262,7 +262,6 @@ CONFIG_INFINIBAND_IPOIB_CM=y CONFIG_INFINIBAND_SRP=m CONFIG_INFINIBAND_ISER=m CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_PASEMI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 11a3473f9e2e..6340e6c53c54 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -173,7 +173,6 @@ CONFIG_INFINIBAND_MTHCA=m CONFIG_INFINIBAND_IPOIB=m CONFIG_INFINIBAND_ISER=m CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_FS_DAX=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 1d2d69dd6409..18d0d60dadbf 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -988,8 +988,7 @@ CONFIG_LEDS_TRIGGER_BACKLIGHT=m CONFIG_LEDS_TRIGGER_DEFAULT_ON=m CONFIG_ACCESSIBILITY=y CONFIG_A11Y_BRAILLE_CONSOLE=y -CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=m +CONFIG_EDAC=m CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set CONFIG_RTC_DRV_DS1307=m diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index fd122ef45b00..0d925fa0f0c1 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -249,7 +249,6 @@ CONFIG_USB_EHCI_HCD=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_TILE=y CONFIG_EXT2_FS=y diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index eb6a55944191..149d8e8eacb8 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -358,7 +358,6 @@ CONFIG_WATCHDOG_NOWAYOUT=y # CONFIG_VGA_ARB is not set # CONFIG_USB_SUPPORT is not set CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_TILE=y CONFIG_EXT2_FS=y diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index a71874df3410..a20cfcbee694 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -471,7 +471,6 @@ config ACPI_EXTLOG tristate "Extended Error Log support" depends on X86_MCE && X86_LOCAL_APIC && EDAC select UEFI_CPER - select RAS default n help Certain usages such as Predictive Failure Analysis (PFA) require diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 7c68e6f955c7..1ac18c989fb3 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -10,8 +10,8 @@ config EDAC_SUPPORT bool menuconfig EDAC - bool "EDAC (Error Detection And Correction) reporting" - depends on HAS_IOMEM && EDAC_SUPPORT + tristate "EDAC (Error Detection And Correction) reporting" + depends on HAS_IOMEM && EDAC_SUPPORT && RAS help EDAC is designed to report errors in the core system. These are low-level errors that are reported in the CPU or @@ -62,20 +62,9 @@ config EDAC_DECODE_MCE which occur really early upon boot, before the module infrastructure has been initialized. -config EDAC_MM_EDAC - tristate "Main Memory EDAC (Error Detection And Correction) reporting" - select RAS - help - Some systems are able to detect and correct errors in main - memory. EDAC can report statistics on memory error - detection and correction (EDAC - or commonly referred to ECC - errors). EDAC will also try to decode where these errors - occurred so that a particular failing memory module can be - replaced. If unsure, select 'Y'. - config EDAC_GHES bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" - depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) + depends on ACPI_APEI_GHES && (EDAC=y) default y help Not all machines support hardware-driven error report. Some of those @@ -98,7 +87,7 @@ config EDAC_GHES config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" - depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE + depends on AMD_NB && EDAC_DECODE_MCE help Support for error detection and correction of DRAM ECC errors on the AMD64 families (>= K8) of memory controllers. @@ -124,28 +113,28 @@ config EDAC_AMD64_ERROR_INJECTION config EDAC_AMD76X tristate "AMD 76x (760, 762, 768)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the AMD 76x series of chipsets used with the Athlon processor. config EDAC_E7XXX tristate "Intel e7xxx (e7205, e7500, e7501, e7505)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel E7205, E7500, E7501 and E7505 server chipsets. config EDAC_E752X tristate "Intel e752x (e7520, e7525, e7320) and 3100" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel E7520, E7525, E7320 server chipsets. config EDAC_I82443BXGX tristate "Intel 82443BX/GX (440BX/GX)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 depends on BROKEN help Support for error detection and correction on the Intel @@ -153,56 +142,56 @@ config EDAC_I82443BXGX config EDAC_I82875P tristate "Intel 82875p (D82875P, E7210)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel DP82785P and E7210 server chipsets. config EDAC_I82975X tristate "Intel 82975x (D82975x)" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel DP82975x server chipsets. config EDAC_I3000 tristate "Intel 3000/3010" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3000 and 3010 server chipsets. config EDAC_I3200 tristate "Intel 3200" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3200 and 3210 server chipsets. config EDAC_IE31200 tristate "Intel e312xx" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel E3-1200 based DRAM controllers. config EDAC_X38 tristate "Intel X38" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel X38 server chipsets. config EDAC_I5400 tristate "Intel 5400 (Seaburg) chipsets" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction the Intel i5400 MCH chipset (Seaburg). config EDAC_I7CORE tristate "Intel i7 Core (Nehalem) processors" - depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL + depends on PCI && X86 && X86_MCE_INTEL help Support for error detection and correction the Intel i7 Core (Nehalem) Integrated Memory Controller that exists on @@ -211,58 +200,56 @@ config EDAC_I7CORE config EDAC_I82860 tristate "Intel 82860" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel 82860 chipset. config EDAC_R82600 tristate "Radisys 82600 embedded chipset" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Radisys 82600 embedded chipset. config EDAC_I5000 tristate "Intel Greencreek/Blackford chipset" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel Greekcreek/Blackford chipsets. config EDAC_I5100 tristate "Intel San Clemente MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel San Clemente MCH. config EDAC_I7300 tristate "Intel Clarksboro MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel Clarksboro MCH (Intel 7300 chipset). config EDAC_SBRIDGE tristate "Intel Sandy-Bridge/Ivy-Bridge/Haswell Integrated MC" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG help Support for error detection and correction the Intel Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. config EDAC_SKX tristate "Intel Skylake server Integrated MC" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG help Support for error detection and correction the Intel Skylake server Integrated Memory Controllers. config EDAC_PND2 tristate "Intel Pondicherry2" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL + depends on PCI && X86_64 && X86_MCE_INTEL help Support for error detection and correction on the Intel Pondicherry2 Integrated Memory Controller. This SoC IP is @@ -271,36 +258,35 @@ config EDAC_PND2 config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" - depends on EDAC_MM_EDAC && FSL_SOC + depends on FSL_SOC help Support for error detection and correction on the Freescale MPC8349, MPC8560, MPC8540, MPC8548, T4240 config EDAC_LAYERSCAPE tristate "Freescale Layerscape DDR" - depends on EDAC_MM_EDAC && ARCH_LAYERSCAPE + depends on ARCH_LAYERSCAPE help Support for error detection and correction on Freescale memory controllers on Layerscape SoCs. config EDAC_MV64X60 tristate "Marvell MV64x60" - depends on EDAC_MM_EDAC && MV64X60 + depends on MV64X60 help Support for error detection and correction on the Marvell MV64360 and MV64460 chipsets. config EDAC_PASEMI tristate "PA Semi PWRficient" - depends on EDAC_MM_EDAC && PCI - depends on PPC_PASEMI + depends on PPC_PASEMI && PCI help Support for error detection and correction on PA Semi PWRficient. config EDAC_CELL tristate "Cell Broadband Engine memory controller" - depends on EDAC_MM_EDAC && PPC_CELL_COMMON + depends on PPC_CELL_COMMON help Support for error detection and correction on the Cell Broadband Engine internal memory controller @@ -308,7 +294,7 @@ config EDAC_CELL config EDAC_PPC4XX tristate "PPC4xx IBM DDR2 Memory Controller" - depends on EDAC_MM_EDAC && 4xx + depends on 4xx help This enables support for EDAC on the ECC memory used with the IBM DDR2 memory controller found in various @@ -317,7 +303,7 @@ config EDAC_PPC4XX config EDAC_AMD8131 tristate "AMD8131 HyperTransport PCI-X Tunnel" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE + depends on PCI && PPC_MAPLE help Support for error detection and correction on the AMD8131 HyperTransport PCI-X Tunnel chip. @@ -326,7 +312,7 @@ config EDAC_AMD8131 config EDAC_AMD8111 tristate "AMD8111 HyperTransport I/O Hub" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE + depends on PCI && PPC_MAPLE help Support for error detection and correction on the AMD8111 HyperTransport I/O Hub chip. @@ -335,7 +321,7 @@ config EDAC_AMD8111 config EDAC_CPC925 tristate "IBM CPC925 Memory Controller (PPC970FX)" - depends on EDAC_MM_EDAC && PPC64 + depends on PPC64 help Support for error detection and correction on the IBM CPC925 Bridge and Memory Controller, which is @@ -344,7 +330,7 @@ config EDAC_CPC925 config EDAC_TILE tristate "Tilera Memory Controller" - depends on EDAC_MM_EDAC && TILE + depends on TILE default y help Support for error detection and correction on the @@ -352,49 +338,48 @@ config EDAC_TILE config EDAC_HIGHBANK_MC tristate "Highbank Memory Controller" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_HIGHBANK_L2 tristate "Highbank L2 Cache" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_OCTEON_PC tristate "Cavium Octeon Primary Caches" - depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + depends on CPU_CAVIUM_OCTEON help Support for error detection and correction on the primary caches of the cnMIPS cores of Cavium Octeon family SOCs. config EDAC_OCTEON_L2C tristate "Cavium Octeon Secondary Caches (L2C)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_LMC tristate "Cavium Octeon DRAM Memory Controller (LMC)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_PCI tristate "Cavium Octeon PCI Controller" - depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC + depends on PCI && CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_THUNDERX tristate "Cavium ThunderX EDAC" - depends on EDAC_MM_EDAC depends on ARM64 depends on PCI help @@ -405,7 +390,7 @@ config EDAC_THUNDERX config EDAC_ALTERA bool "Altera SOCFPGA ECC" - depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA + depends on EDAC=y && ARCH_SOCFPGA help Support for error detection and correction on the Altera SOCs. This must be selected for SDRAM ECC. @@ -471,14 +456,14 @@ config EDAC_ALTERA_SDMMC config EDAC_SYNOPSYS tristate "Synopsys DDR Memory Controller" - depends on EDAC_MM_EDAC && ARCH_ZYNQ + depends on ARCH_ZYNQ help Support for error detection and correction on the Synopsys DDR memory controller. config EDAC_XGENE tristate "APM X-Gene SoC" - depends on EDAC_MM_EDAC && (ARM64 || COMPILE_TEST) + depends on (ARM64 || COMPILE_TEST) help Support for error detection and correction on the APM X-Gene family of SOCs. diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 76517ebc0a7f..a8fb734cb28d 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -6,8 +6,7 @@ # GNU General Public License. # -obj-$(CONFIG_EDAC) := edac_stub.o -obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o +obj-$(CONFIG_EDAC) := edac_stub.o edac_core.o edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o wq.o diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index aa31cbd17cd2..6aacc569401e 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -20,7 +20,7 @@ int edac_report_status = EDAC_REPORTING_ENABLED; EXPORT_SYMBOL_GPL(edac_report_status); -static int __init edac_report_setup(char *str) +static int __init __maybe_unused edac_report_setup(char *str) { if (!str) return -EINVAL; From a06b85ff07147cf03c4d5e17db9ac0644bd27861 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Feb 2017 16:32:27 +0100 Subject: [PATCH 14/17] EDAC: Update Kconfig help text Remove the old URLs. Signed-off-by: Borislav Petkov --- drivers/edac/Kconfig | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 1ac18c989fb3..27337ef9874b 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -13,23 +13,13 @@ menuconfig EDAC tristate "EDAC (Error Detection And Correction) reporting" depends on HAS_IOMEM && EDAC_SUPPORT && RAS help - EDAC is designed to report errors in the core system. - These are low-level errors that are reported in the CPU or - supporting chipset or other subsystems: + EDAC is a subsystem along with hardware-specific drivers designed to + report hardware errors. These are low-level errors that are reported + in the CPU or supporting chipset or other subsystems: memory errors, cache errors, PCI errors, thermal throttling, etc.. If unsure, select 'Y'. - If this code is reporting problems on your system, please - see the EDAC project web pages for more information at: - - - - and: - - - - There is also a mailing list for the EDAC project, which can - be found via the sourceforge page. + The mailing list for the EDAC project is linux-edac@vger.kernel.org. if EDAC From fee27d7d97886515a60cce38b4152b7f5b5a21fc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Feb 2017 17:42:03 +0100 Subject: [PATCH 15/17] EDAC: Delete edac_stub.c Move the remaining functionality to edac_mc.c. Convert "edac_report=" to a module parameter. Signed-off-by: Borislav Petkov --- drivers/edac/Makefile | 2 +- drivers/edac/edac_mc.c | 61 ++++++++++++++++++++++++++++++++++++++++ drivers/edac/edac_stub.c | 37 ------------------------ include/linux/edac.h | 26 ++--------------- 4 files changed, 65 insertions(+), 61 deletions(-) delete mode 100644 drivers/edac/edac_stub.c diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index a8fb734cb28d..0fd9ffa63299 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -6,7 +6,7 @@ # GNU General Public License. # -obj-$(CONFIG_EDAC) := edac_stub.o edac_core.o +obj-$(CONFIG_EDAC) := edac_core.o edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o wq.o diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 735546ea6ebe..536b65aa6fac 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -43,6 +43,8 @@ int edac_op_state = EDAC_OPSTATE_INVAL; EXPORT_SYMBOL_GPL(edac_op_state); +static int edac_report = EDAC_REPORTING_ENABLED; + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -55,6 +57,65 @@ static void const *edac_mc_owner; static struct bus_type mc_bus[EDAC_MAX_MCS]; +int get_edac_report_status(void) +{ + return edac_report; +} +EXPORT_SYMBOL_GPL(get_edac_report_status); + +void set_edac_report_status(int new) +{ + if (new == EDAC_REPORTING_ENABLED || + new == EDAC_REPORTING_DISABLED || + new == EDAC_REPORTING_FORCE) + edac_report = new; +} +EXPORT_SYMBOL_GPL(set_edac_report_status); + +static int edac_report_set(const char *str, const struct kernel_param *kp) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) + edac_report = EDAC_REPORTING_ENABLED; + else if (!strncmp(str, "off", 3)) + edac_report = EDAC_REPORTING_DISABLED; + else if (!strncmp(str, "force", 5)) + edac_report = EDAC_REPORTING_FORCE; + + return 0; +} + +static int edac_report_get(char *buffer, const struct kernel_param *kp) +{ + int ret = 0; + + switch (edac_report) { + case EDAC_REPORTING_ENABLED: + ret = sprintf(buffer, "on"); + break; + case EDAC_REPORTING_DISABLED: + ret = sprintf(buffer, "off"); + break; + case EDAC_REPORTING_FORCE: + ret = sprintf(buffer, "force"); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct kernel_param_ops edac_report_ops = { + .set = edac_report_set, + .get = edac_report_get, +}; + +module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c deleted file mode 100644 index 6aacc569401e..000000000000 --- a/drivers/edac/edac_stub.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * common EDAC components that must be in kernel - * - * Author: Dave Jiang - * - * 2007 (c) MontaVista Software, Inc. - * 2010 (c) Advanced Micro Devices Inc. - * Borislav Petkov - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - */ -#include -#include -#include -#include - -int edac_report_status = EDAC_REPORTING_ENABLED; -EXPORT_SYMBOL_GPL(edac_report_status); - -static int __init __maybe_unused edac_report_setup(char *str) -{ - if (!str) - return -EINVAL; - - if (!strncmp(str, "on", 2)) - set_edac_report_status(EDAC_REPORTING_ENABLED); - else if (!strncmp(str, "off", 3)) - set_edac_report_status(EDAC_REPORTING_DISABLED); - else if (!strncmp(str, "force", 5)) - set_edac_report_status(EDAC_REPORTING_FORCE); - - return 0; -} -__setup("edac_report=", edac_report_setup); diff --git a/include/linux/edac.h b/include/linux/edac.h index c55e93975079..faf87e1eca21 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -29,7 +29,9 @@ struct device; extern int edac_op_state; -extern struct bus_type *edac_get_sysfs_subsys(void); +struct bus_type *edac_get_sysfs_subsys(void); +int get_edac_report_status(void); +void set_edac_report_status(int new); enum { EDAC_REPORTING_ENABLED, @@ -37,28 +39,6 @@ enum { EDAC_REPORTING_FORCE }; -extern int edac_report_status; -#ifdef CONFIG_EDAC -static inline int get_edac_report_status(void) -{ - return edac_report_status; -} - -static inline void set_edac_report_status(int new) -{ - edac_report_status = new; -} -#else -static inline int get_edac_report_status(void) -{ - return EDAC_REPORTING_DISABLED; -} - -static inline void set_edac_report_status(int new) -{ -} -#endif - static inline void opstate_init(void) { switch (edac_op_state) { From bffc7dece92edd0b6445b76a378e2fa9e324c7ed Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Feb 2017 18:10:14 +0100 Subject: [PATCH 16/17] EDAC: Rename report status accessors Change them to have the edac_ prefix. No functionality change. Signed-off-by: Borislav Petkov --- drivers/acpi/acpi_extlog.c | 8 ++++---- drivers/edac/edac_mc.c | 8 ++++---- drivers/edac/pnd2_edac.c | 2 +- drivers/edac/sb_edac.c | 4 ++-- drivers/edac/skx_edac.c | 2 +- include/linux/edac.h | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index a15270a806fc..502ea4dc2080 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -229,7 +229,7 @@ static int __init extlog_init(void) if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) return -ENODEV; - if (get_edac_report_status() == EDAC_REPORTING_FORCE) { + if (edac_get_report_status() == EDAC_REPORTING_FORCE) { pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); return -EPERM; } @@ -285,8 +285,8 @@ static int __init extlog_init(void) * eMCA event report method has higher priority than EDAC method, * unless EDAC event report method is mandatory. */ - old_edac_report_status = get_edac_report_status(); - set_edac_report_status(EDAC_REPORTING_DISABLED); + old_edac_report_status = edac_get_report_status(); + edac_set_report_status(EDAC_REPORTING_DISABLED); mce_register_decode_chain(&extlog_mce_dec); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; @@ -308,7 +308,7 @@ err: static void __exit extlog_exit(void) { - set_edac_report_status(old_edac_report_status); + edac_set_report_status(old_edac_report_status); mce_unregister_decode_chain(&extlog_mce_dec); ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; if (extlog_l1_addr) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 536b65aa6fac..480072139b7a 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -57,20 +57,20 @@ static void const *edac_mc_owner; static struct bus_type mc_bus[EDAC_MAX_MCS]; -int get_edac_report_status(void) +int edac_get_report_status(void) { return edac_report; } -EXPORT_SYMBOL_GPL(get_edac_report_status); +EXPORT_SYMBOL_GPL(edac_get_report_status); -void set_edac_report_status(int new) +void edac_set_report_status(int new) { if (new == EDAC_REPORTING_ENABLED || new == EDAC_REPORTING_DISABLED || new == EDAC_REPORTING_FORCE) edac_report = new; } -EXPORT_SYMBOL_GPL(set_edac_report_status); +EXPORT_SYMBOL_GPL(edac_set_report_status); static int edac_report_set(const char *str, const struct kernel_param *kp) { diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 928e0dba41fc..1cad5a9af8d0 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1349,7 +1349,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo struct dram_addr daddr; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; mci = pnd2_mci; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index a65ea44e3b0b..ea21cb651b3c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3075,7 +3075,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct sbridge_pvt *pvt; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; mci = get_mci_for_node_id(mce->socketid); @@ -3441,7 +3441,7 @@ static int __init sbridge_init(void) if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 1159dba4671f..64bef6c9cfb4 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -971,7 +971,7 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; /* ignore unless this is memory related with an address */ diff --git a/include/linux/edac.h b/include/linux/edac.h index faf87e1eca21..8ae0f45fafd6 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -30,8 +30,8 @@ struct device; extern int edac_op_state; struct bus_type *edac_get_sysfs_subsys(void); -int get_edac_report_status(void); -void set_edac_report_status(int new); +int edac_get_report_status(void); +void edac_set_report_status(int new); enum { EDAC_REPORTING_ENABLED, From f8d5549df25e3961d6bd2ae36d3e0b08614660d9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 26 Apr 2017 12:22:08 +0200 Subject: [PATCH 17/17] EDAC, ghes: Do not enable it by default Leave it to the user to decide whether to enable this or not. Otherwise, platform-specific drivers won't initialize (currently, EDAC supports only a single platform driver loaded). Signed-off-by: Borislav Petkov --- drivers/edac/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 27337ef9874b..96afb2aeed18 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -55,7 +55,6 @@ config EDAC_DECODE_MCE config EDAC_GHES bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" depends on ACPI_APEI_GHES && (EDAC=y) - default y help Not all machines support hardware-driven error report. Some of those provide a BIOS-driven error report mechanism via ACPI, using the