Various fixes and support for new CPUS
- Clean up error messages from thunderx_edac - Add MODULE_DEVICE_TABLE to ti_edac so it will autoload - Use %pR to print resources in aspeed_edac - Add Yazen Ghannam as MAINTAINER for AMD edac drivers - Fix Ice Lake and Sapphire Rapids drivers to report correct "near" or "far" device for errors in 2LM configurations - Add support of on package high bandwidth memory in Sapphire Rapids - New CPU support for three CPUs supporting in-band ECC (IOT SKUs for ICL-NNPI, Tiger Lake and Alder Lake) - Don't even try to load Intel EDAC drivers when running as a guest - Fix Kconfig dependency on X86_MCE_INTEL for EDAC_IGEN6 -----BEGIN PGP SIGNATURE----- iIoEABYIADIWIQQW3WBGcnu5yJnSXn0kTJLX0iGMLAUCYNujCRQcdG9ueS5sdWNr QGludGVsLmNvbQAKCRAkTJLX0iGMLCazAPsHoGc9ymStw0hL06Lw71Va/3VyqiUZ ha1OTWXCcV42UgD/QlwhKkHC3UkEI1dTEAI6McXcC88s8+yXQ8fKAPeQyQw= =iTWK -----END PGP SIGNATURE----- Merge tag 'edac_updates_for_v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras Pull EDAC updates from Tony Luck: "Various fixes and support for new CPUs: - Clean up error messages from thunderx_edac - Add MODULE_DEVICE_TABLE to ti_edac so it will autoload - Use %pR to print resources in aspeed_edac - Add Yazen Ghannam as MAINTAINER for AMD edac drivers - Fix Ice Lake and Sapphire Rapids drivers to report correct "near" or "far" device for errors in 2LM configurations - Add support of on package high bandwidth memory in Sapphire Rapids - New CPU support for three CPUs supporting in-band ECC (IOT SKUs for ICL-NNPI, Tiger Lake and Alder Lake) - Don't even try to load Intel EDAC drivers when running as a guest - Fix Kconfig dependency on X86_MCE_INTEL for EDAC_IGEN6" * tag 'edac_updates_for_v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: EDAC/igen6: fix core dependency EDAC/Intel: Do not load EDAC driver when running as a guest EDAC/igen6: Add Intel Alder Lake SoC support EDAC/igen6: Add Intel Tiger Lake SoC support EDAC/igen6: Add Intel ICL-NNPI SoC support EDAC/i10nm: Add support for high bandwidth memory EDAC/i10nm: Add detection of memory levels for ICX/SPR servers EDAC/skx_common: Add new ADXL components for 2-level memory MAINTAINERS: Make Yazen Ghannam maintainer for EDAC-AMD64 EDAC/aspeed: Use proper format string for printing resource EDAC/ti: Add missing MODULE_DEVICE_TABLE EDAC/thunderx: Remove irrelevant variable from error messages
This commit is contained in:
commit
4b5e35ce07
|
@ -6467,10 +6467,11 @@ F: Documentation/filesystems/ecryptfs.rst
|
|||
F: fs/ecryptfs/
|
||||
|
||||
EDAC-AMD64
|
||||
M: Borislav Petkov <bp@alien8.de>
|
||||
M: Yazen Ghannam <yazen.ghannam@amd.com>
|
||||
L: linux-edac@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Supported
|
||||
F: drivers/edac/amd64_edac*
|
||||
F: drivers/edac/mce_amd*
|
||||
|
||||
EDAC-ARMADA
|
||||
M: Jan Luebbe <jlu@pengutronix.de>
|
||||
|
|
|
@ -270,7 +270,8 @@ config EDAC_PND2
|
|||
|
||||
config EDAC_IGEN6
|
||||
tristate "Intel client SoC Integrated MC"
|
||||
depends on PCI && X86_64 && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
depends on PCI && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
depends on X64_64 && X86_MCE_INTEL
|
||||
help
|
||||
Support for error detection and correction on the Intel
|
||||
client SoC Integrated Memory Controller using In-Band ECC IP.
|
||||
|
|
|
@ -254,8 +254,8 @@ static int init_csrows(struct mem_ctl_info *mci)
|
|||
return rc;
|
||||
}
|
||||
|
||||
dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n",
|
||||
r.start, resource_size(&r), PAGE_SHIFT);
|
||||
dev_dbg(mci->pdev, "dt: /memory node resources: first page %pR, PAGE_SHIFT macro=0x%x\n",
|
||||
&r, PAGE_SHIFT);
|
||||
|
||||
csrow->first_page = r.start >> PAGE_SHIFT;
|
||||
nr_pages = resource_size(&r) >> PAGE_SHIFT;
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#include "edac_module.h"
|
||||
#include "skx_common.h"
|
||||
|
||||
#define I10NM_REVISION "v0.0.4"
|
||||
#define I10NM_REVISION "v0.0.5"
|
||||
#define EDAC_MOD_STR "i10nm_edac"
|
||||
|
||||
/* Debug macros */
|
||||
|
@ -24,19 +24,39 @@
|
|||
pci_read_config_dword((d)->uracu, 0xd0, &(reg))
|
||||
#define I10NM_GET_IMC_BAR(d, i, reg) \
|
||||
pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
|
||||
#define I10NM_GET_SAD(d, offset, i, reg)\
|
||||
pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
|
||||
#define I10NM_GET_HBM_IMC_BAR(d, reg) \
|
||||
pci_read_config_dword((d)->uracu, 0xd4, &(reg))
|
||||
#define I10NM_GET_CAPID3_CFG(d, reg) \
|
||||
pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg))
|
||||
#define I10NM_GET_DIMMMTR(m, i, j) \
|
||||
readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4)
|
||||
readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
|
||||
(i) * (m)->chan_mmio_sz + (j) * 4)
|
||||
#define I10NM_GET_MCDDRTCFG(m, i, j) \
|
||||
readl((m)->mbase + 0x20970 + (i) * (m)->chan_mmio_sz + (j) * 4)
|
||||
readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
|
||||
(i) * (m)->chan_mmio_sz + (j) * 4)
|
||||
#define I10NM_GET_MCMTR(m, i) \
|
||||
readl((m)->mbase + 0x20ef8 + (i) * (m)->chan_mmio_sz)
|
||||
readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
|
||||
(i) * (m)->chan_mmio_sz)
|
||||
#define I10NM_GET_AMAP(m, i) \
|
||||
readl((m)->mbase + 0x20814 + (i) * (m)->chan_mmio_sz)
|
||||
readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
|
||||
(i) * (m)->chan_mmio_sz)
|
||||
|
||||
#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
|
||||
#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
|
||||
#define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \
|
||||
GET_BITFIELD(reg, 0, 10) + 1) << 12)
|
||||
#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \
|
||||
((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
|
||||
|
||||
#define I10NM_HBM_IMC_MMIO_SIZE 0x9000
|
||||
#define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30)
|
||||
#define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29)
|
||||
|
||||
#define I10NM_MAX_SAD 16
|
||||
#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
|
||||
#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
|
||||
|
||||
static struct list_head *i10nm_edac_list;
|
||||
|
||||
|
@ -63,7 +83,32 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
|
|||
return pdev;
|
||||
}
|
||||
|
||||
static int i10nm_get_all_munits(void)
|
||||
static bool i10nm_check_2lm(struct res_config *cfg)
|
||||
{
|
||||
struct skx_dev *d;
|
||||
u32 reg;
|
||||
int i;
|
||||
|
||||
list_for_each_entry(d, i10nm_edac_list, list) {
|
||||
d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1],
|
||||
PCI_SLOT(cfg->sad_all_devfn),
|
||||
PCI_FUNC(cfg->sad_all_devfn));
|
||||
if (!d->sad_all)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < I10NM_MAX_SAD; i++) {
|
||||
I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
|
||||
if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
|
||||
edac_dbg(2, "2-level memory configuration.\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int i10nm_get_ddr_munits(void)
|
||||
{
|
||||
struct pci_dev *mdev;
|
||||
void __iomem *mbase;
|
||||
|
@ -91,7 +136,7 @@ static int i10nm_get_all_munits(void)
|
|||
edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
|
||||
j++, base, reg);
|
||||
|
||||
for (i = 0; i < I10NM_NUM_IMC; i++) {
|
||||
for (i = 0; i < I10NM_NUM_DDR_IMC; i++) {
|
||||
mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
|
||||
12 + i, 0);
|
||||
if (i == 0 && !mdev) {
|
||||
|
@ -127,11 +172,97 @@ static int i10nm_get_all_munits(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool i10nm_check_hbm_imc(struct skx_dev *d)
|
||||
{
|
||||
u32 reg;
|
||||
|
||||
if (I10NM_GET_CAPID3_CFG(d, reg)) {
|
||||
i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return I10NM_IS_HBM_PRESENT(reg) != 0;
|
||||
}
|
||||
|
||||
static int i10nm_get_hbm_munits(void)
|
||||
{
|
||||
struct pci_dev *mdev;
|
||||
void __iomem *mbase;
|
||||
u32 reg, off, mcmtr;
|
||||
struct skx_dev *d;
|
||||
int i, lmc;
|
||||
u64 base;
|
||||
|
||||
list_for_each_entry(d, i10nm_edac_list, list) {
|
||||
d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3);
|
||||
if (!d->pcu_cr3)
|
||||
return -ENODEV;
|
||||
|
||||
if (!i10nm_check_hbm_imc(d)) {
|
||||
i10nm_printk(KERN_DEBUG, "No hbm memory\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (I10NM_GET_SCK_BAR(d, reg)) {
|
||||
i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
base = I10NM_GET_SCK_MMIO_BASE(reg);
|
||||
|
||||
if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
|
||||
i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
|
||||
|
||||
lmc = I10NM_NUM_DDR_IMC;
|
||||
|
||||
for (i = 0; i < I10NM_NUM_HBM_IMC; i++) {
|
||||
mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
|
||||
12 + i / 4, 1 + i % 4);
|
||||
if (i == 0 && !mdev) {
|
||||
i10nm_printk(KERN_ERR, "No hbm mc found\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
if (!mdev)
|
||||
continue;
|
||||
|
||||
d->imc[lmc].mdev = mdev;
|
||||
off = i * I10NM_HBM_IMC_MMIO_SIZE;
|
||||
|
||||
edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
|
||||
lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
|
||||
|
||||
mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
|
||||
if (!mbase) {
|
||||
i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
|
||||
base + off);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
d->imc[lmc].mbase = mbase;
|
||||
d->imc[lmc].hbm_mc = true;
|
||||
|
||||
mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
|
||||
if (!I10NM_IS_HBM_IMC(mcmtr)) {
|
||||
i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
lmc++;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct res_config i10nm_cfg0 = {
|
||||
.type = I10NM,
|
||||
.decs_did = 0x3452,
|
||||
.busno_cfg_offset = 0xcc,
|
||||
.ddr_chan_mmio_sz = 0x4000,
|
||||
.sad_all_devfn = PCI_DEVFN(29, 0),
|
||||
.sad_all_offset = 0x108,
|
||||
};
|
||||
|
||||
static struct res_config i10nm_cfg1 = {
|
||||
|
@ -139,6 +270,8 @@ static struct res_config i10nm_cfg1 = {
|
|||
.decs_did = 0x3452,
|
||||
.busno_cfg_offset = 0xd0,
|
||||
.ddr_chan_mmio_sz = 0x4000,
|
||||
.sad_all_devfn = PCI_DEVFN(29, 0),
|
||||
.sad_all_offset = 0x108,
|
||||
};
|
||||
|
||||
static struct res_config spr_cfg = {
|
||||
|
@ -146,7 +279,10 @@ static struct res_config spr_cfg = {
|
|||
.decs_did = 0x3252,
|
||||
.busno_cfg_offset = 0xd0,
|
||||
.ddr_chan_mmio_sz = 0x8000,
|
||||
.hbm_chan_mmio_sz = 0x4000,
|
||||
.support_ddr5 = true,
|
||||
.sad_all_devfn = PCI_DEVFN(10, 0),
|
||||
.sad_all_offset = 0x300,
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id i10nm_cpuids[] = {
|
||||
|
@ -179,13 +315,13 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
|
|||
struct dimm_info *dimm;
|
||||
int i, j, ndimms;
|
||||
|
||||
for (i = 0; i < I10NM_NUM_CHANNELS; i++) {
|
||||
for (i = 0; i < imc->num_channels; i++) {
|
||||
if (!imc->mbase)
|
||||
continue;
|
||||
|
||||
ndimms = 0;
|
||||
amap = I10NM_GET_AMAP(imc, i);
|
||||
for (j = 0; j < I10NM_NUM_DIMMS; j++) {
|
||||
for (j = 0; j < imc->num_dimms; j++) {
|
||||
dimm = edac_get_dimm(mci, i, j, 0);
|
||||
mtr = I10NM_GET_DIMMMTR(imc, i, j);
|
||||
mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
|
||||
|
@ -278,6 +414,9 @@ static int __init i10nm_init(void)
|
|||
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
|
||||
return -EBUSY;
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
|
||||
return -ENODEV;
|
||||
|
||||
id = x86_match_cpu(i10nm_cpuids);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
@ -296,8 +435,11 @@ static int __init i10nm_init(void)
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
rc = i10nm_get_all_munits();
|
||||
if (rc < 0)
|
||||
skx_set_mem_cfg(i10nm_check_2lm(cfg));
|
||||
|
||||
rc = i10nm_get_ddr_munits();
|
||||
|
||||
if (i10nm_get_hbm_munits() && rc)
|
||||
goto fail;
|
||||
|
||||
list_for_each_entry(d, i10nm_edac_list, list) {
|
||||
|
@ -318,7 +460,15 @@ static int __init i10nm_init(void)
|
|||
d->imc[i].lmc = i;
|
||||
d->imc[i].src_id = src_id;
|
||||
d->imc[i].node_id = node_id;
|
||||
d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
|
||||
if (d->imc[i].hbm_mc) {
|
||||
d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
|
||||
d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS;
|
||||
d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS;
|
||||
} else {
|
||||
d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
|
||||
d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS;
|
||||
d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS;
|
||||
}
|
||||
|
||||
rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
|
||||
"Intel_10nm Socket", EDAC_MOD_STR,
|
||||
|
|
|
@ -22,11 +22,12 @@
|
|||
#include <linux/io.h>
|
||||
#include <asm/mach_traps.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "edac_mc.h"
|
||||
#include "edac_module.h"
|
||||
|
||||
#define IGEN6_REVISION "v2.4"
|
||||
#define IGEN6_REVISION "v2.5"
|
||||
|
||||
#define EDAC_MOD_STR "igen6_edac"
|
||||
#define IGEN6_NMI_NAME "igen6_ibecc"
|
||||
|
@ -40,7 +41,7 @@
|
|||
|
||||
#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
|
||||
|
||||
#define NUM_IMC 1 /* Max memory controllers */
|
||||
#define NUM_IMC 2 /* Max memory controllers */
|
||||
#define NUM_CHANNELS 2 /* Max channels */
|
||||
#define NUM_DIMMS 2 /* Max DIMMs per channel */
|
||||
|
||||
|
@ -54,6 +55,10 @@
|
|||
#define CAPID_C_OFFSET 0xec
|
||||
#define CAPID_C_IBECC BIT(15)
|
||||
|
||||
/* Capability register E */
|
||||
#define CAPID_E_OFFSET 0xf0
|
||||
#define CAPID_E_IBECC BIT(12)
|
||||
|
||||
/* Error Status */
|
||||
#define ERRSTS_OFFSET 0xc8
|
||||
#define ERRSTS_CE BIT_ULL(6)
|
||||
|
@ -70,7 +75,7 @@
|
|||
#define IBECC_ACTIVATE_EN BIT(0)
|
||||
|
||||
/* IBECC error log */
|
||||
#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + 0x170)
|
||||
#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset)
|
||||
#define ECC_ERROR_LOG_CE BIT_ULL(62)
|
||||
#define ECC_ERROR_LOG_UE BIT_ULL(63)
|
||||
#define ECC_ERROR_LOG_ADDR_SHIFT 5
|
||||
|
@ -84,39 +89,54 @@
|
|||
#define MCHBAR_SIZE 0x10000
|
||||
|
||||
/* Parameters for the channel decode stage */
|
||||
#define MAD_INTER_CHANNEL_OFFSET 0x5000
|
||||
#define IMC_BASE (res_cfg->imc_base)
|
||||
#define MAD_INTER_CHANNEL_OFFSET IMC_BASE
|
||||
#define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2)
|
||||
#define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3)
|
||||
#define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4)
|
||||
#define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29)
|
||||
|
||||
/* Parameters for DRAM decode stage */
|
||||
#define MAD_INTRA_CH0_OFFSET 0x5004
|
||||
#define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4)
|
||||
#define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0)
|
||||
|
||||
/* DIMM characteristics */
|
||||
#define MAD_DIMM_CH0_OFFSET 0x500c
|
||||
#define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc)
|
||||
#define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29)
|
||||
#define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8)
|
||||
#define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29)
|
||||
#define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25)
|
||||
|
||||
/* Hash for memory controller selection */
|
||||
#define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8)
|
||||
#define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3)
|
||||
|
||||
/* Hash for channel selection */
|
||||
#define CHANNEL_HASH_OFFSET 0X5024
|
||||
#define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24)
|
||||
/* Hash for enhanced channel selection */
|
||||
#define CHANNEL_EHASH_OFFSET 0X5028
|
||||
#define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28)
|
||||
#define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
|
||||
#define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
|
||||
#define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28)
|
||||
|
||||
/* Parameters for memory slice decode stage */
|
||||
#define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
|
||||
#define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
|
||||
|
||||
static struct res_config {
|
||||
bool machine_check;
|
||||
int num_imc;
|
||||
u32 imc_base;
|
||||
u32 cmf_base;
|
||||
u32 cmf_size;
|
||||
u32 ms_hash_offset;
|
||||
u32 ibecc_base;
|
||||
u32 ibecc_error_log_offset;
|
||||
bool (*ibecc_available)(struct pci_dev *pdev);
|
||||
/* Convert error address logged in IBECC to system physical address */
|
||||
u64 (*err_addr_to_sys_addr)(u64 eaddr);
|
||||
u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
|
||||
/* Convert error address logged in IBECC to integrated memory controller address */
|
||||
u64 (*err_addr_to_imc_addr)(u64 eaddr);
|
||||
u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
|
||||
} *res_cfg;
|
||||
|
||||
struct igen6_imc {
|
||||
|
@ -125,6 +145,7 @@ struct igen6_imc {
|
|||
struct pci_dev *pdev;
|
||||
struct device dev;
|
||||
void __iomem *window;
|
||||
u64 size;
|
||||
u64 ch_s_size;
|
||||
int ch_l_map;
|
||||
u64 dimm_s_size[NUM_CHANNELS];
|
||||
|
@ -134,6 +155,9 @@ struct igen6_imc {
|
|||
|
||||
static struct igen6_pvt {
|
||||
struct igen6_imc imc[NUM_IMC];
|
||||
u64 ms_hash;
|
||||
u64 ms_s_size;
|
||||
int ms_l_map;
|
||||
} *igen6_pvt;
|
||||
|
||||
/* The top of low usable DRAM */
|
||||
|
@ -183,6 +207,21 @@ static struct work_struct ecclog_work;
|
|||
#define DID_EHL_SKU14 0x4534
|
||||
#define DID_EHL_SKU15 0x4536
|
||||
|
||||
/* Compute die IDs for ICL-NNPI with IBECC */
|
||||
#define DID_ICL_SKU8 0x4581
|
||||
#define DID_ICL_SKU10 0x4585
|
||||
#define DID_ICL_SKU11 0x4589
|
||||
#define DID_ICL_SKU12 0x458d
|
||||
|
||||
/* Compute die IDs for Tiger Lake with IBECC */
|
||||
#define DID_TGL_SKU 0x9a14
|
||||
|
||||
/* Compute die IDs for Alder Lake with IBECC */
|
||||
#define DID_ADL_SKU1 0x4601
|
||||
#define DID_ADL_SKU2 0x4602
|
||||
#define DID_ADL_SKU3 0x4621
|
||||
#define DID_ADL_SKU4 0x4641
|
||||
|
||||
static bool ehl_ibecc_available(struct pci_dev *pdev)
|
||||
{
|
||||
u32 v;
|
||||
|
@ -193,12 +232,12 @@ static bool ehl_ibecc_available(struct pci_dev *pdev)
|
|||
return !!(CAPID_C_IBECC & v);
|
||||
}
|
||||
|
||||
static u64 ehl_err_addr_to_sys_addr(u64 eaddr)
|
||||
static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
|
||||
{
|
||||
return eaddr;
|
||||
}
|
||||
|
||||
static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
|
||||
static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
|
||||
{
|
||||
if (eaddr < igen6_tolud)
|
||||
return eaddr;
|
||||
|
@ -212,12 +251,156 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
|
|||
return eaddr;
|
||||
}
|
||||
|
||||
static bool icl_ibecc_available(struct pci_dev *pdev)
|
||||
{
|
||||
u32 v;
|
||||
|
||||
if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
|
||||
return false;
|
||||
|
||||
return !(CAPID_C_IBECC & v) &&
|
||||
(boot_cpu_data.x86_stepping >= 1);
|
||||
}
|
||||
|
||||
static bool tgl_ibecc_available(struct pci_dev *pdev)
|
||||
{
|
||||
u32 v;
|
||||
|
||||
if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
|
||||
return false;
|
||||
|
||||
return !(CAPID_E_IBECC & v);
|
||||
}
|
||||
|
||||
static u64 mem_addr_to_sys_addr(u64 maddr)
|
||||
{
|
||||
if (maddr < igen6_tolud)
|
||||
return maddr;
|
||||
|
||||
if (igen6_tom <= _4GB)
|
||||
return maddr - igen6_tolud + _4GB;
|
||||
|
||||
if (maddr < _4GB)
|
||||
return maddr - igen6_tolud + igen6_tom;
|
||||
|
||||
return maddr;
|
||||
}
|
||||
|
||||
static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
|
||||
{
|
||||
u64 hash_addr = addr & mask, hash = hash_init;
|
||||
u64 intlv = (addr >> intlv_bit) & 1;
|
||||
int i;
|
||||
|
||||
for (i = 6; i < 20; i++)
|
||||
hash ^= (hash_addr >> i) & 1;
|
||||
|
||||
return hash ^ intlv;
|
||||
}
|
||||
|
||||
static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
|
||||
{
|
||||
u64 maddr, hash, mask, ms_s_size;
|
||||
int intlv_bit;
|
||||
u32 ms_hash;
|
||||
|
||||
ms_s_size = igen6_pvt->ms_s_size;
|
||||
if (eaddr >= ms_s_size)
|
||||
return eaddr + ms_s_size;
|
||||
|
||||
ms_hash = igen6_pvt->ms_hash;
|
||||
|
||||
mask = MEM_SLICE_HASH_MASK(ms_hash);
|
||||
intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
|
||||
|
||||
maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
|
||||
GET_BITFIELD(eaddr, 0, intlv_bit - 1);
|
||||
|
||||
hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
|
||||
|
||||
return maddr | (hash << intlv_bit);
|
||||
}
|
||||
|
||||
static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
|
||||
{
|
||||
u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
|
||||
|
||||
return mem_addr_to_sys_addr(maddr);
|
||||
}
|
||||
|
||||
static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
|
||||
{
|
||||
return eaddr;
|
||||
}
|
||||
|
||||
static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
|
||||
{
|
||||
return mem_addr_to_sys_addr(eaddr);
|
||||
}
|
||||
|
||||
static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
|
||||
{
|
||||
u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
|
||||
struct igen6_imc *imc = &igen6_pvt->imc[mc];
|
||||
int intlv_bit;
|
||||
u32 mc_hash;
|
||||
|
||||
if (eaddr >= 2 * ms_s_size)
|
||||
return eaddr - ms_s_size;
|
||||
|
||||
mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
|
||||
|
||||
intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
|
||||
|
||||
imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
|
||||
GET_BITFIELD(eaddr, 0, intlv_bit - 1);
|
||||
|
||||
return imc_addr;
|
||||
}
|
||||
|
||||
static struct res_config ehl_cfg = {
|
||||
.num_imc = 1,
|
||||
.ibecc_base = 0xdc00,
|
||||
.ibecc_available = ehl_ibecc_available,
|
||||
.err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
|
||||
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
|
||||
.num_imc = 1,
|
||||
.imc_base = 0x5000,
|
||||
.ibecc_base = 0xdc00,
|
||||
.ibecc_available = ehl_ibecc_available,
|
||||
.ibecc_error_log_offset = 0x170,
|
||||
.err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
|
||||
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
|
||||
};
|
||||
|
||||
static struct res_config icl_cfg = {
|
||||
.num_imc = 1,
|
||||
.imc_base = 0x5000,
|
||||
.ibecc_base = 0xd800,
|
||||
.ibecc_error_log_offset = 0x170,
|
||||
.ibecc_available = icl_ibecc_available,
|
||||
.err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
|
||||
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
|
||||
};
|
||||
|
||||
static struct res_config tgl_cfg = {
|
||||
.machine_check = true,
|
||||
.num_imc = 2,
|
||||
.imc_base = 0x5000,
|
||||
.cmf_base = 0x11000,
|
||||
.cmf_size = 0x800,
|
||||
.ms_hash_offset = 0xac,
|
||||
.ibecc_base = 0xd400,
|
||||
.ibecc_error_log_offset = 0x170,
|
||||
.ibecc_available = tgl_ibecc_available,
|
||||
.err_addr_to_sys_addr = tgl_err_addr_to_sys_addr,
|
||||
.err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
|
||||
};
|
||||
|
||||
static struct res_config adl_cfg = {
|
||||
.machine_check = true,
|
||||
.num_imc = 2,
|
||||
.imc_base = 0xd800,
|
||||
.ibecc_base = 0xd400,
|
||||
.ibecc_error_log_offset = 0x68,
|
||||
.ibecc_available = tgl_ibecc_available,
|
||||
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
|
||||
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
|
||||
};
|
||||
|
||||
static const struct pci_device_id igen6_pci_tbl[] = {
|
||||
|
@ -232,6 +415,15 @@ static const struct pci_device_id igen6_pci_tbl[] = {
|
|||
{ PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
|
||||
{ PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
|
||||
{ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
|
||||
|
@ -490,8 +682,8 @@ static void ecclog_work_cb(struct work_struct *work)
|
|||
eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
|
||||
ECC_ERROR_LOG_ADDR_SHIFT;
|
||||
res.mc = node->mc;
|
||||
res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr);
|
||||
res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr);
|
||||
res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
|
||||
res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
|
||||
|
||||
mci = igen6_pvt->imc[res.mc].mci;
|
||||
|
||||
|
@ -540,6 +732,57 @@ static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
|
|||
return NMI_HANDLED;
|
||||
}
|
||||
|
||||
static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
struct mce *mce = (struct mce *)data;
|
||||
char *type;
|
||||
|
||||
if (mce->kflags & MCE_HANDLED_CEC)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/*
|
||||
* Ignore unless this is a memory related error.
|
||||
* We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
|
||||
* since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
|
||||
*/
|
||||
if ((mce->status & 0xefff) >> 7 != 1)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (mce->mcgstatus & MCG_STATUS_MCIP)
|
||||
type = "Exception";
|
||||
else
|
||||
type = "Event";
|
||||
|
||||
edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
|
||||
mce->extcpu, type, mce->mcgstatus,
|
||||
mce->bank, mce->status);
|
||||
edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
|
||||
edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
|
||||
edac_dbg(0, "MISC 0x%llx\n", mce->misc);
|
||||
edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
|
||||
mce->cpuvendor, mce->cpuid, mce->time,
|
||||
mce->socketid, mce->apicid);
|
||||
/*
|
||||
* We just use the Machine Check for the memory error notification.
|
||||
* Each memory controller is associated with an IBECC instance.
|
||||
* Directly read and clear the error information(error address and
|
||||
* error type) on all the IBECC instances so that we know on which
|
||||
* memory controller the memory error(s) occurred.
|
||||
*/
|
||||
if (!ecclog_handler())
|
||||
return NOTIFY_DONE;
|
||||
|
||||
mce->kflags |= MCE_HANDLED_EDAC;
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block ecclog_mce_dec = {
|
||||
.notifier_call = ecclog_mce_handler,
|
||||
.priority = MCE_PRIO_EDAC,
|
||||
};
|
||||
|
||||
static bool igen6_check_ecc(struct igen6_imc *imc)
|
||||
{
|
||||
u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
|
||||
|
@ -573,6 +816,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
|
|||
imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
|
||||
imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
|
||||
imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
|
||||
imc->size += imc->dimm_s_size[i];
|
||||
imc->size += imc->dimm_l_size[i];
|
||||
ndimms = 0;
|
||||
|
||||
for (j = 0; j < NUM_DIMMS; j++) {
|
||||
|
@ -608,6 +853,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
|
|||
}
|
||||
}
|
||||
|
||||
edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -857,6 +1104,80 @@ static void igen6_unregister_mcis(void)
|
|||
}
|
||||
}
|
||||
|
||||
static int igen6_mem_slice_setup(u64 mchbar)
|
||||
{
|
||||
struct igen6_imc *imc = &igen6_pvt->imc[0];
|
||||
u64 base = mchbar + res_cfg->cmf_base;
|
||||
u32 offset = res_cfg->ms_hash_offset;
|
||||
u32 size = res_cfg->cmf_size;
|
||||
u64 ms_s_size, ms_hash;
|
||||
void __iomem *cmf;
|
||||
int ms_l_map;
|
||||
|
||||
edac_dbg(2, "\n");
|
||||
|
||||
if (imc[0].size < imc[1].size) {
|
||||
ms_s_size = imc[0].size;
|
||||
ms_l_map = 1;
|
||||
} else {
|
||||
ms_s_size = imc[1].size;
|
||||
ms_l_map = 0;
|
||||
}
|
||||
|
||||
igen6_pvt->ms_s_size = ms_s_size;
|
||||
igen6_pvt->ms_l_map = ms_l_map;
|
||||
|
||||
edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
|
||||
ms_s_size >> 20, ms_l_map);
|
||||
|
||||
if (!size)
|
||||
return 0;
|
||||
|
||||
cmf = ioremap(base, size);
|
||||
if (!cmf) {
|
||||
igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
ms_hash = readq(cmf + offset);
|
||||
igen6_pvt->ms_hash = ms_hash;
|
||||
|
||||
edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
|
||||
|
||||
iounmap(cmf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int register_err_handler(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (res_cfg->machine_check) {
|
||||
mce_register_decode_chain(&ecclog_mce_dec);
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
|
||||
0, IGEN6_NMI_NAME);
|
||||
if (rc) {
|
||||
igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unregister_err_handler(void)
|
||||
{
|
||||
if (res_cfg->machine_check) {
|
||||
mce_unregister_decode_chain(&ecclog_mce_dec);
|
||||
return;
|
||||
}
|
||||
|
||||
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
|
||||
}
|
||||
|
||||
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
{
|
||||
u64 mchbar;
|
||||
|
@ -880,6 +1201,12 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
|||
goto fail2;
|
||||
}
|
||||
|
||||
if (res_cfg->num_imc > 1) {
|
||||
rc = igen6_mem_slice_setup(mchbar);
|
||||
if (rc)
|
||||
goto fail2;
|
||||
}
|
||||
|
||||
ecclog_pool = ecclog_gen_pool_create();
|
||||
if (!ecclog_pool) {
|
||||
rc = -ENOMEM;
|
||||
|
@ -892,12 +1219,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
|||
/* Check if any pending errors before registering the NMI handler */
|
||||
ecclog_handler();
|
||||
|
||||
rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
|
||||
0, IGEN6_NMI_NAME);
|
||||
if (rc) {
|
||||
igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
|
||||
rc = register_err_handler();
|
||||
if (rc)
|
||||
goto fail3;
|
||||
}
|
||||
|
||||
/* Enable error reporting */
|
||||
rc = errcmd_enable_error_reporting(true);
|
||||
|
@ -925,7 +1249,7 @@ static void igen6_remove(struct pci_dev *pdev)
|
|||
|
||||
igen6_debug_teardown();
|
||||
errcmd_enable_error_reporting(false);
|
||||
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
|
||||
unregister_err_handler();
|
||||
irq_work_sync(&ecclog_irq_work);
|
||||
flush_work(&ecclog_work);
|
||||
gen_pool_destroy(ecclog_pool);
|
||||
|
|
|
@ -1554,6 +1554,9 @@ static int __init pnd2_init(void)
|
|||
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
|
||||
return -EBUSY;
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
|
||||
return -ENODEV;
|
||||
|
||||
id = x86_match_cpu(pnd2_cpuids);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
|
|
@ -3510,6 +3510,9 @@ static int __init sbridge_init(void)
|
|||
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
|
||||
return -EBUSY;
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
|
||||
return -ENODEV;
|
||||
|
||||
id = x86_match_cpu(sbridge_cpuids);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
|
|
@ -656,6 +656,9 @@ static int __init skx_init(void)
|
|||
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
|
||||
return -EBUSY;
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
|
||||
return -ENODEV;
|
||||
|
||||
id = x86_match_cpu(skx_cpuids);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
|
|
@ -23,10 +23,13 @@
|
|||
#include "skx_common.h"
|
||||
|
||||
static const char * const component_names[] = {
|
||||
[INDEX_SOCKET] = "ProcessorSocketId",
|
||||
[INDEX_MEMCTRL] = "MemoryControllerId",
|
||||
[INDEX_CHANNEL] = "ChannelId",
|
||||
[INDEX_DIMM] = "DimmSlotId",
|
||||
[INDEX_SOCKET] = "ProcessorSocketId",
|
||||
[INDEX_MEMCTRL] = "MemoryControllerId",
|
||||
[INDEX_CHANNEL] = "ChannelId",
|
||||
[INDEX_DIMM] = "DimmSlotId",
|
||||
[INDEX_NM_MEMCTRL] = "NmMemoryControllerId",
|
||||
[INDEX_NM_CHANNEL] = "NmChannelId",
|
||||
[INDEX_NM_DIMM] = "NmDimmSlotId",
|
||||
};
|
||||
|
||||
static int component_indices[ARRAY_SIZE(component_names)];
|
||||
|
@ -34,12 +37,14 @@ static int adxl_component_count;
|
|||
static const char * const *adxl_component_names;
|
||||
static u64 *adxl_values;
|
||||
static char *adxl_msg;
|
||||
static unsigned long adxl_nm_bitmap;
|
||||
|
||||
static char skx_msg[MSG_SIZE];
|
||||
static skx_decode_f skx_decode;
|
||||
static skx_show_retry_log_f skx_show_retry_rd_err_log;
|
||||
static u64 skx_tolm, skx_tohm;
|
||||
static LIST_HEAD(dev_edac_list);
|
||||
static bool skx_mem_cfg_2lm;
|
||||
|
||||
int __init skx_adxl_get(void)
|
||||
{
|
||||
|
@ -56,14 +61,25 @@ int __init skx_adxl_get(void)
|
|||
for (j = 0; names[j]; j++) {
|
||||
if (!strcmp(component_names[i], names[j])) {
|
||||
component_indices[i] = j;
|
||||
|
||||
if (i >= INDEX_NM_FIRST)
|
||||
adxl_nm_bitmap |= 1 << i;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!names[j])
|
||||
if (!names[j] && i < INDEX_NM_FIRST)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (skx_mem_cfg_2lm) {
|
||||
if (!adxl_nm_bitmap)
|
||||
skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n");
|
||||
else
|
||||
edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap);
|
||||
}
|
||||
|
||||
adxl_component_names = names;
|
||||
while (*names++)
|
||||
adxl_component_count++;
|
||||
|
@ -99,7 +115,7 @@ void __exit skx_adxl_put(void)
|
|||
kfree(adxl_msg);
|
||||
}
|
||||
|
||||
static bool skx_adxl_decode(struct decoded_addr *res)
|
||||
static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
|
||||
{
|
||||
struct skx_dev *d;
|
||||
int i, len = 0;
|
||||
|
@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res)
|
|||
}
|
||||
|
||||
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
|
||||
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
|
||||
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
|
||||
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
|
||||
if (error_in_1st_level_mem) {
|
||||
res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
|
||||
(int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
|
||||
res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
|
||||
(int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
|
||||
res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ?
|
||||
(int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
|
||||
} else {
|
||||
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
|
||||
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
|
||||
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
|
||||
}
|
||||
|
||||
if (res->imc > NUM_IMC - 1) {
|
||||
if (res->imc > NUM_IMC - 1 || res->imc < 0) {
|
||||
skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
|
||||
return false;
|
||||
}
|
||||
|
@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res)
|
|||
return true;
|
||||
}
|
||||
|
||||
void skx_set_mem_cfg(bool mem_cfg_2lm)
|
||||
{
|
||||
skx_mem_cfg_2lm = mem_cfg_2lm;
|
||||
}
|
||||
|
||||
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
|
||||
{
|
||||
skx_decode = decode;
|
||||
|
@ -313,9 +343,9 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
|
|||
|
||||
ranks = numrank(mtr);
|
||||
rows = numrow(mtr);
|
||||
cols = numcol(mtr);
|
||||
cols = imc->hbm_mc ? 6 : numcol(mtr);
|
||||
|
||||
if (cfg->support_ddr5 && (amap & 0x8)) {
|
||||
if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) {
|
||||
banks = 32;
|
||||
mtype = MEM_DDR5;
|
||||
} else {
|
||||
|
@ -344,8 +374,13 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
|
|||
dimm->dtype = get_width(mtr);
|
||||
dimm->mtype = mtype;
|
||||
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
|
||||
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
|
||||
imc->src_id, imc->lmc, chan, dimmno);
|
||||
|
||||
if (imc->hbm_mc)
|
||||
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u",
|
||||
imc->src_id, imc->lmc, chan);
|
||||
else
|
||||
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
|
||||
imc->src_id, imc->lmc, chan, dimmno);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -578,6 +613,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
|
|||
optype, skx_msg);
|
||||
}
|
||||
|
||||
static bool skx_error_in_1st_level_mem(const struct mce *m)
|
||||
{
|
||||
u32 errcode;
|
||||
|
||||
if (!skx_mem_cfg_2lm)
|
||||
return false;
|
||||
|
||||
errcode = GET_BITFIELD(m->status, 0, 15);
|
||||
|
||||
if ((errcode & 0xef80) != 0x280)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
|
@ -597,7 +647,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
|
|||
res.addr = mce->addr;
|
||||
|
||||
if (adxl_component_count) {
|
||||
if (!skx_adxl_decode(&res))
|
||||
if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
|
||||
return NOTIFY_DONE;
|
||||
} else if (!skx_decode || !skx_decode(&res)) {
|
||||
return NOTIFY_DONE;
|
||||
|
@ -658,6 +708,8 @@ void skx_remove(void)
|
|||
}
|
||||
if (d->util_all)
|
||||
pci_dev_put(d->util_all);
|
||||
if (d->pcu_cr3)
|
||||
pci_dev_put(d->pcu_cr3);
|
||||
if (d->sad_all)
|
||||
pci_dev_put(d->sad_all);
|
||||
if (d->uracu)
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
#ifndef _SKX_COMM_EDAC_H
|
||||
#define _SKX_COMM_EDAC_H
|
||||
|
||||
#include <linux/bits.h>
|
||||
|
||||
#define MSG_SIZE 1024
|
||||
|
||||
/*
|
||||
|
@ -30,9 +32,17 @@
|
|||
#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
|
||||
#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
|
||||
|
||||
#define I10NM_NUM_IMC 4
|
||||
#define I10NM_NUM_CHANNELS 2
|
||||
#define I10NM_NUM_DIMMS 2
|
||||
#define I10NM_NUM_DDR_IMC 4
|
||||
#define I10NM_NUM_DDR_CHANNELS 2
|
||||
#define I10NM_NUM_DDR_DIMMS 2
|
||||
|
||||
#define I10NM_NUM_HBM_IMC 16
|
||||
#define I10NM_NUM_HBM_CHANNELS 2
|
||||
#define I10NM_NUM_HBM_DIMMS 1
|
||||
|
||||
#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
|
||||
#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
|
||||
#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
|
||||
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
|
||||
|
@ -54,12 +64,16 @@ struct skx_dev {
|
|||
struct pci_dev *sad_all;
|
||||
struct pci_dev *util_all;
|
||||
struct pci_dev *uracu; /* for i10nm CPU */
|
||||
struct pci_dev *pcu_cr3; /* for HBM memory detection */
|
||||
u32 mcroute;
|
||||
struct skx_imc {
|
||||
struct mem_ctl_info *mci;
|
||||
struct pci_dev *mdev; /* for i10nm CPU */
|
||||
void __iomem *mbase; /* for i10nm CPU */
|
||||
int chan_mmio_sz; /* for i10nm CPU */
|
||||
int num_channels; /* channels per memory controller */
|
||||
int num_dimms; /* dimms per channel */
|
||||
bool hbm_mc;
|
||||
u8 mc; /* system wide mc# */
|
||||
u8 lmc; /* socket relative mc# */
|
||||
u8 src_id, node_id;
|
||||
|
@ -92,9 +106,17 @@ enum {
|
|||
INDEX_MEMCTRL,
|
||||
INDEX_CHANNEL,
|
||||
INDEX_DIMM,
|
||||
INDEX_NM_FIRST,
|
||||
INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
|
||||
INDEX_NM_CHANNEL,
|
||||
INDEX_NM_DIMM,
|
||||
INDEX_MAX
|
||||
};
|
||||
|
||||
#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
|
||||
#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
|
||||
#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
|
||||
|
||||
struct decoded_addr {
|
||||
struct skx_dev *dev;
|
||||
u64 addr;
|
||||
|
@ -122,7 +144,12 @@ struct res_config {
|
|||
int busno_cfg_offset;
|
||||
/* Per DDR channel memory-mapped I/O size */
|
||||
int ddr_chan_mmio_sz;
|
||||
/* Per HBM channel memory-mapped I/O size */
|
||||
int hbm_chan_mmio_sz;
|
||||
bool support_ddr5;
|
||||
/* SAD device number and function number */
|
||||
unsigned int sad_all_devfn;
|
||||
int sad_all_offset;
|
||||
};
|
||||
|
||||
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
|
||||
|
@ -133,6 +160,7 @@ typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int le
|
|||
int __init skx_adxl_get(void);
|
||||
void __exit skx_adxl_put(void);
|
||||
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
|
||||
void skx_set_mem_cfg(bool mem_cfg_2lm);
|
||||
|
||||
int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
|
||||
int skx_get_node_id(struct skx_dev *d, u8 *id);
|
||||
|
|
|
@ -1368,7 +1368,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev,
|
|||
name, 1, "CCPI", 1,
|
||||
0, NULL, 0, idx);
|
||||
if (!edac_dev) {
|
||||
dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret);
|
||||
dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
ocx = edac_dev->pvt_info;
|
||||
|
@ -1380,7 +1380,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev,
|
|||
|
||||
ocx->regs = pcim_iomap_table(pdev)[0];
|
||||
if (!ocx->regs) {
|
||||
dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
|
||||
dev_err(&pdev->dev, "Cannot map PCI resources\n");
|
||||
ret = -ENODEV;
|
||||
goto err_free;
|
||||
}
|
||||
|
|
|
@ -197,6 +197,7 @@ static const struct of_device_id ti_edac_of_match[] = {
|
|||
{ .compatible = "ti,emif-dra7xx", .data = (void *)EMIF_TYPE_DRA7 },
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, ti_edac_of_match);
|
||||
|
||||
static int _emif_get_id(struct device_node *node)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue