From 80b8ce89ebb145d91fd2ef1a1e9610aee1613109 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 27 Dec 2010 15:39:12 +0000 Subject: [PATCH 01/21] i7core_edac: fix misuse of logical operation in place of bitop CC: Mauro Carvalho Chehab Signed-off-by: David Sterba Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index f6cf448d69b4..35a0ff527357 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -731,7 +731,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci) debugf1("\t\t%#x\t%#x\t%#x\n", (value[j] >> 27) & 0x1, (value[j] >> 24) & 0x7, - (value[j] && ((1 << 24) - 1))); + (value[j] & ((1 << 24) - 1))); } return 0; From 224e871f36e1f9edddb0dfecbb84ff9765af3eb4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 17 Mar 2011 17:02:59 -0300 Subject: [PATCH 02/21] i7core_edac: Fix oops when trying to inject errors Error injection needs the pci device 0:0. So, we need to revert this changeset: 79daef2099a02fed35747c23bad22f30441133ea. Tests need to be made to be sure that refcount won't be wrong as noticed before. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 35a0ff527357..c64ba267902f 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -281,8 +281,7 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = { /* Memory controller */ { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) }, { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) }, - - /* Exists only for RDIMM */ + /* Exists only for RDIMM */ { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 }, { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) }, @@ -303,6 +302,16 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = { { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) }, { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) }, { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) }, + + /* Generic Non-core registers */ + /* + * This is the PCI device on i7core and on Xeon 35xx (8086:2c41) + * On Xeon 55xx, however, it has a different id (8086:2c40). So, + * the probing code needs to test for the other address in case of + * failure of this one + */ + { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) }, + }; static const struct pci_id_descr pci_dev_descr_lynnfield[] = { @@ -319,6 +328,12 @@ static const struct pci_id_descr pci_dev_descr_lynnfield[] = { { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) }, { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) }, { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) }, + + /* + * This is the PCI device has an alternate address on some + * processors like Core i7 860 + */ + { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) }, }; static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = { @@ -346,6 +361,10 @@ static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = { { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) }, { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) }, { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) }, + + /* Generic Non-core registers */ + { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) }, + }; #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } @@ -1324,6 +1343,20 @@ static int i7core_get_onedevice(struct pci_dev **prev, pdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_descr->dev_id, *prev); + /* + * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs + * is at addr 8086:2c40, instead of 8086:2c41. So, we need + * to probe for the alternate address in case of failure + */ + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev); + + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT, + *prev); + if (!pdev) { if (*prev) { *prev = pdev; From ddeb3547d4823495c6604750c241e5a3810f51a3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 4 Mar 2011 15:11:29 -0300 Subject: [PATCH 03/21] edac: Move edac main structs to include/linux/edac.h As we'll need to use those structs for trace functions, they should be on a more public place. So, move struct mem_ctl_info & friends to edac.h. No functional changes on this patch. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Doug Thompson --- drivers/edac/edac_core.h | 350 +-------------------------------------- include/linux/edac.h | 350 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 351 insertions(+), 349 deletions(-) diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 55b8278bb172..fe90cd4a7ebc 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -34,11 +34,10 @@ #include #include #include +#include -#define EDAC_MC_LABEL_LEN 31 #define EDAC_DEVICE_NAME_LEN 31 #define EDAC_ATTRIB_VALUE_LEN 15 -#define MC_PROC_NAME_MAX_LEN 7 #if PAGE_SHIFT < 20 #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT)) @@ -101,353 +100,6 @@ extern int edac_debug_level; #define edac_dev_name(dev) (dev)->dev_name -/* memory devices */ -enum dev_type { - DEV_UNKNOWN = 0, - DEV_X1, - DEV_X2, - DEV_X4, - DEV_X8, - DEV_X16, - DEV_X32, /* Do these parts exist? */ - DEV_X64 /* Do these parts exist? */ -}; - -#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN) -#define DEV_FLAG_X1 BIT(DEV_X1) -#define DEV_FLAG_X2 BIT(DEV_X2) -#define DEV_FLAG_X4 BIT(DEV_X4) -#define DEV_FLAG_X8 BIT(DEV_X8) -#define DEV_FLAG_X16 BIT(DEV_X16) -#define DEV_FLAG_X32 BIT(DEV_X32) -#define DEV_FLAG_X64 BIT(DEV_X64) - -/* memory types */ -enum mem_type { - MEM_EMPTY = 0, /* Empty csrow */ - MEM_RESERVED, /* Reserved csrow type */ - MEM_UNKNOWN, /* Unknown csrow type */ - MEM_FPM, /* Fast page mode */ - MEM_EDO, /* Extended data out */ - MEM_BEDO, /* Burst Extended data out */ - MEM_SDR, /* Single data rate SDRAM */ - MEM_RDR, /* Registered single data rate SDRAM */ - MEM_DDR, /* Double data rate SDRAM */ - MEM_RDDR, /* Registered Double data rate SDRAM */ - MEM_RMBS, /* Rambus DRAM */ - MEM_DDR2, /* DDR2 RAM */ - MEM_FB_DDR2, /* fully buffered DDR2 */ - MEM_RDDR2, /* Registered DDR2 RAM */ - MEM_XDR, /* Rambus XDR */ - MEM_DDR3, /* DDR3 RAM */ - MEM_RDDR3, /* Registered DDR3 RAM */ -}; - -#define MEM_FLAG_EMPTY BIT(MEM_EMPTY) -#define MEM_FLAG_RESERVED BIT(MEM_RESERVED) -#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN) -#define MEM_FLAG_FPM BIT(MEM_FPM) -#define MEM_FLAG_EDO BIT(MEM_EDO) -#define MEM_FLAG_BEDO BIT(MEM_BEDO) -#define MEM_FLAG_SDR BIT(MEM_SDR) -#define MEM_FLAG_RDR BIT(MEM_RDR) -#define MEM_FLAG_DDR BIT(MEM_DDR) -#define MEM_FLAG_RDDR BIT(MEM_RDDR) -#define MEM_FLAG_RMBS BIT(MEM_RMBS) -#define MEM_FLAG_DDR2 BIT(MEM_DDR2) -#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) -#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) -#define MEM_FLAG_XDR BIT(MEM_XDR) -#define MEM_FLAG_DDR3 BIT(MEM_DDR3) -#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) - -/* chipset Error Detection and Correction capabilities and mode */ -enum edac_type { - EDAC_UNKNOWN = 0, /* Unknown if ECC is available */ - EDAC_NONE, /* Doesn't support ECC */ - EDAC_RESERVED, /* Reserved ECC type */ - EDAC_PARITY, /* Detects parity errors */ - EDAC_EC, /* Error Checking - no correction */ - EDAC_SECDED, /* Single bit error correction, Double detection */ - EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */ - EDAC_S4ECD4ED, /* Chipkill x4 devices */ - EDAC_S8ECD8ED, /* Chipkill x8 devices */ - EDAC_S16ECD16ED, /* Chipkill x16 devices */ -}; - -#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN) -#define EDAC_FLAG_NONE BIT(EDAC_NONE) -#define EDAC_FLAG_PARITY BIT(EDAC_PARITY) -#define EDAC_FLAG_EC BIT(EDAC_EC) -#define EDAC_FLAG_SECDED BIT(EDAC_SECDED) -#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED) -#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED) -#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED) -#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED) - -/* scrubbing capabilities */ -enum scrub_type { - SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */ - SCRUB_NONE, /* No scrubber */ - SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */ - SCRUB_SW_SRC, /* Software scrub only errors */ - SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */ - SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */ - SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */ - SCRUB_HW_SRC, /* Hardware scrub only errors */ - SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */ - SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */ -}; - -#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG) -#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC) -#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC) -#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE) -#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG) -#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC) -#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC) -#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE) - -/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ - -/* EDAC internal operation states */ -#define OP_ALLOC 0x100 -#define OP_RUNNING_POLL 0x201 -#define OP_RUNNING_INTERRUPT 0x202 -#define OP_RUNNING_POLL_INTR 0x203 -#define OP_OFFLINE 0x300 - -/* - * There are several things to be aware of that aren't at all obvious: - * - * - * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. - * - * These are some of the many terms that are thrown about that don't always - * mean what people think they mean (Inconceivable!). In the interest of - * creating a common ground for discussion, terms and their definitions - * will be established. - * - * Memory devices: The individual chip on a memory stick. These devices - * commonly output 4 and 8 bits each. Grouping several - * of these in parallel provides 64 bits which is common - * for a memory stick. - * - * Memory Stick: A printed circuit board that aggregates multiple - * memory devices in parallel. This is the atomic - * memory component that is purchaseable by Joe consumer - * and loaded into a memory socket. - * - * Socket: A physical connector on the motherboard that accepts - * a single memory stick. - * - * Channel: Set of memory devices on a memory stick that must be - * grouped in parallel with one or more additional - * channels from other memory sticks. This parallel - * grouping of the output from multiple channels are - * necessary for the smallest granularity of memory access. - * Some memory controllers are capable of single channel - - * which means that memory sticks can be loaded - * individually. Other memory controllers are only - * capable of dual channel - which means that memory - * sticks must be loaded as pairs (see "socket set"). - * - * Chip-select row: All of the memory devices that are selected together. - * for a single, minimum grain of memory access. - * This selects all of the parallel memory devices across - * all of the parallel channels. Common chip-select rows - * for single channel are 64 bits, for dual channel 128 - * bits. - * - * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. - * Motherboards commonly drive two chip-select pins to - * a memory stick. A single-ranked stick, will occupy - * only one of those rows. The other will be unused. - * - * Double-Ranked stick: A double-ranked stick has two chip-select rows which - * access different sets of memory devices. The two - * rows cannot be accessed concurrently. - * - * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick. - * A double-sided stick has two chip-select rows which - * access different sets of memory devices. The two - * rows cannot be accessed concurrently. "Double-sided" - * is irrespective of the memory devices being mounted - * on both sides of the memory stick. - * - * Socket set: All of the memory sticks that are required for - * a single memory access or all of the memory sticks - * spanned by a chip-select row. A single socket set - * has two chip-select rows and if double-sided sticks - * are used these will occupy those chip-select rows. - * - * Bank: This term is avoided because it is unclear when - * needing to distinguish between chip-select rows and - * socket sets. - * - * Controller pages: - * - * Physical pages: - * - * Virtual pages: - * - * - * STRUCTURE ORGANIZATION AND CHOICES - * - * - * - * PS - I enjoyed writing all that about as much as you enjoyed reading it. - */ - -struct channel_info { - int chan_idx; /* channel index */ - u32 ce_count; /* Correctable Errors for this CHANNEL */ - char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ - struct csrow_info *csrow; /* the parent */ -}; - -struct csrow_info { - unsigned long first_page; /* first page number in dimm */ - unsigned long last_page; /* last page number in dimm */ - unsigned long page_mask; /* used for interleaving - - * 0UL for non intlv - */ - u32 nr_pages; /* number of pages in csrow */ - u32 grain; /* granularity of reported error in bytes */ - int csrow_idx; /* the chip-select row */ - enum dev_type dtype; /* memory device type */ - u32 ue_count; /* Uncorrectable Errors for this csrow */ - u32 ce_count; /* Correctable Errors for this csrow */ - enum mem_type mtype; /* memory csrow type */ - enum edac_type edac_mode; /* EDAC mode for this csrow */ - struct mem_ctl_info *mci; /* the parent */ - - struct kobject kobj; /* sysfs kobject for this csrow */ - - /* channel information for this csrow */ - u32 nr_channels; - struct channel_info *channels; -}; - -struct mcidev_sysfs_group { - const char *name; /* group name */ - const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */ -}; - -struct mcidev_sysfs_group_kobj { - struct list_head list; /* list for all instances within a mc */ - - struct kobject kobj; /* kobj for the group */ - - const struct mcidev_sysfs_group *grp; /* group description table */ - struct mem_ctl_info *mci; /* the parent */ -}; - -/* mcidev_sysfs_attribute structure - * used for driver sysfs attributes and in mem_ctl_info - * sysfs top level entries - */ -struct mcidev_sysfs_attribute { - /* It should use either attr or grp */ - struct attribute attr; - const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */ - - /* Ops for show/store values at the attribute - not used on group */ - ssize_t (*show)(struct mem_ctl_info *,char *); - ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); -}; - -/* MEMORY controller information structure - */ -struct mem_ctl_info { - struct list_head link; /* for global list of mem_ctl_info structs */ - - struct module *owner; /* Module owner of this control struct */ - - unsigned long mtype_cap; /* memory types supported by mc */ - unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */ - unsigned long edac_cap; /* configuration capabilities - this is - * closely related to edac_ctl_cap. The - * difference is that the controller may be - * capable of s4ecd4ed which would be listed - * in edac_ctl_cap, but if channels aren't - * capable of s4ecd4ed then the edac_cap would - * not have that capability. - */ - unsigned long scrub_cap; /* chipset scrub capabilities */ - enum scrub_type scrub_mode; /* current scrub mode */ - - /* Translates sdram memory scrub rate given in bytes/sec to the - internal representation and configures whatever else needs - to be configured. - */ - int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw); - - /* Get the current sdram memory scrub rate from the internal - representation and converts it to the closest matching - bandwidth in bytes/sec. - */ - int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci); - - - /* pointer to edac checking routine */ - void (*edac_check) (struct mem_ctl_info * mci); - - /* - * Remaps memory pages: controller pages to physical pages. - * For most MC's, this will be NULL. - */ - /* FIXME - why not send the phys page to begin with? */ - unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci, - unsigned long page); - int mc_idx; - int nr_csrows; - struct csrow_info *csrows; - /* - * FIXME - what about controllers on other busses? - IDs must be - * unique. dev pointer should be sufficiently unique, but - * BUS:SLOT.FUNC numbers may not be unique. - */ - struct device *dev; - const char *mod_name; - const char *mod_ver; - const char *ctl_name; - const char *dev_name; - char proc_name[MC_PROC_NAME_MAX_LEN + 1]; - void *pvt_info; - u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */ - u32 ce_noinfo_count; /* Correctable Errors w/o info */ - u32 ue_count; /* Total Uncorrectable Errors for this MC */ - u32 ce_count; /* Total Correctable Errors for this MC */ - unsigned long start_time; /* mci load start time (in jiffies) */ - - struct completion complete; - - /* edac sysfs device control */ - struct kobject edac_mci_kobj; - - /* list for all grp instances within a mc */ - struct list_head grp_kobj_list; - - /* Additional top controller level attributes, but specified - * by the low level driver. - * - * Set by the low level driver to provide attributes at the - * controller level, same level as 'ue_count' and 'ce_count' above. - * An array of structures, NULL terminated - * - * If attributes are desired, then set to array of attributes - * If no attributes are desired, leave NULL - */ - const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes; - - /* work struct for this MC */ - struct delayed_work work; - - /* the internal state of this controller instance */ - int op_state; -}; - /* * The following are the structures to provide for a generic * or abstract 'edac_device'. This set of structures and the diff --git a/include/linux/edac.h b/include/linux/edac.h index 4a73257b47d0..055b248bdd53 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -42,4 +42,354 @@ static inline void opstate_init(void) return; } +#define EDAC_MC_LABEL_LEN 31 +#define MC_PROC_NAME_MAX_LEN 7 + +/* memory devices */ +enum dev_type { + DEV_UNKNOWN = 0, + DEV_X1, + DEV_X2, + DEV_X4, + DEV_X8, + DEV_X16, + DEV_X32, /* Do these parts exist? */ + DEV_X64 /* Do these parts exist? */ +}; + +#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN) +#define DEV_FLAG_X1 BIT(DEV_X1) +#define DEV_FLAG_X2 BIT(DEV_X2) +#define DEV_FLAG_X4 BIT(DEV_X4) +#define DEV_FLAG_X8 BIT(DEV_X8) +#define DEV_FLAG_X16 BIT(DEV_X16) +#define DEV_FLAG_X32 BIT(DEV_X32) +#define DEV_FLAG_X64 BIT(DEV_X64) + +/* memory types */ +enum mem_type { + MEM_EMPTY = 0, /* Empty csrow */ + MEM_RESERVED, /* Reserved csrow type */ + MEM_UNKNOWN, /* Unknown csrow type */ + MEM_FPM, /* Fast page mode */ + MEM_EDO, /* Extended data out */ + MEM_BEDO, /* Burst Extended data out */ + MEM_SDR, /* Single data rate SDRAM */ + MEM_RDR, /* Registered single data rate SDRAM */ + MEM_DDR, /* Double data rate SDRAM */ + MEM_RDDR, /* Registered Double data rate SDRAM */ + MEM_RMBS, /* Rambus DRAM */ + MEM_DDR2, /* DDR2 RAM */ + MEM_FB_DDR2, /* fully buffered DDR2 */ + MEM_RDDR2, /* Registered DDR2 RAM */ + MEM_XDR, /* Rambus XDR */ + MEM_DDR3, /* DDR3 RAM */ + MEM_RDDR3, /* Registered DDR3 RAM */ +}; + +#define MEM_FLAG_EMPTY BIT(MEM_EMPTY) +#define MEM_FLAG_RESERVED BIT(MEM_RESERVED) +#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN) +#define MEM_FLAG_FPM BIT(MEM_FPM) +#define MEM_FLAG_EDO BIT(MEM_EDO) +#define MEM_FLAG_BEDO BIT(MEM_BEDO) +#define MEM_FLAG_SDR BIT(MEM_SDR) +#define MEM_FLAG_RDR BIT(MEM_RDR) +#define MEM_FLAG_DDR BIT(MEM_DDR) +#define MEM_FLAG_RDDR BIT(MEM_RDDR) +#define MEM_FLAG_RMBS BIT(MEM_RMBS) +#define MEM_FLAG_DDR2 BIT(MEM_DDR2) +#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) +#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) +#define MEM_FLAG_XDR BIT(MEM_XDR) +#define MEM_FLAG_DDR3 BIT(MEM_DDR3) +#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) + +/* chipset Error Detection and Correction capabilities and mode */ +enum edac_type { + EDAC_UNKNOWN = 0, /* Unknown if ECC is available */ + EDAC_NONE, /* Doesn't support ECC */ + EDAC_RESERVED, /* Reserved ECC type */ + EDAC_PARITY, /* Detects parity errors */ + EDAC_EC, /* Error Checking - no correction */ + EDAC_SECDED, /* Single bit error correction, Double detection */ + EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */ + EDAC_S4ECD4ED, /* Chipkill x4 devices */ + EDAC_S8ECD8ED, /* Chipkill x8 devices */ + EDAC_S16ECD16ED, /* Chipkill x16 devices */ +}; + +#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN) +#define EDAC_FLAG_NONE BIT(EDAC_NONE) +#define EDAC_FLAG_PARITY BIT(EDAC_PARITY) +#define EDAC_FLAG_EC BIT(EDAC_EC) +#define EDAC_FLAG_SECDED BIT(EDAC_SECDED) +#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED) +#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED) +#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED) +#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED) + +/* scrubbing capabilities */ +enum scrub_type { + SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */ + SCRUB_NONE, /* No scrubber */ + SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */ + SCRUB_SW_SRC, /* Software scrub only errors */ + SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */ + SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */ + SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */ + SCRUB_HW_SRC, /* Hardware scrub only errors */ + SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */ + SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */ +}; + +#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG) +#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC) +#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC) +#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE) +#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG) +#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC) +#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC) +#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE) + +/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ + +/* EDAC internal operation states */ +#define OP_ALLOC 0x100 +#define OP_RUNNING_POLL 0x201 +#define OP_RUNNING_INTERRUPT 0x202 +#define OP_RUNNING_POLL_INTR 0x203 +#define OP_OFFLINE 0x300 + +/* + * There are several things to be aware of that aren't at all obvious: + * + * + * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. + * + * These are some of the many terms that are thrown about that don't always + * mean what people think they mean (Inconceivable!). In the interest of + * creating a common ground for discussion, terms and their definitions + * will be established. + * + * Memory devices: The individual chip on a memory stick. These devices + * commonly output 4 and 8 bits each. Grouping several + * of these in parallel provides 64 bits which is common + * for a memory stick. + * + * Memory Stick: A printed circuit board that aggregates multiple + * memory devices in parallel. This is the atomic + * memory component that is purchaseable by Joe consumer + * and loaded into a memory socket. + * + * Socket: A physical connector on the motherboard that accepts + * a single memory stick. + * + * Channel: Set of memory devices on a memory stick that must be + * grouped in parallel with one or more additional + * channels from other memory sticks. This parallel + * grouping of the output from multiple channels are + * necessary for the smallest granularity of memory access. + * Some memory controllers are capable of single channel - + * which means that memory sticks can be loaded + * individually. Other memory controllers are only + * capable of dual channel - which means that memory + * sticks must be loaded as pairs (see "socket set"). + * + * Chip-select row: All of the memory devices that are selected together. + * for a single, minimum grain of memory access. + * This selects all of the parallel memory devices across + * all of the parallel channels. Common chip-select rows + * for single channel are 64 bits, for dual channel 128 + * bits. + * + * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. + * Motherboards commonly drive two chip-select pins to + * a memory stick. A single-ranked stick, will occupy + * only one of those rows. The other will be unused. + * + * Double-Ranked stick: A double-ranked stick has two chip-select rows which + * access different sets of memory devices. The two + * rows cannot be accessed concurrently. + * + * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick. + * A double-sided stick has two chip-select rows which + * access different sets of memory devices. The two + * rows cannot be accessed concurrently. "Double-sided" + * is irrespective of the memory devices being mounted + * on both sides of the memory stick. + * + * Socket set: All of the memory sticks that are required for + * a single memory access or all of the memory sticks + * spanned by a chip-select row. A single socket set + * has two chip-select rows and if double-sided sticks + * are used these will occupy those chip-select rows. + * + * Bank: This term is avoided because it is unclear when + * needing to distinguish between chip-select rows and + * socket sets. + * + * Controller pages: + * + * Physical pages: + * + * Virtual pages: + * + * + * STRUCTURE ORGANIZATION AND CHOICES + * + * + * + * PS - I enjoyed writing all that about as much as you enjoyed reading it. + */ + +struct channel_info { + int chan_idx; /* channel index */ + u32 ce_count; /* Correctable Errors for this CHANNEL */ + char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ + struct csrow_info *csrow; /* the parent */ +}; + +struct csrow_info { + unsigned long first_page; /* first page number in dimm */ + unsigned long last_page; /* last page number in dimm */ + unsigned long page_mask; /* used for interleaving - + * 0UL for non intlv + */ + u32 nr_pages; /* number of pages in csrow */ + u32 grain; /* granularity of reported error in bytes */ + int csrow_idx; /* the chip-select row */ + enum dev_type dtype; /* memory device type */ + u32 ue_count; /* Uncorrectable Errors for this csrow */ + u32 ce_count; /* Correctable Errors for this csrow */ + enum mem_type mtype; /* memory csrow type */ + enum edac_type edac_mode; /* EDAC mode for this csrow */ + struct mem_ctl_info *mci; /* the parent */ + + struct kobject kobj; /* sysfs kobject for this csrow */ + + /* channel information for this csrow */ + u32 nr_channels; + struct channel_info *channels; +}; + +struct mcidev_sysfs_group { + const char *name; /* group name */ + const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */ +}; + +struct mcidev_sysfs_group_kobj { + struct list_head list; /* list for all instances within a mc */ + + struct kobject kobj; /* kobj for the group */ + + const struct mcidev_sysfs_group *grp; /* group description table */ + struct mem_ctl_info *mci; /* the parent */ +}; + +/* mcidev_sysfs_attribute structure + * used for driver sysfs attributes and in mem_ctl_info + * sysfs top level entries + */ +struct mcidev_sysfs_attribute { + /* It should use either attr or grp */ + struct attribute attr; + const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */ + + /* Ops for show/store values at the attribute - not used on group */ + ssize_t (*show)(struct mem_ctl_info *,char *); + ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); +}; + +/* MEMORY controller information structure + */ +struct mem_ctl_info { + struct list_head link; /* for global list of mem_ctl_info structs */ + + struct module *owner; /* Module owner of this control struct */ + + unsigned long mtype_cap; /* memory types supported by mc */ + unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */ + unsigned long edac_cap; /* configuration capabilities - this is + * closely related to edac_ctl_cap. The + * difference is that the controller may be + * capable of s4ecd4ed which would be listed + * in edac_ctl_cap, but if channels aren't + * capable of s4ecd4ed then the edac_cap would + * not have that capability. + */ + unsigned long scrub_cap; /* chipset scrub capabilities */ + enum scrub_type scrub_mode; /* current scrub mode */ + + /* Translates sdram memory scrub rate given in bytes/sec to the + internal representation and configures whatever else needs + to be configured. + */ + int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw); + + /* Get the current sdram memory scrub rate from the internal + representation and converts it to the closest matching + bandwidth in bytes/sec. + */ + int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci); + + + /* pointer to edac checking routine */ + void (*edac_check) (struct mem_ctl_info * mci); + + /* + * Remaps memory pages: controller pages to physical pages. + * For most MC's, this will be NULL. + */ + /* FIXME - why not send the phys page to begin with? */ + unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci, + unsigned long page); + int mc_idx; + int nr_csrows; + struct csrow_info *csrows; + /* + * FIXME - what about controllers on other busses? - IDs must be + * unique. dev pointer should be sufficiently unique, but + * BUS:SLOT.FUNC numbers may not be unique. + */ + struct device *dev; + const char *mod_name; + const char *mod_ver; + const char *ctl_name; + const char *dev_name; + char proc_name[MC_PROC_NAME_MAX_LEN + 1]; + void *pvt_info; + u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */ + u32 ce_noinfo_count; /* Correctable Errors w/o info */ + u32 ue_count; /* Total Uncorrectable Errors for this MC */ + u32 ce_count; /* Total Correctable Errors for this MC */ + unsigned long start_time; /* mci load start time (in jiffies) */ + + struct completion complete; + + /* edac sysfs device control */ + struct kobject edac_mci_kobj; + + /* list for all grp instances within a mc */ + struct list_head grp_kobj_list; + + /* Additional top controller level attributes, but specified + * by the low level driver. + * + * Set by the low level driver to provide attributes at the + * controller level, same level as 'ue_count' and 'ce_count' above. + * An array of structures, NULL terminated + * + * If attributes are desired, then set to array of attributes + * If no attributes are desired, leave NULL + */ + const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes; + + /* work struct for this MC */ + struct delayed_work work; + + /* the internal state of this controller instance */ + int op_state; +}; + #endif From e8b6a1271035e621d1c4b22403c697b5c969728c Mon Sep 17 00:00:00 2001 From: Samuel Gabrielsson Date: Wed, 30 Mar 2011 10:21:23 -0300 Subject: [PATCH 04/21] i7core_edac: Add scrubbing support Add scrubbing support to i7core_edac, tested on intel Xeon L5638. Signed-off-by: Samuel Gabrielsson Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 126 +++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index c64ba267902f..0c02486cf57a 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -78,6 +78,8 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); /* OFFSETS for Device 0 Function 0 */ #define MC_CFG_CONTROL 0x90 + #define MC_CFG_UNLOCK 0x02 + #define MC_CFG_LOCK 0x00 /* OFFSETS for Device 3 Function 0 */ @@ -98,6 +100,14 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); #define DIMM0_COR_ERR(r) ((r) & 0x7fff) /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */ +#define MC_SSRCONTROL 0x48 + #define SSR_MODE_DISABLE 0x00 + #define SSR_MODE_ENABLE 0x01 + #define SSR_MODE_MASK 0x03 + +#define MC_SCRUB_CONTROL 0x4c + #define STARTSCRUB (1 << 24) + #define MC_COR_ECC_CNT_0 0x80 #define MC_COR_ECC_CNT_1 0x84 #define MC_COR_ECC_CNT_2 0x88 @@ -1901,6 +1911,116 @@ static int i7core_mce_check_error(void *priv, struct mce *mce) return 1; } +/* + * set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate + * to hardware according to SCRUBINTERVAL formula + * found in datasheet. + */ +static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) +{ + struct i7core_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + const u32 cache_line_size = 64; + const u32 freq_dclk = 800*1000000; + u32 dw_scrub; + u32 dw_ssr; + + /* Get data from the MC register, function 2 */ + pdev = pvt->pci_mcr[2]; + if (!pdev) + return -ENODEV; + + pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub); + + if (new_bw == 0) { + /* Prepare to disable petrol scrub */ + dw_scrub &= ~STARTSCRUB; + /* Stop the patrol scrub engine */ + write_and_test(pdev, MC_SCRUB_CONTROL, dw_scrub & ~0x00ffffff); + + /* Get current status of scrub rate and set bit to disable */ + pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr); + dw_ssr &= ~SSR_MODE_MASK; + dw_ssr |= SSR_MODE_DISABLE; + } else { + /* + * Translate the desired scrub rate to a register value and + * program the cooresponding register value. + */ + dw_scrub = 0x00ffffff & (cache_line_size * freq_dclk / new_bw); + + /* Start the patrol scrub engine */ + pci_write_config_dword(pdev, MC_SCRUB_CONTROL, + STARTSCRUB | dw_scrub); + + /* Get current status of scrub rate and set bit to enable */ + pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr); + dw_ssr &= ~SSR_MODE_MASK; + dw_ssr |= SSR_MODE_ENABLE; + } + /* Disable or enable scrubbing */ + pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr); + + return new_bw; +} + +/* + * get_sdram_scrub_rate This routine convert current scrub rate value + * into byte/sec bandwidth accourding to + * SCRUBINTERVAL formula found in datasheet. + */ +static int get_sdram_scrub_rate(struct mem_ctl_info *mci) +{ + struct i7core_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + const u32 cache_line_size = 64; + const u32 freq_dclk = 800*1000000; + u32 scrubval; + + /* Get data from the MC register, function 2 */ + pdev = pvt->pci_mcr[2]; + if (!pdev) + return -ENODEV; + + /* Get current scrub control data */ + pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval); + + /* Mask highest 8-bits to 0 */ + scrubval &= 0x00ffffff; + if (!scrubval) + return 0; + + /* Calculate scrub rate value into byte/sec bandwidth */ + return 0xffffffff & (cache_line_size * freq_dclk / (u64) scrubval); +} + +static void enable_sdram_scrub_setting(struct mem_ctl_info *mci) +{ + struct i7core_pvt *pvt = mci->pvt_info; + u32 pci_lock; + + /* Unlock writes to pci registers */ + pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock); + pci_lock &= ~0x3; + pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, + pci_lock | MC_CFG_UNLOCK); + + mci->set_sdram_scrub_rate = set_sdram_scrub_rate; + mci->get_sdram_scrub_rate = get_sdram_scrub_rate; +} + +static void disable_sdram_scrub_setting(struct mem_ctl_info *mci) +{ + struct i7core_pvt *pvt = mci->pvt_info; + u32 pci_lock; + + /* Lock writes to pci registers */ + pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock); + pci_lock &= ~0x3; + pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, + pci_lock | MC_CFG_LOCK); +} + static void i7core_pci_ctl_create(struct i7core_pvt *pvt) { pvt->i7core_pci = edac_pci_create_generic_ctl( @@ -1939,6 +2059,9 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", __func__, mci, &i7core_dev->pdev[0]->dev); + /* Disable scrubrate setting */ + disable_sdram_scrub_setting(mci); + /* Disable MCE NMI handler */ edac_mce_unregister(&pvt->edac_mce); @@ -2012,6 +2135,9 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* Set the function pointer to an actual operation function */ mci->edac_check = i7core_check_error; + /* Enable scrubrate setting */ + enable_sdram_scrub_setting(mci); + /* add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { debugf0("MC: " __FILE__ From 27100db0e0d381d24b6f3cb1a4f439996e7c00c8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 4 Aug 2011 21:35:27 -0300 Subject: [PATCH 05/21] i7core_edac: Don't enable memory scrubbing for Xeon 35xx Xeon 35xx doesn't mention memory scrub. It seems that only Xeon 55xx and above supports it. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 46 ++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 0c02486cf57a..aeb01f42ffef 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -263,7 +263,7 @@ struct i7core_pvt { unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS]; int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS]; - unsigned int is_registered; + bool is_registered, enable_scrub; /* mcelog glue */ struct edac_mce edac_mce; @@ -1487,8 +1487,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci, struct i7core_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; int i, func, slot; + char *family; - pvt->is_registered = 0; + pvt->is_registered = false; + pvt->enable_scrub = false; for (i = 0; i < i7core_dev->n_devs; i++) { pdev = i7core_dev->pdev[i]; if (!pdev) @@ -1504,9 +1506,37 @@ static int mci_bind_devs(struct mem_ctl_info *mci, if (unlikely(func > MAX_CHAN_FUNC)) goto error; pvt->pci_ch[slot - 4][func] = pdev; - } else if (!slot && !func) + } else if (!slot && !func) { pvt->pci_noncore = pdev; - else + + /* Detect the processor family */ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_I7_NONCORE: + family = "Xeon 35xx/ i7core"; + pvt->enable_scrub = false; + break; + case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT: + family = "i7-800/i5-700"; + pvt->enable_scrub = false; + break; + case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE: + family = "Xeon 34xx"; + pvt->enable_scrub = false; + break; + case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT: + family = "Xeon 55xx"; + pvt->enable_scrub = true; + break; + case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2: + family = "Xeon 56xx / i7-900"; + pvt->enable_scrub = true; + break; + default: + family = "unknown"; + pvt->enable_scrub = false; + } + debugf0("Detected a processor type %s\n", family); + } else goto error; debugf0("Associated fn %d.%d, dev = %p, socket %d\n", @@ -1515,7 +1545,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci, if (PCI_SLOT(pdev->devfn) == 3 && PCI_FUNC(pdev->devfn) == 2) - pvt->is_registered = 1; + pvt->is_registered = true; } return 0; @@ -2060,7 +2090,8 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) __func__, mci, &i7core_dev->pdev[0]->dev); /* Disable scrubrate setting */ - disable_sdram_scrub_setting(mci); + if (pvt->enable_scrub) + disable_sdram_scrub_setting(mci); /* Disable MCE NMI handler */ edac_mce_unregister(&pvt->edac_mce); @@ -2136,7 +2167,8 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) mci->edac_check = i7core_check_error; /* Enable scrubrate setting */ - enable_sdram_scrub_setting(mci); + if (pvt->enable_scrub) + enable_sdram_scrub_setting(mci); /* add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { From 5034086b72e4e2d42f0db4b4ebb0fe0129ebdeae Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 22 Jun 2011 05:40:06 -0300 Subject: [PATCH 06/21] EDAC i7core: Use mce socketid for better compatibility mce->socketid and cpu_data(mce->cpu).phys_proc_id are the same, compare with mce_setup (in mce.c): m->cpu = m->extcpu = smp_processor_id(); ... m->socketid = cpu_data(m->extcpu).phys_proc_id; This makes it easier for example for XEN patches to hook into the MCE subsystem. Compile tested on x86_64. Signed-off-by: Thomas Renninger CC: JBeulich@novell.com CC: linux-edac@vger.kernel.org CC: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index aeb01f42ffef..764207ed6d44 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1917,7 +1917,7 @@ static int i7core_mce_check_error(void *priv, struct mce *mce) #ifdef CONFIG_SMP /* Only handle if it is the right mc controller */ - if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) + if (mce->socketid != pvt->i7core_dev->socket) return 0; #endif From f0cb54524366654e72c87e0a1f87c0b3ff36deb3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 18 Jul 2011 11:24:45 -0300 Subject: [PATCH 07/21] x86, MCE: Use notifier chain only for MCE decoding Drop the edac_mce custom hook in favor of the generic notifier mechanism. Also, do not log the error to mcelog if the notified agent was able to decode it. Signed-off-by: Borislav Petkov Acked-by: Ingo Molnar Signed-off-by: Mauro Carvalho Chehab --- arch/x86/kernel/cpu/mcheck/mce.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 08363b042122..019786a6b0b2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -144,23 +144,20 @@ static struct mce_log mcelog = { void mce_log(struct mce *mce) { unsigned next, entry; + int ret = 0; /* Emit the trace record: */ trace_mce_record(mce); + ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); + if (ret == NOTIFY_STOP) + return; + mce->finished = 0; wmb(); for (;;) { entry = rcu_dereference_check_mce(mcelog.next); for (;;) { - /* - * If edac_mce is enabled, it will check the error type - * and will process it, if it is a known error. - * Otherwise, the error will be sent through mcelog - * interface - */ - if (edac_mce_parse(mce)) - return; /* * When the buffer fills up discard new entries. @@ -551,10 +548,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. */ - if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { + if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) mce_log(&m); - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); - } /* * Clear state for this bank. From 4140c54266365e4267a2dbc5765101bba3b42896 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 18 Jul 2011 11:24:46 -0300 Subject: [PATCH 08/21] i7core_edac: Drop the edac_mce facility Remove edac_mce pieces and use the normal MCE decoder notifier chain by retaining the same functionality with considerably less code. Signed-off-by: Borislav Petkov Signed-off-by: Mauro Carvalho Chehab --- arch/x86/kernel/cpu/mcheck/mce.c | 1 - drivers/edac/Kconfig | 3 -- drivers/edac/Makefile | 1 - drivers/edac/edac_mce.c | 61 -------------------------------- drivers/edac/i7core_edac.c | 51 +++++++++++++------------- include/linux/edac_mce.h | 31 ---------------- 6 files changed, 25 insertions(+), 123 deletions(-) delete mode 100644 drivers/edac/edac_mce.c delete mode 100644 include/linux/edac_mce.h diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 019786a6b0b2..63aad2742d8a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index af1a17d42bd7..f888fb599184 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -71,9 +71,6 @@ config EDAC_MM_EDAC occurred so that a particular failing memory module can be replaced. If unsure, select 'Y'. -config EDAC_MCE - bool - config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h" depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 3e239133e29e..b06a9b11a5f6 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o -obj-$(CONFIG_EDAC_MCE) += edac_mce.o edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o diff --git a/drivers/edac/edac_mce.c b/drivers/edac/edac_mce.c deleted file mode 100644 index 9ccdc5b140e7..000000000000 --- a/drivers/edac/edac_mce.c +++ /dev/null @@ -1,61 +0,0 @@ -/* Provides edac interface to mcelog events - * - * This file may be distributed under the terms of the - * GNU General Public License version 2. - * - * Copyright (c) 2009 by: - * Mauro Carvalho Chehab - * - * Red Hat Inc. http://www.redhat.com - */ - -#include -#include -#include - -int edac_mce_enabled; -EXPORT_SYMBOL_GPL(edac_mce_enabled); - - -/* - * Extension interface - */ - -static LIST_HEAD(edac_mce_list); -static DEFINE_MUTEX(edac_mce_lock); - -int edac_mce_register(struct edac_mce *edac_mce) -{ - mutex_lock(&edac_mce_lock); - list_add_tail(&edac_mce->list, &edac_mce_list); - mutex_unlock(&edac_mce_lock); - return 0; -} -EXPORT_SYMBOL(edac_mce_register); - -void edac_mce_unregister(struct edac_mce *edac_mce) -{ - mutex_lock(&edac_mce_lock); - list_del(&edac_mce->list); - mutex_unlock(&edac_mce_lock); -} -EXPORT_SYMBOL(edac_mce_unregister); - -int edac_mce_parse(struct mce *mce) -{ - struct edac_mce *edac_mce; - - list_for_each_entry(edac_mce, &edac_mce_list, list) { - if (edac_mce->check_error(edac_mce->priv, mce)) - return 1; - } - - /* Nobody queued the error */ - return 0; -} -EXPORT_SYMBOL_GPL(edac_mce_parse); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab "); -MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); -MODULE_DESCRIPTION("EDAC Driver for mcelog captured errors"); diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 764207ed6d44..0bddc27362c1 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -33,8 +33,8 @@ #include #include #include -#include #include +#include #include #include "edac_core.h" @@ -265,9 +265,6 @@ struct i7core_pvt { bool is_registered, enable_scrub; - /* mcelog glue */ - struct edac_mce edac_mce; - /* Fifo double buffers */ struct mce mce_entry[MCE_LOG_LEN]; struct mce mce_outentry[MCE_LOG_LEN]; @@ -1899,33 +1896,43 @@ check_ce_error: * WARNING: As this routine should be called at NMI time, extra care should * be taken to avoid deadlocks, and to be as fast as possible. */ -static int i7core_mce_check_error(void *priv, struct mce *mce) +static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, + void *data) { - struct mem_ctl_info *mci = priv; - struct i7core_pvt *pvt = mci->pvt_info; + struct mce *mce = (struct mce *)data; + struct i7core_dev *i7_dev; + struct mem_ctl_info *mci; + struct i7core_pvt *pvt; + + i7_dev = get_i7core_dev(mce->socketid); + if (!i7_dev) + return NOTIFY_BAD; + + mci = i7_dev->mci; + pvt = mci->pvt_info; /* * Just let mcelog handle it if the error is * outside the memory controller */ if (((mce->status & 0xffff) >> 7) != 1) - return 0; + return NOTIFY_DONE; /* Bank 8 registers are the only ones that we know how to handle */ if (mce->bank != 8) - return 0; + return NOTIFY_DONE; #ifdef CONFIG_SMP /* Only handle if it is the right mc controller */ if (mce->socketid != pvt->i7core_dev->socket) - return 0; + return NOTIFY_DONE; #endif smp_rmb(); if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { smp_wmb(); pvt->mce_overrun++; - return 0; + return NOTIFY_DONE; } /* Copy memory error at the ringbuffer */ @@ -1938,9 +1945,13 @@ static int i7core_mce_check_error(void *priv, struct mce *mce) i7core_check_error(mci); /* Advise mcelog that the errors were handled */ - return 1; + return NOTIFY_STOP; } +static struct notifier_block i7_mce_dec = { + .notifier_call = i7core_mce_check_error, +}; + /* * set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate * to hardware according to SCRUBINTERVAL formula @@ -2093,8 +2104,7 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) if (pvt->enable_scrub) disable_sdram_scrub_setting(mci); - /* Disable MCE NMI handler */ - edac_mce_unregister(&pvt->edac_mce); + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec); /* Disable EDAC polling */ i7core_pci_ctl_release(pvt); @@ -2193,21 +2203,10 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* allocating generic PCI control info */ i7core_pci_ctl_create(pvt); - /* Registers on edac_mce in order to receive memory errors */ - pvt->edac_mce.priv = mci; - pvt->edac_mce.check_error = i7core_mce_check_error; - rc = edac_mce_register(&pvt->edac_mce); - if (unlikely(rc < 0)) { - debugf0("MC: " __FILE__ - ": %s(): failed edac_mce_register()\n", __func__); - goto fail1; - } + atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec); return 0; -fail1: - i7core_pci_ctl_release(pvt); - edac_mc_del_mc(mci->dev); fail0: kfree(mci->ctl_name); edac_mc_free(mci); diff --git a/include/linux/edac_mce.h b/include/linux/edac_mce.h deleted file mode 100644 index f974fc035363..000000000000 --- a/include/linux/edac_mce.h +++ /dev/null @@ -1,31 +0,0 @@ -/* Provides edac interface to mcelog events - * - * This file may be distributed under the terms of the - * GNU General Public License version 2. - * - * Copyright (c) 2009 by: - * Mauro Carvalho Chehab - * - * Red Hat Inc. http://www.redhat.com - */ - -#if defined(CONFIG_EDAC_MCE) || \ - (defined(CONFIG_EDAC_MCE_MODULE) && defined(MODULE)) - -#include -#include - -struct edac_mce { - struct list_head list; - - void *priv; - int (*check_error)(void *priv, struct mce *mce); -}; - -int edac_mce_register(struct edac_mce *edac_mce); -void edac_mce_unregister(struct edac_mce *edac_mce); -int edac_mce_parse(struct mce *mce); - -#else -#define edac_mce_parse(mce) (0) -#endif From 45f74482a05df8aa9253ec0b228648cbad3dd7e8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Nov 2011 10:00:17 -0200 Subject: [PATCH 09/21] MAINTAINERS: remove dropped edac_mce.* from the file Signed-off-by: Mauro Carvalho Chehab --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6185d0513584..1544cc8cf2db 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2409,8 +2409,6 @@ L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i7core_edac.c -F: drivers/edac/edac_mce.c -F: include/linux/edac_mce.h EDAC-I82975X M: Ranganathan Desikan From f9902f24fc2d6bcc836622aa70e77f7f9dfb30a9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 21 Aug 2010 09:42:05 -0300 Subject: [PATCH 10/21] i7core_edac: use edac's own way to print errors Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 0bddc27362c1..304f550ed8d4 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -2068,7 +2068,8 @@ static void i7core_pci_ctl_create(struct i7core_pvt *pvt) &pvt->i7core_dev->pdev[0]->dev, EDAC_MOD_STR); if (unlikely(!pvt->i7core_pci)) - pr_warn("Unable to setup PCI error report via EDAC\n"); + i7core_printk(KERN_WARNING, + "Unable to setup PCI error report via EDAC\n"); } static void i7core_pci_ctl_release(struct i7core_pvt *pvt) From 4055759145b421537207ed0d034041f1774ab41d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 30 Nov 2010 08:14:30 -0200 Subject: [PATCH 11/21] i7core_edac: return -ENODEV if no MC is found Nehalem-EX uses a different memory controller. However, as the memory controller is not visible on some Nehalem/Nehalem-EP, we need to indirectly probe via a X58 PCI device. The same devices are found on (some) Nehalem-EX. So, on those machines, the probe routine needs to return -ENODEV, as the actual Memory Controller registers won't be detected. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 304f550ed8d4..6ae7795dea86 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -2226,7 +2226,7 @@ fail0: static int __devinit i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - int rc; + int rc, count = 0; struct i7core_dev *i7core_dev; /* get the pci devices we want to reserve for our use */ @@ -2246,12 +2246,28 @@ static int __devinit i7core_probe(struct pci_dev *pdev, goto fail0; list_for_each_entry(i7core_dev, &i7core_edac_list, list) { + count++; rc = i7core_register_mci(i7core_dev); if (unlikely(rc < 0)) goto fail1; } - i7core_printk(KERN_INFO, "Driver loaded.\n"); + /* + * Nehalem-EX uses a different memory controller. However, as the + * memory controller is not visible on some Nehalem/Nehalem-EP, we + * need to indirectly probe via a X58 PCI device. The same devices + * are found on (some) Nehalem-EX. So, on those machines, the + * probe routine needs to return -ENODEV, as the actual Memory + * Controller registers won't be detected. + */ + if (!count) { + rc = -ENODEV; + goto fail1; + } + + i7core_printk(KERN_INFO, + "Driver loaded, %d memory controller(s) found.\n", + count); mutex_unlock(&i7core_edac_lock); return 0; From 168eb34dedf2f9456cc26f513d27de65c64fc608 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 10 Aug 2011 09:43:30 -0300 Subject: [PATCH 12/21] EDAC: Correct Kconfig dependencies Both AMD and Intel i7 EDAC drivers use MCE features and are thus dependent of this functionality present in the kernel. Express this in Kconfig so that randconfig builds don't break. Reported-by: Randy Dunlap Signed-off-by: Borislav Petkov Acked-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index f888fb599184..d057d6c40208 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -41,7 +41,7 @@ config EDAC_DEBUG config EDAC_DECODE_MCE tristate "Decode MCEs in human-readable form (only on AMD for now)" - depends on CPU_SUP_AMD && X86_MCE + depends on CPU_SUP_AMD && X86_MCE_AMD default y ---help--- Enable this option if you want to decode Machine Check Exceptions @@ -170,8 +170,7 @@ config EDAC_I5400 config EDAC_I7CORE tristate "Intel i7 Core (Nehalem) processors" - depends on EDAC_MM_EDAC && PCI && X86 - select EDAC_MCE + depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL help Support for error detection and correction the Intel i7 Core (Nehalem) Integrated Memory Controller that exists on From 535e9c78e1a80946283cecc742b687b3a5ff5109 Mon Sep 17 00:00:00 2001 From: Nils Carlson Date: Mon, 8 Aug 2011 06:21:26 -0300 Subject: [PATCH 13/21] i7core_edac: scrubbing fixups Get a more reliable DCLK value from DMI, name the SCRUBINTERVAL mask and guard against potential overflow in the scrub rate computations. Signed-off-by: Nils Carlson --- drivers/edac/i7core_edac.c | 141 ++++++++++++++++++++++++++++++++++--- 1 file changed, 133 insertions(+), 8 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 6ae7795dea86..7cb68decf57d 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,7 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); #define MC_SCRUB_CONTROL 0x4c #define STARTSCRUB (1 << 24) + #define SCRUBINTERVAL_MASK 0xffffff #define MC_COR_ECC_CNT_0 0x80 #define MC_COR_ECC_CNT_1 0x84 @@ -275,6 +277,9 @@ struct i7core_pvt { /* Count indicator to show errors not got */ unsigned mce_overrun; + /* DCLK Frequency used for computing scrub rate */ + int dclk_freq; + /* Struct to control EDAC polling */ struct edac_pci_ctl_info *i7core_pci; }; @@ -1952,6 +1957,112 @@ static struct notifier_block i7_mce_dec = { .notifier_call = i7core_mce_check_error, }; +struct memdev_dmi_entry { + u8 type; + u8 length; + u16 handle; + u16 phys_mem_array_handle; + u16 mem_err_info_handle; + u16 total_width; + u16 data_width; + u16 size; + u8 form; + u8 device_set; + u8 device_locator; + u8 bank_locator; + u8 memory_type; + u16 type_detail; + u16 speed; + u8 manufacturer; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 attributes; + u32 extended_size; + u16 conf_mem_clk_speed; +} __attribute__((__packed__)); + + +/* + * Decode the DRAM Clock Frequency, be paranoid, make sure that all + * memory devices show the same speed, and if they don't then consider + * all speeds to be invalid. + */ +static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq) +{ + int *dclk_freq = _dclk_freq; + u16 dmi_mem_clk_speed; + + if (*dclk_freq == -1) + return; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) { + struct memdev_dmi_entry *memdev_dmi_entry = + (struct memdev_dmi_entry *)dh; + unsigned long conf_mem_clk_speed_offset = + (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed - + (unsigned long)&memdev_dmi_entry->type; + unsigned long speed_offset = + (unsigned long)&memdev_dmi_entry->speed - + (unsigned long)&memdev_dmi_entry->type; + + /* Check that a DIMM is present */ + if (memdev_dmi_entry->size == 0) + return; + + /* + * Pick the configured speed if it's available, otherwise + * pick the DIMM speed, or we don't have a speed. + */ + if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) { + dmi_mem_clk_speed = + memdev_dmi_entry->conf_mem_clk_speed; + } else if (memdev_dmi_entry->length > speed_offset) { + dmi_mem_clk_speed = memdev_dmi_entry->speed; + } else { + *dclk_freq = -1; + return; + } + + if (*dclk_freq == 0) { + /* First pass, speed was 0 */ + if (dmi_mem_clk_speed > 0) { + /* Set speed if a valid speed is read */ + *dclk_freq = dmi_mem_clk_speed; + } else { + /* Otherwise we don't have a valid speed */ + *dclk_freq = -1; + } + } else if (*dclk_freq > 0 && + *dclk_freq != dmi_mem_clk_speed) { + /* + * If we have a speed, check that all DIMMS are the same + * speed, otherwise set the speed as invalid. + */ + *dclk_freq = -1; + } + } +} + +/* + * The default DCLK frequency is used as a fallback if we + * fail to find anything reliable in the DMI. The value + * is taken straight from the datasheet. + */ +#define DEFAULT_DCLK_FREQ 800 + +static int get_dclk_freq(void) +{ + int dclk_freq = 0; + + dmi_walk(decode_dclk, (void *)&dclk_freq); + + if (dclk_freq < 1) + return DEFAULT_DCLK_FREQ; + + return dclk_freq; +} + /* * set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate * to hardware according to SCRUBINTERVAL formula @@ -1961,8 +2072,6 @@ static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) { struct i7core_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; - const u32 cache_line_size = 64; - const u32 freq_dclk = 800*1000000; u32 dw_scrub; u32 dw_ssr; @@ -1977,18 +2086,28 @@ static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) /* Prepare to disable petrol scrub */ dw_scrub &= ~STARTSCRUB; /* Stop the patrol scrub engine */ - write_and_test(pdev, MC_SCRUB_CONTROL, dw_scrub & ~0x00ffffff); + write_and_test(pdev, MC_SCRUB_CONTROL, + dw_scrub & ~SCRUBINTERVAL_MASK); /* Get current status of scrub rate and set bit to disable */ pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr); dw_ssr &= ~SSR_MODE_MASK; dw_ssr |= SSR_MODE_DISABLE; } else { + const int cache_line_size = 64; + const u32 freq_dclk_mhz = pvt->dclk_freq; + unsigned long long scrub_interval; /* * Translate the desired scrub rate to a register value and - * program the cooresponding register value. + * program the corresponding register value. */ - dw_scrub = 0x00ffffff & (cache_line_size * freq_dclk / new_bw); + scrub_interval = (unsigned long long)freq_dclk_mhz * + cache_line_size * 1000000 / new_bw; + + if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK) + return -EINVAL; + + dw_scrub = SCRUBINTERVAL_MASK & scrub_interval; /* Start the patrol scrub engine */ pci_write_config_dword(pdev, MC_SCRUB_CONTROL, @@ -2015,7 +2134,8 @@ static int get_sdram_scrub_rate(struct mem_ctl_info *mci) struct i7core_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; const u32 cache_line_size = 64; - const u32 freq_dclk = 800*1000000; + const u32 freq_dclk_mhz = pvt->dclk_freq; + unsigned long long scrub_rate; u32 scrubval; /* Get data from the MC register, function 2 */ @@ -2027,12 +2147,14 @@ static int get_sdram_scrub_rate(struct mem_ctl_info *mci) pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval); /* Mask highest 8-bits to 0 */ - scrubval &= 0x00ffffff; + scrubval &= SCRUBINTERVAL_MASK; if (!scrubval) return 0; /* Calculate scrub rate value into byte/sec bandwidth */ - return 0xffffffff & (cache_line_size * freq_dclk / (u64) scrubval); + scrub_rate = (unsigned long long)freq_dclk_mhz * + 1000000 * cache_line_size / scrubval; + return (int)scrub_rate; } static void enable_sdram_scrub_setting(struct mem_ctl_info *mci) @@ -2204,6 +2326,9 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* allocating generic PCI control info */ i7core_pci_ctl_create(pvt); + /* DCLK for scrub rate setting */ + pvt->dclk_freq = get_dclk_freq(); + atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec); return 0; From 4fad8098bc3cc9dfe711b10b07df821ea30e6879 Mon Sep 17 00:00:00 2001 From: Sedat Dilek Date: Wed, 21 Sep 2011 23:44:52 -0300 Subject: [PATCH 14/21] i7core_edac: Fix compilation on 32 bits arch on i386: ERROR: "__udivdi3" [drivers/edac/i7core_edac.ko] undefined!\ In both get_sdram_scrub_rate() and set_sdram_scrub_rate() Reported-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 7cb68decf57d..51d251c32901 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "edac_core.h" @@ -2102,7 +2103,8 @@ static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) * program the corresponding register value. */ scrub_interval = (unsigned long long)freq_dclk_mhz * - cache_line_size * 1000000 / new_bw; + cache_line_size * 1000000; + do_div(scrub_interval, new_bw); if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK) return -EINVAL; @@ -2153,7 +2155,8 @@ static int get_sdram_scrub_rate(struct mem_ctl_info *mci) /* Calculate scrub rate value into byte/sec bandwidth */ scrub_rate = (unsigned long long)freq_dclk_mhz * - 1000000 * cache_line_size / scrubval; + 1000000 * cache_line_size; + do_div(scrub_rate, scrubval); return (int)scrub_rate; } From 767ba4a52a6de7ea2875a8af79bf11452a2a0c44 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 16 Sep 2011 13:37:29 -0300 Subject: [PATCH 15/21] i7core_edac: Initialize memory name with cpu, channel, bank Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7core_edac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 51d251c32901..70ad8923f1d7 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -746,6 +746,10 @@ static int get_dimm_config(const struct mem_ctl_info *mci) csr->edac_mode = mode; csr->mtype = mtype; + snprintf(csr->channels[0].label, + sizeof(csr->channels[0].label), + "CPU#%uChannel#%u_DIMM#%u", + pvt->i7core_dev->socket, i, j); csrow++; } From 5f032119d6f1cc48d0d1a28b8014f270ca4c4e47 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 13 Jun 2011 14:09:11 -0300 Subject: [PATCH 16/21] i7300_edac: Fix error cleanup logic The error cleanup logic was broken. Due to that, one error is generated for every error polling. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 51 ++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index a76fe8366b68..6104dba380b6 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -372,7 +372,7 @@ static const char *get_err_from_table(const char *table[], int size, int pos) static void i7300_process_error_global(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; - u32 errnum, value; + u32 errnum, error_reg; unsigned long errors; const char *specific; bool is_fatal; @@ -381,9 +381,9 @@ static void i7300_process_error_global(struct mem_ctl_info *mci) /* read in the 1st FATAL error register */ pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, - FERR_GLOBAL_HI, &value); - if (unlikely(value)) { - errors = value; + FERR_GLOBAL_HI, &error_reg); + if (unlikely(error_reg)) { + errors = error_reg; errnum = find_first_bit(&errors, ARRAY_SIZE(ferr_global_hi_name)); specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum); @@ -391,15 +391,15 @@ static void i7300_process_error_global(struct mem_ctl_info *mci) /* Clear the error bit */ pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, - FERR_GLOBAL_HI, value); + FERR_GLOBAL_HI, error_reg); goto error_global; } pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, - FERR_GLOBAL_LO, &value); - if (unlikely(value)) { - errors = value; + FERR_GLOBAL_LO, &error_reg); + if (unlikely(error_reg)) { + errors = error_reg; errnum = find_first_bit(&errors, ARRAY_SIZE(ferr_global_lo_name)); specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum); @@ -407,7 +407,7 @@ static void i7300_process_error_global(struct mem_ctl_info *mci) /* Clear the error bit */ pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, - FERR_GLOBAL_LO, value); + FERR_GLOBAL_LO, error_reg); goto error_global; } @@ -427,7 +427,7 @@ error_global: static void i7300_process_fbd_error(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; - u32 errnum, value; + u32 errnum, value, error_reg; u16 val16; unsigned branch, channel, bank, rank, cas, ras; u32 syndrome; @@ -440,14 +440,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) /* read in the 1st FATAL error register */ pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, - FERR_FAT_FBD, &value); - if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) { - errors = value & FERR_FAT_FBD_ERR_MASK ; + FERR_FAT_FBD, &error_reg); + if (unlikely(error_reg & FERR_FAT_FBD_ERR_MASK)) { + errors = error_reg & FERR_FAT_FBD_ERR_MASK ; errnum = find_first_bit(&errors, ARRAY_SIZE(ferr_fat_fbd_name)); specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum); + branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0; - branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0; pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, NRECMEMA, &val16); bank = NRECMEMA_BANK(val16); @@ -455,11 +455,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, NRECMEMB, &value); - is_wr = NRECMEMB_IS_WR(value); cas = NRECMEMB_CAS(value); ras = NRECMEMB_RAS(value); + /* Clean the error register */ + pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + FERR_FAT_FBD, error_reg); + snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, "FATAL (Branch=%d DRAM-Bank=%d %s " "RAS=%d CAS=%d Err=0x%lx (%s))", @@ -476,21 +479,17 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) /* read in the 1st NON-FATAL error register */ pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, - FERR_NF_FBD, &value); - if (unlikely(value & FERR_NF_FBD_ERR_MASK)) { - errors = value & FERR_NF_FBD_ERR_MASK; + FERR_NF_FBD, &error_reg); + if (unlikely(error_reg & FERR_NF_FBD_ERR_MASK)) { + errors = error_reg & FERR_NF_FBD_ERR_MASK; errnum = find_first_bit(&errors, ARRAY_SIZE(ferr_nf_fbd_name)); specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); - - /* Clear the error bit */ - pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, - FERR_GLOBAL_LO, value); + branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0; pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, REDMEMA, &syndrome); - branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0; pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, RECMEMA, &val16); bank = RECMEMA_BANK(val16); @@ -498,18 +497,20 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, RECMEMB, &value); - is_wr = RECMEMB_IS_WR(value); cas = RECMEMB_CAS(value); ras = RECMEMB_RAS(value); pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, REDMEMB, &value); - channel = (branch << 1); if (IS_SECOND_CH(value)) channel++; + /* Clear the error bit */ + pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + FERR_NF_FBD, error_reg); + /* Form out message */ snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, "Corrected error (Branch=%d, Channel %d), " From eebf11a0166f011c5945dd30fd1779afca6c964e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 20 Oct 2011 19:18:01 -0200 Subject: [PATCH 17/21] edac: Add an experimental new driver to support Sandy Bridge CPU's This driver is known to work on mine and Tony's test environments, using software error injection, and a partial hardware/software error injection tool. There's no broader range test yet to double check if the error decoding logic will actually point to the right DIMM, so use it with care. More tests are required to be sure that the driver will work on all different types of memory configurations. If you're willing to risk using it, I suggest you to enable EDAC debugs for your test machines, as the debug logs helps to track what's going inside the driver. Please feed me with bug reports, if you notice that the driver is miss-behaving. Tested-by: Tony Luck Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/sb_edac.c | 1894 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1894 insertions(+) create mode 100644 drivers/edac/sb_edac.c diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c new file mode 100644 index 000000000000..785c2769f05b --- /dev/null +++ b/drivers/edac/sb_edac.c @@ -0,0 +1,1894 @@ +/* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module + * + * This driver supports the memory controllers found on the Intel + * processor family Sandy Bridge. + * + * This file may be distributed under the terms of the + * GNU General Public License version 2 only. + * + * Copyright (c) 2011 by: + * Mauro Carvalho Chehab + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "edac_core.h" + +/* Static vars */ +static LIST_HEAD(sbridge_edac_list); +static DEFINE_MUTEX(sbridge_edac_lock); +static int probed; + +/* + * Alter this version for the module when modifications are made + */ +#define SBRIDGE_REVISION " Ver: 1.0.0 " +#define EDAC_MOD_STR "sbridge_edac" + +/* + * Debug macros + */ +#define sbridge_printk(level, fmt, arg...) \ + edac_printk(level, "sbridge", fmt, ##arg) + +#define sbridge_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg) + +/* + * Get a bit field at register value , from bit to bit + */ +#define GET_BITFIELD(v, lo, hi) \ + (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo)) + +/* + * sbridge Memory Controller Registers + */ + +/* + * FIXME: For now, let's order by device function, as it makes + * easier for driver's development proccess. This table should be + * moved to pci_id.h when submitted upstream + */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */ + + /* + * Currently, unused, but will be needed in the future + * implementations, as they hold the error counters + */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */ +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */ + +/* Devices 12 Function 6, Offsets 0x80 to 0xcc */ +static const u32 dram_rule[] = { + 0x80, 0x88, 0x90, 0x98, 0xa0, + 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, +}; +#define MAX_SAD ARRAY_SIZE(dram_rule) + +#define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff) +#define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3) +#define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1) +#define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0) + +static char *get_dram_attr(u32 reg) +{ + switch(DRAM_ATTR(reg)) { + case 0: + return "DRAM"; + case 1: + return "MMCFG"; + case 2: + return "NXM"; + default: + return "unknown"; + } +} + +static const u32 interleave_list[] = { + 0x84, 0x8c, 0x94, 0x9c, 0xa4, + 0xac, 0xb4, 0xbc, 0xc4, 0xcc, +}; +#define MAX_INTERLEAVE ARRAY_SIZE(interleave_list) + +#define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2) +#define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5) +#define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10) +#define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13) +#define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18) +#define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21) +#define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26) +#define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29) + +static inline int sad_pkg(u32 reg, int interleave) +{ + switch (interleave) { + case 0: + return SAD_PKG0(reg); + case 1: + return SAD_PKG1(reg); + case 2: + return SAD_PKG2(reg); + case 3: + return SAD_PKG3(reg); + case 4: + return SAD_PKG4(reg); + case 5: + return SAD_PKG5(reg); + case 6: + return SAD_PKG6(reg); + case 7: + return SAD_PKG7(reg); + default: + return -EINVAL; + } +} + +/* Devices 12 Function 7 */ + +#define TOLM 0x80 +#define TOHM 0x84 + +#define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff) +#define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff) + +/* Device 13 Function 6 */ + +#define SAD_TARGET 0xf0 + +#define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11) + +#define SAD_CONTROL 0xf4 + +#define NODE_ID(reg) GET_BITFIELD(reg, 0, 2) + +/* Device 14 function 0 */ + +static const u32 tad_dram_rule[] = { + 0x40, 0x44, 0x48, 0x4c, + 0x50, 0x54, 0x58, 0x5c, + 0x60, 0x64, 0x68, 0x6c, +}; +#define MAX_TAD ARRAY_SIZE(tad_dram_rule) + +#define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff) +#define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11) +#define TAD_CH(reg) GET_BITFIELD(reg, 8, 9) +#define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7) +#define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5) +#define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3) +#define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1) + +/* Device 15, function 0 */ + +#define MCMTR 0x7c + +#define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2) +#define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1) +#define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0) + +/* Device 15, function 1 */ + +#define RASENABLES 0xac +#define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0) + +/* Device 15, functions 2-5 */ + +static const int mtr_regs[] = { + 0x80, 0x84, 0x88, +}; + +#define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19) +#define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14) +#define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13) +#define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4) +#define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1) + +static const u32 tad_ch_nilv_offset[] = { + 0x90, 0x94, 0x98, 0x9c, + 0xa0, 0xa4, 0xa8, 0xac, + 0xb0, 0xb4, 0xb8, 0xbc, +}; +#define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29) +#define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26) + +static const u32 rir_way_limit[] = { + 0x108, 0x10c, 0x110, 0x114, 0x118, +}; +#define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit) + +#define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31) +#define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29) +#define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff) + +#define MAX_RIR_WAY 8 + +static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { + { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c }, + { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c }, + { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c }, + { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c }, + { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc }, +}; + +#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19) +#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14) + +/* Device 16, functions 2-7 */ + +/* + * FIXME: Implement the error count reads directly + */ + +static const u32 correrrcnt[] = { + 0x104, 0x108, 0x10c, 0x110, +}; + +#define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31) +#define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30) +#define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15) +#define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14) + +static const u32 correrrthrsld[] = { + 0x11c, 0x120, 0x124, 0x128, +}; + +#define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30) +#define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14) + + +/* Device 17, function 0 */ + +#define RANK_CFG_A 0x0328 + +#define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11) + +/* + * sbridge structs + */ + +#define NUM_CHANNELS 4 +#define MAX_DIMMS 3 /* Max DIMMS per channel */ + +struct sbridge_info { + u32 mcmtr; +}; + +struct sbridge_channel { + u32 ranks; + u32 dimms; +}; + +struct pci_id_descr { + int dev; + int func; + int dev_id; + int optional; +}; + +struct pci_id_table { + const struct pci_id_descr *descr; + int n_devs; +}; + +struct sbridge_dev { + struct list_head list; + u8 bus, mc; + u8 node_id, source_id; + struct pci_dev **pdev; + int n_devs; + struct mem_ctl_info *mci; +}; + +struct sbridge_pvt { + struct pci_dev *pci_ta, *pci_ddrio, *pci_ras; + struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0; + struct pci_dev *pci_br; + struct pci_dev *pci_tad[NUM_CHANNELS]; + + struct sbridge_dev *sbridge_dev; + + struct sbridge_info info; + struct sbridge_channel channel[NUM_CHANNELS]; + + int csrow_map[NUM_CHANNELS][MAX_DIMMS]; + + /* Memory type detection */ + bool is_mirrored, is_lockstep, is_close_pg; + + /* mcelog glue */ + struct edac_mce edac_mce; + + /* Fifo double buffers */ + struct mce mce_entry[MCE_LOG_LEN]; + struct mce mce_outentry[MCE_LOG_LEN]; + + /* Fifo in/out counters */ + unsigned mce_in, mce_out; + + /* Count indicator to show errors not got */ + unsigned mce_overrun; + + /* Memory description */ + u64 tolm, tohm; +}; + +#define PCI_DESCR(device, function, device_id) \ + .dev = (device), \ + .func = (function), \ + .dev_id = (device_id) + +static const struct pci_id_descr pci_dev_descr_sbridge[] = { + /* Processor Home Agent */ + { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0) }, + + /* Memory controller */ + { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA) }, + { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS) }, + { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0) }, + { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1) }, + { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2) }, + { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3) }, + { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO) }, + + /* System Address Decoder */ + { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0) }, + { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1) }, + + /* Broadcast Registers */ + { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR) }, +}; + +#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } +static const struct pci_id_table pci_dev_descr_sbridge_table[] = { + PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge), + {0,} /* 0 terminated list. */ +}; + +/* + * pci_device_id table for which devices we are looking for + */ +static const struct pci_device_id sbridge_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)}, + {0,} /* 0 terminated list. */ +}; + + +/**************************************************************************** + Anciliary status routines + ****************************************************************************/ + +static inline int numrank(u32 mtr) +{ + int ranks = (1 << RANK_CNT_BITS(mtr)); + + if (ranks > 4) { + debugf0("Invalid number of ranks: %d (max = 4) raw value = %x (%04x)", + ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr); + return -EINVAL; + } + + return ranks; +} + +static inline int numrow(u32 mtr) +{ + int rows = (RANK_WIDTH_BITS(mtr) + 12); + + if (rows < 13 || rows > 18) { + debugf0("Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)", + rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr); + return -EINVAL; + } + + return 1 << rows; +} + +static inline int numcol(u32 mtr) +{ + int cols = (COL_WIDTH_BITS(mtr) + 10); + + if (cols > 12) { + debugf0("Invalid number of cols: %d (max = 4) raw value = %x (%04x)", + cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr); + return -EINVAL; + } + + return 1 << cols; +} + +static struct sbridge_dev *get_sbridge_dev(u8 bus) +{ + struct sbridge_dev *sbridge_dev; + + list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { + if (sbridge_dev->bus == bus) + return sbridge_dev; + } + + return NULL; +} + +static struct sbridge_dev *alloc_sbridge_dev(u8 bus, + const struct pci_id_table *table) +{ + struct sbridge_dev *sbridge_dev; + + sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL); + if (!sbridge_dev) + return NULL; + + sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs, + GFP_KERNEL); + if (!sbridge_dev->pdev) { + kfree(sbridge_dev); + return NULL; + } + + sbridge_dev->bus = bus; + sbridge_dev->n_devs = table->n_devs; + list_add_tail(&sbridge_dev->list, &sbridge_edac_list); + + return sbridge_dev; +} + +static void free_sbridge_dev(struct sbridge_dev *sbridge_dev) +{ + list_del(&sbridge_dev->list); + kfree(sbridge_dev->pdev); + kfree(sbridge_dev); +} + +/**************************************************************************** + Memory check routines + ****************************************************************************/ +static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot, + unsigned func) +{ + struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus); + int i; + + if (!sbridge_dev) + return NULL; + + for (i = 0; i < sbridge_dev->n_devs; i++) { + if (!sbridge_dev->pdev[i]) + continue; + + if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot && + PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) { + debugf1("Associated %02x.%02x.%d with %p\n", + bus, slot, func, sbridge_dev->pdev[i]); + return sbridge_dev->pdev[i]; + } + } + + return NULL; +} + +/** + * sbridge_get_active_channels() - gets the number of channels and csrows + * bus: Device bus + * @channels: Number of channels that will be returned + * @csrows: Number of csrows found + * + * Since EDAC core needs to know in advance the number of available channels + * and csrows, in order to allocate memory for csrows/channels, it is needed + * to run two similar steps. At the first step, implemented on this function, + * it checks the number of csrows/channels present at one socket, identified + * by the associated PCI bus. + * this is used in order to properly allocate the size of mci components. + * Note: one csrow is one dimm. + */ +static int sbridge_get_active_channels(const u8 bus, unsigned *channels, + unsigned *csrows) +{ + struct pci_dev *pdev = NULL; + int i, j; + u32 mcmtr; + + *channels = 0; + *csrows = 0; + + pdev = get_pdev_slot_func(bus, 15, 0); + if (!pdev) { + sbridge_printk(KERN_ERR, "Couldn't find PCI device " + "%2x.%02d.%d!!!\n", + bus, 15, 0); + return -ENODEV; + } + + pci_read_config_dword(pdev, MCMTR, &mcmtr); + if (!IS_ECC_ENABLED(mcmtr)) { + sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); + return -ENODEV; + } + + for (i = 0; i < NUM_CHANNELS; i++) { + u32 mtr; + + /* Device 15 functions 2 - 5 */ + pdev = get_pdev_slot_func(bus, 15, 2 + i); + if (!pdev) { + sbridge_printk(KERN_ERR, "Couldn't find PCI device " + "%2x.%02d.%d!!!\n", + bus, 15, 2 + i); + return -ENODEV; + } + (*channels)++; + + for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { + pci_read_config_dword(pdev, mtr_regs[j], &mtr); + debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr); + if (IS_DIMM_PRESENT(mtr)) + (*csrows)++; + } + } + + debugf0("Number of active channels: %d, number of active dimms: %d\n", + *channels, *csrows); + + return 0; +} + +static int get_dimm_config(const struct mem_ctl_info *mci) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + struct csrow_info *csr; + int i, j, banks, ranks, rows, cols, size, npages; + int csrow = 0; + unsigned long last_page = 0; + u32 reg; + enum edac_type mode; + + pci_read_config_dword(pvt->pci_br, SAD_TARGET, ®); + pvt->sbridge_dev->source_id = SOURCE_ID(reg); + + pci_read_config_dword(pvt->pci_br, SAD_CONTROL, ®); + pvt->sbridge_dev->node_id = NODE_ID(reg); + debugf0("mc#%d: Node ID: %d, source ID: %d\n", + pvt->sbridge_dev->mc, + pvt->sbridge_dev->node_id, + pvt->sbridge_dev->source_id); + + pci_read_config_dword(pvt->pci_ras, RASENABLES, ®); + if (IS_MIRROR_ENABLED(reg)) { + debugf0("Memory mirror is enabled\n"); + pvt->is_mirrored = true; + } else { + debugf0("Memory mirror is disabled\n"); + pvt->is_mirrored = false; + } + + pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr); + if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { + debugf0("Lockstep is enabled\n"); + mode = EDAC_S8ECD8ED; + pvt->is_lockstep = true; + } else { + debugf0("Lockstep is disabled\n"); + mode = EDAC_S4ECD4ED; + pvt->is_lockstep = false; + } + if (IS_CLOSE_PG(pvt->info.mcmtr)) { + debugf0("address map is on closed page mode\n"); + pvt->is_close_pg = true; + } else { + debugf0("address map is on open page mode\n"); + pvt->is_close_pg = false; + } + + pci_read_config_dword(pvt->pci_ta, RANK_CFG_A, ®); + if (IS_RDIMM_ENABLED(reg)) { + /* FIXME: Can also be LRDIMM */ + debugf0("Memory is registered\n"); + mode = MEM_RDDR3; + } else { + debugf0("Memory is unregistered\n"); + mode = MEM_DDR3; + } + + /* On all supported DDR3 DIMM types, there are 8 banks available */ + banks = 8; + + for (i = 0; i < NUM_CHANNELS; i++) { + u32 mtr; + + for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { + pci_read_config_dword(pvt->pci_tad[i], + mtr_regs[j], &mtr); + debugf4("Channel #%d MTR%d = %x\n", i, j, mtr); + if (IS_DIMM_PRESENT(mtr)) { + pvt->channel[i].dimms++; + + ranks = numrank(mtr); + rows = numrow(mtr); + cols = numcol(mtr); + + /* DDR3 has 8 I/O banks */ + size = (rows * cols * banks * ranks) >> (20 - 3); + npages = MiB_TO_PAGES(size); + + debugf0("mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + pvt->sbridge_dev->mc, i, j, + size, npages, + banks, ranks, rows, cols); + csr = &mci->csrows[csrow]; + + csr->first_page = last_page; + csr->last_page = last_page + npages - 1; + csr->page_mask = 0UL; /* Unused */ + csr->nr_pages = npages; + csr->grain = 32; + csr->csrow_idx = csrow; + csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4; + csr->ce_count = 0; + csr->ue_count = 0; + csr->mtype = mode; + csr->edac_mode = mode; + csr->nr_channels = 1; + csr->channels[0].chan_idx = i; + csr->channels[0].ce_count = 0; + pvt->csrow_map[i][j] = csrow; + snprintf(csr->channels[0].label, + sizeof(csr->channels[0].label), + "CPU_SrcID#%u_Channel#%u_DIMM#%u", + pvt->sbridge_dev->source_id, i, j); + last_page += npages; + csrow++; + } + } + } + + return 0; +} + +static void get_memory_layout(const struct mem_ctl_info *mci) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + int i, j, k, n_sads, n_tads, sad_interl; + u32 reg; + u64 limit, prv = 0; + u64 tmp_mb; + u32 rir_way; + + /* + * Step 1) Get TOLM/TOHM ranges + */ + + /* Address range is 32:28 */ + pci_read_config_dword(pvt->pci_sad1, TOLM, + ®); + pvt->tolm = GET_TOLM(reg); + tmp_mb = (1 + pvt->tolm) >> 20; + + debugf0("TOLM: %Lu.%03Lu GB (0x%016Lx)\n", + tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tolm); + + /* Address range is already 45:25 */ + pci_read_config_dword(pvt->pci_sad1, TOHM, + ®); + pvt->tohm = GET_TOHM(reg); + tmp_mb = (1 + pvt->tohm) >> 20; + + debugf0("TOHM: %Lu.%03Lu GB (0x%016Lx)", + tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tohm); + + /* + * Step 2) Get SAD range and SAD Interleave list + * TAD registers contain the interleave wayness. However, it + * seems simpler to just discover it indirectly, with the + * algorithm bellow. + */ + prv = 0; + for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { + /* SAD_LIMIT Address range is 45:26 */ + pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], + ®); + limit = SAD_LIMIT(reg); + + if (!DRAM_RULE_ENABLE(reg)) + continue; + + if (limit <= prv) + break; + + tmp_mb = (limit + 1) >> 20; + debugf0("SAD#%d %s up to %Lu.%03Lu GB (0x%016Lx) %s reg=0x%08x\n", + n_sads, + get_dram_attr(reg), + tmp_mb / 1000, tmp_mb % 1000, + ((u64)tmp_mb) << 20L, + INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]", + reg); + prv = limit; + + pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], + ®); + sad_interl = sad_pkg(reg, 0); + for (j = 0; j < 8; j++) { + if (j > 0 && sad_interl == sad_pkg(reg, j)) + break; + + debugf0("SAD#%d, interleave #%d: %d\n", + n_sads, j, sad_pkg(reg, j)); + } + } + + /* + * Step 3) Get TAD range + */ + prv = 0; + for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { + pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], + ®); + limit = TAD_LIMIT(reg); + if (limit <= prv) + break; + tmp_mb = (limit + 1) >> 20; + + debugf0("TAD#%d: up to %Lu.%03Lu GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", + n_tads, tmp_mb / 1000, tmp_mb % 1000, + ((u64)tmp_mb) << 20L, + (u32)TAD_SOCK(reg), + (u32)TAD_CH(reg), + (u32)TAD_TGT0(reg), + (u32)TAD_TGT1(reg), + (u32)TAD_TGT2(reg), + (u32)TAD_TGT3(reg), + reg); + prv = tmp_mb; + } + + /* + * Step 4) Get TAD offsets, per each channel + */ + for (i = 0; i < NUM_CHANNELS; i++) { + if (!pvt->channel[i].dimms) + continue; + for (j = 0; j < n_tads; j++) { + pci_read_config_dword(pvt->pci_tad[i], + tad_ch_nilv_offset[j], + ®); + tmp_mb = TAD_OFFSET(reg) >> 20; + debugf0("TAD CH#%d, offset #%d: %Lu.%03Lu GB (0x%016Lx), reg=0x%08x\n", + i, j, + tmp_mb / 1000, tmp_mb % 1000, + ((u64)tmp_mb) << 20L, + reg); + } + } + + /* + * Step 6) Get RIR Wayness/Limit, per each channel + */ + for (i = 0; i < NUM_CHANNELS; i++) { + if (!pvt->channel[i].dimms) + continue; + for (j = 0; j < MAX_RIR_RANGES; j++) { + pci_read_config_dword(pvt->pci_tad[i], + rir_way_limit[j], + ®); + + if (!IS_RIR_VALID(reg)) + continue; + + tmp_mb = RIR_LIMIT(reg) >> 20; + rir_way = 1 << RIR_WAY(reg); + debugf0("CH#%d RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d, reg=0x%08x\n", + i, j, + tmp_mb / 1000, tmp_mb % 1000, + ((u64)tmp_mb) << 20L, + rir_way, + reg); + + for (k = 0; k < rir_way; k++) { + pci_read_config_dword(pvt->pci_tad[i], + rir_offset[j][k], + ®); + tmp_mb = RIR_OFFSET(reg) << 6; + + debugf0("CH#%d RIR#%d INTL#%d, offset %Lu.%03Lu GB (0x%016Lx), tgt: %d, reg=0x%08x\n", + i, j, k, + tmp_mb / 1000, tmp_mb % 1000, + ((u64)tmp_mb) << 20L, + (u32)RIR_RNK_TGT(reg), + reg); + } + } + } +} + +struct mem_ctl_info *get_mci_for_node_id(u8 node_id) +{ + struct sbridge_dev *sbridge_dev; + + list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { + if (sbridge_dev->node_id == node_id) + return sbridge_dev->mci; + } + return NULL; +} + +static int get_memory_error_data(struct mem_ctl_info *mci, + u64 addr, + u8 *socket, + long *channel_mask, + u8 *rank, + char *area_type) +{ + struct mem_ctl_info *new_mci; + struct sbridge_pvt *pvt = mci->pvt_info; + char msg[256]; + int n_rir, n_sads, n_tads, sad_way, sck_xch; + int sad_interl, idx, base_ch; + int interleave_mode; + unsigned sad_interleave[MAX_INTERLEAVE]; + u32 reg; + u8 ch_way,sck_way; + u32 tad_offset; + u32 rir_way; + u64 ch_addr, offset, limit, prv = 0; + + + /* + * Step 0) Check if the address is at special memory ranges + * The check bellow is probably enough to fill all cases where + * the error is not inside a memory, except for the legacy + * range (e. g. VGA addresses). It is unlikely, however, that the + * memory controller would generate an error on that range. + */ + if ((addr > (u64) pvt->tolm) && (addr < (1L << 32))) { + sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + if (addr >= (u64)pvt->tohm) { + sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + + /* + * Step 1) Get socket + */ + for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { + pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], + ®); + + if (!DRAM_RULE_ENABLE(reg)) + continue; + + limit = SAD_LIMIT(reg); + if (limit <= prv) { + sprintf(msg, "Can't discover the memory socket"); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + if (addr <= limit) + break; + prv = limit; + } + if (n_sads == MAX_SAD) { + sprintf(msg, "Can't discover the memory socket"); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + area_type = get_dram_attr(reg); + interleave_mode = INTERLEAVE_MODE(reg); + + pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], + ®); + sad_interl = sad_pkg(reg, 0); + for (sad_way = 0; sad_way < 8; sad_way++) { + if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way)) + break; + sad_interleave[sad_way] = sad_pkg(reg, sad_way); + debugf0("SAD interleave #%d: %d\n", + sad_way, sad_interleave[sad_way]); + } + debugf0("mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", + pvt->sbridge_dev->mc, + n_sads, + addr, + limit, + sad_way + 7, + INTERLEAVE_MODE(reg) ? "" : "XOR[18:16]"); + if (interleave_mode) + idx = ((addr >> 6) ^ (addr >> 16)) & 7; + else + idx = (addr >> 6) & 7; + switch (sad_way) { + case 1: + idx = 0; + break; + case 2: + idx = idx & 1; + break; + case 4: + idx = idx & 3; + break; + case 8: + break; + default: + sprintf(msg, "Can't discover socket interleave"); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + *socket = sad_interleave[idx]; + debugf0("SAD interleave index: %d (wayness %d) = CPU socket %d\n", + idx, sad_way, *socket); + + /* + * Move to the proper node structure, in order to access the + * right PCI registers + */ + new_mci = get_mci_for_node_id(*socket); + if (!new_mci) { + sprintf(msg, "Struct for socket #%u wasn't initialized", + *socket); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + mci = new_mci; + pvt = mci->pvt_info; + + /* + * Step 2) Get memory channel + */ + prv = 0; + for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { + pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], + ®); + limit = TAD_LIMIT(reg); + if (limit <= prv) { + sprintf(msg, "Can't discover the memory channel"); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + if (addr <= limit) + break; + prv = limit; + } + ch_way = TAD_CH(reg) + 1; + sck_way = TAD_SOCK(reg) + 1; + /* + * FIXME: Is it right to always use channel 0 for offsets? + */ + pci_read_config_dword(pvt->pci_tad[0], + tad_ch_nilv_offset[n_tads], + &tad_offset); + + if (ch_way == 3) + idx = addr >> 6; + else + idx = addr >> (6 + sck_way); + idx = idx % ch_way; + + /* + * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ??? + */ + switch (idx) { + case 0: + base_ch = TAD_TGT0(reg); + break; + case 1: + base_ch = TAD_TGT1(reg); + break; + case 2: + base_ch = TAD_TGT2(reg); + break; + case 3: + base_ch = TAD_TGT3(reg); + break; + default: + sprintf(msg, "Can't discover the TAD target"); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + *channel_mask = 1 << base_ch; + + if (pvt->is_mirrored) { + *channel_mask |= 1 << ((base_ch + 2) % 4); + switch(ch_way) { + case 2: + case 4: + sck_xch = 1 << sck_way * (ch_way >> 1); + break; + default: + sprintf(msg, "Invalid mirror set. Can't decode addr"); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + } else + sck_xch = (1 << sck_way) * ch_way; + + if (pvt->is_lockstep) + *channel_mask |= 1 << ((base_ch + 1) % 4); + + offset = TAD_OFFSET(tad_offset); + + debugf0("TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n", + n_tads, + addr, + limit, + (u32)TAD_SOCK(reg), + ch_way, + offset, + idx, + base_ch, + *channel_mask); + + /* Calculate channel address */ + /* Remove the TAD offset */ + + if (offset > addr) { + sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!", + offset, addr); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + addr -= offset; + /* Store the low bits [0:6] of the addr */ + ch_addr = addr & 0x7f; + /* Remove socket wayness and remove 6 bits */ + addr >>= 6; + addr /= sck_xch; +#if 0 + /* Divide by channel way */ + addr = addr / ch_way; +#endif + /* Recover the last 6 bits */ + ch_addr |= addr << 6; + + /* + * Step 3) Decode rank + */ + for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) { + pci_read_config_dword(pvt->pci_tad[base_ch], + rir_way_limit[n_rir], + ®); + + if (!IS_RIR_VALID(reg)) + continue; + + limit = RIR_LIMIT(reg); + + debugf0("RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d\n", + n_rir, + (limit >> 20) / 1000, (limit >> 20) % 1000, + limit, + 1 << RIR_WAY(reg)); + if (ch_addr <= limit) + break; + } + if (n_rir == MAX_RIR_RANGES) { + sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx", + ch_addr); + edac_mc_handle_ce_no_info(mci, msg); + return -EINVAL; + } + rir_way = RIR_WAY(reg); + if (pvt->is_close_pg) + idx = (ch_addr >> 6); + else + idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */ + idx %= 1 << rir_way; + + pci_read_config_dword(pvt->pci_tad[base_ch], + rir_offset[n_rir][idx], + ®); + *rank = RIR_RNK_TGT(reg); + + debugf0("RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", + n_rir, + ch_addr, + limit, + rir_way, + idx); + + return 0; +} + +/**************************************************************************** + Device initialization routines: put/get, init/exit + ****************************************************************************/ + +/* + * sbridge_put_all_devices 'put' all the devices that we have + * reserved via 'get' + */ +static void sbridge_put_devices(struct sbridge_dev *sbridge_dev) +{ + int i; + + debugf0(__FILE__ ": %s()\n", __func__); + for (i = 0; i < sbridge_dev->n_devs; i++) { + struct pci_dev *pdev = sbridge_dev->pdev[i]; + if (!pdev) + continue; + debugf0("Removing dev %02x:%02x.%d\n", + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + pci_dev_put(pdev); + } +} + +static void sbridge_put_all_devices(void) +{ + struct sbridge_dev *sbridge_dev, *tmp; + + list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) { + sbridge_put_devices(sbridge_dev); + free_sbridge_dev(sbridge_dev); + } +} + +/* + * sbridge_get_all_devices Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver + * + * Need to 'get' device 16 func 1 and func 2 + */ +static int sbridge_get_onedevice(struct pci_dev **prev, + u8 *num_mc, + const struct pci_id_table *table, + const unsigned devno) +{ + struct sbridge_dev *sbridge_dev; + const struct pci_id_descr *dev_descr = &table->descr[devno]; + + struct pci_dev *pdev = NULL; + u8 bus = 0; + + sbridge_printk(KERN_INFO, + "Seeking for: dev %02x.%d PCI ID %04x:%04x\n", + dev_descr->dev, dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + dev_descr->dev_id, *prev); + + if (!pdev) { + if (*prev) { + *prev = pdev; + return 0; + } + + if (dev_descr->optional) + return 0; + + if (devno == 0) + return -ENODEV; + + sbridge_printk(KERN_INFO, + "Device not found: dev %02x.%d PCI ID %04x:%04x\n", + dev_descr->dev, dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + + /* End of list, leave */ + return -ENODEV; + } + bus = pdev->bus->number; + + sbridge_dev = get_sbridge_dev(bus); + if (!sbridge_dev) { + sbridge_dev = alloc_sbridge_dev(bus, table); + if (!sbridge_dev) { + pci_dev_put(pdev); + return -ENOMEM; + } + (*num_mc)++; + } + + if (sbridge_dev->pdev[devno]) { + sbridge_printk(KERN_ERR, + "Duplicated device for " + "dev %02x:%d.%d PCI ID %04x:%04x\n", + bus, dev_descr->dev, dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + pci_dev_put(pdev); + return -ENODEV; + } + + sbridge_dev->pdev[devno] = pdev; + + /* Sanity check */ + if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev || + PCI_FUNC(pdev->devfn) != dev_descr->func)) { + sbridge_printk(KERN_ERR, + "Device PCI ID %04x:%04x " + "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n", + PCI_VENDOR_ID_INTEL, dev_descr->dev_id, + bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + bus, dev_descr->dev, dev_descr->func); + return -ENODEV; + } + + /* Be sure that the device is enabled */ + if (unlikely(pci_enable_device(pdev) < 0)) { + sbridge_printk(KERN_ERR, + "Couldn't enable " + "dev %02x:%d.%d PCI ID %04x:%04x\n", + bus, dev_descr->dev, dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + return -ENODEV; + } + + debugf0("Detected dev %02x:%d.%d PCI ID %04x:%04x\n", + bus, dev_descr->dev, + dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + + /* + * As stated on drivers/pci/search.c, the reference count for + * @from is always decremented if it is not %NULL. So, as we need + * to get all devices up to null, we need to do a get for the device + */ + pci_dev_get(pdev); + + *prev = pdev; + + return 0; +} + +static int sbridge_get_all_devices(u8 *num_mc) +{ + int i, rc; + struct pci_dev *pdev = NULL; + const struct pci_id_table *table = pci_dev_descr_sbridge_table; + + while (table && table->descr) { + for (i = 0; i < table->n_devs; i++) { + pdev = NULL; + do { + rc = sbridge_get_onedevice(&pdev, num_mc, + table, i); + if (rc < 0) { + if (i == 0) { + i = table->n_devs; + break; + } + sbridge_put_all_devices(); + return -ENODEV; + } + } while (pdev); + } + table++; + } + + return 0; +} + +static int mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + int i, func, slot; + + for (i = 0; i < sbridge_dev->n_devs; i++) { + pdev = sbridge_dev->pdev[i]; + if (!pdev) + continue; + slot = PCI_SLOT(pdev->devfn); + func = PCI_FUNC(pdev->devfn); + switch (slot) { + case 12: + switch (func) { + case 6: + pvt->pci_sad0 = pdev; + break; + case 7: + pvt->pci_sad1 = pdev; + break; + default: + goto error; + } + break; + case 13: + switch (func) { + case 6: + pvt->pci_br = pdev; + break; + default: + goto error; + } + break; + case 14: + switch (func) { + case 0: + pvt->pci_ha0 = pdev; + break; + default: + goto error; + } + break; + case 15: + switch (func) { + case 0: + pvt->pci_ta = pdev; + break; + case 1: + pvt->pci_ras = pdev; + break; + case 2: + case 3: + case 4: + case 5: + pvt->pci_tad[func - 2] = pdev; + break; + default: + goto error; + } + break; + case 17: + switch (func) { + case 0: + pvt->pci_ddrio = pdev; + break; + default: + goto error; + } + break; + default: + goto error; + } + + debugf0("Associated PCI %02x.%02d.%d with dev = %p\n", + sbridge_dev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev); + } + + /* Check if everything were registered */ + if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 || + !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta || + !pvt->pci_ddrio) + goto enodev; + + for (i = 0; i < NUM_CHANNELS; i++) { + if (!pvt->pci_tad[i]) + goto enodev; + } + return 0; + +enodev: + sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); + return -ENODEV; + +error: + sbridge_printk(KERN_ERR, "Device %d, function %d " + "is out of the expected range\n", + slot, func); + return -EINVAL; +} + +/**************************************************************************** + Error check routines + ****************************************************************************/ + +/* + * While Sandy Bridge has error count registers, SMI BIOS read values from + * and resets the counters. So, they are not reliable for the OS to read + * from them. So, we have no option but to just trust on whatever MCE is + * telling us about the errors. + */ +static void sbridge_mce_output_error(struct mem_ctl_info *mci, + const struct mce *m) +{ + struct mem_ctl_info *new_mci; + struct sbridge_pvt *pvt = mci->pvt_info; + char *type, *optype, *msg, *recoverable_msg; + bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); + bool overflow = GET_BITFIELD(m->status, 62, 62); + bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); + bool recoverable = GET_BITFIELD(m->status, 56, 56); + u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); + u32 mscod = GET_BITFIELD(m->status, 16, 31); + u32 errcode = GET_BITFIELD(m->status, 0, 15); + u32 channel = GET_BITFIELD(m->status, 0, 3); + u32 optypenum = GET_BITFIELD(m->status, 4, 6); + long channel_mask, first_channel; + u8 rank, socket; + int csrow, rc, dimm; + char *area_type = "Unknown"; + + if (ripv) + type = "NON_FATAL"; + else + type = "FATAL"; + + /* + * According with Table 15-9 of the Intel Archictecture spec vol 3A, + * memory errors should fit in this mask: + * 000f 0000 1mmm cccc (binary) + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + * If the mask doesn't match, report an error to the parsing logic + */ + if (! ((errcode & 0xef80) == 0x80)) { + optype = "Can't parse: it is not a mem"; + } else { + switch (optypenum) { + case 0: + optype = "generic undef request"; + break; + case 1: + optype = "memory read"; + break; + case 2: + optype = "memory write"; + break; + case 3: + optype = "addr/cmd"; + break; + case 4: + optype = "memory scrubbing"; + break; + default: + optype = "reserved"; + break; + } + } + + rc = get_memory_error_data(mci, m->addr, &socket, + &channel_mask, &rank, area_type); + if (rc < 0) + return; + new_mci = get_mci_for_node_id(socket); + if (!new_mci) { + edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!"); + return; + } + mci = new_mci; + pvt = mci->pvt_info; + + first_channel = find_first_bit(&channel_mask, NUM_CHANNELS); + + if (rank < 4) + dimm = 0; + else if (rank < 8) + dimm = 1; + else + dimm = 2; + + csrow = pvt->csrow_map[first_channel][dimm]; + + if (uncorrected_error && recoverable) + recoverable_msg = " recoverable"; + else + recoverable_msg = ""; + + /* + * FIXME: What should we do with "channel" information on mcelog? + * Probably, we can just discard it, as the channel information + * comes from the get_memory_error_data() address decoding + */ + msg = kasprintf(GFP_ATOMIC, + "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), " + "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n", + core_err_cnt, + area_type, + optype, + type, + recoverable_msg, + overflow ? "OVERFLOW" : "", + m->cpu, + mscod, errcode, + channel, /* 1111b means not specified */ + (long long) m->addr, + socket, + first_channel, /* This is the real channel on SB */ + channel_mask, + rank); + + debugf0("%s", msg); + + /* Call the helper to output message */ + if (uncorrected_error) + edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg); + else + edac_mc_handle_fbd_ce(mci, csrow, 0, msg); + + kfree(msg); +} + +/* + * sbridge_check_error Retrieve and process errors reported by the + * hardware. Called by the Core module. + */ +static void sbridge_check_error(struct mem_ctl_info *mci) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + int i; + unsigned count = 0; + struct mce *m; + + /* + * MCE first step: Copy all mce errors into a temporary buffer + * We use a double buffering here, to reduce the risk of + * loosing an error. + */ + smp_rmb(); + count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) + % MCE_LOG_LEN; + if (!count) + return; + + m = pvt->mce_outentry; + if (pvt->mce_in + count > MCE_LOG_LEN) { + unsigned l = MCE_LOG_LEN - pvt->mce_in; + + memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); + smp_wmb(); + pvt->mce_in = 0; + count -= l; + m += l; + } + memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); + smp_wmb(); + pvt->mce_in += count; + + smp_rmb(); + if (pvt->mce_overrun) { + sbridge_printk(KERN_ERR, "Lost %d memory errors\n", + pvt->mce_overrun); + smp_wmb(); + pvt->mce_overrun = 0; + } + + /* + * MCE second step: parse errors and display + */ + for (i = 0; i < count; i++) + sbridge_mce_output_error(mci, &pvt->mce_outentry[i]); +} + +/* + * sbridge_mce_check_error Replicates mcelog routine to get errors + * This routine simply queues mcelog errors, and + * return. The error itself should be handled later + * by sbridge_check_error. + * WARNING: As this routine should be called at NMI time, extra care should + * be taken to avoid deadlocks, and to be as fast as possible. + */ +static int sbridge_mce_check_error(void *priv, struct mce *mce) +{ + struct mem_ctl_info *mci = priv; + struct sbridge_pvt *pvt = mci->pvt_info; + + /* + * Just let mcelog handle it if the error is + * outside the memory controller. A memory error + * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0. + * bit 12 has an special meaning. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return 0; + + printk("sbridge: HANDLING MCE MEMORY ERROR\n"); + + printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", + mce->extcpu, mce->mcgstatus, mce->bank, mce->status); + printk("TSC %llx ", mce->tsc); + printk("ADDR %llx ", mce->addr); + printk("MISC %llx ", mce->misc); + + printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + mce->cpuvendor, mce->cpuid, mce->time, + mce->socketid, mce->apicid); + +#ifdef CONFIG_SMP + /* Only handle if it is the right mc controller */ + if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc) + return 0; +#endif + + smp_rmb(); + if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { + smp_wmb(); + pvt->mce_overrun++; + return 0; + } + + /* Copy memory error at the ringbuffer */ + memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); + smp_wmb(); + pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; + + /* Handle fatal errors immediately */ + if (mce->mcgstatus & 1) + sbridge_check_error(mci); + + /* Advice mcelog that the error were handled */ + return 1; +} + +/**************************************************************************** + EDAC register/unregister logic + ****************************************************************************/ + +static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) +{ + struct mem_ctl_info *mci = sbridge_dev->mci; + struct sbridge_pvt *pvt; + + if (unlikely(!mci || !mci->pvt_info)) { + debugf0("MC: " __FILE__ ": %s(): dev = %p\n", + __func__, &sbridge_dev->pdev[0]->dev); + + sbridge_printk(KERN_ERR, "Couldn't find mci handler\n"); + return; + } + + pvt = mci->pvt_info; + + debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", + __func__, mci, &sbridge_dev->pdev[0]->dev); + + /* Disable MCE NMI handler */ + edac_mce_unregister(&pvt->edac_mce); + + /* Remove MC sysfs nodes */ + edac_mc_del_mc(mci->dev); + + debugf1("%s: free mci struct\n", mci->ctl_name); + kfree(mci->ctl_name); + edac_mc_free(mci); + sbridge_dev->mci = NULL; +} + +static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) +{ + struct mem_ctl_info *mci; + struct sbridge_pvt *pvt; + int rc, channels, csrows; + + /* Check the number of active and not disabled channels */ + rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows); + if (unlikely(rc < 0)) + return rc; + + /* allocate a new MC control structure */ + mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc); + if (unlikely(!mci)) + return -ENOMEM; + + debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", + __func__, mci, &sbridge_dev->pdev[0]->dev); + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + + /* Associate sbridge_dev and mci for future usage */ + pvt->sbridge_dev = sbridge_dev; + sbridge_dev->mci = mci; + + mci->mtype_cap = MEM_FLAG_DDR3; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "sbridge_edac.c"; + mci->mod_ver = SBRIDGE_REVISION; + mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); + mci->dev_name = pci_name(sbridge_dev->pdev[0]); + mci->ctl_page_to_phys = NULL; + + /* Set the function pointer to an actual operation function */ + mci->edac_check = sbridge_check_error; + + /* Store pci devices at mci for faster access */ + rc = mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + + /* Get dimm basic config and the memory layout */ + get_dimm_config(mci); + get_memory_layout(mci); + + /* record ptr to the generic device */ + mci->dev = &sbridge_dev->pdev[0]->dev; + + /* add this new MC control structure to EDAC's list of MCs */ + if (unlikely(edac_mc_add_mc(mci))) { + debugf0("MC: " __FILE__ + ": %s(): failed edac_mc_add_mc()\n", __func__); + rc = -EINVAL; + goto fail0; + } + + /* Registers on edac_mce in order to receive memory errors */ + pvt->edac_mce.priv = mci; + pvt->edac_mce.check_error = sbridge_mce_check_error; + rc = edac_mce_register(&pvt->edac_mce); + if (unlikely(rc < 0)) { + debugf0("MC: " __FILE__ + ": %s(): failed edac_mce_register()\n", __func__); + goto fail1; + } + + return 0; +fail1: + edac_mc_del_mc(mci->dev); + +fail0: + kfree(mci->ctl_name); + edac_mc_free(mci); + sbridge_dev->mci = NULL; + return rc; +} + +/* + * sbridge_probe Probe for ONE instance of device to see if it is + * present. + * return: + * 0 for FOUND a device + * < 0 for error code + */ + +static int __devinit sbridge_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int rc; + u8 mc, num_mc = 0; + struct sbridge_dev *sbridge_dev; + + /* get the pci devices we want to reserve for our use */ + mutex_lock(&sbridge_edac_lock); + + /* + * All memory controllers are allocated at the first pass. + */ + if (unlikely(probed >= 1)) { + mutex_unlock(&sbridge_edac_lock); + return -ENODEV; + } + probed++; + + rc = sbridge_get_all_devices(&num_mc); + if (unlikely(rc < 0)) + goto fail0; + mc = 0; + + list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { + debugf0("Registering MC#%d (%d of %d)\n", mc, mc + 1, num_mc); + sbridge_dev->mc = mc++; + rc = sbridge_register_mci(sbridge_dev); + if (unlikely(rc < 0)) + goto fail1; + } + + sbridge_printk(KERN_INFO, "Driver loaded.\n"); + + mutex_unlock(&sbridge_edac_lock); + return 0; + +fail1: + list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) + sbridge_unregister_mci(sbridge_dev); + + sbridge_put_all_devices(); +fail0: + mutex_unlock(&sbridge_edac_lock); + return rc; +} + +/* + * sbridge_remove destructor for one instance of device + * + */ +static void __devexit sbridge_remove(struct pci_dev *pdev) +{ + struct sbridge_dev *sbridge_dev; + + debugf0(__FILE__ ": %s()\n", __func__); + + /* + * we have a trouble here: pdev value for removal will be wrong, since + * it will point to the X58 register used to detect that the machine + * is a Nehalem or upper design. However, due to the way several PCI + * devices are grouped together to provide MC functionality, we need + * to use a different method for releasing the devices + */ + + mutex_lock(&sbridge_edac_lock); + + if (unlikely(!probed)) { + mutex_unlock(&sbridge_edac_lock); + return; + } + + list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) + sbridge_unregister_mci(sbridge_dev); + + /* Release PCI resources */ + sbridge_put_all_devices(); + + probed--; + + mutex_unlock(&sbridge_edac_lock); +} + +MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl); + +/* + * sbridge_driver pci_driver structure for this module + * + */ +static struct pci_driver sbridge_driver = { + .name = "sbridge_edac", + .probe = sbridge_probe, + .remove = __devexit_p(sbridge_remove), + .id_table = sbridge_pci_tbl, +}; + +/* + * sbridge_init Module entry function + * Try to initialize this module for its devices + */ +static int __init sbridge_init(void) +{ + int pci_rc; + + debugf2("MC: " __FILE__ ": %s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + pci_rc = pci_register_driver(&sbridge_driver); + + if (pci_rc >= 0) + return 0; + + sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", + pci_rc); + + return pci_rc; +} + +/* + * sbridge_exit() Module exit function + * Unregister the driver + */ +static void __exit sbridge_exit(void) +{ + debugf2("MC: " __FILE__ ": %s()\n", __func__); + pci_unregister_driver(&sbridge_driver); +} + +module_init(sbridge_init); +module_exit(sbridge_exit); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mauro Carvalho Chehab "); +MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); +MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - " + SBRIDGE_REVISION); From 3d78c9af78e080db1961fd30ede1720d6e6e5999 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 20 Oct 2011 19:33:46 -0200 Subject: [PATCH 18/21] edac: sb_edac: Add it to the building system Some changes on it were required due to changeset cd90cc84c6bf0, that changed the glue with the MCE logic. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/Kconfig | 7 ++++++ drivers/edac/Makefile | 1 + drivers/edac/sb_edac.c | 48 ++++++++++++++++++++---------------------- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index d057d6c40208..15e453577213 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -212,6 +212,13 @@ config EDAC_I7300 Support for error detection and correction the Intel Clarksboro MCH (Intel 7300 chipset). +config EDAC_SBRIDGE + tristate "Intel Sandy-Bridge Integrated MC" + depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL + help + Support for error detection and correction the Intel + Sandy Bridge Integrated Memory Controller. + config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index b06a9b11a5f6..196a63dd37c5 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_EDAC_I5100) += i5100_edac.o obj-$(CONFIG_EDAC_I5400) += i5400_edac.o obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o +obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 785c2769f05b..8118f12cb8c7 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -18,10 +18,10 @@ #include #include #include -#include #include #include #include +#include #include "edac_core.h" @@ -318,9 +318,6 @@ struct sbridge_pvt { /* Memory type detection */ bool is_mirrored, is_lockstep, is_close_pg; - /* mcelog glue */ - struct edac_mce edac_mce; - /* Fifo double buffers */ struct mce mce_entry[MCE_LOG_LEN]; struct mce mce_outentry[MCE_LOG_LEN]; @@ -1578,10 +1575,17 @@ static void sbridge_check_error(struct mem_ctl_info *mci) * WARNING: As this routine should be called at NMI time, extra care should * be taken to avoid deadlocks, and to be as fast as possible. */ -static int sbridge_mce_check_error(void *priv, struct mce *mce) +static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, + void *data) { - struct mem_ctl_info *mci = priv; - struct sbridge_pvt *pvt = mci->pvt_info; + struct mce *mce = (struct mce *)data; + struct mem_ctl_info *mci; + struct sbridge_pvt *pvt; + + mci = get_mci_for_node_id(mce->socketid); + if (!mci) + return NOTIFY_BAD; + pvt = mci->pvt_info; /* * Just let mcelog handle it if the error is @@ -1590,7 +1594,7 @@ static int sbridge_mce_check_error(void *priv, struct mce *mce) * bit 12 has an special meaning. */ if ((mce->status & 0xefff) >> 7 != 1) - return 0; + return NOTIFY_DONE; printk("sbridge: HANDLING MCE MEMORY ERROR\n"); @@ -1607,14 +1611,14 @@ static int sbridge_mce_check_error(void *priv, struct mce *mce) #ifdef CONFIG_SMP /* Only handle if it is the right mc controller */ if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc) - return 0; + return NOTIFY_DONE; #endif smp_rmb(); if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { smp_wmb(); pvt->mce_overrun++; - return 0; + return NOTIFY_DONE; } /* Copy memory error at the ringbuffer */ @@ -1627,9 +1631,13 @@ static int sbridge_mce_check_error(void *priv, struct mce *mce) sbridge_check_error(mci); /* Advice mcelog that the error were handled */ - return 1; + return NOTIFY_STOP; } +static struct notifier_block sbridge_mce_dec = { + .notifier_call = sbridge_mce_check_error, +}; + /**************************************************************************** EDAC register/unregister logic ****************************************************************************/ @@ -1652,8 +1660,8 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", __func__, mci, &sbridge_dev->pdev[0]->dev); - /* Disable MCE NMI handler */ - edac_mce_unregister(&pvt->edac_mce); + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, + &sbridge_mce_dec); /* Remove MC sysfs nodes */ edac_mc_del_mc(mci->dev); @@ -1722,19 +1730,9 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) goto fail0; } - /* Registers on edac_mce in order to receive memory errors */ - pvt->edac_mce.priv = mci; - pvt->edac_mce.check_error = sbridge_mce_check_error; - rc = edac_mce_register(&pvt->edac_mce); - if (unlikely(rc < 0)) { - debugf0("MC: " __FILE__ - ": %s(): failed edac_mce_register()\n", __func__); - goto fail1; - } - + atomic_notifier_chain_register(&x86_mce_decoder_chain, + &sbridge_mce_dec); return 0; -fail1: - edac_mc_del_mc(mci->dev); fail0: kfree(mci->ctl_name); From c6e13b528f5a6c6a6ae260007d4fd14c3dbc5648 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Tue, 18 Oct 2011 11:02:58 -0200 Subject: [PATCH 19/21] EDAC: Fix incorrect edac mode reporting in sb_edac The edac driver for Sandy Bridge was found to be reporting "FPM" for edac_mode, which clearly doesn't make sense. It was found that sb_edac.c:get_dimm_config was reusing a variable for both mem_type and edac_type, and thus was overwriting the value after setting it correctly. This patch fixes that issue. Before the patch: /sys/devices/system/edac/mc/mc0/csrow0/edac_mode:FPM /sys/devices/system/edac/mc/mc0/csrow1/edac_mode:FPM /sys/devices/system/edac/mc/mc0/csrow2/edac_mode:FPM /sys/devices/system/edac/mc/mc0/csrow3/edac_mode:FPM After: /sys/devices/system/edac/mc/mc0/csrow0/edac_mode:S4ECD4ED /sys/devices/system/edac/mc/mc0/csrow1/edac_mode:S4ECD4ED /sys/devices/system/edac/mc/mc0/csrow2/edac_mode:S4ECD4ED /sys/devices/system/edac/mc/mc0/csrow3/edac_mode:S4ECD4ED Signed-off-by: Mark A. Grondona Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/sb_edac.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 8118f12cb8c7..7a402bfbee7d 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -559,6 +559,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci) unsigned long last_page = 0; u32 reg; enum edac_type mode; + enum mem_type mtype; pci_read_config_dword(pvt->pci_br, SAD_TARGET, ®); pvt->sbridge_dev->source_id = SOURCE_ID(reg); @@ -601,10 +602,10 @@ static int get_dimm_config(const struct mem_ctl_info *mci) if (IS_RDIMM_ENABLED(reg)) { /* FIXME: Can also be LRDIMM */ debugf0("Memory is registered\n"); - mode = MEM_RDDR3; + mtype = MEM_RDDR3; } else { debugf0("Memory is unregistered\n"); - mode = MEM_DDR3; + mtype = MEM_DDR3; } /* On all supported DDR3 DIMM types, there are 8 banks available */ @@ -643,7 +644,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci) csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4; csr->ce_count = 0; csr->ue_count = 0; - csr->mtype = mode; + csr->mtype = mtype; csr->edac_mode = mode; csr->nr_channels = 1; csr->channels[0].chan_idx = i; From 124a02c9b44a5faa3373a92388aa00290a76a0b7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 20 Oct 2011 19:41:23 -0200 Subject: [PATCH 20/21] edac: tag sb_edac as EXPERIMENTAL, as it requires more testing Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 15e453577213..203361e4ef94 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -215,6 +215,7 @@ config EDAC_I7300 config EDAC_SBRIDGE tristate "Intel Sandy-Bridge Integrated MC" depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL + depends on EXPERIMENTAL help Support for error detection and correction the Intel Sandy Bridge Integrated Memory Controller. From 4d096ca7e65584dd5845e64c6400f920e694f672 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Nov 2011 10:03:24 -0200 Subject: [PATCH 21/21] MAINTAINERS: add an entry for Edac Sandy Bridge driver Signed-off-by: Mauro Carvalho Chehab --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1544cc8cf2db..14070ab3fc6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2432,6 +2432,13 @@ W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/r82600_edac.c +EDAC-SBRIDGE +M: Mauro Carvalho Chehab +L: linux-edac@vger.kernel.org +W: bluesmoke.sourceforge.net +S: Maintained +F: drivers/edac/sb_edac.c + EDIROL UA-101/UA-1000 DRIVER M: Clemens Ladisch L: alsa-devel@alsa-project.org (moderated for non-subscribers)