amd64_edac: add module registration routines

Also, link into Kbuild by adding Kconfig and Makefile entries.

Borislav:
- Kconfig/Makefile splitting
- use zero-sized arrays for the sysfs attrs if not enabled
- rename sysfs attrs to more conform values
- shorten CONFIG_ names
- make multiple structure members assignment vertically aligned
- fix/cleanup comments
- fix function return value patterns
- fix err labels
- fix a memleak bug caught by Ingo
- remove the NUMA dependency and use num_k8_northbrides for initializing
  a driver instance per NB.
- do not copy the pvt contents into the mci struct in
  amd64_init_2nd_stage() and save it in the mci->pvt_info void ptr
  instead.
- cleanup debug calls
- simplify amd64_setup_pci_device()

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
This commit is contained in:
Doug Thompson 2009-04-27 20:01:01 +02:00 committed by Borislav Petkov
parent f9431992b6
commit 7d6034d321
4 changed files with 422 additions and 0 deletions

View File

@ -58,6 +58,32 @@ config EDAC_MM_EDAC
occurred so that a particular failing memory module can be
replaced. If unsure, select 'Y'.
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
depends on EDAC_MM_EDAC && X86 && PCI
default m
help
Support for error detection and correction on the AMD 64
Families of Memory Controllers (K8, F10h and F11h)
config EDAC_AMD64_ERROR_INJECTION
bool "Sysfs Error Injection facilities"
depends on EDAC_AMD64
help
Recent Opterons (Family 10h and later) provide for Memory Error
Injection into the ECC detection circuits. The amd64_edac module
allows the operator/user to inject Uncorrectable and Correctable
errors into DRAM.
When enabled, in each of the respective memory controller directories
(/sys/devices/system/edac/mc/mcX), there are 3 input files:
- inject_section (0..3, 16-byte section of 64-byte cacheline),
- inject_word (0..8, 16-bit word of 16-byte section),
- inject_ecc_vector (hex ecc vector: select bits of inject word)
In addition, there are two control files, inject_read and inject_write,
which trigger the DRAM ECC Read and Write respectively.
config EDAC_AMD76X
tristate "AMD 76x (760, 762, 768)"

View File

@ -30,6 +30,13 @@ obj-$(CONFIG_EDAC_I3000) += i3000_edac.o
obj-$(CONFIG_EDAC_X38) += x38_edac.o
obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
amd64_edac_mod-y := amd64_edac_err_types.o amd64_edac.o
amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o
amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
obj-$(CONFIG_EDAC_AMD64) += amd64_edac_mod.o
obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o
obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac.o
obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o

View File

@ -1,4 +1,5 @@
#include "amd64_edac.h"
#include <asm/k8.h>
static struct edac_pci_ctl_info *amd64_ctl_pci;
@ -2978,3 +2979,376 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
return ret;
}
struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
ARRAY_SIZE(amd64_inj_attrs) +
1];
struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
{
unsigned int i = 0, j = 0;
for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
sysfs_attrs[i] = amd64_dbg_attrs[i];
for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
sysfs_attrs[i] = amd64_inj_attrs[j];
sysfs_attrs[i] = terminator;
mci->mc_driver_sysfs_attributes = sysfs_attrs;
}
static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE;
if (pvt->nbcap & K8_NBCAP_SECDED)
mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
if (pvt->nbcap & K8_NBCAP_CHIPKILL)
mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
mci->edac_cap = amd64_determine_edac_cap(pvt);
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = EDAC_AMD64_VERSION;
mci->ctl_name = get_amd_family_name(pvt->mc_type_index);
mci->dev_name = pci_name(pvt->dram_f2_ctl);
mci->ctl_page_to_phys = NULL;
/* IMPORTANT: Set the polling 'check' function in this module */
mci->edac_check = amd64_check;
/* memory scrubber interface */
mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
}
/*
* Init stuff for this DRAM Controller device.
*
* Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration
* Space feature MUST be enabled on ALL Processors prior to actually reading
* from the ECS registers. Since the loading of the module can occur on any
* 'core', and cores don't 'see' all the other processors ECS data when the
* others are NOT enabled. Our solution is to first enable ECS access in this
* routine on all processors, gather some data in a amd64_pvt structure and
* later come back in a finish-setup function to perform that final
* initialization. See also amd64_init_2nd_stage() for that.
*/
static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
int mc_type_index)
{
struct amd64_pvt *pvt = NULL;
int err = 0, ret;
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
if (!pvt)
goto err_exit;
pvt->mc_node_id = get_mc_node_id_from_pdev(dram_f2_ctl);
pvt->dram_f2_ctl = dram_f2_ctl;
pvt->ext_model = boot_cpu_data.x86_model >> 4;
pvt->mc_type_index = mc_type_index;
pvt->ops = family_ops(mc_type_index);
pvt->old_mcgctl = 0;
/*
* We have the dram_f2_ctl device as an argument, now go reserve its
* sibling devices from the PCI system.
*/
ret = -ENODEV;
err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index);
if (err)
goto err_free;
ret = -EINVAL;
err = amd64_check_ecc_enabled(pvt);
if (err)
goto err_put;
/*
* Key operation here: setup of HW prior to performing ops on it. Some
* setup is required to access ECS data. After this is performed, the
* 'teardown' function must be called upon error and normal exit paths.
*/
if (boot_cpu_data.x86 >= 0x10)
amd64_setup(pvt);
/*
* Save the pointer to the private data for use in 2nd initialization
* stage
*/
pvt_lookup[pvt->mc_node_id] = pvt;
return 0;
err_put:
amd64_free_mc_sibling_devices(pvt);
err_free:
kfree(pvt);
err_exit:
return ret;
}
/*
* This is the finishing stage of the init code. Needs to be performed after all
* MCs' hardware have been prepped for accessing extended config space.
*/
static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
{
int node_id = pvt->mc_node_id;
struct mem_ctl_info *mci;
int ret, err = 0;
amd64_read_mc_registers(pvt);
ret = -ENODEV;
if (pvt->ops->probe_valid_hardware) {
err = pvt->ops->probe_valid_hardware(pvt);
if (err)
goto err_exit;
}
/*
* We need to determine how many memory channels there are. Then use
* that information for calculating the size of the dynamic instance
* tables in the 'mci' structure
*/
pvt->channel_count = pvt->ops->early_channel_count(pvt);
if (pvt->channel_count < 0)
goto err_exit;
ret = -ENOMEM;
mci = edac_mc_alloc(0, CHIPSELECT_COUNT, pvt->channel_count, node_id);
if (!mci)
goto err_exit;
mci->pvt_info = pvt;
mci->dev = &pvt->dram_f2_ctl->dev;
amd64_setup_mci_misc_attributes(mci);
if (amd64_init_csrows(mci))
mci->edac_cap = EDAC_FLAG_NONE;
amd64_enable_ecc_error_reporting(mci);
amd64_set_mc_sysfs_attributes(mci);
ret = -ENODEV;
if (edac_mc_add_mc(mci)) {
debugf1("failed edac_mc_add_mc()\n");
goto err_add_mc;
}
mci_lookup[node_id] = mci;
pvt_lookup[node_id] = NULL;
return 0;
err_add_mc:
edac_mc_free(mci);
err_exit:
debugf0("failure to init 2nd stage: ret=%d\n", ret);
amd64_restore_ecc_error_reporting(pvt);
if (boot_cpu_data.x86 > 0xf)
amd64_teardown(pvt);
amd64_free_mc_sibling_devices(pvt);
kfree(pvt_lookup[pvt->mc_node_id]);
pvt_lookup[node_id] = NULL;
return ret;
}
static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
const struct pci_device_id *mc_type)
{
int ret = 0;
debugf0("(MC node=%d,mc_type='%s')\n",
get_mc_node_id_from_pdev(pdev),
get_amd_family_name(mc_type->driver_data));
ret = pci_enable_device(pdev);
if (ret < 0)
ret = -EIO;
else
ret = amd64_probe_one_instance(pdev, mc_type->driver_data);
if (ret < 0)
debugf0("ret=%d\n", ret);
return ret;
}
static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
/* Remove from EDAC CORE tracking list */
mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
return;
pvt = mci->pvt_info;
amd64_restore_ecc_error_reporting(pvt);
if (boot_cpu_data.x86 > 0xf)
amd64_teardown(pvt);
amd64_free_mc_sibling_devices(pvt);
kfree(pvt);
mci->pvt_info = NULL;
mci_lookup[pvt->mc_node_id] = NULL;
/* Free the EDAC CORE resources */
edac_mc_free(mci);
}
/*
* This table is part of the interface for loading drivers for PCI devices. The
* PCI core identifies what devices are on a system during boot, and then
* inquiry this table to see if this driver is for a given device found.
*/
static const struct pci_device_id amd64_pci_table[] __devinitdata = {
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
.driver_data = K8_CPUS
},
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
.driver_data = F10_CPUS
},
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_11H_NB_DRAM,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
.driver_data = F11_CPUS
},
{0, }
};
MODULE_DEVICE_TABLE(pci, amd64_pci_table);
static struct pci_driver amd64_pci_driver = {
.name = EDAC_MOD_STR,
.probe = amd64_init_one_instance,
.remove = __devexit_p(amd64_remove_one_instance),
.id_table = amd64_pci_table,
};
static void amd64_setup_pci_device(void)
{
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
if (amd64_ctl_pci)
return;
mci = mci_lookup[0];
if (mci) {
pvt = mci->pvt_info;
amd64_ctl_pci =
edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev,
EDAC_MOD_STR);
if (!amd64_ctl_pci) {
pr_warning("%s(): Unable to create PCI control\n",
__func__);
pr_warning("%s(): PCI error report via EDAC not set\n",
__func__);
}
}
}
static int __init amd64_edac_init(void)
{
int nb, err = -ENODEV;
edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
opstate_init();
if (cache_k8_northbridges() < 0)
goto err_exit;
err = pci_register_driver(&amd64_pci_driver);
if (err)
return err;
/*
* At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
* amd64_pvt structs. These will be used in the 2nd stage init function
* to finish initialization of the MC instances.
*/
for (nb = 0; nb < num_k8_northbridges; nb++) {
if (!pvt_lookup[nb])
continue;
err = amd64_init_2nd_stage(pvt_lookup[nb]);
if (err)
goto err_exit;
}
amd64_setup_pci_device();
return 0;
err_exit:
debugf0("'finish_setup' stage failed\n");
pci_unregister_driver(&amd64_pci_driver);
return err;
}
static void __exit amd64_edac_exit(void)
{
if (amd64_ctl_pci)
edac_pci_release_generic_ctl(amd64_ctl_pci);
pci_unregister_driver(&amd64_pci_driver);
}
module_init(amd64_edac_init);
module_exit(amd64_edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
"Dave Peterson, Thayne Harbaugh");
MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
EDAC_AMD64_VERSION);
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

View File

@ -577,6 +577,21 @@ extern const char *ii_msgs[4];
extern const char *ext_msgs[32];
extern const char *htlink_msgs[8];
#ifdef CONFIG_EDAC_DEBUG
#define NUM_DBG_ATTRS 9
#else
#define NUM_DBG_ATTRS 0
#endif
#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
#define NUM_INJ_ATTRS 5
#else
#define NUM_INJ_ATTRS 0
#endif
extern struct mcidev_sysfs_attribute amd64_dbg_attrs[NUM_DBG_ATTRS],
amd64_inj_attrs[NUM_INJ_ATTRS];
/*
* Each of the PCI Device IDs types have their own set of hardware accessor
* functions and per device encoding/decoding logic.