drivers/edac: provide support for tile architecture

Add tile support for the EDAC driver, which provides unified system
error (memory, PCI, etc.) reporting. For now, the TILEPro port
reports memory correctable error (CE) only.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
This commit is contained in:
Chris Metcalf 2011-03-01 13:01:49 -05:00
parent d91c641233
commit 5c77075548
7 changed files with 384 additions and 2 deletions

View File

@ -6082,6 +6082,7 @@ S: Supported
F: arch/tile/
F: drivers/char/hvc_tile.c
F: drivers/net/tile/
F: drivers/edac/tile_edac.c
TLAN NETWORK DRIVER
M: Samuel Chessman <chessman@tux.org>

View File

@ -0,0 +1,29 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#ifndef _ASM_TILE_EDAC_H
#define _ASM_TILE_EDAC_H
/* ECC atomic, DMA, SMP and interrupt safe scrub function */
static inline void atomic_scrub(void *va, u32 size)
{
/*
* These is nothing to be done here because CE is
* corrected by the mshim.
*/
return;
}
#endif /* _ASM_TILE_EDAC_H */

View File

@ -0,0 +1,50 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
/**
* @file drv_mshim_intf.h
* Interface definitions for the Linux EDAC memory controller driver.
*/
#ifndef _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
#define _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
/** Number of memory controllers in the public API. */
#define TILE_MAX_MSHIMS 4
/** Memory info under each memory controller. */
struct mshim_mem_info
{
uint64_t mem_size; /**< Total memory size in bytes. */
uint8_t mem_type; /**< Memory type, DDR2 or DDR3. */
uint8_t mem_ecc; /**< Memory supports ECC. */
};
/**
* DIMM error structure.
* For now, only correctable errors are counted and the mshim doesn't record
* the error PA. HV takes panic upon uncorrectable errors.
*/
struct mshim_mem_error
{
uint32_t sbe_count; /**< Number of single-bit errors. */
};
/** Read this offset to get the memory info per mshim. */
#define MSHIM_MEM_INFO_OFF 0x100
/** Read this offset to check DIMM error. */
#define MSHIM_MEM_ERROR_OFF 0x200
#endif /* _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H */

View File

@ -338,9 +338,10 @@ typedef int HV_Errno;
#define HV_ENOTREADY -812 /**< Device not ready */
#define HV_EIO -813 /**< I/O error */
#define HV_ENOMEM -814 /**< Out of memory */
#define HV_EAGAIN -815 /**< Try again */
#define HV_ERR_MAX -801 /**< Largest HV error code */
#define HV_ERR_MIN -814 /**< Smallest HV error code */
#define HV_ERR_MIN -815 /**< Smallest HV error code */
#ifndef __ASSEMBLER__
@ -867,6 +868,43 @@ typedef struct
*/
HV_PhysAddrRange hv_inquire_physical(int idx);
/** Possible DIMM types. */
typedef enum
{
NO_DIMM = 0, /**< No DIMM */
DDR2 = 1, /**< DDR2 */
DDR3 = 2 /**< DDR3 */
} HV_DIMM_Type;
#ifdef __tilegx__
/** Log2 of minimum DIMM bytes supported by the memory controller. */
#define HV_MSH_MIN_DIMM_SIZE_SHIFT 29
/** Max number of DIMMs contained by one memory controller. */
#define HV_MSH_MAX_DIMMS 8
#else
/** Log2 of minimum DIMM bytes supported by the memory controller. */
#define HV_MSH_MIN_DIMM_SIZE_SHIFT 26
/** Max number of DIMMs contained by one memory controller. */
#define HV_MSH_MAX_DIMMS 2
#endif
/** Number of bits to right-shift to get the DIMM type. */
#define HV_DIMM_TYPE_SHIFT 0
/** Bits to mask to get the DIMM type. */
#define HV_DIMM_TYPE_MASK 0xf
/** Number of bits to right-shift to get the DIMM size. */
#define HV_DIMM_SIZE_SHIFT 4
/** Bits to mask to get the DIMM size. */
#define HV_DIMM_SIZE_MASK 0xf
/** Memory controller information. */
typedef struct
@ -1043,6 +1081,7 @@ int hv_console_write(HV_VirtAddr bytes, int len);
* downcall:
*
* INT_MESSAGE_RCV_DWNCL (hypervisor message available)
* INT_DEV_INTR_DWNCL (device interrupt)
* INT_DMATLB_MISS_DWNCL (DMA TLB miss)
* INT_SNITLB_MISS_DWNCL (SNI TLB miss)
* INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation)

View File

@ -7,7 +7,7 @@
menuconfig EDAC
bool "EDAC (Error Detection And Correction) reporting"
depends on HAS_IOMEM
depends on X86 || PPC
depends on X86 || PPC || TILE
help
EDAC is designed to report errors in the core system.
These are low-level errors that are reported in the CPU or
@ -282,4 +282,12 @@ config EDAC_CPC925
a companion chip to the PowerPC 970 family of
processors.
config EDAC_TILE
tristate "Tilera Memory Controller"
depends on EDAC_MM_EDAC && TILE
default y
help
Support for error detection and correction on the
Tilera memory controller.
endif # EDAC

View File

@ -54,3 +54,4 @@ obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
obj-$(CONFIG_EDAC_TILE) += tile_edac.o

254
drivers/edac/tile_edac.c Normal file
View File

@ -0,0 +1,254 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
* Tilera-specific EDAC driver.
*
* This source code is derived from the following driver:
*
* Cell MIC driver for ECC counting
*
* Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/edac.h>
#include <hv/hypervisor.h>
#include <hv/drv_mshim_intf.h>
#include "edac_core.h"
#define DRV_NAME "tile-edac"
/* Number of cs_rows needed per memory controller on TILEPro. */
#define TILE_EDAC_NR_CSROWS 1
/* Number of channels per memory controller on TILEPro. */
#define TILE_EDAC_NR_CHANS 1
/* Granularity of reported error in bytes on TILEPro. */
#define TILE_EDAC_ERROR_GRAIN 8
/* TILE processor has multiple independent memory controllers. */
struct platform_device *mshim_pdev[TILE_MAX_MSHIMS];
struct tile_edac_priv {
int hv_devhdl; /* Hypervisor device handle. */
int node; /* Memory controller instance #. */
unsigned int ce_count; /*
* Correctable-error counter
* kept by the driver.
*/
};
static void tile_edac_check(struct mem_ctl_info *mci)
{
struct tile_edac_priv *priv = mci->pvt_info;
struct mshim_mem_error mem_error;
if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_error,
sizeof(struct mshim_mem_error), MSHIM_MEM_ERROR_OFF) !=
sizeof(struct mshim_mem_error)) {
pr_err(DRV_NAME ": MSHIM_MEM_ERROR_OFF pread failure.\n");
return;
}
/* Check if the current error count is different from the saved one. */
if (mem_error.sbe_count != priv->ce_count) {
dev_dbg(mci->dev, "ECC CE err on node %d\n", priv->node);
priv->ce_count = mem_error.sbe_count;
edac_mc_handle_ce(mci, 0, 0, 0, 0, 0, mci->ctl_name);
}
}
/*
* Initialize the 'csrows' table within the mci control structure with the
* addressing of memory.
*/
static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci)
{
struct csrow_info *csrow = &mci->csrows[0];
struct tile_edac_priv *priv = mci->pvt_info;
struct mshim_mem_info mem_info;
if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info,
sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) !=
sizeof(struct mshim_mem_info)) {
pr_err(DRV_NAME ": MSHIM_MEM_INFO_OFF pread failure.\n");
return -1;
}
if (mem_info.mem_ecc)
csrow->edac_mode = EDAC_SECDED;
else
csrow->edac_mode = EDAC_NONE;
switch (mem_info.mem_type) {
case DDR2:
csrow->mtype = MEM_DDR2;
break;
case DDR3:
csrow->mtype = MEM_DDR3;
break;
default:
return -1;
}
csrow->first_page = 0;
csrow->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
csrow->grain = TILE_EDAC_ERROR_GRAIN;
csrow->dtype = DEV_UNKNOWN;
return 0;
}
static int __devinit tile_edac_mc_probe(struct platform_device *pdev)
{
char hv_file[32];
int hv_devhdl;
struct mem_ctl_info *mci;
struct tile_edac_priv *priv;
int rc;
sprintf(hv_file, "mshim/%d", pdev->id);
hv_devhdl = hv_dev_open((HV_VirtAddr)hv_file, 0);
if (hv_devhdl < 0)
return -EINVAL;
/* A TILE MC has a single channel and one chip-select row. */
mci = edac_mc_alloc(sizeof(struct tile_edac_priv),
TILE_EDAC_NR_CSROWS, TILE_EDAC_NR_CHANS, pdev->id);
if (mci == NULL)
return -ENOMEM;
priv = mci->pvt_info;
priv->node = pdev->id;
priv->hv_devhdl = hv_devhdl;
mci->dev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->mod_name = DRV_NAME;
mci->ctl_name = "TILEPro_Memory_Controller";
mci->dev_name = dev_name(&pdev->dev);
mci->edac_check = tile_edac_check;
/*
* Initialize the MC control structure 'csrows' table
* with the mapping and control information.
*/
if (tile_edac_init_csrows(mci)) {
/* No csrows found. */
mci->edac_cap = EDAC_FLAG_NONE;
} else {
mci->edac_cap = EDAC_FLAG_SECDED;
}
platform_set_drvdata(pdev, mci);
/* Register with EDAC core */
rc = edac_mc_add_mc(mci);
if (rc) {
dev_err(&pdev->dev, "failed to register with EDAC core\n");
edac_mc_free(mci);
return rc;
}
return 0;
}
static int __devexit tile_edac_mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
if (mci)
edac_mc_free(mci);
return 0;
}
static struct platform_driver tile_edac_mc_driver = {
.driver = {
.name = DRV_NAME,
.owner = THIS_MODULE,
},
.probe = tile_edac_mc_probe,
.remove = __devexit_p(tile_edac_mc_remove),
};
/*
* Driver init routine.
*/
static int __init tile_edac_init(void)
{
char hv_file[32];
struct platform_device *pdev;
int i, err, num = 0;
/* Only support POLL mode. */
edac_op_state = EDAC_OPSTATE_POLL;
err = platform_driver_register(&tile_edac_mc_driver);
if (err)
return err;
for (i = 0; i < TILE_MAX_MSHIMS; i++) {
/*
* Not all memory controllers are configured such as in the
* case of a simulator. So we register only those mshims
* that are configured by the hypervisor.
*/
sprintf(hv_file, "mshim/%d", i);
if (hv_dev_open((HV_VirtAddr)hv_file, 0) < 0)
continue;
pdev = platform_device_register_simple(DRV_NAME, i, NULL, 0);
if (IS_ERR(pdev))
continue;
mshim_pdev[i] = pdev;
num++;
}
if (num == 0) {
platform_driver_unregister(&tile_edac_mc_driver);
return -ENODEV;
}
return 0;
}
/*
* Driver cleanup routine.
*/
static void __exit tile_edac_exit(void)
{
int i;
for (i = 0; i < TILE_MAX_MSHIMS; i++) {
struct platform_device *pdev = mshim_pdev[i];
if (!pdev)
continue;
platform_set_drvdata(pdev, NULL);
platform_device_unregister(pdev);
}
platform_driver_unregister(&tile_edac_mc_driver);
}
module_init(tile_edac_init);
module_exit(tile_edac_exit);