From 72708a0a2b60e83255631f2557a85ac7daf33fac Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Wed, 16 Apr 2008 21:09:25 -0700 Subject: [PATCH] IB/ipath: HW workaround for case where chip can send but not receive Workaround a QLE7140 problem that in rare cases causes flow control problems after link recovery by forcing a link retrain after recovery. A module parameter is provided to control the behavior in case it causes problems. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 4 ++++ drivers/infiniband/hw/ipath/ipath_intr.c | 18 ++++++++++++++++++ drivers/infiniband/hw/ipath/ipath_kernel.h | 2 ++ 3 files changed, 24 insertions(+) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 5605f4f27521..2cad7335681b 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -82,6 +82,10 @@ module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); MODULE_PARM_DESC(hol_timeout_ms, "duration of user app suspension after link failure"); +unsigned ipath_linkrecovery = 1; +module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("QLogic "); MODULE_DESCRIPTION("QLogic InfiniPath driver"); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index dde5dfc9fcf5..ed82ecbb02da 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -366,6 +366,22 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, dd->ipath_ibpollcnt = 0; /* not poll*, now */ ipath_stats.sps_iblink++; + if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) { + u64 linkrecov; + linkrecov = ipath_snap_cntr(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + if (linkrecov != dd->ipath_lastlinkrecov) { + ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n", + ibcs, ib_linkstate(dd, ibcs), + ipath_ibcstatus_str[ltstate], + linkrecov); + /* and no more until active again */ + dd->ipath_lastlinkrecov = 0; + ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); + goto skip_ibchange; + } + } + if (ibstate == init || ibstate == arm || ibstate == active) { *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE; if (ibstate == init || ibstate == arm) { @@ -392,6 +408,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, IPATH_NOCABLE); ipath_hol_down(dd); } else { /* active */ + dd->ipath_lastlinkrecov = ipath_snap_cntr(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); *dd->ipath_statusp |= IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; dd->ipath_flags |= IPATH_LINKACTIVE; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 960d5b7e7865..b8b81cb745b9 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -309,6 +309,7 @@ struct ipath_devdata { ipath_err_t ipath_lasthwerror; /* errors masked because they occur too fast */ ipath_err_t ipath_maskederrs; + u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */ /* time in jiffies at which to re-enable maskederrs */ unsigned long ipath_unmasktime; /* count of egrfull errors, combined for all ports */ @@ -1099,6 +1100,7 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); #endif extern unsigned ipath_debug; /* debugging bit mask */ +extern unsigned ipath_linkrecovery; extern unsigned ipath_mtu4096; #define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */