qede: Error recovery process

This patch adds the error recovery process in the qede driver.
The process includes a partial/customized driver unload and load, which
allows it to look like a short suspend period to the kernel while
preserving the net devices' state.

Signed-off-by: Tomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Tomer Tayar 2019-01-20 11:36:39 +02:00 committed by David S. Miller
parent c75860e48a
commit 278396de78
4 changed files with 315 additions and 75 deletions

View File

@ -162,6 +162,7 @@ struct qede_rdma_dev {
struct list_head entry;
struct list_head rdma_event_list;
struct workqueue_struct *rdma_wq;
bool exp_recovery;
};
struct qede_ptp;
@ -264,6 +265,7 @@ struct qede_dev {
enum QEDE_STATE {
QEDE_STATE_CLOSED,
QEDE_STATE_OPEN,
QEDE_STATE_RECOVERY,
};
#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo))
@ -462,6 +464,7 @@ struct qede_fastpath {
#define QEDE_CSUM_UNNECESSARY BIT(1)
#define QEDE_TUNN_CSUM_UNNECESSARY BIT(2)
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1
#ifdef CONFIG_RFS_ACCEL

View File

@ -133,23 +133,12 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
static void qede_remove(struct pci_dev *pdev);
static void qede_shutdown(struct pci_dev *pdev);
static void qede_link_update(void *dev, struct qed_link_output *link);
static void qede_schedule_recovery_handler(void *dev);
static void qede_recovery_handler(struct qede_dev *edev);
static void qede_get_eth_tlv_data(void *edev, void *data);
static void qede_get_generic_tlv_data(void *edev,
struct qed_generic_tlvs *data);
/* The qede lock is used to protect driver state change and driver flows that
* are not reentrant.
*/
void __qede_lock(struct qede_dev *edev)
{
mutex_lock(&edev->qede_lock);
}
void __qede_unlock(struct qede_dev *edev)
{
mutex_unlock(&edev->qede_lock);
}
#ifdef CONFIG_QED_SRIOV
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto)
@ -231,6 +220,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
.arfs_filter_op = qede_arfs_filter_op,
#endif
.link_update = qede_link_update,
.schedule_recovery_handler = qede_schedule_recovery_handler,
.get_generic_tlv_data = qede_get_generic_tlv_data,
.get_protocol_tlv_data = qede_get_eth_tlv_data,
},
@ -950,11 +940,57 @@ err:
return -ENOMEM;
}
/* The qede lock is used to protect driver state change and driver flows that
* are not reentrant.
*/
void __qede_lock(struct qede_dev *edev)
{
mutex_lock(&edev->qede_lock);
}
void __qede_unlock(struct qede_dev *edev)
{
mutex_unlock(&edev->qede_lock);
}
/* This version of the lock should be used when acquiring the RTNL lock is also
* needed in addition to the internal qede lock.
*/
void qede_lock(struct qede_dev *edev)
{
rtnl_lock();
__qede_lock(edev);
}
void qede_unlock(struct qede_dev *edev)
{
__qede_unlock(edev);
rtnl_unlock();
}
static void qede_sp_task(struct work_struct *work)
{
struct qede_dev *edev = container_of(work, struct qede_dev,
sp_task.work);
/* The locking scheme depends on the specific flag:
* In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
* ensure that ongoing flows are ended and new ones are not started.
* In other cases - only the internal qede lock should be acquired.
*/
if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
#ifdef CONFIG_QED_SRIOV
/* SRIOV must be disabled outside the lock to avoid a deadlock.
* The recovery of the active VFs is currently not supported.
*/
qede_sriov_configure(edev->pdev, 0);
#endif
qede_lock(edev);
qede_recovery_handler(edev);
qede_unlock(edev);
}
__qede_lock(edev);
if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
@ -1031,8 +1067,13 @@ static void qede_log_probe(struct qede_dev *edev)
enum qede_probe_mode {
QEDE_PROBE_NORMAL,
QEDE_PROBE_RECOVERY,
};
#define QEDE_RDMA_PROBE_MODE(mode) \
((mode) == QEDE_PROBE_NORMAL ? QEDE_RDMA_PROBE_NORMAL \
: QEDE_RDMA_PROBE_RECOVERY)
static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
bool is_vf, enum qede_probe_mode mode)
{
@ -1051,6 +1092,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
probe_params.dp_module = dp_module;
probe_params.dp_level = dp_level;
probe_params.is_vf = is_vf;
probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY);
cdev = qed_ops->common->probe(pdev, &probe_params);
if (!cdev) {
rc = -ENODEV;
@ -1078,11 +1120,20 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
if (rc)
goto err2;
edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
dp_level);
if (!edev) {
rc = -ENOMEM;
goto err2;
if (mode != QEDE_PROBE_RECOVERY) {
edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
dp_level);
if (!edev) {
rc = -ENOMEM;
goto err2;
}
} else {
struct net_device *ndev = pci_get_drvdata(pdev);
edev = netdev_priv(ndev);
edev->cdev = cdev;
memset(&edev->stats, 0, sizeof(edev->stats));
memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
}
if (is_vf)
@ -1090,28 +1141,31 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
qede_init_ndev(edev);
rc = qede_rdma_dev_add(edev);
rc = qede_rdma_dev_add(edev, QEDE_RDMA_PROBE_MODE(mode));
if (rc)
goto err3;
/* Prepare the lock prior to the registration of the netdev,
* as once it's registered we might reach flows requiring it
* [it's even possible to reach a flow needing it directly
* from there, although it's unlikely].
*/
INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
mutex_init(&edev->qede_lock);
rc = register_netdev(edev->ndev);
if (rc) {
DP_NOTICE(edev, "Cannot register net-device\n");
goto err4;
if (mode != QEDE_PROBE_RECOVERY) {
/* Prepare the lock prior to the registration of the netdev,
* as once it's registered we might reach flows requiring it
* [it's even possible to reach a flow needing it directly
* from there, although it's unlikely].
*/
INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
mutex_init(&edev->qede_lock);
rc = register_netdev(edev->ndev);
if (rc) {
DP_NOTICE(edev, "Cannot register net-device\n");
goto err4;
}
}
edev->ops->common->set_name(cdev, edev->ndev->name);
/* PTP not supported on VFs */
if (!is_vf)
qede_ptp_enable(edev, true);
qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL));
edev->ops->register_ops(cdev, &qede_ll_ops, edev);
@ -1126,7 +1180,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
return 0;
err4:
qede_rdma_dev_remove(edev);
qede_rdma_dev_remove(edev, QEDE_RDMA_PROBE_MODE(mode));
err3:
free_netdev(edev->ndev);
err2:
@ -1162,8 +1216,13 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
enum qede_remove_mode {
QEDE_REMOVE_NORMAL,
QEDE_REMOVE_RECOVERY,
};
#define QEDE_RDMA_REMOVE_MODE(mode) \
((mode) == QEDE_REMOVE_NORMAL ? QEDE_RDMA_REMOVE_NORMAL \
: QEDE_RDMA_REMOVE_RECOVERY)
static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
{
struct net_device *ndev = pci_get_drvdata(pdev);
@ -1172,16 +1231,20 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
DP_INFO(edev, "Starting qede_remove\n");
qede_rdma_dev_remove(edev);
unregister_netdev(ndev);
cancel_delayed_work_sync(&edev->sp_task);
qede_rdma_dev_remove(edev, QEDE_RDMA_REMOVE_MODE(mode));
if (mode != QEDE_REMOVE_RECOVERY) {
unregister_netdev(ndev);
cancel_delayed_work_sync(&edev->sp_task);
edev->ops->common->set_power_state(cdev, PCI_D0);
pci_set_drvdata(pdev, NULL);
}
qede_ptp_disable(edev);
edev->ops->common->set_power_state(cdev, PCI_D0);
pci_set_drvdata(pdev, NULL);
/* Use global ops since we've freed edev */
qed_ops->common->slowpath_stop(cdev);
if (system_state == SYSTEM_POWER_OFF)
@ -1194,7 +1257,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
* [e.g., QED register callbacks] won't break anything when
* accessing the netdevice.
*/
free_netdev(ndev);
if (mode != QEDE_REMOVE_RECOVERY)
free_netdev(ndev);
dev_info(&pdev->dev, "Ending qede_remove successfully\n");
}
@ -1539,6 +1603,58 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
return 0;
}
static void qede_empty_tx_queue(struct qede_dev *edev,
struct qede_tx_queue *txq)
{
unsigned int pkts_compl = 0, bytes_compl = 0;
struct netdev_queue *netdev_txq;
int rc, len = 0;
netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
while (qed_chain_get_cons_idx(&txq->tx_pbl) !=
qed_chain_get_prod_idx(&txq->tx_pbl)) {
DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
"Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
txq->index, qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl));
rc = qede_free_tx_pkt(edev, txq, &len);
if (rc) {
DP_NOTICE(edev,
"Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
txq->index,
qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl));
break;
}
bytes_compl += len;
pkts_compl++;
txq->sw_tx_cons++;
}
netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
}
static void qede_empty_tx_queues(struct qede_dev *edev)
{
int i;
for_each_queue(i)
if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos) {
struct qede_fastpath *fp;
fp = &edev->fp_array[i];
qede_empty_tx_queue(edev,
&fp->txq[cos]);
}
}
}
/* This function inits fp content and resets the SB, RXQ and TXQ structures */
static void qede_init_fp(struct qede_dev *edev)
{
@ -2053,6 +2169,7 @@ out:
enum qede_unload_mode {
QEDE_UNLOAD_NORMAL,
QEDE_UNLOAD_RECOVERY,
};
static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
@ -2068,7 +2185,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
edev->state = QEDE_STATE_CLOSED;
if (mode != QEDE_UNLOAD_RECOVERY)
edev->state = QEDE_STATE_CLOSED;
qede_rdma_dev_event_close(edev);
@ -2076,17 +2194,20 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
netif_tx_disable(edev->ndev);
netif_carrier_off(edev->ndev);
/* Reset the link */
memset(&link_params, 0, sizeof(link_params));
link_params.link_up = false;
edev->ops->common->set_link(edev->cdev, &link_params);
rc = qede_stop_queues(edev);
if (rc) {
qede_sync_free_irqs(edev);
goto out;
}
if (mode != QEDE_UNLOAD_RECOVERY) {
/* Reset the link */
memset(&link_params, 0, sizeof(link_params));
link_params.link_up = false;
edev->ops->common->set_link(edev->cdev, &link_params);
DP_INFO(edev, "Stopped Queues\n");
rc = qede_stop_queues(edev);
if (rc) {
qede_sync_free_irqs(edev);
goto out;
}
DP_INFO(edev, "Stopped Queues\n");
}
qede_vlan_mark_nonconfigured(edev);
edev->ops->fastpath_stop(edev->cdev);
@ -2102,18 +2223,26 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
qede_napi_disable_remove(edev);
if (mode == QEDE_UNLOAD_RECOVERY)
qede_empty_tx_queues(edev);
qede_free_mem_load(edev);
qede_free_fp_array(edev);
out:
if (!is_locked)
__qede_unlock(edev);
if (mode != QEDE_UNLOAD_RECOVERY)
DP_NOTICE(edev, "Link is down\n");
DP_INFO(edev, "Ending qede unload\n");
}
enum qede_load_mode {
QEDE_LOAD_NORMAL,
QEDE_LOAD_RELOAD,
QEDE_LOAD_RECOVERY,
};
static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
@ -2293,6 +2422,77 @@ static void qede_link_update(void *dev, struct qed_link_output *link)
}
}
static void qede_schedule_recovery_handler(void *dev)
{
struct qede_dev *edev = dev;
if (edev->state == QEDE_STATE_RECOVERY) {
DP_NOTICE(edev,
"Avoid scheduling a recovery handling since already in recovery state\n");
return;
}
set_bit(QEDE_SP_RECOVERY, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
DP_INFO(edev, "Scheduled a recovery handler\n");
}
static void qede_recovery_failed(struct qede_dev *edev)
{
netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n");
netif_device_detach(edev->ndev);
if (edev->cdev)
edev->ops->common->set_power_state(edev->cdev, PCI_D3hot);
}
static void qede_recovery_handler(struct qede_dev *edev)
{
u32 curr_state = edev->state;
int rc;
DP_NOTICE(edev, "Starting a recovery process\n");
/* No need to acquire first the qede_lock since is done by qede_sp_task
* before calling this function.
*/
edev->state = QEDE_STATE_RECOVERY;
edev->ops->common->recovery_prolog(edev->cdev);
if (curr_state == QEDE_STATE_OPEN)
qede_unload(edev, QEDE_UNLOAD_RECOVERY, true);
__qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY);
rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level,
IS_VF(edev), QEDE_PROBE_RECOVERY);
if (rc) {
edev->cdev = NULL;
goto err;
}
if (curr_state == QEDE_STATE_OPEN) {
rc = qede_load(edev, QEDE_LOAD_RECOVERY, true);
if (rc)
goto err;
qede_config_rx_mode(edev->ndev);
udp_tunnel_get_rx_info(edev->ndev);
}
edev->state = curr_state;
DP_NOTICE(edev, "Recovery handling is done\n");
return;
err:
qede_recovery_failed(edev);
}
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
{
struct netdev_queue *netdev_txq;

View File

@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
if (!qedr_drv)
return;
/* Leftovers from previous error recovery */
edev->rdma_info.exp_recovery = false;
edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
edev->ndev);
}
@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev)
destroy_workqueue(edev->rdma_info.rdma_wq);
}
int qede_rdma_dev_add(struct qede_dev *edev)
int qede_rdma_dev_add(struct qede_dev *edev, enum qede_rdma_probe_mode mode)
{
int rc = 0;
int rc;
if (qede_rdma_supported(edev)) {
rc = qede_rdma_create_wq(edev);
if (rc)
return rc;
if (!qede_rdma_supported(edev))
return 0;
INIT_LIST_HEAD(&edev->rdma_info.entry);
mutex_lock(&qedr_dev_list_lock);
list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
_qede_rdma_dev_add(edev);
mutex_unlock(&qedr_dev_list_lock);
}
/* Cannot start qedr while recovering since it wasn't fully stopped */
if (mode == QEDE_RDMA_PROBE_RECOVERY)
return 0;
rc = qede_rdma_create_wq(edev);
if (rc)
return rc;
INIT_LIST_HEAD(&edev->rdma_info.entry);
mutex_lock(&qedr_dev_list_lock);
list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
_qede_rdma_dev_add(edev);
mutex_unlock(&qedr_dev_list_lock);
return rc;
}
@ -110,19 +117,31 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev)
{
if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
qedr_drv->remove(edev->rdma_info.qedr_dev);
edev->rdma_info.qedr_dev = NULL;
}
void qede_rdma_dev_remove(struct qede_dev *edev)
void qede_rdma_dev_remove(struct qede_dev *edev,
enum qede_rdma_remove_mode mode)
{
if (!qede_rdma_supported(edev))
return;
qede_rdma_destroy_wq(edev);
mutex_lock(&qedr_dev_list_lock);
_qede_rdma_dev_remove(edev);
list_del(&edev->rdma_info.entry);
mutex_unlock(&qedr_dev_list_lock);
/* Cannot remove qedr while recovering since it wasn't fully stopped */
if (mode == QEDE_RDMA_REMOVE_NORMAL) {
qede_rdma_destroy_wq(edev);
mutex_lock(&qedr_dev_list_lock);
if (!edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev);
edev->rdma_info.qedr_dev = NULL;
list_del(&edev->rdma_info.entry);
mutex_unlock(&qedr_dev_list_lock);
} else {
if (!edev->rdma_info.exp_recovery) {
mutex_lock(&qedr_dev_list_lock);
_qede_rdma_dev_remove(edev);
mutex_unlock(&qedr_dev_list_lock);
}
edev->rdma_info.exp_recovery = true;
}
}
static void _qede_rdma_dev_open(struct qede_dev *edev)
@ -204,7 +223,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv)
mutex_lock(&qedr_dev_list_lock);
list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
if (edev->rdma_info.qedr_dev)
/* If device has experienced recovery it was already removed */
if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev);
}
qedr_drv = NULL;
@ -284,6 +304,10 @@ static void qede_rdma_add_event(struct qede_dev *edev,
{
struct qede_rdma_event_work *event_node;
/* If a recovery was experienced avoid adding the event */
if (edev->rdma_info.exp_recovery)
return;
if (!edev->rdma_info.qedr_dev)
return;

View File

@ -55,6 +55,16 @@ struct qede_rdma_event_work {
enum qede_rdma_event event;
};
enum qede_rdma_probe_mode {
QEDE_RDMA_PROBE_NORMAL,
QEDE_RDMA_PROBE_RECOVERY,
};
enum qede_rdma_remove_mode {
QEDE_RDMA_REMOVE_NORMAL,
QEDE_RDMA_REMOVE_RECOVERY,
};
struct qedr_driver {
unsigned char name[32];
@ -74,21 +84,24 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv);
bool qede_rdma_supported(struct qede_dev *dev);
#if IS_ENABLED(CONFIG_QED_RDMA)
int qede_rdma_dev_add(struct qede_dev *dev);
int qede_rdma_dev_add(struct qede_dev *dev, enum qede_rdma_probe_mode mode);
void qede_rdma_dev_event_open(struct qede_dev *dev);
void qede_rdma_dev_event_close(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev,
enum qede_rdma_remove_mode mode);
void qede_rdma_event_changeaddr(struct qede_dev *edr);
#else
static inline int qede_rdma_dev_add(struct qede_dev *dev)
static inline int qede_rdma_dev_add(struct qede_dev *dev,
enum qede_rdma_probe_mode mode)
{
return 0;
}
static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
static inline void qede_rdma_dev_remove(struct qede_dev *dev) {}
static inline void qede_rdma_dev_remove(struct qede_dev *dev,
enum qede_rdma_remove_mode mode) {}
static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
#endif
#endif