[PATCH] s390: lcs driver bug fixes and improvements [1/2]

Several problems occured with lcs device driver:
	 - device not operational anymore after cable pull/plug-in.
       	 - unpredictable results occured, e.g. kernel panic
	   using cards of type QD8F.
	 - STOPLAN and delete multicast address command
           were not proper recognized by OSA card under heavy network workload.
       	 - channel/device error checks missing in interrupt handler.
	To fix all problems at once recovery of lcs devices has been improved.
	missing error checks in lcs interrupt handler has been added.
	Once a hardware problem occurs lcs will recover the device now properly.

Signed-off-by: Frank Pavlic <fpavlic@de.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
This commit is contained in:
Klaus Wacker 2006-05-24 09:51:17 +02:00 committed by Jeff Garzik
parent ba1aa084d6
commit 27eb5ac8f0
1 changed files with 171 additions and 170 deletions

View File

@ -68,6 +68,7 @@ static void lcs_tasklet(unsigned long);
static void lcs_start_kernel_thread(struct lcs_card *card);
static void lcs_get_frames_cb(struct lcs_channel *, struct lcs_buffer *);
static int lcs_send_delipm(struct lcs_card *, struct lcs_ipm_list *);
static int lcs_recovery(void *ptr);
/**
* Debug Facility Stuff
@ -429,12 +430,6 @@ lcs_setup_card(struct lcs_card *card)
card->tx_buffer = NULL;
card->tx_emitted = 0;
/* Initialize kernel thread task used for LGW commands. */
INIT_WORK(&card->kernel_thread_starter,
(void *)lcs_start_kernel_thread,card);
card->thread_start_mask = 0;
card->thread_allowed_mask = 0;
card->thread_running_mask = 0;
init_waitqueue_head(&card->wait_q);
spin_lock_init(&card->lock);
spin_lock_init(&card->ipm_lock);
@ -675,8 +670,9 @@ lcs_ready_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer)
int index, rc;
LCS_DBF_TEXT(5, trace, "rdybuff");
BUG_ON(buffer->state != BUF_STATE_LOCKED &&
buffer->state != BUF_STATE_PROCESSED);
if (buffer->state != BUF_STATE_LOCKED &&
buffer->state != BUF_STATE_PROCESSED)
BUG();
spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
buffer->state = BUF_STATE_READY;
index = buffer - channel->iob;
@ -700,7 +696,8 @@ __lcs_processed_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer)
int index, prev, next;
LCS_DBF_TEXT(5, trace, "prcsbuff");
BUG_ON(buffer->state != BUF_STATE_READY);
if (buffer->state != BUF_STATE_READY)
BUG();
buffer->state = BUF_STATE_PROCESSED;
index = buffer - channel->iob;
prev = (index - 1) & (LCS_NUM_BUFFS - 1);
@ -732,8 +729,9 @@ lcs_release_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer)
unsigned long flags;
LCS_DBF_TEXT(5, trace, "relbuff");
BUG_ON(buffer->state != BUF_STATE_LOCKED &&
buffer->state != BUF_STATE_PROCESSED);
if (buffer->state != BUF_STATE_LOCKED &&
buffer->state != BUF_STATE_PROCESSED)
BUG();
spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
buffer->state = BUF_STATE_EMPTY;
spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
@ -1147,8 +1145,6 @@ list_modified:
list_add_tail(&ipm->list, &card->ipm_list);
}
spin_unlock_irqrestore(&card->ipm_lock, flags);
if (card->state == DEV_STATE_UP)
netif_wake_queue(card->dev);
}
/**
@ -1231,17 +1227,17 @@ lcs_set_mc_addresses(struct lcs_card *card, struct in_device *in4_dev)
if (ipm != NULL)
continue; /* Address already in list. */
ipm = (struct lcs_ipm_list *)
kmalloc(sizeof(struct lcs_ipm_list), GFP_ATOMIC);
kzalloc(sizeof(struct lcs_ipm_list), GFP_ATOMIC);
if (ipm == NULL) {
PRINT_INFO("Not enough memory to add "
"new multicast entry!\n");
break;
}
memset(ipm, 0, sizeof(struct lcs_ipm_list));
memcpy(&ipm->ipm.mac_addr, buf, LCS_MAC_LENGTH);
ipm->ipm.ip_addr = im4->multiaddr;
ipm->ipm_state = LCS_IPM_STATE_SET_REQUIRED;
spin_lock_irqsave(&card->ipm_lock, flags);
LCS_DBF_HEX(2,trace,&ipm->ipm.ip_addr,4);
list_add(&ipm->list, &card->ipm_list);
spin_unlock_irqrestore(&card->ipm_lock, flags);
}
@ -1269,7 +1265,15 @@ lcs_register_mc_addresses(void *data)
read_unlock(&in4_dev->mc_list_lock);
in_dev_put(in4_dev);
netif_carrier_off(card->dev);
netif_tx_disable(card->dev);
wait_event(card->write.wait_q,
(card->write.state != CH_STATE_RUNNING));
lcs_fix_multicast_list(card);
if (card->state == DEV_STATE_UP) {
netif_carrier_on(card->dev);
netif_wake_queue(card->dev);
}
out:
lcs_clear_thread_running_bit(card, LCS_SET_MC_THREAD);
return 0;
@ -1318,6 +1322,53 @@ lcs_check_irb_error(struct ccw_device *cdev, struct irb *irb)
return PTR_ERR(irb);
}
static int
lcs_get_problem(struct ccw_device *cdev, struct irb *irb)
{
int dstat, cstat;
char *sense;
sense = (char *) irb->ecw;
cstat = irb->scsw.cstat;
dstat = irb->scsw.dstat;
if (cstat & (SCHN_STAT_CHN_CTRL_CHK | SCHN_STAT_INTF_CTRL_CHK |
SCHN_STAT_CHN_DATA_CHK | SCHN_STAT_CHAIN_CHECK |
SCHN_STAT_PROT_CHECK | SCHN_STAT_PROG_CHECK)) {
LCS_DBF_TEXT(2, trace, "CGENCHK");
return 1;
}
if (dstat & DEV_STAT_UNIT_CHECK) {
if (sense[LCS_SENSE_BYTE_1] &
LCS_SENSE_RESETTING_EVENT) {
LCS_DBF_TEXT(2, trace, "REVIND");
return 1;
}
if (sense[LCS_SENSE_BYTE_0] &
LCS_SENSE_CMD_REJECT) {
LCS_DBF_TEXT(2, trace, "CMDREJ");
return 0;
}
if ((!sense[LCS_SENSE_BYTE_0]) &&
(!sense[LCS_SENSE_BYTE_1]) &&
(!sense[LCS_SENSE_BYTE_2]) &&
(!sense[LCS_SENSE_BYTE_3])) {
LCS_DBF_TEXT(2, trace, "ZEROSEN");
return 0;
}
LCS_DBF_TEXT(2, trace, "DGENCHK");
return 1;
}
return 0;
}
void
lcs_schedule_recovery(struct lcs_card *card)
{
LCS_DBF_TEXT(2, trace, "startrec");
if (!lcs_set_thread_start_bit(card, LCS_RECOVERY_THREAD))
schedule_work(&card->kernel_thread_starter);
}
/**
* IRQ Handler for LCS channels
@ -1327,7 +1378,8 @@ lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
{
struct lcs_card *card;
struct lcs_channel *channel;
int index;
int rc, index;
int cstat, dstat;
if (lcs_check_irb_error(cdev, irb))
return;
@ -1338,10 +1390,23 @@ lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
else
channel = &card->write;
cstat = irb->scsw.cstat;
dstat = irb->scsw.dstat;
LCS_DBF_TEXT_(5, trace, "Rint%s",cdev->dev.bus_id);
LCS_DBF_TEXT_(5, trace, "%4x%4x",irb->scsw.cstat, irb->scsw.dstat);
LCS_DBF_TEXT_(5, trace, "%4x%4x",irb->scsw.fctl, irb->scsw.actl);
/* Check for channel and device errors presented */
rc = lcs_get_problem(cdev, irb);
if (rc || (dstat & DEV_STAT_UNIT_EXCEP)) {
PRINT_WARN("check on device %s, dstat=0x%X, cstat=0x%X \n",
cdev->dev.bus_id, dstat, cstat);
if (rc) {
lcs_schedule_recovery(card);
wake_up(&card->wait_q);
return;
}
}
/* How far in the ccw chain have we processed? */
if ((channel->state != CH_STATE_INIT) &&
(irb->scsw.fctl & SCSW_FCTL_START_FUNC)) {
@ -1367,7 +1432,6 @@ lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
else if (irb->scsw.actl & SCSW_ACTL_SUSPENDED)
/* CCW execution stopped on a suspend bit. */
channel->state = CH_STATE_SUSPENDED;
if (irb->scsw.fctl & SCSW_FCTL_HALT_FUNC) {
if (irb->scsw.cc != 0) {
ccw_device_halt(channel->ccwdev, (addr_t) channel);
@ -1376,7 +1440,6 @@ lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
/* The channel has been stopped by halt_IO. */
channel->state = CH_STATE_HALTED;
}
if (irb->scsw.fctl & SCSW_FCTL_CLEAR_FUNC) {
channel->state = CH_STATE_CLEARED;
}
@ -1452,7 +1515,7 @@ lcs_txbuffer_cb(struct lcs_channel *channel, struct lcs_buffer *buffer)
lcs_release_buffer(channel, buffer);
card = (struct lcs_card *)
((char *) channel - offsetof(struct lcs_card, write));
if (netif_queue_stopped(card->dev))
if (netif_queue_stopped(card->dev) && netif_carrier_ok(card->dev))
netif_wake_queue(card->dev);
spin_lock(&card->lock);
card->tx_emitted--;
@ -1488,6 +1551,10 @@ __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb,
card->stats.tx_carrier_errors++;
return 0;
}
if (skb->protocol == htons(ETH_P_IPV6)) {
dev_kfree_skb(skb);
return 0;
}
netif_stop_queue(card->dev);
spin_lock(&card->lock);
if (card->tx_buffer != NULL &&
@ -1632,30 +1699,6 @@ lcs_detect(struct lcs_card *card)
return rc;
}
/**
* reset card
*/
static int
lcs_resetcard(struct lcs_card *card)
{
int retries;
LCS_DBF_TEXT(2, trace, "rescard");
for (retries = 0; retries < 10; retries++) {
if (lcs_detect(card) == 0) {
netif_wake_queue(card->dev);
card->state = DEV_STATE_UP;
PRINT_INFO("LCS device %s successfully restarted!\n",
card->dev->name);
return 0;
}
msleep(3000);
}
PRINT_ERR("Error in Reseting LCS card!\n");
return -EIO;
}
/**
* LCS Stop card
*/
@ -1679,111 +1722,6 @@ lcs_stopcard(struct lcs_card *card)
return rc;
}
/**
* LGW initiated commands
*/
static int
lcs_lgw_startlan_thread(void *data)
{
struct lcs_card *card;
card = (struct lcs_card *) data;
daemonize("lgwstpln");
if (!lcs_do_run_thread(card, LCS_STARTLAN_THREAD))
return 0;
LCS_DBF_TEXT(4, trace, "lgwstpln");
if (card->dev)
netif_stop_queue(card->dev);
if (lcs_startlan(card) == 0) {
netif_wake_queue(card->dev);
card->state = DEV_STATE_UP;
PRINT_INFO("LCS Startlan for device %s succeeded!\n",
card->dev->name);
} else
PRINT_ERR("LCS Startlan for device %s failed!\n",
card->dev->name);
lcs_clear_thread_running_bit(card, LCS_STARTLAN_THREAD);
return 0;
}
/**
* Send startup command initiated by Lan Gateway
*/
static int
lcs_lgw_startup_thread(void *data)
{
int rc;
struct lcs_card *card;
card = (struct lcs_card *) data;
daemonize("lgwstaln");
if (!lcs_do_run_thread(card, LCS_STARTUP_THREAD))
return 0;
LCS_DBF_TEXT(4, trace, "lgwstaln");
if (card->dev)
netif_stop_queue(card->dev);
rc = lcs_send_startup(card, LCS_INITIATOR_LGW);
if (rc != 0) {
PRINT_ERR("Startup for LCS device %s initiated " \
"by LGW failed!\nReseting card ...\n",
card->dev->name);
/* do a card reset */
rc = lcs_resetcard(card);
if (rc == 0)
goto Done;
}
rc = lcs_startlan(card);
if (rc == 0) {
netif_wake_queue(card->dev);
card->state = DEV_STATE_UP;
}
Done:
if (rc == 0)
PRINT_INFO("LCS Startup for device %s succeeded!\n",
card->dev->name);
else
PRINT_ERR("LCS Startup for device %s failed!\n",
card->dev->name);
lcs_clear_thread_running_bit(card, LCS_STARTUP_THREAD);
return 0;
}
/**
* send stoplan command initiated by Lan Gateway
*/
static int
lcs_lgw_stoplan_thread(void *data)
{
struct lcs_card *card;
int rc;
card = (struct lcs_card *) data;
daemonize("lgwstop");
if (!lcs_do_run_thread(card, LCS_STOPLAN_THREAD))
return 0;
LCS_DBF_TEXT(4, trace, "lgwstop");
if (card->dev)
netif_stop_queue(card->dev);
if (lcs_send_stoplan(card, LCS_INITIATOR_LGW) == 0)
PRINT_INFO("Stoplan for %s initiated by LGW succeeded!\n",
card->dev->name);
else
PRINT_ERR("Stoplan %s initiated by LGW failed!\n",
card->dev->name);
/*Try to reset the card, stop it on failure */
rc = lcs_resetcard(card);
if (rc != 0)
rc = lcs_stopcard(card);
lcs_clear_thread_running_bit(card, LCS_STOPLAN_THREAD);
return rc;
}
/**
* Kernel Thread helper functions for LGW initiated commands
*/
@ -1791,15 +1729,12 @@ static void
lcs_start_kernel_thread(struct lcs_card *card)
{
LCS_DBF_TEXT(5, trace, "krnthrd");
if (lcs_do_start_thread(card, LCS_STARTUP_THREAD))
kernel_thread(lcs_lgw_startup_thread, (void *) card, SIGCHLD);
if (lcs_do_start_thread(card, LCS_STARTLAN_THREAD))
kernel_thread(lcs_lgw_startlan_thread, (void *) card, SIGCHLD);
if (lcs_do_start_thread(card, LCS_STOPLAN_THREAD))
kernel_thread(lcs_lgw_stoplan_thread, (void *) card, SIGCHLD);
if (lcs_do_start_thread(card, LCS_RECOVERY_THREAD))
kernel_thread(lcs_recovery, (void *) card, SIGCHLD);
#ifdef CONFIG_IP_MULTICAST
if (lcs_do_start_thread(card, LCS_SET_MC_THREAD))
kernel_thread(lcs_register_mc_addresses, (void *) card, SIGCHLD);
kernel_thread(lcs_register_mc_addresses,
(void *) card, SIGCHLD);
#endif
}
@ -1813,19 +1748,14 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd)
if (cmd->initiator == LCS_INITIATOR_LGW) {
switch(cmd->cmd_code) {
case LCS_CMD_STARTUP:
if (!lcs_set_thread_start_bit(card,
LCS_STARTUP_THREAD))
schedule_work(&card->kernel_thread_starter);
break;
case LCS_CMD_STARTLAN:
if (!lcs_set_thread_start_bit(card,
LCS_STARTLAN_THREAD))
schedule_work(&card->kernel_thread_starter);
lcs_schedule_recovery(card);
break;
case LCS_CMD_STOPLAN:
if (!lcs_set_thread_start_bit(card,
LCS_STOPLAN_THREAD))
schedule_work(&card->kernel_thread_starter);
PRINT_WARN("Stoplan for %s initiated by LGW.\n",
card->dev->name);
if (card->dev)
netif_carrier_off(card->dev);
break;
default:
PRINT_INFO("UNRECOGNIZED LGW COMMAND\n");
@ -1941,8 +1871,11 @@ lcs_stop_device(struct net_device *dev)
LCS_DBF_TEXT(2, trace, "stopdev");
card = (struct lcs_card *) dev->priv;
netif_stop_queue(dev);
netif_carrier_off(dev);
netif_tx_disable(dev);
dev->flags &= ~IFF_UP;
wait_event(card->write.wait_q,
(card->write.state != CH_STATE_RUNNING));
rc = lcs_stopcard(card);
if (rc)
PRINT_ERR("Try it again!\n ");
@ -1968,6 +1901,7 @@ lcs_open_device(struct net_device *dev)
} else {
dev->flags |= IFF_UP;
netif_carrier_on(dev);
netif_wake_queue(dev);
card->state = DEV_STATE_UP;
}
@ -2059,10 +1993,31 @@ lcs_timeout_store (struct device *dev, struct device_attribute *attr, const char
DEVICE_ATTR(lancmd_timeout, 0644, lcs_timeout_show, lcs_timeout_store);
static ssize_t
lcs_dev_recover_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct lcs_card *card = dev->driver_data;
char *tmp;
int i;
if (!card)
return -EINVAL;
if (card->state != DEV_STATE_UP)
return -EPERM;
i = simple_strtoul(buf, &tmp, 16);
if (i == 1)
lcs_schedule_recovery(card);
return count;
}
static DEVICE_ATTR(recover, 0200, NULL, lcs_dev_recover_store);
static struct attribute * lcs_attrs[] = {
&dev_attr_portno.attr,
&dev_attr_type.attr,
&dev_attr_lancmd_timeout.attr,
&dev_attr_recover.attr,
NULL,
};
@ -2099,6 +2054,12 @@ lcs_probe_device(struct ccwgroup_device *ccwgdev)
ccwgdev->dev.driver_data = card;
ccwgdev->cdev[0]->handler = lcs_irq;
ccwgdev->cdev[1]->handler = lcs_irq;
card->gdev = ccwgdev;
INIT_WORK(&card->kernel_thread_starter,
(void *) lcs_start_kernel_thread, card);
card->thread_start_mask = 0;
card->thread_allowed_mask = 0;
card->thread_running_mask = 0;
return 0;
}
@ -2200,6 +2161,7 @@ netdev_out:
if (recover_state == DEV_STATE_RECOVER) {
lcs_set_multicast_list(card->dev);
card->dev->flags |= IFF_UP;
netif_carrier_on(card->dev);
netif_wake_queue(card->dev);
card->state = DEV_STATE_UP;
} else {
@ -2229,7 +2191,7 @@ out:
* lcs_shutdown_device, called when setting the group device offline.
*/
static int
lcs_shutdown_device(struct ccwgroup_device *ccwgdev)
__lcs_shutdown_device(struct ccwgroup_device *ccwgdev, int recovery_mode)
{
struct lcs_card *card;
enum lcs_dev_states recover_state;
@ -2239,9 +2201,11 @@ lcs_shutdown_device(struct ccwgroup_device *ccwgdev)
card = (struct lcs_card *)ccwgdev->dev.driver_data;
if (!card)
return -ENODEV;
lcs_set_allowed_threads(card, 0);
if (lcs_wait_for_threads(card, LCS_SET_MC_THREAD))
return -ERESTARTSYS;
if (recovery_mode == 0) {
lcs_set_allowed_threads(card, 0);
if (lcs_wait_for_threads(card, LCS_SET_MC_THREAD))
return -ERESTARTSYS;
}
LCS_DBF_HEX(3, setup, &card, sizeof(void*));
recover_state = card->state;
@ -2256,6 +2220,43 @@ lcs_shutdown_device(struct ccwgroup_device *ccwgdev)
return 0;
}
static int
lcs_shutdown_device(struct ccwgroup_device *ccwgdev)
{
return __lcs_shutdown_device(ccwgdev, 0);
}
/**
* drive lcs recovery after startup and startlan initiated by Lan Gateway
*/
static int
lcs_recovery(void *ptr)
{
struct lcs_card *card;
struct ccwgroup_device *gdev;
int rc;
card = (struct lcs_card *) ptr;
daemonize("lcs_recover");
LCS_DBF_TEXT(4, trace, "recover1");
if (!lcs_do_run_thread(card, LCS_RECOVERY_THREAD))
return 0;
LCS_DBF_TEXT(4, trace, "recover2");
gdev = card->gdev;
PRINT_WARN("Recovery of device %s started...\n", gdev->dev.bus_id);
rc = __lcs_shutdown_device(gdev, 1);
rc = lcs_new_device(gdev);
if (!rc)
PRINT_INFO("Device %s successfully recovered!\n",
card->dev->name);
else
PRINT_INFO("Device %s could not be recovered!\n",
card->dev->name);
lcs_clear_thread_running_bit(card, LCS_RECOVERY_THREAD);
return 0;
}
/**
* lcs_remove_device, free buffers and card
*/