[SCSI] improved eh timeout handler
When a command runs into a timeout we need to send an 'ABORT TASK' TMF. This is typically done by the 'eh_abort_handler' LLDD callback. Conceptually, however, this function is a normal SCSI command, so there is no need to enter the error handler. This patch implements a new scsi_abort_command() function which invokes an asynchronous function scsi_eh_abort_handler() to abort the commands via the usual 'eh_abort_handler'. If abort succeeds the command is either retried or terminated, depending on the number of allowed retries. However, 'eh_eflags' records the abort, so if the retry would fail again the command is pushed onto the error handler without trying to abort it (again); it'll be cleared up from SCSI EH. [hare: smatch detected stray switch fixed] Signed-off-by: Hannes Reinecke <hare@suse.de> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
This commit is contained in:
parent
2451079bc2
commit
e494f6a728
|
@ -169,6 +169,7 @@ void scsi_remove_host(struct Scsi_Host *shost)
|
||||||
spin_unlock_irqrestore(shost->host_lock, flags);
|
spin_unlock_irqrestore(shost->host_lock, flags);
|
||||||
|
|
||||||
scsi_autopm_get_host(shost);
|
scsi_autopm_get_host(shost);
|
||||||
|
flush_workqueue(shost->tmf_work_q);
|
||||||
scsi_forget_host(shost);
|
scsi_forget_host(shost);
|
||||||
mutex_unlock(&shost->scan_mutex);
|
mutex_unlock(&shost->scan_mutex);
|
||||||
scsi_proc_host_rm(shost);
|
scsi_proc_host_rm(shost);
|
||||||
|
@ -294,6 +295,8 @@ static void scsi_host_dev_release(struct device *dev)
|
||||||
|
|
||||||
scsi_proc_hostdir_rm(shost->hostt);
|
scsi_proc_hostdir_rm(shost->hostt);
|
||||||
|
|
||||||
|
if (shost->tmf_work_q)
|
||||||
|
destroy_workqueue(shost->tmf_work_q);
|
||||||
if (shost->ehandler)
|
if (shost->ehandler)
|
||||||
kthread_stop(shost->ehandler);
|
kthread_stop(shost->ehandler);
|
||||||
if (shost->work_q)
|
if (shost->work_q)
|
||||||
|
@ -360,7 +363,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
|
||||||
INIT_LIST_HEAD(&shost->eh_cmd_q);
|
INIT_LIST_HEAD(&shost->eh_cmd_q);
|
||||||
INIT_LIST_HEAD(&shost->starved_list);
|
INIT_LIST_HEAD(&shost->starved_list);
|
||||||
init_waitqueue_head(&shost->host_wait);
|
init_waitqueue_head(&shost->host_wait);
|
||||||
|
|
||||||
mutex_init(&shost->scan_mutex);
|
mutex_init(&shost->scan_mutex);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -444,9 +446,19 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
|
||||||
goto fail_kfree;
|
goto fail_kfree;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
shost->tmf_work_q = alloc_workqueue("scsi_tmf_%d",
|
||||||
|
WQ_UNBOUND | WQ_MEM_RECLAIM,
|
||||||
|
1, shost->host_no);
|
||||||
|
if (!shost->tmf_work_q) {
|
||||||
|
printk(KERN_WARNING "scsi%d: failed to create tmf workq\n",
|
||||||
|
shost->host_no);
|
||||||
|
goto fail_kthread;
|
||||||
|
}
|
||||||
scsi_proc_hostdir_add(shost->hostt);
|
scsi_proc_hostdir_add(shost->hostt);
|
||||||
return shost;
|
return shost;
|
||||||
|
|
||||||
|
fail_kthread:
|
||||||
|
kthread_stop(shost->ehandler);
|
||||||
fail_kfree:
|
fail_kfree:
|
||||||
kfree(shost);
|
kfree(shost);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -297,6 +297,7 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask)
|
||||||
|
|
||||||
cmd->device = dev;
|
cmd->device = dev;
|
||||||
INIT_LIST_HEAD(&cmd->list);
|
INIT_LIST_HEAD(&cmd->list);
|
||||||
|
INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
|
||||||
spin_lock_irqsave(&dev->list_lock, flags);
|
spin_lock_irqsave(&dev->list_lock, flags);
|
||||||
list_add_tail(&cmd->list, &dev->cmd_list);
|
list_add_tail(&cmd->list, &dev->cmd_list);
|
||||||
spin_unlock_irqrestore(&dev->list_lock, flags);
|
spin_unlock_irqrestore(&dev->list_lock, flags);
|
||||||
|
@ -353,6 +354,8 @@ void scsi_put_command(struct scsi_cmnd *cmd)
|
||||||
list_del_init(&cmd->list);
|
list_del_init(&cmd->list);
|
||||||
spin_unlock_irqrestore(&cmd->device->list_lock, flags);
|
spin_unlock_irqrestore(&cmd->device->list_lock, flags);
|
||||||
|
|
||||||
|
cancel_delayed_work(&cmd->abort_work);
|
||||||
|
|
||||||
__scsi_put_command(cmd->device->host, cmd, &sdev->sdev_gendev);
|
__scsi_put_command(cmd->device->host, cmd, &sdev->sdev_gendev);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(scsi_put_command);
|
EXPORT_SYMBOL(scsi_put_command);
|
||||||
|
|
|
@ -53,6 +53,8 @@ static void scsi_eh_done(struct scsi_cmnd *scmd);
|
||||||
#define HOST_RESET_SETTLE_TIME (10)
|
#define HOST_RESET_SETTLE_TIME (10)
|
||||||
|
|
||||||
static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
|
static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
|
||||||
|
static int scsi_try_to_abort_cmd(struct scsi_host_template *,
|
||||||
|
struct scsi_cmnd *);
|
||||||
|
|
||||||
/* called with shost->host_lock held */
|
/* called with shost->host_lock held */
|
||||||
void scsi_eh_wakeup(struct Scsi_Host *shost)
|
void scsi_eh_wakeup(struct Scsi_Host *shost)
|
||||||
|
@ -99,6 +101,116 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* scmd_eh_abort_handler - Handle command aborts
|
||||||
|
* @work: command to be aborted.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
scmd_eh_abort_handler(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct scsi_cmnd *scmd =
|
||||||
|
container_of(work, struct scsi_cmnd, abort_work.work);
|
||||||
|
struct scsi_device *sdev = scmd->device;
|
||||||
|
unsigned long flags;
|
||||||
|
int rtn;
|
||||||
|
|
||||||
|
spin_lock_irqsave(sdev->host->host_lock, flags);
|
||||||
|
if (scsi_host_eh_past_deadline(sdev->host)) {
|
||||||
|
spin_unlock_irqrestore(sdev->host->host_lock, flags);
|
||||||
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
|
scmd_printk(KERN_INFO, scmd,
|
||||||
|
"scmd %p eh timeout, not aborting\n",
|
||||||
|
scmd));
|
||||||
|
} else {
|
||||||
|
spin_unlock_irqrestore(sdev->host->host_lock, flags);
|
||||||
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
|
scmd_printk(KERN_INFO, scmd,
|
||||||
|
"aborting command %p\n", scmd));
|
||||||
|
rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd);
|
||||||
|
if (rtn == SUCCESS) {
|
||||||
|
scmd->result |= DID_TIME_OUT << 16;
|
||||||
|
if (!scsi_noretry_cmd(scmd) &&
|
||||||
|
(++scmd->retries <= scmd->allowed)) {
|
||||||
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
|
scmd_printk(KERN_WARNING, scmd,
|
||||||
|
"scmd %p retry "
|
||||||
|
"aborted command\n", scmd));
|
||||||
|
scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
|
||||||
|
} else {
|
||||||
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
|
scmd_printk(KERN_WARNING, scmd,
|
||||||
|
"scmd %p finish "
|
||||||
|
"aborted command\n", scmd));
|
||||||
|
scsi_finish_command(scmd);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
|
scmd_printk(KERN_INFO, scmd,
|
||||||
|
"scmd %p abort failed, rtn %d\n",
|
||||||
|
scmd, rtn));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!scsi_eh_scmd_add(scmd, 0)) {
|
||||||
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
|
scmd_printk(KERN_WARNING, scmd,
|
||||||
|
"scmd %p terminate "
|
||||||
|
"aborted command\n", scmd));
|
||||||
|
scmd->result |= DID_TIME_OUT << 16;
|
||||||
|
scsi_finish_command(scmd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* scsi_abort_command - schedule a command abort
|
||||||
|
* @scmd: scmd to abort.
|
||||||
|
*
|
||||||
|
* We only need to abort commands after a command timeout
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
scsi_abort_command(struct scsi_cmnd *scmd)
|
||||||
|
{
|
||||||
|
struct scsi_device *sdev = scmd->device;
|
||||||
|
struct Scsi_Host *shost = sdev->host;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) {
|
||||||
|
/*
|
||||||
|
* Retry after abort failed, escalate to next level.
|
||||||
|
*/
|
||||||
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
|
scmd_printk(KERN_INFO, scmd,
|
||||||
|
"scmd %p previous abort failed\n", scmd));
|
||||||
|
cancel_delayed_work(&scmd->abort_work);
|
||||||
|
return FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do not try a command abort if
|
||||||
|
* SCSI EH has already started.
|
||||||
|
*/
|
||||||
|
spin_lock_irqsave(shost->host_lock, flags);
|
||||||
|
if (scsi_host_in_recovery(shost)) {
|
||||||
|
spin_unlock_irqrestore(shost->host_lock, flags);
|
||||||
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
|
scmd_printk(KERN_INFO, scmd,
|
||||||
|
"scmd %p not aborting, host in recovery\n",
|
||||||
|
scmd));
|
||||||
|
return FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shost->eh_deadline && !shost->last_reset)
|
||||||
|
shost->last_reset = jiffies;
|
||||||
|
spin_unlock_irqrestore(shost->host_lock, flags);
|
||||||
|
|
||||||
|
scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;
|
||||||
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
|
scmd_printk(KERN_INFO, scmd,
|
||||||
|
"scmd %p abort scheduled\n", scmd));
|
||||||
|
queue_delayed_work(shost->tmf_work_q, &scmd->abort_work, HZ / 100);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scsi_eh_scmd_add - add scsi cmd to error handling.
|
* scsi_eh_scmd_add - add scsi cmd to error handling.
|
||||||
* @scmd: scmd to run eh on.
|
* @scmd: scmd to run eh on.
|
||||||
|
@ -125,6 +237,8 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
|
||||||
shost->last_reset = jiffies;
|
shost->last_reset = jiffies;
|
||||||
|
|
||||||
ret = 1;
|
ret = 1;
|
||||||
|
if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED)
|
||||||
|
eh_flag &= ~SCSI_EH_CANCEL_CMD;
|
||||||
scmd->eh_eflags |= eh_flag;
|
scmd->eh_eflags |= eh_flag;
|
||||||
list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
|
list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
|
||||||
shost->host_failed++;
|
shost->host_failed++;
|
||||||
|
@ -161,6 +275,10 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
|
||||||
else if (host->hostt->eh_timed_out)
|
else if (host->hostt->eh_timed_out)
|
||||||
rtn = host->hostt->eh_timed_out(scmd);
|
rtn = host->hostt->eh_timed_out(scmd);
|
||||||
|
|
||||||
|
if (rtn == BLK_EH_NOT_HANDLED && !host->hostt->no_async_abort)
|
||||||
|
if (scsi_abort_command(scmd) == SUCCESS)
|
||||||
|
return BLK_EH_NOT_HANDLED;
|
||||||
|
|
||||||
scmd->result |= DID_TIME_OUT << 16;
|
scmd->result |= DID_TIME_OUT << 16;
|
||||||
|
|
||||||
if (unlikely(rtn == BLK_EH_NOT_HANDLED &&
|
if (unlikely(rtn == BLK_EH_NOT_HANDLED &&
|
||||||
|
@ -1577,7 +1695,7 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scsi_noretry_cmd - determinte if command should be failed fast
|
* scsi_noretry_cmd - determine if command should be failed fast
|
||||||
* @scmd: SCSI cmd to examine.
|
* @scmd: SCSI cmd to examine.
|
||||||
*/
|
*/
|
||||||
int scsi_noretry_cmd(struct scsi_cmnd *scmd)
|
int scsi_noretry_cmd(struct scsi_cmnd *scmd)
|
||||||
|
@ -1585,6 +1703,8 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)
|
||||||
switch (host_byte(scmd->result)) {
|
switch (host_byte(scmd->result)) {
|
||||||
case DID_OK:
|
case DID_OK:
|
||||||
break;
|
break;
|
||||||
|
case DID_TIME_OUT:
|
||||||
|
goto check_type;
|
||||||
case DID_BUS_BUSY:
|
case DID_BUS_BUSY:
|
||||||
return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT);
|
return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT);
|
||||||
case DID_PARITY:
|
case DID_PARITY:
|
||||||
|
@ -1598,17 +1718,18 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)
|
||||||
return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);
|
return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (status_byte(scmd->result)) {
|
if (status_byte(scmd->result) != CHECK_CONDITION)
|
||||||
case CHECK_CONDITION:
|
return 0;
|
||||||
|
|
||||||
|
check_type:
|
||||||
/*
|
/*
|
||||||
* assume caller has checked sense and determinted
|
* assume caller has checked sense and determined
|
||||||
* the check condition was retryable.
|
* the check condition was retryable.
|
||||||
*/
|
*/
|
||||||
if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
|
if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
|
||||||
scmd->request->cmd_type == REQ_TYPE_BLOCK_PC)
|
scmd->request->cmd_type == REQ_TYPE_BLOCK_PC)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
else
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1659,9 +1780,13 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
|
||||||
* looks good. drop through, and check the next byte.
|
* looks good. drop through, and check the next byte.
|
||||||
*/
|
*/
|
||||||
break;
|
break;
|
||||||
|
case DID_ABORT:
|
||||||
|
if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) {
|
||||||
|
scmd->result |= DID_TIME_OUT << 16;
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
case DID_NO_CONNECT:
|
case DID_NO_CONNECT:
|
||||||
case DID_BAD_TARGET:
|
case DID_BAD_TARGET:
|
||||||
case DID_ABORT:
|
|
||||||
/*
|
/*
|
||||||
* note - this means that we just report the status back
|
* note - this means that we just report the status back
|
||||||
* to the top level driver, not that we actually think
|
* to the top level driver, not that we actually think
|
||||||
|
|
|
@ -19,6 +19,7 @@ struct scsi_nl_hdr;
|
||||||
* Scsi Error Handler Flags
|
* Scsi Error Handler Flags
|
||||||
*/
|
*/
|
||||||
#define SCSI_EH_CANCEL_CMD 0x0001 /* Cancel this cmd */
|
#define SCSI_EH_CANCEL_CMD 0x0001 /* Cancel this cmd */
|
||||||
|
#define SCSI_EH_ABORT_SCHEDULED 0x0002 /* Abort has been scheduled */
|
||||||
|
|
||||||
#define SCSI_SENSE_VALID(scmd) \
|
#define SCSI_SENSE_VALID(scmd) \
|
||||||
(((scmd)->sense_buffer[0] & 0x70) == 0x70)
|
(((scmd)->sense_buffer[0] & 0x70) == 0x70)
|
||||||
|
@ -66,6 +67,7 @@ extern int __init scsi_init_devinfo(void);
|
||||||
extern void scsi_exit_devinfo(void);
|
extern void scsi_exit_devinfo(void);
|
||||||
|
|
||||||
/* scsi_error.c */
|
/* scsi_error.c */
|
||||||
|
extern void scmd_eh_abort_handler(struct work_struct *work);
|
||||||
extern enum blk_eh_timer_return scsi_times_out(struct request *req);
|
extern enum blk_eh_timer_return scsi_times_out(struct request *req);
|
||||||
extern int scsi_error_handler(void *host);
|
extern int scsi_error_handler(void *host);
|
||||||
extern int scsi_decide_disposition(struct scsi_cmnd *cmd);
|
extern int scsi_decide_disposition(struct scsi_cmnd *cmd);
|
||||||
|
|
|
@ -55,6 +55,7 @@ struct scsi_cmnd {
|
||||||
struct scsi_device *device;
|
struct scsi_device *device;
|
||||||
struct list_head list; /* scsi_cmnd participates in queue lists */
|
struct list_head list; /* scsi_cmnd participates in queue lists */
|
||||||
struct list_head eh_entry; /* entry for the host eh_cmd_q */
|
struct list_head eh_entry; /* entry for the host eh_cmd_q */
|
||||||
|
struct delayed_work abort_work;
|
||||||
int eh_eflags; /* Used by error handlr */
|
int eh_eflags; /* Used by error handlr */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -478,6 +478,11 @@ struct scsi_host_template {
|
||||||
/* True if the controller does not support WRITE SAME */
|
/* True if the controller does not support WRITE SAME */
|
||||||
unsigned no_write_same:1;
|
unsigned no_write_same:1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* True if asynchronous aborts are not supported
|
||||||
|
*/
|
||||||
|
unsigned no_async_abort:1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Countdown for host blocking with no commands outstanding.
|
* Countdown for host blocking with no commands outstanding.
|
||||||
*/
|
*/
|
||||||
|
@ -689,6 +694,11 @@ struct Scsi_Host {
|
||||||
char work_q_name[20];
|
char work_q_name[20];
|
||||||
struct workqueue_struct *work_q;
|
struct workqueue_struct *work_q;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Task management function work queue
|
||||||
|
*/
|
||||||
|
struct workqueue_struct *tmf_work_q;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Host has rejected a command because it was busy.
|
* Host has rejected a command because it was busy.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue