[SCSI] aacraid: OS panic after Adapter panic (hardening).

In experiments in the lab we managed to trigger an Adapter firmware
panic (BlinkLED) coincidentally while several pass-through ioctl
command from the management software were outstanding on a bug only
present on a class of RAID Adapters that require a hardware reset
rather than a commanded reset. The net result was an attempt to time
out the management software command as if it came from the SCSI layer
resulting in an OS panic.

Adapters that use commanded reset, management commands are returned
failed by the Adapter correctly. The adapter firmware panic that
resulted in this condition was also resolved, and there were no
adapters in the field with this specific firmware bug so we do not
expect any field reports. This is a rare or unlikely corner condition,
and no reports have ever been forwarded from the field.

Signed-off-by: Mark Salyzyn <aacraid@adaptec.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
This commit is contained in:
Salyzyn, Mark 2008-01-08 13:26:43 -08:00 committed by James Bottomley
parent 6dcd4a7fe5
commit b6ef70f33c
4 changed files with 25 additions and 3 deletions

View File

@ -1073,6 +1073,7 @@ struct aac_dev
(dev)->a_ops.adapter_comm(dev, comm)
#define FIB_CONTEXT_FLAG_TIMED_OUT (0x00000001)
#define FIB_CONTEXT_FLAG (0x00000002)
/*
* Define the command values

View File

@ -171,6 +171,7 @@ struct fib *aac_fib_alloc(struct aac_dev *dev)
* each I/O
*/
fibptr->hw_fib_va->header.XferState = 0;
fibptr->flags = 0;
fibptr->callback = NULL;
fibptr->callback_data = NULL;
@ -402,6 +403,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
* will have a debug mode where the adapter can notify the host
* it had a problem and the host can log that fact.
*/
fibptr->flags = 0;
if (wait && !reply) {
return -EINVAL;
} else if (!wait && reply) {
@ -450,10 +452,10 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
if (!wait) {
fibptr->callback = callback;
fibptr->callback_data = callback_data;
fibptr->flags = FIB_CONTEXT_FLAG;
}
fibptr->done = 0;
fibptr->flags = 0;
FIB_COUNTER_INCREMENT(aac_config.FibsSent);

View File

@ -120,6 +120,7 @@ unsigned int aac_response_normal(struct aac_queue * q)
* NOTE: we cannot touch the fib after this
* call, because it may have been deallocated.
*/
fib->flags = 0;
fib->callback(fib->callback_data, fib);
} else {
unsigned long flagv;
@ -313,6 +314,7 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index)
* NOTE: we cannot touch the fib after this
* call, because it may have been deallocated.
*/
fib->flags = 0;
fib->callback(fib->callback_data, fib);
} else {
unsigned long flagv;

View File

@ -536,17 +536,33 @@ static int aac_eh_abort(struct scsi_cmnd* cmd)
break;
case INQUIRY:
case READ_CAPACITY:
case TEST_UNIT_READY:
/* Mark associated FIB to not complete, eh handler does this */
for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
struct fib * fib = &aac->fibs[count];
if (fib->hw_fib_va->header.XferState &&
(fib->flags & FIB_CONTEXT_FLAG) &&
(fib->callback_data == cmd)) {
fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
ret = SUCCESS;
}
}
break;
case TEST_UNIT_READY:
/* Mark associated FIB to not complete, eh handler does this */
for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
struct scsi_cmnd * command;
struct fib * fib = &aac->fibs[count];
if ((fib->hw_fib_va->header.XferState & cpu_to_le32(Async | NoResponseExpected)) &&
(fib->flags & FIB_CONTEXT_FLAG) &&
((command = fib->callback_data)) &&
(command->device == cmd->device)) {
fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
command->SCp.phase = AAC_OWNER_ERROR_HANDLER;
if (command == cmd)
ret = SUCCESS;
}
}
}
return ret;
}
@ -569,6 +585,7 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
struct fib * fib = &aac->fibs[count];
if (fib->hw_fib_va->header.XferState &&
(fib->flags & FIB_CONTEXT_FLAG) &&
(fib->callback_data == cmd)) {
fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;