cciss: PCI power management reset for kexec
The kexec kernel resets the CCISS hardware in three steps: 1. Use PCI power management states to reset the controller in the kexec kernel. 2. Clear the MSI/MSI-X bits in PCI configuration space so that MSI initialization in the kexec kernel doesn't fail. 3. Use the CCISS "No-op" message to determine when the controller firmware has recovered from the PCI PM reset. [akpm@linux-foundation.org: cleanups] Signed-off-by: Mike Miller <mike.miller@hp.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
This commit is contained in:
parent
c8cbec6bdf
commit
82eb03cfd8
|
@ -3390,6 +3390,203 @@ static void free_hba(int i)
|
||||||
kfree(p);
|
kfree(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Send a message CDB to the firmware. */
|
||||||
|
static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
|
||||||
|
{
|
||||||
|
typedef struct {
|
||||||
|
CommandListHeader_struct CommandHeader;
|
||||||
|
RequestBlock_struct Request;
|
||||||
|
ErrDescriptor_struct ErrorDescriptor;
|
||||||
|
} Command;
|
||||||
|
static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
|
||||||
|
Command *cmd;
|
||||||
|
dma_addr_t paddr64;
|
||||||
|
uint32_t paddr32, tag;
|
||||||
|
void __iomem *vaddr;
|
||||||
|
int i, err;
|
||||||
|
|
||||||
|
vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
|
||||||
|
if (vaddr == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
/* The Inbound Post Queue only accepts 32-bit physical addresses for the
|
||||||
|
CCISS commands, so they must be allocated from the lower 4GiB of
|
||||||
|
memory. */
|
||||||
|
err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
|
||||||
|
if (err) {
|
||||||
|
iounmap(vaddr);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
|
||||||
|
if (cmd == NULL) {
|
||||||
|
iounmap(vaddr);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This must fit, because of the 32-bit consistent DMA mask. Also,
|
||||||
|
although there's no guarantee, we assume that the address is at
|
||||||
|
least 4-byte aligned (most likely, it's page-aligned). */
|
||||||
|
paddr32 = paddr64;
|
||||||
|
|
||||||
|
cmd->CommandHeader.ReplyQueue = 0;
|
||||||
|
cmd->CommandHeader.SGList = 0;
|
||||||
|
cmd->CommandHeader.SGTotal = 0;
|
||||||
|
cmd->CommandHeader.Tag.lower = paddr32;
|
||||||
|
cmd->CommandHeader.Tag.upper = 0;
|
||||||
|
memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
|
||||||
|
|
||||||
|
cmd->Request.CDBLen = 16;
|
||||||
|
cmd->Request.Type.Type = TYPE_MSG;
|
||||||
|
cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
|
||||||
|
cmd->Request.Type.Direction = XFER_NONE;
|
||||||
|
cmd->Request.Timeout = 0; /* Don't time out */
|
||||||
|
cmd->Request.CDB[0] = opcode;
|
||||||
|
cmd->Request.CDB[1] = type;
|
||||||
|
memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
|
||||||
|
|
||||||
|
cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
|
||||||
|
cmd->ErrorDescriptor.Addr.upper = 0;
|
||||||
|
cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
|
||||||
|
|
||||||
|
writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
|
||||||
|
|
||||||
|
for (i = 0; i < 10; i++) {
|
||||||
|
tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
|
||||||
|
if ((tag & ~3) == paddr32)
|
||||||
|
break;
|
||||||
|
schedule_timeout_uninterruptible(HZ);
|
||||||
|
}
|
||||||
|
|
||||||
|
iounmap(vaddr);
|
||||||
|
|
||||||
|
/* we leak the DMA buffer here ... no choice since the controller could
|
||||||
|
still complete the command. */
|
||||||
|
if (i == 10) {
|
||||||
|
printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
|
||||||
|
opcode, type);
|
||||||
|
return -ETIMEDOUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
|
||||||
|
|
||||||
|
if (tag & 2) {
|
||||||
|
printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
|
||||||
|
opcode, type);
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
|
||||||
|
opcode, type);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
|
||||||
|
#define cciss_noop(p) cciss_message(p, 3, 0)
|
||||||
|
|
||||||
|
static __devinit int cciss_reset_msi(struct pci_dev *pdev)
|
||||||
|
{
|
||||||
|
/* the #defines are stolen from drivers/pci/msi.h. */
|
||||||
|
#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
|
||||||
|
#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
|
||||||
|
|
||||||
|
int pos;
|
||||||
|
u16 control = 0;
|
||||||
|
|
||||||
|
pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
|
||||||
|
if (pos) {
|
||||||
|
pci_read_config_word(pdev, msi_control_reg(pos), &control);
|
||||||
|
if (control & PCI_MSI_FLAGS_ENABLE) {
|
||||||
|
printk(KERN_INFO "cciss: resetting MSI\n");
|
||||||
|
pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
|
||||||
|
if (pos) {
|
||||||
|
pci_read_config_word(pdev, msi_control_reg(pos), &control);
|
||||||
|
if (control & PCI_MSIX_FLAGS_ENABLE) {
|
||||||
|
printk(KERN_INFO "cciss: resetting MSI-X\n");
|
||||||
|
pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This does a hard reset of the controller using PCI power management
|
||||||
|
* states. */
|
||||||
|
static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
|
||||||
|
{
|
||||||
|
u16 pmcsr, saved_config_space[32];
|
||||||
|
int i, pos;
|
||||||
|
|
||||||
|
printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
|
||||||
|
|
||||||
|
/* This is very nearly the same thing as
|
||||||
|
|
||||||
|
pci_save_state(pci_dev);
|
||||||
|
pci_set_power_state(pci_dev, PCI_D3hot);
|
||||||
|
pci_set_power_state(pci_dev, PCI_D0);
|
||||||
|
pci_restore_state(pci_dev);
|
||||||
|
|
||||||
|
but we can't use these nice canned kernel routines on
|
||||||
|
kexec, because they also check the MSI/MSI-X state in PCI
|
||||||
|
configuration space and do the wrong thing when it is
|
||||||
|
set/cleared. Also, the pci_save/restore_state functions
|
||||||
|
violate the ordering requirements for restoring the
|
||||||
|
configuration space from the CCISS document (see the
|
||||||
|
comment below). So we roll our own .... */
|
||||||
|
|
||||||
|
for (i = 0; i < 32; i++)
|
||||||
|
pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
|
||||||
|
|
||||||
|
pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
|
||||||
|
if (pos == 0) {
|
||||||
|
printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Quoting from the Open CISS Specification: "The Power
|
||||||
|
* Management Control/Status Register (CSR) controls the power
|
||||||
|
* state of the device. The normal operating state is D0,
|
||||||
|
* CSR=00h. The software off state is D3, CSR=03h. To reset
|
||||||
|
* the controller, place the interface device in D3 then to
|
||||||
|
* D0, this causes a secondary PCI reset which will reset the
|
||||||
|
* controller." */
|
||||||
|
|
||||||
|
/* enter the D3hot power management state */
|
||||||
|
pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
|
||||||
|
pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
|
||||||
|
pmcsr |= PCI_D3hot;
|
||||||
|
pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
|
||||||
|
|
||||||
|
schedule_timeout_uninterruptible(HZ >> 1);
|
||||||
|
|
||||||
|
/* enter the D0 power management state */
|
||||||
|
pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
|
||||||
|
pmcsr |= PCI_D0;
|
||||||
|
pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
|
||||||
|
|
||||||
|
schedule_timeout_uninterruptible(HZ >> 1);
|
||||||
|
|
||||||
|
/* Restore the PCI configuration space. The Open CISS
|
||||||
|
* Specification says, "Restore the PCI Configuration
|
||||||
|
* Registers, offsets 00h through 60h. It is important to
|
||||||
|
* restore the command register, 16-bits at offset 04h,
|
||||||
|
* last. Do not restore the configuration status register,
|
||||||
|
* 16-bits at offset 06h." Note that the offset is 2*i. */
|
||||||
|
for (i = 0; i < 32; i++) {
|
||||||
|
if (i == 2 || i == 3)
|
||||||
|
continue;
|
||||||
|
pci_write_config_word(pdev, 2*i, saved_config_space[i]);
|
||||||
|
}
|
||||||
|
wmb();
|
||||||
|
pci_write_config_word(pdev, 4, saved_config_space[2]);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is it. Find all the controllers and register them. I really hate
|
* This is it. Find all the controllers and register them. I really hate
|
||||||
* stealing all these major device numbers.
|
* stealing all these major device numbers.
|
||||||
|
@ -3404,6 +3601,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
|
||||||
int dac, return_code;
|
int dac, return_code;
|
||||||
InquiryData_struct *inq_buff = NULL;
|
InquiryData_struct *inq_buff = NULL;
|
||||||
|
|
||||||
|
if (reset_devices) {
|
||||||
|
/* Reset the controller with a PCI power-cycle */
|
||||||
|
if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
/* Some devices (notably the HP Smart Array 5i Controller)
|
||||||
|
need a little pause here */
|
||||||
|
schedule_timeout_uninterruptible(30*HZ);
|
||||||
|
|
||||||
|
/* Now try to get the controller to respond to a no-op */
|
||||||
|
for (i=0; i<12; i++) {
|
||||||
|
if (cciss_noop(pdev) == 0)
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : ""));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
i = alloc_cciss_hba();
|
i = alloc_cciss_hba();
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
Loading…
Reference in New Issue