scsi: smartpqi: add heartbeat check
check for controller lockups Reviewed-by: Scott Benesh <scott.benesh@microsemi.com> Signed-off-by: Kevin Barnett <kevin.barnett@microsemi.com> Signed-off-by: Don Brace <don.brace@microsemi.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
parent
061ef06a2d
commit
98f876674a
|
@ -490,7 +490,6 @@ struct pqi_raid_error_info {
|
|||
#define PQI_EVENT_TYPE_LOGICAL_DEVICE 0x5
|
||||
#define PQI_EVENT_TYPE_AIO_STATE_CHANGE 0xfd
|
||||
#define PQI_EVENT_TYPE_AIO_CONFIG_CHANGE 0xfe
|
||||
#define PQI_EVENT_TYPE_HEARTBEAT 0xff
|
||||
|
||||
#pragma pack()
|
||||
|
||||
|
@ -635,6 +634,58 @@ struct pqi_encryption_info {
|
|||
u32 encrypt_tweak_upper;
|
||||
};
|
||||
|
||||
#pragma pack(1)
|
||||
|
||||
#define PQI_CONFIG_TABLE_SIGNATURE "CFGTABLE"
|
||||
#define PQI_CONFIG_TABLE_MAX_LENGTH ((u16)~0)
|
||||
|
||||
/* configuration table section IDs */
|
||||
#define PQI_CONFIG_TABLE_SECTION_GENERAL_INFO 0
|
||||
#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES 1
|
||||
#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_ERRATA 2
|
||||
#define PQI_CONFIG_TABLE_SECTION_DEBUG 3
|
||||
#define PQI_CONFIG_TABLE_SECTION_HEARTBEAT 4
|
||||
|
||||
struct pqi_config_table {
|
||||
u8 signature[8]; /* "CFGTABLE" */
|
||||
__le32 first_section_offset; /* offset in bytes from the base */
|
||||
/* address of this table to the */
|
||||
/* first section */
|
||||
};
|
||||
|
||||
struct pqi_config_table_section_header {
|
||||
__le16 section_id; /* as defined by the */
|
||||
/* PQI_CONFIG_TABLE_SECTION_* */
|
||||
/* manifest constants above */
|
||||
__le16 next_section_offset; /* offset in bytes from base */
|
||||
/* address of the table of the */
|
||||
/* next section or 0 if last entry */
|
||||
};
|
||||
|
||||
struct pqi_config_table_general_info {
|
||||
struct pqi_config_table_section_header header;
|
||||
__le32 section_length; /* size of this section in bytes */
|
||||
/* including the section header */
|
||||
__le32 max_outstanding_requests; /* max. outstanding */
|
||||
/* commands supported by */
|
||||
/* the controller */
|
||||
__le32 max_sg_size; /* max. transfer size of a single */
|
||||
/* command */
|
||||
__le32 max_sg_per_request; /* max. number of scatter-gather */
|
||||
/* entries supported in a single */
|
||||
/* command */
|
||||
};
|
||||
|
||||
struct pqi_config_table_debug {
|
||||
struct pqi_config_table_section_header header;
|
||||
__le32 scratchpad;
|
||||
};
|
||||
|
||||
struct pqi_config_table_heartbeat {
|
||||
struct pqi_config_table_section_header header;
|
||||
__le32 heartbeat_counter;
|
||||
};
|
||||
|
||||
#define PQI_MAX_OUTSTANDING_REQUESTS ((u32)~0)
|
||||
#define PQI_MAX_TRANSFER_SIZE (4 * 1024U * 1024U)
|
||||
|
||||
|
@ -645,8 +696,6 @@ struct pqi_encryption_info {
|
|||
#define PQI_HBA_BUS 2
|
||||
#define PQI_MAX_BUS PQI_HBA_BUS
|
||||
|
||||
#pragma pack(1)
|
||||
|
||||
struct report_lun_header {
|
||||
__be32 list_length;
|
||||
u8 extended_response;
|
||||
|
@ -870,7 +919,6 @@ struct pqi_io_request {
|
|||
struct list_head request_list_entry;
|
||||
};
|
||||
|
||||
#define PQI_EVENT_HEARTBEAT 0
|
||||
#define PQI_NUM_SUPPORTED_EVENTS 6
|
||||
|
||||
struct pqi_event {
|
||||
|
@ -943,7 +991,6 @@ struct pqi_ctrl_info {
|
|||
u8 inbound_spanning_supported : 1;
|
||||
u8 outbound_spanning_supported : 1;
|
||||
u8 pqi_mode_enabled : 1;
|
||||
u8 heartbeat_timer_started : 1;
|
||||
u8 update_time_worker_scheduled : 1;
|
||||
|
||||
struct list_head scsi_device_list;
|
||||
|
@ -963,7 +1010,8 @@ struct pqi_ctrl_info {
|
|||
|
||||
atomic_t num_interrupts;
|
||||
int previous_num_interrupts;
|
||||
unsigned int num_heartbeats_requested;
|
||||
u32 previous_heartbeat_count;
|
||||
__le32 __iomem *heartbeat_counter;
|
||||
struct timer_list heartbeat_timer;
|
||||
|
||||
struct semaphore sync_request_sem;
|
||||
|
|
|
@ -267,6 +267,14 @@ static inline void pqi_cancel_rescan_worker(struct pqi_ctrl_info *ctrl_info)
|
|||
cancel_delayed_work_sync(&ctrl_info->rescan_work);
|
||||
}
|
||||
|
||||
static inline u32 pqi_read_heartbeat_counter(struct pqi_ctrl_info *ctrl_info)
|
||||
{
|
||||
if (!ctrl_info->heartbeat_counter)
|
||||
return 0;
|
||||
|
||||
return readl(ctrl_info->heartbeat_counter);
|
||||
}
|
||||
|
||||
static int pqi_map_single(struct pci_dev *pci_dev,
|
||||
struct pqi_sg_descriptor *sg_descriptor, void *buffer,
|
||||
size_t buffer_length, int data_direction)
|
||||
|
@ -2708,23 +2716,18 @@ static inline unsigned int pqi_num_elements_free(unsigned int pi,
|
|||
return elements_in_queue - num_elements_used - 1;
|
||||
}
|
||||
|
||||
#define PQI_EVENT_ACK_TIMEOUT 30
|
||||
|
||||
static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
|
||||
static void pqi_send_event_ack(struct pqi_ctrl_info *ctrl_info,
|
||||
struct pqi_event_acknowledge_request *iu, size_t iu_length)
|
||||
{
|
||||
pqi_index_t iq_pi;
|
||||
pqi_index_t iq_ci;
|
||||
unsigned long flags;
|
||||
void *next_element;
|
||||
unsigned long timeout;
|
||||
struct pqi_queue_group *queue_group;
|
||||
|
||||
queue_group = &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP];
|
||||
put_unaligned_le16(queue_group->oq_id, &iu->header.response_queue_id);
|
||||
|
||||
timeout = (PQI_EVENT_ACK_TIMEOUT * HZ) + jiffies;
|
||||
|
||||
while (1) {
|
||||
spin_lock_irqsave(&queue_group->submit_lock[RAID_PATH], flags);
|
||||
|
||||
|
@ -2738,11 +2741,8 @@ static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
|
|||
spin_unlock_irqrestore(
|
||||
&queue_group->submit_lock[RAID_PATH], flags);
|
||||
|
||||
if (time_after(jiffies, timeout)) {
|
||||
dev_err(&ctrl_info->pci_dev->dev,
|
||||
"sending event acknowledge timed out\n");
|
||||
if (pqi_ctrl_offline(ctrl_info))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
next_element = queue_group->iq_element_array[RAID_PATH] +
|
||||
|
@ -2751,7 +2751,6 @@ static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
|
|||
memcpy(next_element, iu, iu_length);
|
||||
|
||||
iq_pi = (iq_pi + 1) % ctrl_info->num_elements_per_iq;
|
||||
|
||||
queue_group->iq_pi_copy[RAID_PATH] = iq_pi;
|
||||
|
||||
/*
|
||||
|
@ -2777,7 +2776,7 @@ static void pqi_acknowledge_event(struct pqi_ctrl_info *ctrl_info,
|
|||
request.event_id = event->event_id;
|
||||
request.additional_event_id = event->additional_event_id;
|
||||
|
||||
pqi_start_event_ack(ctrl_info, &request, sizeof(request));
|
||||
pqi_send_event_ack(ctrl_info, &request, sizeof(request));
|
||||
}
|
||||
|
||||
static void pqi_event_worker(struct work_struct *work)
|
||||
|
@ -2785,7 +2784,6 @@ static void pqi_event_worker(struct work_struct *work)
|
|||
unsigned int i;
|
||||
struct pqi_ctrl_info *ctrl_info;
|
||||
struct pqi_event *event;
|
||||
bool got_non_heartbeat_event = false;
|
||||
|
||||
ctrl_info = container_of(work, struct pqi_ctrl_info, event_work);
|
||||
|
||||
|
@ -2797,8 +2795,6 @@ static void pqi_event_worker(struct work_struct *work)
|
|||
if (event->pending) {
|
||||
event->pending = false;
|
||||
pqi_acknowledge_event(ctrl_info, event);
|
||||
if (i != PQI_EVENT_TYPE_HEARTBEAT)
|
||||
got_non_heartbeat_event = true;
|
||||
}
|
||||
event++;
|
||||
}
|
||||
|
@ -2848,57 +2844,58 @@ static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
|
|||
}
|
||||
}
|
||||
|
||||
#define PQI_HEARTBEAT_TIMER_INTERVAL (5 * HZ)
|
||||
#define PQI_MAX_HEARTBEAT_REQUESTS 5
|
||||
#define PQI_HEARTBEAT_TIMER_INTERVAL (10 * HZ)
|
||||
|
||||
static void pqi_heartbeat_timer_handler(unsigned long data)
|
||||
{
|
||||
int num_interrupts;
|
||||
u32 heartbeat_count;
|
||||
struct pqi_ctrl_info *ctrl_info = (struct pqi_ctrl_info *)data;
|
||||
|
||||
if (!ctrl_info->heartbeat_timer_started)
|
||||
pqi_check_ctrl_health(ctrl_info);
|
||||
if (pqi_ctrl_offline(ctrl_info))
|
||||
return;
|
||||
|
||||
num_interrupts = atomic_read(&ctrl_info->num_interrupts);
|
||||
heartbeat_count = pqi_read_heartbeat_counter(ctrl_info);
|
||||
|
||||
if (num_interrupts == ctrl_info->previous_num_interrupts) {
|
||||
ctrl_info->num_heartbeats_requested++;
|
||||
if (ctrl_info->num_heartbeats_requested >
|
||||
PQI_MAX_HEARTBEAT_REQUESTS) {
|
||||
if (heartbeat_count == ctrl_info->previous_heartbeat_count) {
|
||||
dev_err(&ctrl_info->pci_dev->dev,
|
||||
"no heartbeat detected - last heartbeat count: %u\n",
|
||||
heartbeat_count);
|
||||
pqi_take_ctrl_offline(ctrl_info);
|
||||
return;
|
||||
}
|
||||
ctrl_info->events[PQI_EVENT_HEARTBEAT].pending = true;
|
||||
schedule_work(&ctrl_info->event_work);
|
||||
} else {
|
||||
ctrl_info->num_heartbeats_requested = 0;
|
||||
ctrl_info->previous_num_interrupts = num_interrupts;
|
||||
}
|
||||
|
||||
ctrl_info->previous_num_interrupts = num_interrupts;
|
||||
ctrl_info->previous_heartbeat_count = heartbeat_count;
|
||||
mod_timer(&ctrl_info->heartbeat_timer,
|
||||
jiffies + PQI_HEARTBEAT_TIMER_INTERVAL);
|
||||
}
|
||||
|
||||
static void pqi_start_heartbeat_timer(struct pqi_ctrl_info *ctrl_info)
|
||||
{
|
||||
if (!ctrl_info->heartbeat_counter)
|
||||
return;
|
||||
|
||||
ctrl_info->previous_num_interrupts =
|
||||
atomic_read(&ctrl_info->num_interrupts);
|
||||
ctrl_info->previous_heartbeat_count =
|
||||
pqi_read_heartbeat_counter(ctrl_info);
|
||||
|
||||
init_timer(&ctrl_info->heartbeat_timer);
|
||||
ctrl_info->heartbeat_timer.expires =
|
||||
jiffies + PQI_HEARTBEAT_TIMER_INTERVAL;
|
||||
ctrl_info->heartbeat_timer.data = (unsigned long)ctrl_info;
|
||||
ctrl_info->heartbeat_timer.function = pqi_heartbeat_timer_handler;
|
||||
ctrl_info->heartbeat_timer_started = true;
|
||||
add_timer(&ctrl_info->heartbeat_timer);
|
||||
}
|
||||
|
||||
static inline void pqi_stop_heartbeat_timer(struct pqi_ctrl_info *ctrl_info)
|
||||
{
|
||||
if (ctrl_info->heartbeat_timer_started) {
|
||||
ctrl_info->heartbeat_timer_started = false;
|
||||
del_timer_sync(&ctrl_info->heartbeat_timer);
|
||||
}
|
||||
del_timer_sync(&ctrl_info->heartbeat_timer);
|
||||
}
|
||||
|
||||
static inline int pqi_event_type_to_event_index(unsigned int event_type)
|
||||
|
@ -2925,12 +2922,10 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
|
|||
struct pqi_event_queue *event_queue;
|
||||
struct pqi_event_response *response;
|
||||
struct pqi_event *event;
|
||||
bool need_delayed_work;
|
||||
int event_index;
|
||||
|
||||
event_queue = &ctrl_info->event_queue;
|
||||
num_events = 0;
|
||||
need_delayed_work = false;
|
||||
oq_ci = event_queue->oq_ci_copy;
|
||||
|
||||
while (1) {
|
||||
|
@ -2953,10 +2948,6 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
|
|||
event->event_id = response->event_id;
|
||||
event->additional_event_id =
|
||||
response->additional_event_id;
|
||||
if (event_index != PQI_EVENT_TYPE_HEARTBEAT) {
|
||||
event->pending = true;
|
||||
need_delayed_work = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2966,9 +2957,7 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
|
|||
if (num_events) {
|
||||
event_queue->oq_ci_copy = oq_ci;
|
||||
writel(oq_ci, event_queue->oq_ci);
|
||||
|
||||
if (need_delayed_work)
|
||||
schedule_work(&ctrl_info->event_work);
|
||||
schedule_work(&ctrl_info->event_work);
|
||||
}
|
||||
|
||||
return num_events;
|
||||
|
@ -3220,7 +3209,7 @@ static int pqi_alloc_operational_queues(struct pqi_ctrl_info *ctrl_info)
|
|||
|
||||
if (!ctrl_info->queue_memory_base) {
|
||||
dev_err(&ctrl_info->pci_dev->dev,
|
||||
"failed to allocate memory for PQI admin queues\n");
|
||||
"unable to allocate memory for PQI admin queues\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
@ -5672,6 +5661,55 @@ out:
|
|||
return rc;
|
||||
}
|
||||
|
||||
static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info)
|
||||
{
|
||||
u32 table_length;
|
||||
u32 section_offset;
|
||||
void __iomem *table_iomem_addr;
|
||||
struct pqi_config_table *config_table;
|
||||
struct pqi_config_table_section_header *section;
|
||||
|
||||
table_length = ctrl_info->config_table_length;
|
||||
|
||||
config_table = kmalloc(table_length, GFP_KERNEL);
|
||||
if (!config_table) {
|
||||
dev_err(&ctrl_info->pci_dev->dev,
|
||||
"unable to allocate memory for PQI configuration table\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the config table contents from I/O memory space into the
|
||||
* temporary buffer.
|
||||
*/
|
||||
table_iomem_addr = ctrl_info->iomem_base +
|
||||
ctrl_info->config_table_offset;
|
||||
memcpy_fromio(config_table, table_iomem_addr, table_length);
|
||||
|
||||
section_offset =
|
||||
get_unaligned_le32(&config_table->first_section_offset);
|
||||
|
||||
while (section_offset) {
|
||||
section = (void *)config_table + section_offset;
|
||||
|
||||
switch (get_unaligned_le16(§ion->section_id)) {
|
||||
case PQI_CONFIG_TABLE_SECTION_HEARTBEAT:
|
||||
ctrl_info->heartbeat_counter = table_iomem_addr +
|
||||
section_offset +
|
||||
offsetof(struct pqi_config_table_heartbeat,
|
||||
heartbeat_counter);
|
||||
break;
|
||||
}
|
||||
|
||||
section_offset =
|
||||
get_unaligned_le16(§ion->next_section_offset);
|
||||
}
|
||||
|
||||
kfree(config_table);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Switches the controller from PQI mode back into SIS mode. */
|
||||
|
||||
static int pqi_revert_to_sis_mode(struct pqi_ctrl_info *ctrl_info)
|
||||
|
@ -5783,6 +5821,10 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
|
|||
ctrl_info->pqi_mode_enabled = true;
|
||||
pqi_save_ctrl_mode(ctrl_info, PQI_MODE);
|
||||
|
||||
rc = pqi_process_config_table(ctrl_info);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = pqi_alloc_admin_queues(ctrl_info);
|
||||
if (rc) {
|
||||
dev_err(&ctrl_info->pci_dev->dev,
|
||||
|
@ -6091,6 +6133,8 @@ static struct pqi_ctrl_info *pqi_alloc_ctrl_info(int numa_node)
|
|||
INIT_DELAYED_WORK(&ctrl_info->rescan_work, pqi_rescan_worker);
|
||||
INIT_DELAYED_WORK(&ctrl_info->update_time_work, pqi_update_time_worker);
|
||||
|
||||
init_timer(&ctrl_info->heartbeat_timer);
|
||||
|
||||
sema_init(&ctrl_info->sync_request_sem,
|
||||
PQI_RESERVED_IO_SLOTS_SYNCHRONOUS_REQUESTS);
|
||||
init_waitqueue_head(&ctrl_info->block_requests_wait);
|
||||
|
|
|
@ -422,6 +422,10 @@ void sis_soft_reset(struct pqi_ctrl_info *ctrl_info)
|
|||
|
||||
void sis_shutdown_ctrl(struct pqi_ctrl_info *ctrl_info)
|
||||
{
|
||||
if (readl(&ctrl_info->registers->sis_firmware_status) &
|
||||
SIS_CTRL_KERNEL_PANIC)
|
||||
return;
|
||||
|
||||
writel(SIS_TRIGGER_SHUTDOWN,
|
||||
&ctrl_info->registers->sis_host_to_ctrl_doorbell);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue