ath10k: perform crash dump collection in workqueue
Commit25733c4e67
("ath10k: pci: use mutex for diagnostic window CE polling") introduced a regression where we try to sleep (grab a mutex) in an atomic context: [ 233.602619] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:254 [ 233.602626] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0 [ 233.602636] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 5.1.0-rc2 #4 [ 233.602642] Hardware name: Google Scarlet (DT) [ 233.602647] Call trace: [ 233.602663] dump_backtrace+0x0/0x11c [ 233.602672] show_stack+0x20/0x28 [ 233.602681] dump_stack+0x98/0xbc [ 233.602690] ___might_sleep+0x154/0x16c [ 233.602696] __might_sleep+0x78/0x88 [ 233.602704] mutex_lock+0x2c/0x5c [ 233.602717] ath10k_pci_diag_read_mem+0x68/0x21c [ath10k_pci] [ 233.602725] ath10k_pci_diag_read32+0x48/0x74 [ath10k_pci] [ 233.602733] ath10k_pci_dump_registers+0x5c/0x16c [ath10k_pci] [ 233.602741] ath10k_pci_fw_crashed_dump+0xb8/0x548 [ath10k_pci] [ 233.602749] ath10k_pci_napi_poll+0x60/0x128 [ath10k_pci] [ 233.602757] net_rx_action+0x140/0x388 [ 233.602766] __do_softirq+0x1b0/0x35c [...] ath10k_pci_fw_crashed_dump() is called from NAPI contexts, and firmware memory dumps are retrieved using the diag memory interface. A simple reproduction case is to run this on QCA6174A / WLAN.RM.4.4.1-00132-QCARMSWP-1, which happens to be a way to b0rk the firmware: dd if=/sys/kernel/debug/ieee80211/phy0/ath10k/mem_value bs=4K count=1 of=/dev/null (NB: simulated firmware crashes, via debugfs, don't trigger firmware dumps.) The fix is to move the crash-dump into a workqueue context, and avoid relying on 'data_lock' for most mutual exclusion. We only keep using it here for protecting 'fw_crash_counter', while the rest of the coredump buffers are protected by a new 'dump_mutex'. I've tested the above with simulated firmware crashes (debugfs 'reset' file), real firmware crashes (the 'dd' command above), and a variety of reboot and suspend/resume configurations on QCA6174A. Reported here: http://lkml.kernel.org/linux-wireless/20190325202706.GA68720@google.com Fixes:25733c4e67
("ath10k: pci: use mutex for diagnostic window CE polling") Signed-off-by: Brian Norris <briannorris@chromium.org> Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
This commit is contained in:
parent
b82d6c1f8f
commit
38faed1504
|
@ -1855,7 +1855,7 @@ void ath10k_ce_dump_registers(struct ath10k *ar,
|
||||||
struct ath10k_ce_crash_data ce_data;
|
struct ath10k_ce_crash_data ce_data;
|
||||||
u32 addr, id;
|
u32 addr, id;
|
||||||
|
|
||||||
lockdep_assert_held(&ar->data_lock);
|
lockdep_assert_held(&ar->dump_mutex);
|
||||||
|
|
||||||
ath10k_err(ar, "Copy Engine register dump:\n");
|
ath10k_err(ar, "Copy Engine register dump:\n");
|
||||||
|
|
||||||
|
|
|
@ -3119,6 +3119,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
|
||||||
goto err_free_wq;
|
goto err_free_wq;
|
||||||
|
|
||||||
mutex_init(&ar->conf_mutex);
|
mutex_init(&ar->conf_mutex);
|
||||||
|
mutex_init(&ar->dump_mutex);
|
||||||
spin_lock_init(&ar->data_lock);
|
spin_lock_init(&ar->data_lock);
|
||||||
|
|
||||||
INIT_LIST_HEAD(&ar->peers);
|
INIT_LIST_HEAD(&ar->peers);
|
||||||
|
|
|
@ -1063,6 +1063,9 @@ struct ath10k {
|
||||||
/* prevents concurrent FW reconfiguration */
|
/* prevents concurrent FW reconfiguration */
|
||||||
struct mutex conf_mutex;
|
struct mutex conf_mutex;
|
||||||
|
|
||||||
|
/* protects coredump data */
|
||||||
|
struct mutex dump_mutex;
|
||||||
|
|
||||||
/* protects shared structure data */
|
/* protects shared structure data */
|
||||||
spinlock_t data_lock;
|
spinlock_t data_lock;
|
||||||
|
|
||||||
|
|
|
@ -1102,7 +1102,7 @@ struct ath10k_fw_crash_data *ath10k_coredump_new(struct ath10k *ar)
|
||||||
{
|
{
|
||||||
struct ath10k_fw_crash_data *crash_data = ar->coredump.fw_crash_data;
|
struct ath10k_fw_crash_data *crash_data = ar->coredump.fw_crash_data;
|
||||||
|
|
||||||
lockdep_assert_held(&ar->data_lock);
|
lockdep_assert_held(&ar->dump_mutex);
|
||||||
|
|
||||||
if (ath10k_coredump_mask == 0)
|
if (ath10k_coredump_mask == 0)
|
||||||
/* coredump disabled */
|
/* coredump disabled */
|
||||||
|
@ -1146,7 +1146,7 @@ static struct ath10k_dump_file_data *ath10k_coredump_build(struct ath10k *ar)
|
||||||
if (!buf)
|
if (!buf)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
spin_lock_bh(&ar->data_lock);
|
mutex_lock(&ar->dump_mutex);
|
||||||
|
|
||||||
dump_data = (struct ath10k_dump_file_data *)(buf);
|
dump_data = (struct ath10k_dump_file_data *)(buf);
|
||||||
strlcpy(dump_data->df_magic, "ATH10K-FW-DUMP",
|
strlcpy(dump_data->df_magic, "ATH10K-FW-DUMP",
|
||||||
|
@ -1213,7 +1213,7 @@ static struct ath10k_dump_file_data *ath10k_coredump_build(struct ath10k *ar)
|
||||||
sofar += sizeof(*dump_tlv) + crash_data->ramdump_buf_len;
|
sofar += sizeof(*dump_tlv) + crash_data->ramdump_buf_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock_bh(&ar->data_lock);
|
mutex_unlock(&ar->dump_mutex);
|
||||||
|
|
||||||
return dump_data;
|
return dump_data;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1441,7 +1441,7 @@ static void ath10k_pci_dump_registers(struct ath10k *ar,
|
||||||
__le32 reg_dump_values[REG_DUMP_COUNT_QCA988X] = {};
|
__le32 reg_dump_values[REG_DUMP_COUNT_QCA988X] = {};
|
||||||
int i, ret;
|
int i, ret;
|
||||||
|
|
||||||
lockdep_assert_held(&ar->data_lock);
|
lockdep_assert_held(&ar->dump_mutex);
|
||||||
|
|
||||||
ret = ath10k_pci_diag_read_hi(ar, ®_dump_values[0],
|
ret = ath10k_pci_diag_read_hi(ar, ®_dump_values[0],
|
||||||
hi_failure_state,
|
hi_failure_state,
|
||||||
|
@ -1656,7 +1656,7 @@ static void ath10k_pci_dump_memory(struct ath10k *ar,
|
||||||
int ret, i;
|
int ret, i;
|
||||||
u8 *buf;
|
u8 *buf;
|
||||||
|
|
||||||
lockdep_assert_held(&ar->data_lock);
|
lockdep_assert_held(&ar->dump_mutex);
|
||||||
|
|
||||||
if (!crash_data)
|
if (!crash_data)
|
||||||
return;
|
return;
|
||||||
|
@ -1734,14 +1734,19 @@ static void ath10k_pci_dump_memory(struct ath10k *ar,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
|
static void ath10k_pci_fw_dump_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
|
struct ath10k_pci *ar_pci = container_of(work, struct ath10k_pci,
|
||||||
|
dump_work);
|
||||||
struct ath10k_fw_crash_data *crash_data;
|
struct ath10k_fw_crash_data *crash_data;
|
||||||
|
struct ath10k *ar = ar_pci->ar;
|
||||||
char guid[UUID_STRING_LEN + 1];
|
char guid[UUID_STRING_LEN + 1];
|
||||||
|
|
||||||
spin_lock_bh(&ar->data_lock);
|
mutex_lock(&ar->dump_mutex);
|
||||||
|
|
||||||
|
spin_lock_bh(&ar->data_lock);
|
||||||
ar->stats.fw_crash_counter++;
|
ar->stats.fw_crash_counter++;
|
||||||
|
spin_unlock_bh(&ar->data_lock);
|
||||||
|
|
||||||
crash_data = ath10k_coredump_new(ar);
|
crash_data = ath10k_coredump_new(ar);
|
||||||
|
|
||||||
|
@ -1756,11 +1761,18 @@ static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
|
||||||
ath10k_ce_dump_registers(ar, crash_data);
|
ath10k_ce_dump_registers(ar, crash_data);
|
||||||
ath10k_pci_dump_memory(ar, crash_data);
|
ath10k_pci_dump_memory(ar, crash_data);
|
||||||
|
|
||||||
spin_unlock_bh(&ar->data_lock);
|
mutex_unlock(&ar->dump_mutex);
|
||||||
|
|
||||||
queue_work(ar->workqueue, &ar->restart_work);
|
queue_work(ar->workqueue, &ar->restart_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
|
||||||
|
{
|
||||||
|
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
|
||||||
|
|
||||||
|
queue_work(ar->workqueue, &ar_pci->dump_work);
|
||||||
|
}
|
||||||
|
|
||||||
void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe,
|
void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe,
|
||||||
int force)
|
int force)
|
||||||
{
|
{
|
||||||
|
@ -3442,6 +3454,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar)
|
||||||
spin_lock_init(&ar_pci->ps_lock);
|
spin_lock_init(&ar_pci->ps_lock);
|
||||||
mutex_init(&ar_pci->ce_diag_mutex);
|
mutex_init(&ar_pci->ce_diag_mutex);
|
||||||
|
|
||||||
|
INIT_WORK(&ar_pci->dump_work, ath10k_pci_fw_dump_work);
|
||||||
|
|
||||||
timer_setup(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, 0);
|
timer_setup(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, 0);
|
||||||
|
|
||||||
if (QCA_REV_6174(ar) || QCA_REV_9377(ar))
|
if (QCA_REV_6174(ar) || QCA_REV_9377(ar))
|
||||||
|
|
|
@ -121,6 +121,8 @@ struct ath10k_pci {
|
||||||
/* For protecting ce_diag */
|
/* For protecting ce_diag */
|
||||||
struct mutex ce_diag_mutex;
|
struct mutex ce_diag_mutex;
|
||||||
|
|
||||||
|
struct work_struct dump_work;
|
||||||
|
|
||||||
struct ath10k_ce ce;
|
struct ath10k_ce ce;
|
||||||
struct timer_list rx_post_retry;
|
struct timer_list rx_post_retry;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue