From 91f36237b4b9bdce7610c7450a906d46704a566a Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Fri, 28 Jul 2023 13:44:18 +0200 Subject: [PATCH] RDMA/siw: Fix tx thread initialization. Immediately removing the siw module after insertion may crash in siw_stop_tx_thread(), if the according thread did not yet had a chance to initialize its wait queue and siw_stop_tx_thread() tries to wakeup that thread. Initializing the threads state before spwaning it fixes it. Reported-by: Guoqing Jiang Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20230728114418.124328-1-bmt@zurich.ibm.com Tested-by: Guoqing Jiang Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw.h | 3 +- drivers/infiniband/sw/siw/siw_main.c | 40 ++---------------------- drivers/infiniband/sw/siw/siw_qp_tx.c | 44 +++++++++++++++++++++++---- 3 files changed, 43 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 8b4a710b82bc..58dddb143b9f 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -531,11 +531,12 @@ void siw_qp_llp_data_ready(struct sock *sk); void siw_qp_llp_write_space(struct sock *sk); /* QP TX path functions */ +int siw_create_tx_threads(void); +void siw_stop_tx_threads(void); int siw_run_sq(void *arg); int siw_qp_sq_process(struct siw_qp *qp); int siw_sq_start(struct siw_qp *qp); int siw_activate_tx(struct siw_qp *qp); -void siw_stop_tx_thread(int nr_cpu); int siw_get_tx_cpu(struct siw_device *sdev); void siw_put_tx_cpu(int cpu); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index f45600d169ae..d4b6e0106851 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -87,29 +87,6 @@ static void siw_device_cleanup(struct ib_device *base_dev) xa_destroy(&sdev->mem_xa); } -static int siw_create_tx_threads(void) -{ - int cpu, assigned = 0; - - for_each_online_cpu(cpu) { - /* Skip HT cores */ - if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) - continue; - - siw_tx_thread[cpu] = - kthread_run_on_cpu(siw_run_sq, - (unsigned long *)(long)cpu, - cpu, "siw_tx/%u"); - if (IS_ERR(siw_tx_thread[cpu])) { - siw_tx_thread[cpu] = NULL; - continue; - } - - assigned++; - } - return assigned; -} - static int siw_dev_qualified(struct net_device *netdev) { /* @@ -529,7 +506,6 @@ static struct rdma_link_ops siw_link_ops = { static __init int siw_init_module(void) { int rv; - int nr_cpu; if (SENDPAGE_THRESH < SIW_MAX_INLINE) { pr_info("siw: sendpage threshold too small: %u\n", @@ -574,12 +550,8 @@ static __init int siw_init_module(void) return 0; out_error: - for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) { - if (siw_tx_thread[nr_cpu]) { - siw_stop_tx_thread(nr_cpu); - siw_tx_thread[nr_cpu] = NULL; - } - } + siw_stop_tx_threads(); + if (siw_crypto_shash) crypto_free_shash(siw_crypto_shash); @@ -593,14 +565,8 @@ out_error: static void __exit siw_exit_module(void) { - int cpu; + siw_stop_tx_threads(); - for_each_possible_cpu(cpu) { - if (siw_tx_thread[cpu]) { - siw_stop_tx_thread(cpu); - siw_tx_thread[cpu] = NULL; - } - } unregister_netdevice_notifier(&siw_netdev_nb); rdma_link_unregister(&siw_link_ops); ib_unregister_driver(RDMA_DRIVER_SIW); diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 7c7a51d36d0c..3ff339eceec3 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1208,10 +1208,45 @@ struct tx_task_t { static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g); -void siw_stop_tx_thread(int nr_cpu) +int siw_create_tx_threads(void) { - kthread_stop(siw_tx_thread[nr_cpu]); - wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting); + int cpu, assigned = 0; + + for_each_online_cpu(cpu) { + struct tx_task_t *tx_task; + + /* Skip HT cores */ + if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) + continue; + + tx_task = &per_cpu(siw_tx_task_g, cpu); + init_llist_head(&tx_task->active); + init_waitqueue_head(&tx_task->waiting); + + siw_tx_thread[cpu] = + kthread_run_on_cpu(siw_run_sq, + (unsigned long *)(long)cpu, + cpu, "siw_tx/%u"); + if (IS_ERR(siw_tx_thread[cpu])) { + siw_tx_thread[cpu] = NULL; + continue; + } + assigned++; + } + return assigned; +} + +void siw_stop_tx_threads(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (siw_tx_thread[cpu]) { + kthread_stop(siw_tx_thread[cpu]); + wake_up(&per_cpu(siw_tx_task_g, cpu).waiting); + siw_tx_thread[cpu] = NULL; + } + } } int siw_run_sq(void *data) @@ -1221,9 +1256,6 @@ int siw_run_sq(void *data) struct siw_qp *qp; struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu); - init_llist_head(&tx_task->active); - init_waitqueue_head(&tx_task->waiting); - while (1) { struct llist_node *fifo_list = NULL;