net/mlx4_core: Fix racy CQ (Completion Queue) free
In function mlx4_cq_completion() and mlx4_cq_event(), the radix_tree_lookup requires a rcu_read_lock. This is mandatory: if another core frees the CQ, it could run the radix_tree_node_rcu_free() call_rcu() callback while its being used by the radix tree lookup function. Additionally, in function mlx4_cq_event(), since we are adding the rcu lock around the radix-tree lookup, we no longer need to take the spinlock. Also, the synchronize_irq() call for the async event eliminates the need for incrementing the cq reference count in mlx4_cq_event(). Other changes: 1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock: we no longer take this spinlock in the interrupt context. The spinlock here, therefore, simply protects against different threads simultaneously invoking mlx4_cq_free() for different cq's. 2. In function mlx4_cq_free(), we move the radix tree delete to before the synchronize_irq() calls. This guarantees that we will not access this cq during any subsequent interrupts, and therefore can safely free the CQ after the synchronize_irq calls. The rcu_read_lock in the interrupt handlers only needs to protect against corrupting the radix tree; the interrupt handlers may access the cq outside the rcu_read_lock due to the synchronize_irq calls which protect against premature freeing of the cq. 3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg. 4. We leave the cq reference count mechanism in place, because it is still needed for the cq completion tasklet mechanism. Fixes:6d90aa5cf1
("net/mlx4_core: Make sure there are no pending async events when freeing CQ") Fixes:225c7b1fee
("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
b618ab4561
commit
291c566a28
|
@ -101,13 +101,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
|
||||||
{
|
{
|
||||||
struct mlx4_cq *cq;
|
struct mlx4_cq *cq;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
|
cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
|
||||||
cqn & (dev->caps.num_cqs - 1));
|
cqn & (dev->caps.num_cqs - 1));
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
if (!cq) {
|
if (!cq) {
|
||||||
mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
|
mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Acessing the CQ outside of rcu_read_lock is safe, because
|
||||||
|
* the CQ is freed only after interrupt handling is completed.
|
||||||
|
*/
|
||||||
++cq->arm_sn;
|
++cq->arm_sn;
|
||||||
|
|
||||||
cq->comp(cq);
|
cq->comp(cq);
|
||||||
|
@ -118,23 +124,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
|
||||||
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
|
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
|
||||||
struct mlx4_cq *cq;
|
struct mlx4_cq *cq;
|
||||||
|
|
||||||
spin_lock(&cq_table->lock);
|
rcu_read_lock();
|
||||||
|
|
||||||
cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
|
cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
|
||||||
if (cq)
|
rcu_read_unlock();
|
||||||
atomic_inc(&cq->refcount);
|
|
||||||
|
|
||||||
spin_unlock(&cq_table->lock);
|
|
||||||
|
|
||||||
if (!cq) {
|
if (!cq) {
|
||||||
mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
|
mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Acessing the CQ outside of rcu_read_lock is safe, because
|
||||||
|
* the CQ is freed only after interrupt handling is completed.
|
||||||
|
*/
|
||||||
cq->event(cq, event_type);
|
cq->event(cq, event_type);
|
||||||
|
|
||||||
if (atomic_dec_and_test(&cq->refcount))
|
|
||||||
complete(&cq->free);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
|
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
|
||||||
|
@ -301,9 +303,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
spin_lock_irq(&cq_table->lock);
|
spin_lock(&cq_table->lock);
|
||||||
err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
|
err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
|
||||||
spin_unlock_irq(&cq_table->lock);
|
spin_unlock(&cq_table->lock);
|
||||||
if (err)
|
if (err)
|
||||||
goto err_icm;
|
goto err_icm;
|
||||||
|
|
||||||
|
@ -349,9 +351,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err_radix:
|
err_radix:
|
||||||
spin_lock_irq(&cq_table->lock);
|
spin_lock(&cq_table->lock);
|
||||||
radix_tree_delete(&cq_table->tree, cq->cqn);
|
radix_tree_delete(&cq_table->tree, cq->cqn);
|
||||||
spin_unlock_irq(&cq_table->lock);
|
spin_unlock(&cq_table->lock);
|
||||||
|
|
||||||
err_icm:
|
err_icm:
|
||||||
mlx4_cq_free_icm(dev, cq->cqn);
|
mlx4_cq_free_icm(dev, cq->cqn);
|
||||||
|
@ -370,15 +372,15 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
|
||||||
if (err)
|
if (err)
|
||||||
mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
|
mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
|
||||||
|
|
||||||
|
spin_lock(&cq_table->lock);
|
||||||
|
radix_tree_delete(&cq_table->tree, cq->cqn);
|
||||||
|
spin_unlock(&cq_table->lock);
|
||||||
|
|
||||||
synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq);
|
synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq);
|
||||||
if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq !=
|
if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq !=
|
||||||
priv->eq_table.eq[MLX4_EQ_ASYNC].irq)
|
priv->eq_table.eq[MLX4_EQ_ASYNC].irq)
|
||||||
synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
|
synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
|
||||||
|
|
||||||
spin_lock_irq(&cq_table->lock);
|
|
||||||
radix_tree_delete(&cq_table->tree, cq->cqn);
|
|
||||||
spin_unlock_irq(&cq_table->lock);
|
|
||||||
|
|
||||||
if (atomic_dec_and_test(&cq->refcount))
|
if (atomic_dec_and_test(&cq->refcount))
|
||||||
complete(&cq->free);
|
complete(&cq->free);
|
||||||
wait_for_completion(&cq->free);
|
wait_for_completion(&cq->free);
|
||||||
|
|
Loading…
Reference in New Issue