IB/mlx5: Fix implicit MR release flow
Once implicit MR is being called to be released by
ib_umem_notifier_release() its leaves were marked as "dying".
However, when dereg_mr()->mlx5_ib_free_implicit_mr()->mr_leaf_free() is
called, it skips running the mr_leaf_free_action (i.e. umem_odp->work)
when those leaves were marked as "dying".
As such ib_umem_release() for the leaves won't be called and their MRs
will be leaked as well.
When an application exits/killed without calling dereg_mr we might hit the
above flow.
This fatal scenario is reported by WARN_ON() upon
mlx5_ib_dealloc_ucontext() as ibcontext->per_mm_list is not empty, the
call trace can be seen below.
Originally the "dying" mark as part of ib_umem_notifier_release() was
introduced to prevent pagefault_mr() from returning a success response
once this happened. However, we already have today the completion
mechanism so no need for that in those flows any more. Even in case a
success response will be returned the firmware will not find the pages and
an error will be returned in the following call as a released mm will
cause ib_umem_odp_map_dma_pages() to permanently fail mmget_not_zero().
Fix the above issue by dropping the "dying" from the above flows. The
other flows that are using "dying" are still needed it for their
synchronization purposes.
WARNING: CPU: 1 PID: 7218 at
drivers/infiniband/hw/mlx5/main.c:2004
mlx5_ib_dealloc_ucontext+0x84/0x90 [mlx5_ib]
CPU: 1 PID: 7218 Comm: ibv_rc_pingpong Tainted: G E
5.2.0-rc6+ #13
Call Trace:
uverbs_destroy_ufile_hw+0xb5/0x120 [ib_uverbs]
ib_uverbs_close+0x1f/0x80 [ib_uverbs]
__fput+0xbe/0x250
task_work_run+0x88/0xa0
do_exit+0x2cb/0xc30
? __fput+0x14b/0x250
do_group_exit+0x39/0xb0
get_signal+0x191/0x920
? _raw_spin_unlock_bh+0xa/0x20
? inet_csk_accept+0x229/0x2f0
do_signal+0x36/0x5e0
? put_unused_fd+0x5b/0x70
? __sys_accept4+0x1a6/0x1e0
? inet_hash+0x35/0x40
? release_sock+0x43/0x90
? _raw_spin_unlock_bh+0xa/0x20
? inet_listen+0x9f/0x120
exit_to_usermode_loop+0x5c/0xc6
do_syscall_64+0x182/0x1b0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Fixes: 81713d3788
("IB/mlx5: Add implicit MR support")
Link: https://lore.kernel.org/r/20190805083010.21777-1-leon@kernel.org
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
parent
e21a712a96
commit
f591822c3c
|
@ -112,10 +112,6 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
|
||||||
* prevent any further fault handling on this MR.
|
* prevent any further fault handling on this MR.
|
||||||
*/
|
*/
|
||||||
ib_umem_notifier_start_account(umem_odp);
|
ib_umem_notifier_start_account(umem_odp);
|
||||||
umem_odp->dying = 1;
|
|
||||||
/* Make sure that the fact the umem is dying is out before we release
|
|
||||||
* all pending page faults. */
|
|
||||||
smp_wmb();
|
|
||||||
complete_all(&umem_odp->notifier_completion);
|
complete_all(&umem_odp->notifier_completion);
|
||||||
umem_odp->umem.context->invalidate_range(
|
umem_odp->umem.context->invalidate_range(
|
||||||
umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp));
|
umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp));
|
||||||
|
|
|
@ -579,7 +579,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
|
||||||
u32 flags)
|
u32 flags)
|
||||||
{
|
{
|
||||||
int npages = 0, current_seq, page_shift, ret, np;
|
int npages = 0, current_seq, page_shift, ret, np;
|
||||||
bool implicit = false;
|
|
||||||
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
|
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
|
||||||
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
|
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
|
||||||
bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
|
bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
|
||||||
|
@ -594,7 +593,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
|
||||||
if (IS_ERR(odp))
|
if (IS_ERR(odp))
|
||||||
return PTR_ERR(odp);
|
return PTR_ERR(odp);
|
||||||
mr = odp->private;
|
mr = odp->private;
|
||||||
implicit = true;
|
|
||||||
} else {
|
} else {
|
||||||
odp = odp_mr;
|
odp = odp_mr;
|
||||||
}
|
}
|
||||||
|
@ -682,19 +680,15 @@ next_mr:
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (ret == -EAGAIN) {
|
if (ret == -EAGAIN) {
|
||||||
if (implicit || !odp->dying) {
|
unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
|
||||||
unsigned long timeout =
|
|
||||||
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
|
|
||||||
|
|
||||||
if (!wait_for_completion_timeout(
|
if (!wait_for_completion_timeout(&odp->notifier_completion,
|
||||||
&odp->notifier_completion,
|
timeout)) {
|
||||||
timeout)) {
|
mlx5_ib_warn(
|
||||||
mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
|
dev,
|
||||||
current_seq, odp->notifiers_seq, odp->notifiers_count);
|
"timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
|
||||||
}
|
current_seq, odp->notifiers_seq,
|
||||||
} else {
|
odp->notifiers_count);
|
||||||
/* The MR is being killed, kill the QP as well. */
|
|
||||||
ret = -EFAULT;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue