net/mlx5: Delay events till mlx5 interface's add complete for pci resume
mlx5_ib_add is called during mlx5_pci_resume after a pci error.
Before mlx5_ib_add completes, there are multiple events which trigger
function mlx5_ib_event. This cause kernel panic because mlx5_ib_event
accesses unitialized resources.
The fix is to extend Erez Shitrit's patch <97834eba7c19>
("net/mlx5: Delay events till ib registration ends") to cover
the pci resume code path.
Trace:
mlx5_core 0001:01:00.6: mlx5_pci_resume was called
mlx5_core 0001:01:00.6: firmware version: 16.20.1011
mlx5_core 0001:01:00.6: mlx5_attach_interface:164:(pid 779):
mlx5_ib_event:2996:(pid 34777): warning: event on port 1
mlx5_ib_event:2996:(pid 34782): warning: event on port 1
Unable to handle kernel paging request for data at address 0x0001c104
Faulting instruction address: 0xd000000008f411fc
Oops: Kernel access of bad area, sig: 11 [#1]
...
...
Call Trace:
[c000000fff77bb70] [d000000008f4119c] mlx5_ib_event+0x64/0x470 [mlx5_ib] (unreliable)
[c000000fff77bc60] [d000000008e67130] mlx5_core_event+0xb8/0x210 [mlx5_core]
[c000000fff77bd10] [d000000008e4bd00] mlx5_eq_int+0x528/0x860[mlx5_core]
Fixes: 97834eba7c
("net/mlx5: Delay events till ib registration ends")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
parent
6377ed0bba
commit
4ca637a20a
|
@ -77,35 +77,41 @@ static void add_delayed_event(struct mlx5_priv *priv,
|
||||||
list_add_tail(&delayed_event->list, &priv->waiting_events_list);
|
list_add_tail(&delayed_event->list, &priv->waiting_events_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fire_delayed_event_locked(struct mlx5_device_context *dev_ctx,
|
static void delayed_event_release(struct mlx5_device_context *dev_ctx,
|
||||||
struct mlx5_core_dev *dev,
|
struct mlx5_priv *priv)
|
||||||
struct mlx5_priv *priv)
|
|
||||||
{
|
{
|
||||||
|
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
|
||||||
struct mlx5_delayed_event *de;
|
struct mlx5_delayed_event *de;
|
||||||
struct mlx5_delayed_event *n;
|
struct mlx5_delayed_event *n;
|
||||||
|
struct list_head temp;
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&temp);
|
||||||
|
|
||||||
|
spin_lock_irq(&priv->ctx_lock);
|
||||||
|
|
||||||
/* stop delaying events */
|
|
||||||
priv->is_accum_events = false;
|
priv->is_accum_events = false;
|
||||||
|
list_splice_init(&priv->waiting_events_list, &temp);
|
||||||
/* fire all accumulated events before new event comes */
|
if (!dev_ctx->context)
|
||||||
list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) {
|
goto out;
|
||||||
|
list_for_each_entry_safe(de, n, &priv->waiting_events_list, list)
|
||||||
dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
|
dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
|
||||||
|
|
||||||
|
out:
|
||||||
|
spin_unlock_irq(&priv->ctx_lock);
|
||||||
|
|
||||||
|
list_for_each_entry_safe(de, n, &temp, list) {
|
||||||
list_del(&de->list);
|
list_del(&de->list);
|
||||||
kfree(de);
|
kfree(de);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cleanup_delayed_evets(struct mlx5_priv *priv)
|
/* accumulating events that can come after mlx5_ib calls to
|
||||||
|
* ib_register_device, till adding that interface to the events list.
|
||||||
|
*/
|
||||||
|
static void delayed_event_start(struct mlx5_priv *priv)
|
||||||
{
|
{
|
||||||
struct mlx5_delayed_event *de;
|
|
||||||
struct mlx5_delayed_event *n;
|
|
||||||
|
|
||||||
spin_lock_irq(&priv->ctx_lock);
|
spin_lock_irq(&priv->ctx_lock);
|
||||||
priv->is_accum_events = false;
|
priv->is_accum_events = true;
|
||||||
list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) {
|
|
||||||
list_del(&de->list);
|
|
||||||
kfree(de);
|
|
||||||
}
|
|
||||||
spin_unlock_irq(&priv->ctx_lock);
|
spin_unlock_irq(&priv->ctx_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,11 +128,8 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
dev_ctx->intf = intf;
|
dev_ctx->intf = intf;
|
||||||
/* accumulating events that can come after mlx5_ib calls to
|
|
||||||
* ib_register_device, till adding that interface to the events list.
|
|
||||||
*/
|
|
||||||
|
|
||||||
priv->is_accum_events = true;
|
delayed_event_start(priv);
|
||||||
|
|
||||||
dev_ctx->context = intf->add(dev);
|
dev_ctx->context = intf->add(dev);
|
||||||
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
|
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
|
||||||
|
@ -137,8 +140,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
|
||||||
spin_lock_irq(&priv->ctx_lock);
|
spin_lock_irq(&priv->ctx_lock);
|
||||||
list_add_tail(&dev_ctx->list, &priv->ctx_list);
|
list_add_tail(&dev_ctx->list, &priv->ctx_list);
|
||||||
|
|
||||||
fire_delayed_event_locked(dev_ctx, dev, priv);
|
|
||||||
|
|
||||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||||
if (dev_ctx->intf->pfault) {
|
if (dev_ctx->intf->pfault) {
|
||||||
if (priv->pfault) {
|
if (priv->pfault) {
|
||||||
|
@ -150,11 +151,12 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
spin_unlock_irq(&priv->ctx_lock);
|
spin_unlock_irq(&priv->ctx_lock);
|
||||||
} else {
|
|
||||||
kfree(dev_ctx);
|
|
||||||
/* delete all accumulated events */
|
|
||||||
cleanup_delayed_evets(priv);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
delayed_event_release(dev_ctx, priv);
|
||||||
|
|
||||||
|
if (!dev_ctx->context)
|
||||||
|
kfree(dev_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
|
static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
|
||||||
|
@ -205,17 +207,21 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
|
||||||
if (!dev_ctx)
|
if (!dev_ctx)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
delayed_event_start(priv);
|
||||||
if (intf->attach) {
|
if (intf->attach) {
|
||||||
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
|
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
|
||||||
return;
|
goto out;
|
||||||
intf->attach(dev, dev_ctx->context);
|
intf->attach(dev, dev_ctx->context);
|
||||||
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
|
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
|
||||||
} else {
|
} else {
|
||||||
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
|
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
|
||||||
return;
|
goto out;
|
||||||
dev_ctx->context = intf->add(dev);
|
dev_ctx->context = intf->add(dev);
|
||||||
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
|
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
delayed_event_release(dev_ctx, priv);
|
||||||
}
|
}
|
||||||
|
|
||||||
void mlx5_attach_device(struct mlx5_core_dev *dev)
|
void mlx5_attach_device(struct mlx5_core_dev *dev)
|
||||||
|
@ -414,8 +420,14 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
|
||||||
if (priv->is_accum_events)
|
if (priv->is_accum_events)
|
||||||
add_delayed_event(priv, dev, event, param);
|
add_delayed_event(priv, dev, event, param);
|
||||||
|
|
||||||
|
/* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
|
||||||
|
* still in priv->ctx_list. In this case, only notify the dev_ctx if its
|
||||||
|
* ADDED or ATTACHED bit are set.
|
||||||
|
*/
|
||||||
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
|
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
|
||||||
if (dev_ctx->intf->event)
|
if (dev_ctx->intf->event &&
|
||||||
|
(test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
|
||||||
|
test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
|
||||||
dev_ctx->intf->event(dev, dev_ctx->context, event, param);
|
dev_ctx->intf->event(dev, dev_ctx->context, event, param);
|
||||||
|
|
||||||
spin_unlock_irqrestore(&priv->ctx_lock, flags);
|
spin_unlock_irqrestore(&priv->ctx_lock, flags);
|
||||||
|
|
Loading…
Reference in New Issue