net/mlx5: Initialize flow steering during driver probe
Currently, software objects of flow steering are created and destroyed
during reload flow. In case a device is unloaded, the following error
is printed during grace period:
mlx5_core 0000:00:0b.0: mlx5_fw_fatal_reporter_err_work:690:(pid 95):
Driver is in error state. Unloading
As a solution to fix use-after-free bugs, where we try to access
these objects, when reading the value of flow_steering_mode devlink
param[1], let's split flow steering creation and destruction into two
routines:
* init and cleanup: memory, cache, and pools allocation/free.
* create and destroy: namespaces initialization and cleanup.
While at it, re-order the cleanup function to mirror the init function.
[1]
Kasan trace:
[ 385.119849 ] BUG: KASAN: use-after-free in mlx5_devlink_fs_mode_get+0x3b/0xa0
[ 385.119849 ] Read of size 4 at addr ffff888104b79308 by task bash/291
[ 385.119849 ]
[ 385.119849 ] CPU: 1 PID: 291 Comm: bash Not tainted 5.17.0-rc1+ #2
[ 385.119849 ] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014
[ 385.119849 ] Call Trace:
[ 385.119849 ] <TASK>
[ 385.119849 ] dump_stack_lvl+0x6e/0x91
[ 385.119849 ] print_address_description.constprop.0+0x1f/0x160
[ 385.119849 ] ? mlx5_devlink_fs_mode_get+0x3b/0xa0
[ 385.119849 ] ? mlx5_devlink_fs_mode_get+0x3b/0xa0
[ 385.119849 ] kasan_report.cold+0x83/0xdf
[ 385.119849 ] ? devlink_param_notify+0x20/0x190
[ 385.119849 ] ? mlx5_devlink_fs_mode_get+0x3b/0xa0
[ 385.119849 ] mlx5_devlink_fs_mode_get+0x3b/0xa0
[ 385.119849 ] devlink_nl_param_fill+0x18a/0xa50
[ 385.119849 ] ? _raw_spin_lock_irqsave+0x8d/0xe0
[ 385.119849 ] ? devlink_flash_update_timeout_notify+0xf0/0xf0
[ 385.119849 ] ? __wake_up_common+0x4b/0x1e0
[ 385.119849 ] ? preempt_count_sub+0x14/0xc0
[ 385.119849 ] ? _raw_spin_unlock_irqrestore+0x28/0x40
[ 385.119849 ] ? __wake_up_common_lock+0xe3/0x140
[ 385.119849 ] ? __wake_up_common+0x1e0/0x1e0
[ 385.119849 ] ? __sanitizer_cov_trace_const_cmp8+0x27/0x80
[ 385.119849 ] ? __rcu_read_unlock+0x48/0x70
[ 385.119849 ] ? kasan_unpoison+0x23/0x50
[ 385.119849 ] ? __kasan_slab_alloc+0x2c/0x80
[ 385.119849 ] ? memset+0x20/0x40
[ 385.119849 ] ? __sanitizer_cov_trace_const_cmp4+0x25/0x80
[ 385.119849 ] devlink_param_notify+0xce/0x190
[ 385.119849 ] devlink_unregister+0x92/0x2b0
[ 385.119849 ] remove_one+0x41/0x140
[ 385.119849 ] pci_device_remove+0x68/0x140
[ 385.119849 ] ? pcibios_free_irq+0x10/0x10
[ 385.119849 ] __device_release_driver+0x294/0x3f0
[ 385.119849 ] device_driver_detach+0x82/0x130
[ 385.119849 ] unbind_store+0x193/0x1b0
[ 385.119849 ] ? subsys_interface_unregister+0x270/0x270
[ 385.119849 ] drv_attr_store+0x4e/0x70
[ 385.119849 ] ? drv_attr_show+0x60/0x60
[ 385.119849 ] sysfs_kf_write+0xa7/0xc0
[ 385.119849 ] kernfs_fop_write_iter+0x23a/0x2f0
[ 385.119849 ] ? sysfs_kf_bin_read+0x160/0x160
[ 385.119849 ] new_sync_write+0x311/0x430
[ 385.119849 ] ? new_sync_read+0x480/0x480
[ 385.119849 ] ? _raw_spin_lock+0x87/0xe0
[ 385.119849 ] ? __sanitizer_cov_trace_cmp4+0x25/0x80
[ 385.119849 ] ? security_file_permission+0x94/0xa0
[ 385.119849 ] vfs_write+0x4c7/0x590
[ 385.119849 ] ksys_write+0xf6/0x1e0
[ 385.119849 ] ? __x64_sys_read+0x50/0x50
[ 385.119849 ] ? fpregs_assert_state_consistent+0x99/0xa0
[ 385.119849 ] do_syscall_64+0x3d/0x90
[ 385.119849 ] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 385.119849 ] RIP: 0033:0x7fc36ef38504
[ 385.119849 ] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f
80 00 00 00 00 48 8d 05 f9 61 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f
05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53
[ 385.119849 ] RSP: 002b:00007ffde0ff3d08 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 385.119849 ] RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007fc36ef38504
[ 385.119849 ] RDX: 000000000000000c RSI: 00007fc370521040 RDI: 0000000000000001
[ 385.119849 ] RBP: 00007fc370521040 R08: 00007fc36f00b8c0 R09: 00007fc36ee4b740
[ 385.119849 ] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc36f00a760
[ 385.119849 ] R13: 000000000000000c R14: 00007fc36f005760 R15: 000000000000000c
[ 385.119849 ] </TASK>
[ 385.119849 ]
[ 385.119849 ] Allocated by task 65:
[ 385.119849 ] kasan_save_stack+0x1e/0x40
[ 385.119849 ] __kasan_kmalloc+0x81/0xa0
[ 385.119849 ] mlx5_init_fs+0x11b/0x1160
[ 385.119849 ] mlx5_load+0x13c/0x220
[ 385.119849 ] mlx5_load_one+0xda/0x160
[ 385.119849 ] mlx5_recover_device+0xb8/0x100
[ 385.119849 ] mlx5_health_try_recover+0x2f9/0x3a1
[ 385.119849 ] devlink_health_reporter_recover+0x75/0x100
[ 385.119849 ] devlink_health_report+0x26c/0x4b0
[ 385.275909 ] mlx5_fw_fatal_reporter_err_work+0x11e/0x1b0
[ 385.275909 ] process_one_work+0x520/0x970
[ 385.275909 ] worker_thread+0x378/0x950
[ 385.275909 ] kthread+0x1bb/0x200
[ 385.275909 ] ret_from_fork+0x1f/0x30
[ 385.275909 ]
[ 385.275909 ] Freed by task 65:
[ 385.275909 ] kasan_save_stack+0x1e/0x40
[ 385.275909 ] kasan_set_track+0x21/0x30
[ 385.275909 ] kasan_set_free_info+0x20/0x30
[ 385.275909 ] __kasan_slab_free+0xfc/0x140
[ 385.275909 ] kfree+0xa5/0x3b0
[ 385.275909 ] mlx5_unload+0x2e/0xb0
[ 385.275909 ] mlx5_unload_one+0x86/0xb0
[ 385.275909 ] mlx5_fw_fatal_reporter_err_work.cold+0xca/0xcf
[ 385.275909 ] process_one_work+0x520/0x970
[ 385.275909 ] worker_thread+0x378/0x950
[ 385.275909 ] kthread+0x1bb/0x200
[ 385.275909 ] ret_from_fork+0x1f/0x30
[ 385.275909 ]
[ 385.275909 ] The buggy address belongs to the object at ffff888104b79300
[ 385.275909 ] which belongs to the cache kmalloc-128 of size 128
[ 385.275909 ] The buggy address is located 8 bytes inside of
[ 385.275909 ] 128-byte region [ffff888104b79300, ffff888104b79380)
[ 385.275909 ] The buggy address belongs to the page:
[ 385.275909 ] page:00000000de44dd39 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x104b78
[ 385.275909 ] head:00000000de44dd39 order:1 compound_mapcount:0
[ 385.275909 ] flags: 0x8000000000010200(slab|head|zone=2)
[ 385.275909 ] raw: 8000000000010200 0000000000000000 dead000000000122 ffff8881000428c0
[ 385.275909 ] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
[ 385.275909 ] page dumped because: kasan: bad access detected
[ 385.275909 ]
[ 385.275909 ] Memory state around the buggy address:
[ 385.275909 ] ffff888104b79200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc
[ 385.275909 ] ffff888104b79280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 385.275909 ] >ffff888104b79300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 385.275909 ] ^
[ 385.275909 ] ffff888104b79380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 385.275909 ] ffff888104b79400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 385.275909 ]]
Fixes: e890acd5ff
("net/mlx5: Add devlink flow_steering_mode parameter")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
This commit is contained in:
parent
2c5fc6cd26
commit
b33886971d
|
@ -2663,28 +2663,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
|
|||
clean_tree(&root_ns->ns.node);
|
||||
}
|
||||
|
||||
void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
|
||||
{
|
||||
struct mlx5_flow_steering *steering = dev->priv.steering;
|
||||
|
||||
cleanup_root_ns(steering->root_ns);
|
||||
cleanup_root_ns(steering->fdb_root_ns);
|
||||
steering->fdb_root_ns = NULL;
|
||||
kfree(steering->fdb_sub_ns);
|
||||
steering->fdb_sub_ns = NULL;
|
||||
cleanup_root_ns(steering->port_sel_root_ns);
|
||||
cleanup_root_ns(steering->sniffer_rx_root_ns);
|
||||
cleanup_root_ns(steering->sniffer_tx_root_ns);
|
||||
cleanup_root_ns(steering->rdma_rx_root_ns);
|
||||
cleanup_root_ns(steering->rdma_tx_root_ns);
|
||||
cleanup_root_ns(steering->egress_root_ns);
|
||||
mlx5_cleanup_fc_stats(dev);
|
||||
kmem_cache_destroy(steering->ftes_cache);
|
||||
kmem_cache_destroy(steering->fgs_cache);
|
||||
mlx5_ft_pool_destroy(dev);
|
||||
kfree(steering);
|
||||
}
|
||||
|
||||
static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
|
||||
{
|
||||
struct fs_prio *prio;
|
||||
|
@ -3086,43 +3064,28 @@ cleanup:
|
|||
return err;
|
||||
}
|
||||
|
||||
int mlx5_init_fs(struct mlx5_core_dev *dev)
|
||||
void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
|
||||
{
|
||||
struct mlx5_flow_steering *steering;
|
||||
struct mlx5_flow_steering *steering = dev->priv.steering;
|
||||
|
||||
cleanup_root_ns(steering->root_ns);
|
||||
cleanup_root_ns(steering->fdb_root_ns);
|
||||
steering->fdb_root_ns = NULL;
|
||||
kfree(steering->fdb_sub_ns);
|
||||
steering->fdb_sub_ns = NULL;
|
||||
cleanup_root_ns(steering->port_sel_root_ns);
|
||||
cleanup_root_ns(steering->sniffer_rx_root_ns);
|
||||
cleanup_root_ns(steering->sniffer_tx_root_ns);
|
||||
cleanup_root_ns(steering->rdma_rx_root_ns);
|
||||
cleanup_root_ns(steering->rdma_tx_root_ns);
|
||||
cleanup_root_ns(steering->egress_root_ns);
|
||||
}
|
||||
|
||||
int mlx5_fs_core_init(struct mlx5_core_dev *dev)
|
||||
{
|
||||
struct mlx5_flow_steering *steering = dev->priv.steering;
|
||||
int err = 0;
|
||||
|
||||
err = mlx5_init_fc_stats(dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5_ft_pool_init(dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
steering = kzalloc(sizeof(*steering), GFP_KERNEL);
|
||||
if (!steering) {
|
||||
err = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
steering->dev = dev;
|
||||
dev->priv.steering = steering;
|
||||
|
||||
if (mlx5_fs_dr_is_supported(dev))
|
||||
steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
|
||||
else
|
||||
steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
|
||||
|
||||
steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
|
||||
sizeof(struct mlx5_flow_group), 0,
|
||||
0, NULL);
|
||||
steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
|
||||
0, NULL);
|
||||
if (!steering->ftes_cache || !steering->fgs_cache) {
|
||||
err = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
|
||||
(MLX5_CAP_GEN(dev, nic_flow_table))) ||
|
||||
((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
|
||||
|
@ -3180,8 +3143,64 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
|
|||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
mlx5_cleanup_fs(dev);
|
||||
mlx5_fs_core_cleanup(dev);
|
||||
return err;
|
||||
}
|
||||
|
||||
void mlx5_fs_core_free(struct mlx5_core_dev *dev)
|
||||
{
|
||||
struct mlx5_flow_steering *steering = dev->priv.steering;
|
||||
|
||||
kmem_cache_destroy(steering->ftes_cache);
|
||||
kmem_cache_destroy(steering->fgs_cache);
|
||||
kfree(steering);
|
||||
mlx5_ft_pool_destroy(dev);
|
||||
mlx5_cleanup_fc_stats(dev);
|
||||
}
|
||||
|
||||
int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
|
||||
{
|
||||
struct mlx5_flow_steering *steering;
|
||||
int err = 0;
|
||||
|
||||
err = mlx5_init_fc_stats(dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5_ft_pool_init(dev);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
steering = kzalloc(sizeof(*steering), GFP_KERNEL);
|
||||
if (!steering) {
|
||||
err = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
steering->dev = dev;
|
||||
dev->priv.steering = steering;
|
||||
|
||||
if (mlx5_fs_dr_is_supported(dev))
|
||||
steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
|
||||
else
|
||||
steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
|
||||
|
||||
steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
|
||||
sizeof(struct mlx5_flow_group), 0,
|
||||
0, NULL);
|
||||
steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
|
||||
0, NULL);
|
||||
if (!steering->ftes_cache || !steering->fgs_cache) {
|
||||
err = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
mlx5_fs_core_free(dev);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
|
@ -298,8 +298,10 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
|
|||
int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
|
||||
enum mlx5_flow_steering_mode mode);
|
||||
|
||||
int mlx5_init_fs(struct mlx5_core_dev *dev);
|
||||
void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
|
||||
int mlx5_fs_core_alloc(struct mlx5_core_dev *dev);
|
||||
void mlx5_fs_core_free(struct mlx5_core_dev *dev);
|
||||
int mlx5_fs_core_init(struct mlx5_core_dev *dev);
|
||||
void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);
|
||||
|
||||
int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
|
||||
void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
|
||||
|
|
|
@ -938,6 +938,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
|
|||
goto err_sf_table_cleanup;
|
||||
}
|
||||
|
||||
err = mlx5_fs_core_alloc(dev);
|
||||
if (err) {
|
||||
mlx5_core_err(dev, "Failed to alloc flow steering\n");
|
||||
goto err_fs;
|
||||
}
|
||||
|
||||
dev->dm = mlx5_dm_create(dev);
|
||||
if (IS_ERR(dev->dm))
|
||||
mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
|
||||
|
@ -948,6 +954,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
|
|||
|
||||
return 0;
|
||||
|
||||
err_fs:
|
||||
mlx5_sf_table_cleanup(dev);
|
||||
err_sf_table_cleanup:
|
||||
mlx5_sf_hw_table_cleanup(dev);
|
||||
err_sf_hw_table_cleanup:
|
||||
|
@ -985,6 +993,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
|
|||
mlx5_hv_vhca_destroy(dev->hv_vhca);
|
||||
mlx5_fw_tracer_destroy(dev->tracer);
|
||||
mlx5_dm_cleanup(dev);
|
||||
mlx5_fs_core_free(dev);
|
||||
mlx5_sf_table_cleanup(dev);
|
||||
mlx5_sf_hw_table_cleanup(dev);
|
||||
mlx5_vhca_event_cleanup(dev);
|
||||
|
@ -1191,7 +1200,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
|
|||
goto err_tls_start;
|
||||
}
|
||||
|
||||
err = mlx5_init_fs(dev);
|
||||
err = mlx5_fs_core_init(dev);
|
||||
if (err) {
|
||||
mlx5_core_err(dev, "Failed to init flow steering\n");
|
||||
goto err_fs;
|
||||
|
@ -1236,7 +1245,7 @@ err_ec:
|
|||
err_vhca:
|
||||
mlx5_vhca_event_stop(dev);
|
||||
err_set_hca:
|
||||
mlx5_cleanup_fs(dev);
|
||||
mlx5_fs_core_cleanup(dev);
|
||||
err_fs:
|
||||
mlx5_accel_tls_cleanup(dev);
|
||||
err_tls_start:
|
||||
|
@ -1265,7 +1274,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
|
|||
mlx5_ec_cleanup(dev);
|
||||
mlx5_sf_hw_table_destroy(dev);
|
||||
mlx5_vhca_event_stop(dev);
|
||||
mlx5_cleanup_fs(dev);
|
||||
mlx5_fs_core_cleanup(dev);
|
||||
mlx5_accel_ipsec_cleanup(dev);
|
||||
mlx5_accel_tls_cleanup(dev);
|
||||
mlx5_fpga_device_stop(dev);
|
||||
|
|
Loading…
Reference in New Issue