2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004 Topspin Communications. All rights reserved.
|
|
|
|
*
|
|
|
|
* This software is available to you under a choice of one of two
|
|
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
|
|
* General Public License (GPL) Version 2, available from the file
|
|
|
|
* COPYING in the main directory of this source tree, or the
|
|
|
|
* OpenIB.org BSD license below:
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or
|
|
|
|
* without modification, are permitted provided that the following
|
|
|
|
* conditions are met:
|
|
|
|
*
|
|
|
|
* - Redistributions of source code must retain the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer.
|
|
|
|
*
|
|
|
|
* - Redistributions in binary form must reproduce the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer in the documentation and/or other materials
|
|
|
|
* provided with the distribution.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _CORE_PRIV_H
|
|
|
|
#define _CORE_PRIV_H
|
|
|
|
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/spinlock.h>
|
2017-01-10 08:02:14 +08:00
|
|
|
#include <linux/cgroup_rdma.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-08-26 04:40:04 +08:00
|
|
|
#include <rdma/ib_verbs.h>
|
2017-06-09 01:37:47 +08:00
|
|
|
#include <rdma/opa_addr.h>
|
2017-05-19 20:48:54 +08:00
|
|
|
#include <rdma/ib_mad.h>
|
2018-01-28 17:17:20 +08:00
|
|
|
#include <rdma/restrack.h>
|
2017-05-19 20:48:54 +08:00
|
|
|
#include "mad_priv.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2018-01-08 18:15:38 +08:00
|
|
|
/* Total number of ports combined across all struct ib_devices's */
|
|
|
|
#define RDMA_MAX_PORTS 1024
|
|
|
|
|
IB/core: Enforce PKey security on QPs
Add new LSM hooks to allocate and free security contexts and check for
permission to access a PKey.
Allocate and free a security context when creating and destroying a QP.
This context is used for controlling access to PKeys.
When a request is made to modify a QP that changes the port, PKey index,
or alternate path, check that the QP has permission for the PKey in the
PKey table index on the subnet prefix of the port. If the QP is shared
make sure all handles to the QP also have access.
Store which port and PKey index a QP is using. After the reset to init
transition the user can modify the port, PKey index and alternate path
independently. So port and PKey settings changes can be a merge of the
previous settings and the new ones.
In order to maintain access control if there are PKey table or subnet
prefix change keep a list of all QPs are using each PKey index on
each port. If a change occurs all QPs using that device and port must
have access enforced for the new cache settings.
These changes add a transaction to the QP modify process. Association
with the old port and PKey index must be maintained if the modify fails,
and must be removed if it succeeds. Association with the new port and
PKey index must be established prior to the modify and removed if the
modify fails.
1. When a QP is modified to a particular Port, PKey index or alternate
path insert that QP into the appropriate lists.
2. Check permission to access the new settings.
3. If step 2 grants access attempt to modify the QP.
4a. If steps 2 and 3 succeed remove any prior associations.
4b. If ether fails remove the new setting associations.
If a PKey table or subnet prefix changes walk the list of QPs and
check that they have permission. If not send the QP to the error state
and raise a fatal error event. If it's a shared QP make sure all the
QPs that share the real_qp have permission as well. If the QP that
owns a security structure is denied access the security structure is
marked as such and the QP is added to an error_list. Once the moving
the QP to error is complete the security structure mark is cleared.
Maintaining the lists correctly turns QP destroy into a transaction.
The hardware driver for the device frees the ib_qp structure, so while
the destroy is in progress the ib_qp pointer in the ib_qp_security
struct is undefined. When the destroy process begins the ib_qp_security
structure is marked as destroying. This prevents any action from being
taken on the QP pointer. After the QP is destroyed successfully it
could still listed on an error_list wait for it to be processed by that
flow before cleaning up the structure.
If the destroy fails the QPs port and PKey settings are reinserted into
the appropriate lists, the destroying flag is cleared, and access control
is enforced, in case there were any cache changes during the destroy
flow.
To keep the security changes isolated a new file is used to hold security
related functionality.
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Acked-by: Doug Ledford <dledford@redhat.com>
[PM: merge fixup in ib_verbs.h and uverbs_cmd.c]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-05-19 20:48:52 +08:00
|
|
|
struct pkey_index_qp_list {
|
|
|
|
struct list_head pkey_index_list;
|
|
|
|
u16 pkey_index;
|
|
|
|
/* Lock to hold while iterating the qp_list. */
|
|
|
|
spinlock_t qp_list_lock;
|
|
|
|
struct list_head qp_list;
|
|
|
|
};
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2015-12-23 20:56:55 +08:00
|
|
|
#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
|
|
|
|
int cma_configfs_init(void);
|
|
|
|
void cma_configfs_exit(void);
|
|
|
|
#else
|
|
|
|
static inline int cma_configfs_init(void)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void cma_configfs_exit(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
2015-12-23 20:56:54 +08:00
|
|
|
struct cma_device;
|
|
|
|
void cma_ref_dev(struct cma_device *cma_dev);
|
|
|
|
void cma_deref_dev(struct cma_device *cma_dev);
|
2015-12-23 20:56:55 +08:00
|
|
|
typedef bool (*cma_device_filter)(struct ib_device *, void *);
|
|
|
|
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
|
|
|
|
void *cookie);
|
|
|
|
int cma_get_default_gid_type(struct cma_device *cma_dev,
|
|
|
|
unsigned int port);
|
|
|
|
int cma_set_default_gid_type(struct cma_device *cma_dev,
|
|
|
|
unsigned int port,
|
|
|
|
enum ib_gid_type default_gid_type);
|
2017-02-14 13:21:52 +08:00
|
|
|
int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
|
|
|
|
int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
|
|
|
|
u8 default_roce_tos);
|
2015-12-23 20:56:55 +08:00
|
|
|
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
|
2015-12-23 20:56:54 +08:00
|
|
|
|
2010-05-07 08:03:25 +08:00
|
|
|
int ib_device_register_sysfs(struct ib_device *device,
|
|
|
|
int (*port_callback)(struct ib_device *,
|
|
|
|
u8, struct kobject *));
|
2005-04-17 06:20:36 +08:00
|
|
|
void ib_device_unregister_sysfs(struct ib_device *device);
|
|
|
|
|
IB/core: Add RoCE GID table management
RoCE GIDs are based on IP addresses configured on Ethernet net-devices
which relate to the RDMA (RoCE) device port.
Currently, each of the low-level drivers that support RoCE (ocrdma,
mlx4) manages its own RoCE port GID table. As there's nothing which is
essentially vendor specific, we generalize that, and enhance the RDMA
core GID cache to do this job.
In order to populate the GID table, we listen for events:
(a) netdev up/down/change_addr events - if a netdev is built onto
our RoCE device, we need to add/delete its IPs. This involves
adding all GIDs related to this ndev, add default GIDs, etc.
(b) inet events - add new GIDs (according to the IP addresses)
to the table.
For programming the port RoCE GID table, providers must implement
the add_gid and del_gid callbacks.
RoCE GID management requires us to state the associated net_device
alongside the GID. This information is necessary in order to manage
the GID table. For example, when a net_device is removed, its
associated GIDs need to be removed as well.
RoCE mandates generating a default GID for each port, based on the
related net-device's IPv6 link local. In contrast to the GID based on
the regular IPv6 link-local (as we generate GID per IP address),
the default GID is also available when the net device is down (in
order to support loopback).
Locking is done as follows:
The patch modify the GID table code both for new RoCE drivers
implementing the add_gid/del_gid callbacks and for current RoCE and
IB drivers that do not. The flows for updating the table are
different, so the locking requirements are too.
While updating RoCE GID table, protection against multiple writers is
achieved via mutex_lock(&table->lock). Since writing to a table
requires us to find an entry (possible a free entry) in the table and
then modify it, this mutex protects both the find_gid and write_gid
ensuring the atomicity of the action.
Each entry in the GID cache is protected by rwlock. In RoCE, writing
(usually results from netdev notifier) involves invoking the vendor's
add_gid and del_gid callbacks, which could sleep.
Therefore, an invalid flag is added for each entry. Updates for RoCE are
done via a workqueue, thus sleeping is permitted.
In IB, updates are done in write_lock_irq(&device->cache.lock), thus
write_gid isn't allowed to sleep and add_gid/del_gid are not called.
When passing net-device into/out-of the GID cache, the device
is always passed held (dev_hold).
The code uses a single work item for updating all RDMA devices,
following a netdev or inet notifier.
The patch moves the cache from being a client (which was incorrect,
as the cache is part of the IB infrastructure) to being explicitly
initialized/freed when a device is registered/removed.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2015-07-30 23:33:26 +08:00
|
|
|
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
|
|
|
|
struct net_device *idev, void *cookie);
|
|
|
|
|
2018-08-14 15:36:21 +08:00
|
|
|
typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
|
|
|
|
struct net_device *idev, void *cookie);
|
IB/core: Add RoCE GID table management
RoCE GIDs are based on IP addresses configured on Ethernet net-devices
which relate to the RDMA (RoCE) device port.
Currently, each of the low-level drivers that support RoCE (ocrdma,
mlx4) manages its own RoCE port GID table. As there's nothing which is
essentially vendor specific, we generalize that, and enhance the RDMA
core GID cache to do this job.
In order to populate the GID table, we listen for events:
(a) netdev up/down/change_addr events - if a netdev is built onto
our RoCE device, we need to add/delete its IPs. This involves
adding all GIDs related to this ndev, add default GIDs, etc.
(b) inet events - add new GIDs (according to the IP addresses)
to the table.
For programming the port RoCE GID table, providers must implement
the add_gid and del_gid callbacks.
RoCE GID management requires us to state the associated net_device
alongside the GID. This information is necessary in order to manage
the GID table. For example, when a net_device is removed, its
associated GIDs need to be removed as well.
RoCE mandates generating a default GID for each port, based on the
related net-device's IPv6 link local. In contrast to the GID based on
the regular IPv6 link-local (as we generate GID per IP address),
the default GID is also available when the net device is down (in
order to support loopback).
Locking is done as follows:
The patch modify the GID table code both for new RoCE drivers
implementing the add_gid/del_gid callbacks and for current RoCE and
IB drivers that do not. The flows for updating the table are
different, so the locking requirements are too.
While updating RoCE GID table, protection against multiple writers is
achieved via mutex_lock(&table->lock). Since writing to a table
requires us to find an entry (possible a free entry) in the table and
then modify it, this mutex protects both the find_gid and write_gid
ensuring the atomicity of the action.
Each entry in the GID cache is protected by rwlock. In RoCE, writing
(usually results from netdev notifier) involves invoking the vendor's
add_gid and del_gid callbacks, which could sleep.
Therefore, an invalid flag is added for each entry. Updates for RoCE are
done via a workqueue, thus sleeping is permitted.
In IB, updates are done in write_lock_irq(&device->cache.lock), thus
write_gid isn't allowed to sleep and add_gid/del_gid are not called.
When passing net-device into/out-of the GID cache, the device
is always passed held (dev_hold).
The code uses a single work item for updating all RDMA devices,
following a netdev or inet notifier.
The patch moves the cache from being a client (which was incorrect,
as the cache is part of the IB infrastructure) to being explicitly
initialized/freed when a device is registered/removed.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2015-07-30 23:33:26 +08:00
|
|
|
|
|
|
|
void ib_enum_roce_netdev(struct ib_device *ib_dev,
|
|
|
|
roce_netdev_filter filter,
|
|
|
|
void *filter_cookie,
|
|
|
|
roce_netdev_callback cb,
|
|
|
|
void *cookie);
|
|
|
|
void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
|
|
|
|
void *filter_cookie,
|
|
|
|
roce_netdev_callback cb,
|
|
|
|
void *cookie);
|
|
|
|
|
2017-06-19 19:04:56 +08:00
|
|
|
typedef int (*nldev_callback)(struct ib_device *device,
|
|
|
|
struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb,
|
|
|
|
unsigned int idx);
|
|
|
|
|
|
|
|
int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb);
|
|
|
|
|
IB/core: Add RoCE GID table management
RoCE GIDs are based on IP addresses configured on Ethernet net-devices
which relate to the RDMA (RoCE) device port.
Currently, each of the low-level drivers that support RoCE (ocrdma,
mlx4) manages its own RoCE port GID table. As there's nothing which is
essentially vendor specific, we generalize that, and enhance the RDMA
core GID cache to do this job.
In order to populate the GID table, we listen for events:
(a) netdev up/down/change_addr events - if a netdev is built onto
our RoCE device, we need to add/delete its IPs. This involves
adding all GIDs related to this ndev, add default GIDs, etc.
(b) inet events - add new GIDs (according to the IP addresses)
to the table.
For programming the port RoCE GID table, providers must implement
the add_gid and del_gid callbacks.
RoCE GID management requires us to state the associated net_device
alongside the GID. This information is necessary in order to manage
the GID table. For example, when a net_device is removed, its
associated GIDs need to be removed as well.
RoCE mandates generating a default GID for each port, based on the
related net-device's IPv6 link local. In contrast to the GID based on
the regular IPv6 link-local (as we generate GID per IP address),
the default GID is also available when the net device is down (in
order to support loopback).
Locking is done as follows:
The patch modify the GID table code both for new RoCE drivers
implementing the add_gid/del_gid callbacks and for current RoCE and
IB drivers that do not. The flows for updating the table are
different, so the locking requirements are too.
While updating RoCE GID table, protection against multiple writers is
achieved via mutex_lock(&table->lock). Since writing to a table
requires us to find an entry (possible a free entry) in the table and
then modify it, this mutex protects both the find_gid and write_gid
ensuring the atomicity of the action.
Each entry in the GID cache is protected by rwlock. In RoCE, writing
(usually results from netdev notifier) involves invoking the vendor's
add_gid and del_gid callbacks, which could sleep.
Therefore, an invalid flag is added for each entry. Updates for RoCE are
done via a workqueue, thus sleeping is permitted.
In IB, updates are done in write_lock_irq(&device->cache.lock), thus
write_gid isn't allowed to sleep and add_gid/del_gid are not called.
When passing net-device into/out-of the GID cache, the device
is always passed held (dev_hold).
The code uses a single work item for updating all RDMA devices,
following a netdev or inet notifier.
The patch moves the cache from being a client (which was incorrect,
as the cache is part of the IB infrastructure) to being explicitly
initialized/freed when a device is registered/removed.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2015-07-30 23:33:26 +08:00
|
|
|
enum ib_cache_gid_default_mode {
|
|
|
|
IB_CACHE_GID_DEFAULT_MODE_SET,
|
|
|
|
IB_CACHE_GID_DEFAULT_MODE_DELETE
|
|
|
|
};
|
|
|
|
|
2015-12-23 20:56:55 +08:00
|
|
|
int ib_cache_gid_parse_type_str(const char *buf);
|
|
|
|
|
2015-12-23 20:56:47 +08:00
|
|
|
const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
|
|
|
|
|
IB/core: Add RoCE GID table management
RoCE GIDs are based on IP addresses configured on Ethernet net-devices
which relate to the RDMA (RoCE) device port.
Currently, each of the low-level drivers that support RoCE (ocrdma,
mlx4) manages its own RoCE port GID table. As there's nothing which is
essentially vendor specific, we generalize that, and enhance the RDMA
core GID cache to do this job.
In order to populate the GID table, we listen for events:
(a) netdev up/down/change_addr events - if a netdev is built onto
our RoCE device, we need to add/delete its IPs. This involves
adding all GIDs related to this ndev, add default GIDs, etc.
(b) inet events - add new GIDs (according to the IP addresses)
to the table.
For programming the port RoCE GID table, providers must implement
the add_gid and del_gid callbacks.
RoCE GID management requires us to state the associated net_device
alongside the GID. This information is necessary in order to manage
the GID table. For example, when a net_device is removed, its
associated GIDs need to be removed as well.
RoCE mandates generating a default GID for each port, based on the
related net-device's IPv6 link local. In contrast to the GID based on
the regular IPv6 link-local (as we generate GID per IP address),
the default GID is also available when the net device is down (in
order to support loopback).
Locking is done as follows:
The patch modify the GID table code both for new RoCE drivers
implementing the add_gid/del_gid callbacks and for current RoCE and
IB drivers that do not. The flows for updating the table are
different, so the locking requirements are too.
While updating RoCE GID table, protection against multiple writers is
achieved via mutex_lock(&table->lock). Since writing to a table
requires us to find an entry (possible a free entry) in the table and
then modify it, this mutex protects both the find_gid and write_gid
ensuring the atomicity of the action.
Each entry in the GID cache is protected by rwlock. In RoCE, writing
(usually results from netdev notifier) involves invoking the vendor's
add_gid and del_gid callbacks, which could sleep.
Therefore, an invalid flag is added for each entry. Updates for RoCE are
done via a workqueue, thus sleeping is permitted.
In IB, updates are done in write_lock_irq(&device->cache.lock), thus
write_gid isn't allowed to sleep and add_gid/del_gid are not called.
When passing net-device into/out-of the GID cache, the device
is always passed held (dev_hold).
The code uses a single work item for updating all RDMA devices,
following a netdev or inet notifier.
The patch moves the cache from being a client (which was incorrect,
as the cache is part of the IB infrastructure) to being explicitly
initialized/freed when a device is registered/removed.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2015-07-30 23:33:26 +08:00
|
|
|
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
|
|
|
|
struct net_device *ndev,
|
2015-12-23 20:56:47 +08:00
|
|
|
unsigned long gid_type_mask,
|
IB/core: Add RoCE GID table management
RoCE GIDs are based on IP addresses configured on Ethernet net-devices
which relate to the RDMA (RoCE) device port.
Currently, each of the low-level drivers that support RoCE (ocrdma,
mlx4) manages its own RoCE port GID table. As there's nothing which is
essentially vendor specific, we generalize that, and enhance the RDMA
core GID cache to do this job.
In order to populate the GID table, we listen for events:
(a) netdev up/down/change_addr events - if a netdev is built onto
our RoCE device, we need to add/delete its IPs. This involves
adding all GIDs related to this ndev, add default GIDs, etc.
(b) inet events - add new GIDs (according to the IP addresses)
to the table.
For programming the port RoCE GID table, providers must implement
the add_gid and del_gid callbacks.
RoCE GID management requires us to state the associated net_device
alongside the GID. This information is necessary in order to manage
the GID table. For example, when a net_device is removed, its
associated GIDs need to be removed as well.
RoCE mandates generating a default GID for each port, based on the
related net-device's IPv6 link local. In contrast to the GID based on
the regular IPv6 link-local (as we generate GID per IP address),
the default GID is also available when the net device is down (in
order to support loopback).
Locking is done as follows:
The patch modify the GID table code both for new RoCE drivers
implementing the add_gid/del_gid callbacks and for current RoCE and
IB drivers that do not. The flows for updating the table are
different, so the locking requirements are too.
While updating RoCE GID table, protection against multiple writers is
achieved via mutex_lock(&table->lock). Since writing to a table
requires us to find an entry (possible a free entry) in the table and
then modify it, this mutex protects both the find_gid and write_gid
ensuring the atomicity of the action.
Each entry in the GID cache is protected by rwlock. In RoCE, writing
(usually results from netdev notifier) involves invoking the vendor's
add_gid and del_gid callbacks, which could sleep.
Therefore, an invalid flag is added for each entry. Updates for RoCE are
done via a workqueue, thus sleeping is permitted.
In IB, updates are done in write_lock_irq(&device->cache.lock), thus
write_gid isn't allowed to sleep and add_gid/del_gid are not called.
When passing net-device into/out-of the GID cache, the device
is always passed held (dev_hold).
The code uses a single work item for updating all RDMA devices,
following a netdev or inet notifier.
The patch moves the cache from being a client (which was incorrect,
as the cache is part of the IB infrastructure) to being explicitly
initialized/freed when a device is registered/removed.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2015-07-30 23:33:26 +08:00
|
|
|
enum ib_cache_gid_default_mode mode);
|
|
|
|
|
|
|
|
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
|
|
|
|
union ib_gid *gid, struct ib_gid_attr *attr);
|
|
|
|
|
|
|
|
int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
|
|
|
|
union ib_gid *gid, struct ib_gid_attr *attr);
|
|
|
|
|
|
|
|
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
|
|
|
|
struct net_device *ndev);
|
|
|
|
|
|
|
|
int roce_gid_mgmt_init(void);
|
|
|
|
void roce_gid_mgmt_cleanup(void);
|
|
|
|
|
2015-12-23 20:56:47 +08:00
|
|
|
unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
|
IB/core: Add RoCE GID table management
RoCE GIDs are based on IP addresses configured on Ethernet net-devices
which relate to the RDMA (RoCE) device port.
Currently, each of the low-level drivers that support RoCE (ocrdma,
mlx4) manages its own RoCE port GID table. As there's nothing which is
essentially vendor specific, we generalize that, and enhance the RDMA
core GID cache to do this job.
In order to populate the GID table, we listen for events:
(a) netdev up/down/change_addr events - if a netdev is built onto
our RoCE device, we need to add/delete its IPs. This involves
adding all GIDs related to this ndev, add default GIDs, etc.
(b) inet events - add new GIDs (according to the IP addresses)
to the table.
For programming the port RoCE GID table, providers must implement
the add_gid and del_gid callbacks.
RoCE GID management requires us to state the associated net_device
alongside the GID. This information is necessary in order to manage
the GID table. For example, when a net_device is removed, its
associated GIDs need to be removed as well.
RoCE mandates generating a default GID for each port, based on the
related net-device's IPv6 link local. In contrast to the GID based on
the regular IPv6 link-local (as we generate GID per IP address),
the default GID is also available when the net device is down (in
order to support loopback).
Locking is done as follows:
The patch modify the GID table code both for new RoCE drivers
implementing the add_gid/del_gid callbacks and for current RoCE and
IB drivers that do not. The flows for updating the table are
different, so the locking requirements are too.
While updating RoCE GID table, protection against multiple writers is
achieved via mutex_lock(&table->lock). Since writing to a table
requires us to find an entry (possible a free entry) in the table and
then modify it, this mutex protects both the find_gid and write_gid
ensuring the atomicity of the action.
Each entry in the GID cache is protected by rwlock. In RoCE, writing
(usually results from netdev notifier) involves invoking the vendor's
add_gid and del_gid callbacks, which could sleep.
Therefore, an invalid flag is added for each entry. Updates for RoCE are
done via a workqueue, thus sleeping is permitted.
In IB, updates are done in write_lock_irq(&device->cache.lock), thus
write_gid isn't allowed to sleep and add_gid/del_gid are not called.
When passing net-device into/out-of the GID cache, the device
is always passed held (dev_hold).
The code uses a single work item for updating all RDMA devices,
following a netdev or inet notifier.
The patch moves the cache from being a client (which was incorrect,
as the cache is part of the IB infrastructure) to being explicitly
initialized/freed when a device is registered/removed.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2015-07-30 23:33:26 +08:00
|
|
|
|
|
|
|
int ib_cache_setup_one(struct ib_device *device);
|
|
|
|
void ib_cache_cleanup_one(struct ib_device *device);
|
|
|
|
void ib_cache_release_one(struct ib_device *device);
|
|
|
|
|
2017-01-10 08:02:14 +08:00
|
|
|
#ifdef CONFIG_CGROUP_RDMA
|
|
|
|
int ib_device_register_rdmacg(struct ib_device *device);
|
|
|
|
void ib_device_unregister_rdmacg(struct ib_device *device);
|
|
|
|
|
|
|
|
int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
|
|
|
|
struct ib_device *device,
|
|
|
|
enum rdmacg_resource_type resource_index);
|
|
|
|
|
|
|
|
void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
|
|
|
|
struct ib_device *device,
|
|
|
|
enum rdmacg_resource_type resource_index);
|
|
|
|
#else
|
|
|
|
static inline int ib_device_register_rdmacg(struct ib_device *device)
|
|
|
|
{ return 0; }
|
|
|
|
|
|
|
|
static inline void ib_device_unregister_rdmacg(struct ib_device *device)
|
|
|
|
{ }
|
|
|
|
|
|
|
|
static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
|
|
|
|
struct ib_device *device,
|
|
|
|
enum rdmacg_resource_type resource_index)
|
|
|
|
{ return 0; }
|
|
|
|
|
|
|
|
static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
|
|
|
|
struct ib_device *device,
|
|
|
|
enum rdmacg_resource_type resource_index)
|
|
|
|
{ }
|
|
|
|
#endif
|
|
|
|
|
2015-12-23 20:56:52 +08:00
|
|
|
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
|
|
|
|
struct net_device *upper)
|
|
|
|
{
|
2016-10-18 10:15:46 +08:00
|
|
|
return netdev_has_upper_dev_all_rcu(dev, upper);
|
2015-12-23 20:56:52 +08:00
|
|
|
}
|
|
|
|
|
2016-05-19 22:12:31 +08:00
|
|
|
int addr_init(void);
|
|
|
|
void addr_cleanup(void);
|
|
|
|
|
2016-05-19 22:12:32 +08:00
|
|
|
int ib_mad_init(void);
|
|
|
|
void ib_mad_cleanup(void);
|
|
|
|
|
2016-05-19 22:12:33 +08:00
|
|
|
int ib_sa_init(void);
|
|
|
|
void ib_sa_cleanup(void);
|
|
|
|
|
2017-06-05 15:20:11 +08:00
|
|
|
int rdma_nl_init(void);
|
|
|
|
void rdma_nl_exit(void);
|
2017-05-14 20:49:57 +08:00
|
|
|
|
2016-05-19 22:12:35 +08:00
|
|
|
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
|
2017-06-15 19:20:39 +08:00
|
|
|
struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack);
|
2016-05-19 22:12:35 +08:00
|
|
|
int ib_nl_handle_set_timeout(struct sk_buff *skb,
|
2017-06-15 19:20:39 +08:00
|
|
|
struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack);
|
2016-05-19 22:12:36 +08:00
|
|
|
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
|
2017-06-15 19:20:39 +08:00
|
|
|
struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack);
|
2016-05-19 22:12:35 +08:00
|
|
|
|
2017-05-19 20:48:51 +08:00
|
|
|
int ib_get_cached_subnet_prefix(struct ib_device *device,
|
|
|
|
u8 port_num,
|
|
|
|
u64 *sn_pfx);
|
IB/core: Enforce PKey security on QPs
Add new LSM hooks to allocate and free security contexts and check for
permission to access a PKey.
Allocate and free a security context when creating and destroying a QP.
This context is used for controlling access to PKeys.
When a request is made to modify a QP that changes the port, PKey index,
or alternate path, check that the QP has permission for the PKey in the
PKey table index on the subnet prefix of the port. If the QP is shared
make sure all handles to the QP also have access.
Store which port and PKey index a QP is using. After the reset to init
transition the user can modify the port, PKey index and alternate path
independently. So port and PKey settings changes can be a merge of the
previous settings and the new ones.
In order to maintain access control if there are PKey table or subnet
prefix change keep a list of all QPs are using each PKey index on
each port. If a change occurs all QPs using that device and port must
have access enforced for the new cache settings.
These changes add a transaction to the QP modify process. Association
with the old port and PKey index must be maintained if the modify fails,
and must be removed if it succeeds. Association with the new port and
PKey index must be established prior to the modify and removed if the
modify fails.
1. When a QP is modified to a particular Port, PKey index or alternate
path insert that QP into the appropriate lists.
2. Check permission to access the new settings.
3. If step 2 grants access attempt to modify the QP.
4a. If steps 2 and 3 succeed remove any prior associations.
4b. If ether fails remove the new setting associations.
If a PKey table or subnet prefix changes walk the list of QPs and
check that they have permission. If not send the QP to the error state
and raise a fatal error event. If it's a shared QP make sure all the
QPs that share the real_qp have permission as well. If the QP that
owns a security structure is denied access the security structure is
marked as such and the QP is added to an error_list. Once the moving
the QP to error is complete the security structure mark is cleared.
Maintaining the lists correctly turns QP destroy into a transaction.
The hardware driver for the device frees the ib_qp structure, so while
the destroy is in progress the ib_qp pointer in the ib_qp_security
struct is undefined. When the destroy process begins the ib_qp_security
structure is marked as destroying. This prevents any action from being
taken on the QP pointer. After the QP is destroyed successfully it
could still listed on an error_list wait for it to be processed by that
flow before cleaning up the structure.
If the destroy fails the QPs port and PKey settings are reinserted into
the appropriate lists, the destroying flag is cleared, and access control
is enforced, in case there were any cache changes during the destroy
flow.
To keep the security changes isolated a new file is used to hold security
related functionality.
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Acked-by: Doug Ledford <dledford@redhat.com>
[PM: merge fixup in ib_verbs.h and uverbs_cmd.c]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-05-19 20:48:52 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_SECURITY_INFINIBAND
|
|
|
|
void ib_security_destroy_port_pkey_list(struct ib_device *device);
|
|
|
|
|
|
|
|
void ib_security_cache_change(struct ib_device *device,
|
|
|
|
u8 port_num,
|
|
|
|
u64 subnet_prefix);
|
|
|
|
|
|
|
|
int ib_security_modify_qp(struct ib_qp *qp,
|
|
|
|
struct ib_qp_attr *qp_attr,
|
|
|
|
int qp_attr_mask,
|
|
|
|
struct ib_udata *udata);
|
|
|
|
|
|
|
|
int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev);
|
|
|
|
void ib_destroy_qp_security_begin(struct ib_qp_security *sec);
|
|
|
|
void ib_destroy_qp_security_abort(struct ib_qp_security *sec);
|
|
|
|
void ib_destroy_qp_security_end(struct ib_qp_security *sec);
|
|
|
|
int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev);
|
|
|
|
void ib_close_shared_qp_security(struct ib_qp_security *sec);
|
2017-05-19 20:48:54 +08:00
|
|
|
int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
|
|
|
|
enum ib_qp_type qp_type);
|
|
|
|
void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
|
|
|
|
int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
|
IB/core: Enforce PKey security on QPs
Add new LSM hooks to allocate and free security contexts and check for
permission to access a PKey.
Allocate and free a security context when creating and destroying a QP.
This context is used for controlling access to PKeys.
When a request is made to modify a QP that changes the port, PKey index,
or alternate path, check that the QP has permission for the PKey in the
PKey table index on the subnet prefix of the port. If the QP is shared
make sure all handles to the QP also have access.
Store which port and PKey index a QP is using. After the reset to init
transition the user can modify the port, PKey index and alternate path
independently. So port and PKey settings changes can be a merge of the
previous settings and the new ones.
In order to maintain access control if there are PKey table or subnet
prefix change keep a list of all QPs are using each PKey index on
each port. If a change occurs all QPs using that device and port must
have access enforced for the new cache settings.
These changes add a transaction to the QP modify process. Association
with the old port and PKey index must be maintained if the modify fails,
and must be removed if it succeeds. Association with the new port and
PKey index must be established prior to the modify and removed if the
modify fails.
1. When a QP is modified to a particular Port, PKey index or alternate
path insert that QP into the appropriate lists.
2. Check permission to access the new settings.
3. If step 2 grants access attempt to modify the QP.
4a. If steps 2 and 3 succeed remove any prior associations.
4b. If ether fails remove the new setting associations.
If a PKey table or subnet prefix changes walk the list of QPs and
check that they have permission. If not send the QP to the error state
and raise a fatal error event. If it's a shared QP make sure all the
QPs that share the real_qp have permission as well. If the QP that
owns a security structure is denied access the security structure is
marked as such and the QP is added to an error_list. Once the moving
the QP to error is complete the security structure mark is cleared.
Maintaining the lists correctly turns QP destroy into a transaction.
The hardware driver for the device frees the ib_qp structure, so while
the destroy is in progress the ib_qp pointer in the ib_qp_security
struct is undefined. When the destroy process begins the ib_qp_security
structure is marked as destroying. This prevents any action from being
taken on the QP pointer. After the QP is destroyed successfully it
could still listed on an error_list wait for it to be processed by that
flow before cleaning up the structure.
If the destroy fails the QPs port and PKey settings are reinserted into
the appropriate lists, the destroying flag is cleared, and access control
is enforced, in case there were any cache changes during the destroy
flow.
To keep the security changes isolated a new file is used to hold security
related functionality.
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Acked-by: Doug Ledford <dledford@redhat.com>
[PM: merge fixup in ib_verbs.h and uverbs_cmd.c]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-05-19 20:48:52 +08:00
|
|
|
#else
|
|
|
|
static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void ib_security_cache_change(struct ib_device *device,
|
|
|
|
u8 port_num,
|
|
|
|
u64 subnet_prefix)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ib_security_modify_qp(struct ib_qp *qp,
|
|
|
|
struct ib_qp_attr *qp_attr,
|
|
|
|
int qp_attr_mask,
|
|
|
|
struct ib_udata *udata)
|
|
|
|
{
|
|
|
|
return qp->device->modify_qp(qp->real_qp,
|
|
|
|
qp_attr,
|
|
|
|
qp_attr_mask,
|
|
|
|
udata);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ib_create_qp_security(struct ib_qp *qp,
|
|
|
|
struct ib_device *dev)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void ib_destroy_qp_security_begin(struct ib_qp_security *sec)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void ib_destroy_qp_security_abort(struct ib_qp_security *sec)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void ib_destroy_qp_security_end(struct ib_qp_security *sec)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ib_open_shared_qp_security(struct ib_qp *qp,
|
|
|
|
struct ib_device *dev)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void ib_close_shared_qp_security(struct ib_qp_security *sec)
|
|
|
|
{
|
|
|
|
}
|
2017-05-19 20:48:54 +08:00
|
|
|
|
|
|
|
static inline int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
|
|
|
|
enum ib_qp_type qp_type)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
|
|
|
|
u16 pkey_index)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
IB/core: Enforce PKey security on QPs
Add new LSM hooks to allocate and free security contexts and check for
permission to access a PKey.
Allocate and free a security context when creating and destroying a QP.
This context is used for controlling access to PKeys.
When a request is made to modify a QP that changes the port, PKey index,
or alternate path, check that the QP has permission for the PKey in the
PKey table index on the subnet prefix of the port. If the QP is shared
make sure all handles to the QP also have access.
Store which port and PKey index a QP is using. After the reset to init
transition the user can modify the port, PKey index and alternate path
independently. So port and PKey settings changes can be a merge of the
previous settings and the new ones.
In order to maintain access control if there are PKey table or subnet
prefix change keep a list of all QPs are using each PKey index on
each port. If a change occurs all QPs using that device and port must
have access enforced for the new cache settings.
These changes add a transaction to the QP modify process. Association
with the old port and PKey index must be maintained if the modify fails,
and must be removed if it succeeds. Association with the new port and
PKey index must be established prior to the modify and removed if the
modify fails.
1. When a QP is modified to a particular Port, PKey index or alternate
path insert that QP into the appropriate lists.
2. Check permission to access the new settings.
3. If step 2 grants access attempt to modify the QP.
4a. If steps 2 and 3 succeed remove any prior associations.
4b. If ether fails remove the new setting associations.
If a PKey table or subnet prefix changes walk the list of QPs and
check that they have permission. If not send the QP to the error state
and raise a fatal error event. If it's a shared QP make sure all the
QPs that share the real_qp have permission as well. If the QP that
owns a security structure is denied access the security structure is
marked as such and the QP is added to an error_list. Once the moving
the QP to error is complete the security structure mark is cleared.
Maintaining the lists correctly turns QP destroy into a transaction.
The hardware driver for the device frees the ib_qp structure, so while
the destroy is in progress the ib_qp pointer in the ib_qp_security
struct is undefined. When the destroy process begins the ib_qp_security
structure is marked as destroying. This prevents any action from being
taken on the QP pointer. After the QP is destroyed successfully it
could still listed on an error_list wait for it to be processed by that
flow before cleaning up the structure.
If the destroy fails the QPs port and PKey settings are reinserted into
the appropriate lists, the destroying flag is cleared, and access control
is enforced, in case there were any cache changes during the destroy
flow.
To keep the security changes isolated a new file is used to hold security
related functionality.
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Acked-by: Doug Ledford <dledford@redhat.com>
[PM: merge fixup in ib_verbs.h and uverbs_cmd.c]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2017-05-19 20:48:52 +08:00
|
|
|
#endif
|
2017-06-18 19:39:59 +08:00
|
|
|
|
2018-01-01 19:07:15 +08:00
|
|
|
struct ib_device *ib_device_get_by_index(u32 ifindex);
|
2017-06-18 19:39:59 +08:00
|
|
|
/* RDMA device netlink */
|
|
|
|
void nldev_init(void);
|
|
|
|
void nldev_exit(void);
|
2018-01-28 17:17:21 +08:00
|
|
|
|
|
|
|
static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
|
|
|
|
struct ib_pd *pd,
|
|
|
|
struct ib_qp_init_attr *attr,
|
2018-02-15 10:43:36 +08:00
|
|
|
struct ib_udata *udata,
|
|
|
|
struct ib_uobject *uobj)
|
2018-01-28 17:17:21 +08:00
|
|
|
{
|
|
|
|
struct ib_qp *qp;
|
|
|
|
|
2018-02-14 20:38:43 +08:00
|
|
|
if (!dev->create_qp)
|
|
|
|
return ERR_PTR(-EOPNOTSUPP);
|
|
|
|
|
2018-01-28 17:17:21 +08:00
|
|
|
qp = dev->create_qp(pd, attr, udata);
|
|
|
|
if (IS_ERR(qp))
|
|
|
|
return qp;
|
|
|
|
|
|
|
|
qp->device = dev;
|
|
|
|
qp->pd = pd;
|
2018-02-15 10:43:36 +08:00
|
|
|
qp->uobject = uobj;
|
2018-01-28 17:17:21 +08:00
|
|
|
/*
|
|
|
|
* We don't track XRC QPs for now, because they don't have PD
|
|
|
|
* and more importantly they are created internaly by driver,
|
|
|
|
* see mlx5 create_dev_resources() as an example.
|
|
|
|
*/
|
|
|
|
if (attr->qp_type < IB_QPT_XRC_INI) {
|
|
|
|
qp->res.type = RDMA_RESTRACK_QP;
|
|
|
|
rdma_restrack_add(&qp->res);
|
|
|
|
} else
|
|
|
|
qp->res.valid = false;
|
|
|
|
|
|
|
|
return qp;
|
|
|
|
}
|
2018-03-13 22:06:16 +08:00
|
|
|
|
|
|
|
struct rdma_dev_addr;
|
|
|
|
int rdma_resolve_ip_route(struct sockaddr *src_addr,
|
|
|
|
const struct sockaddr *dst_addr,
|
|
|
|
struct rdma_dev_addr *addr);
|
|
|
|
|
2018-03-13 22:06:23 +08:00
|
|
|
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
|
|
|
|
const union ib_gid *dgid,
|
|
|
|
u8 *dmac, const struct net_device *ndev,
|
|
|
|
int *hoplimit);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /* _CORE_PRIV_H */
|