RDMA/core: Add RDMA_NLDEV_CMD_NEWLINK/DELLINK support

Add support for new LINK messages to allow adding and deleting rdma
interfaces.  This will be used initially for soft rdma drivers which
instantiate device instances dynamically by the admin specifying a netdev
device to use.  The rdma_rxe module will be the first user of these
messages.

The design is modeled after RTNL_NEWLINK/DELLINK: rdma drivers register
with the rdma core if they provide link add/delete functions.  Each driver
registers with a unique "type" string, that is used to dispatch messages
coming from user space.  A new RDMA_NLDEV_ATTR is defined for the "type"
string.  User mode will pass 3 attributes in a NEWLINK message:
RDMA_NLDEV_ATTR_DEV_NAME for the desired rdma device name to be created,
RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link being added, and
RDMA_NLDEV_ATTR_NDEV_NAME for the net_device interface to use for this
link.  The DELLINK message will contain the RDMA_NLDEV_ATTR_DEV_INDEX of
the device to delete.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Steve Wise 2019-02-15 11:03:53 -08:00 committed by Jason Gunthorpe
parent 5bb3c1e9d4
commit 3856ec4b93
4 changed files with 144 additions and 2 deletions

View File

@ -33,6 +33,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/pid.h> #include <linux/pid.h>
#include <linux/pid_namespace.h> #include <linux/pid_namespace.h>
#include <linux/mutex.h>
#include <net/netlink.h> #include <net/netlink.h>
#include <rdma/rdma_cm.h> #include <rdma/rdma_cm.h>
#include <rdma/rdma_netlink.h> #include <rdma/rdma_netlink.h>
@ -113,6 +114,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
.len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
}; };
static int put_driver_name_print_type(struct sk_buff *msg, const char *name, static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@ -1200,6 +1203,117 @@ RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
static LIST_HEAD(link_ops);
static DECLARE_RWSEM(link_ops_rwsem);
static const struct rdma_link_ops *link_ops_get(const char *type)
{
const struct rdma_link_ops *ops;
list_for_each_entry(ops, &link_ops, list) {
if (!strcmp(ops->type, type))
goto out;
}
ops = NULL;
out:
return ops;
}
void rdma_link_register(struct rdma_link_ops *ops)
{
down_write(&link_ops_rwsem);
if (link_ops_get(ops->type)) {
WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
goto out;
}
list_add(&ops->list, &link_ops);
out:
up_write(&link_ops_rwsem);
}
EXPORT_SYMBOL(rdma_link_register);
void rdma_link_unregister(struct rdma_link_ops *ops)
{
down_write(&link_ops_rwsem);
list_del(&ops->list);
up_write(&link_ops_rwsem);
}
EXPORT_SYMBOL(rdma_link_unregister);
static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
char ibdev_name[IB_DEVICE_NAME_MAX];
const struct rdma_link_ops *ops;
char ndev_name[IFNAMSIZ];
struct net_device *ndev;
char type[IFNAMSIZ];
int err;
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
!tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
return -EINVAL;
nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
sizeof(ibdev_name));
if (strchr(ibdev_name, '%'))
return -EINVAL;
nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
sizeof(ndev_name));
ndev = dev_get_by_name(&init_net, ndev_name);
if (!ndev)
return -ENODEV;
down_read(&link_ops_rwsem);
ops = link_ops_get(type);
#ifdef CONFIG_MODULES
if (!ops) {
up_read(&link_ops_rwsem);
request_module("rdma-link-%s", type);
down_read(&link_ops_rwsem);
ops = link_ops_get(type);
}
#endif
err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
up_read(&link_ops_rwsem);
dev_put(ndev);
return err;
}
static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
u32 index;
int err;
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
if (!device)
return -EINVAL;
if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
ib_device_put(device);
return -EINVAL;
}
ib_unregister_device_and_put(device);
return 0;
}
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = { [RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit, .doit = nldev_get_doit,
@ -1209,6 +1323,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_set_doit, .doit = nldev_set_doit,
.flags = RDMA_NL_ADMIN_PERM, .flags = RDMA_NL_ADMIN_PERM,
}, },
[RDMA_NLDEV_CMD_NEWLINK] = {
.doit = nldev_newlink,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_DELLINK] = {
.doit = nldev_dellink,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_PORT_GET] = { [RDMA_NLDEV_CMD_PORT_GET] = {
.doit = nldev_port_get_doit, .doit = nldev_port_get_doit,
.dump = nldev_port_get_dumpit, .dump = nldev_port_get_dumpit,

View File

@ -238,6 +238,7 @@ enum ib_device_cap_flags {
IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35), IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35),
/* The device supports padding incoming writes to cacheline. */ /* The device supports padding incoming writes to cacheline. */
IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36),
IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37),
}; };
enum ib_signature_prot_cap { enum ib_signature_prot_cap {
@ -2622,6 +2623,8 @@ struct ib_device {
refcount_t refcount; refcount_t refcount;
struct completion unreg_completion; struct completion unreg_completion;
struct work_struct unregistration_work; struct work_struct unregistration_work;
const struct rdma_link_ops *link_ops;
}; };
struct ib_client { struct ib_client {

View File

@ -99,4 +99,15 @@ int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
* Returns true on success or false if no listeners. * Returns true on success or false if no listeners.
*/ */
bool rdma_nl_chk_listeners(unsigned int group); bool rdma_nl_chk_listeners(unsigned int group);
struct rdma_link_ops {
struct list_head list;
const char *type;
int (*newlink)(const char *ibdev_name, struct net_device *ndev);
};
void rdma_link_register(struct rdma_link_ops *ops);
void rdma_link_unregister(struct rdma_link_ops *ops);
#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
#endif /* _RDMA_NETLINK_H */ #endif /* _RDMA_NETLINK_H */

View File

@ -255,9 +255,11 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_GET, /* can dump */ RDMA_NLDEV_CMD_GET, /* can dump */
RDMA_NLDEV_CMD_SET, RDMA_NLDEV_CMD_SET,
/* 3 - 4 are free to use */ RDMA_NLDEV_CMD_NEWLINK,
RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ RDMA_NLDEV_CMD_DELLINK,
RDMA_NLDEV_CMD_PORT_GET, /* can dump */
/* 6 - 8 are free to use */ /* 6 - 8 are free to use */
@ -465,6 +467,10 @@ enum rdma_nldev_attr {
RDMA_NLDEV_ATTR_RES_MRN, /* u32 */ RDMA_NLDEV_ATTR_RES_MRN, /* u32 */
RDMA_NLDEV_ATTR_RES_CM_IDN, /* u32 */ RDMA_NLDEV_ATTR_RES_CM_IDN, /* u32 */
RDMA_NLDEV_ATTR_RES_CTXN, /* u32 */ RDMA_NLDEV_ATTR_RES_CTXN, /* u32 */
/*
* Identifies the rdma driver. eg: "rxe" or "siw"
*/
RDMA_NLDEV_ATTR_LINK_TYPE, /* string */
/* /*
* Always the end * Always the end