Merge branch 'openvswitch-allow-specifying-ifindex-of-new-interfaces'

Andrey Zhadchenko says:

====================
openvswitch: allow specifying ifindex of new interfaces

CRIU currently do not support checkpoint/restore of OVS configurations, but
there was several requests for it. For example,
https://github.com/lxc/lxc/issues/2909

The main problem is ifindexes of newly created interfaces. We realy need to
preserve them after restore. Current openvswitch API does not allow to
specify ifindex. Most of the time we can just create an interface via
generic netlink requests and plug it into ovs but datapaths (generally any
OVS_VPORT_TYPE_INTERNAL) can only be created via openvswitch requests which
do not support selecting ifindex.

This patch allows to do so.
For new datapaths I decided to use dp_infindex in header as infindex
because it control ifindex for other requests too.
For internal vports I reused OVS_VPORT_ATTR_IFINDEX.

The only concern I have is that previously dp_ifindex was not used for
OVS_DP_VMD_NEW requests and some software may not set it to zero. However
we have been running this patch at Virtuozzo for 2 years and have not
encountered this problem. Not sure if it is worth to add new
ovs_datapath_attr instead.
====================

Link: https://lore.kernel.org/r/20220825020450.664147-1-andrey.zhadchenko@virtuozzo.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2022-08-26 19:31:23 -07:00
commit 7dea06dbb0
4 changed files with 24 additions and 3 deletions

View File

@ -76,6 +76,8 @@ enum ovs_datapath_cmd {
* datapath. Always present in notifications. * datapath. Always present in notifications.
* @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the
* datapath. Always present in notifications. * datapath. Always present in notifications.
* @OVS_DP_ATTR_IFINDEX: Interface index for a new datapath netdev. Only
* valid for %OVS_DP_CMD_NEW requests.
* *
* These attributes follow the &struct ovs_header within the Generic Netlink * These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_DP_* commands. * payload for %OVS_DP_* commands.
@ -92,6 +94,7 @@ enum ovs_datapath_attr {
OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in
* per-cpu dispatch mode * per-cpu dispatch mode
*/ */
OVS_DP_ATTR_IFINDEX,
__OVS_DP_ATTR_MAX __OVS_DP_ATTR_MAX
}; };

View File

@ -1523,6 +1523,7 @@ static size_t ovs_dp_cmd_msg_size(void)
msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */ msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */
return msgsize; return msgsize;
} }
@ -1534,7 +1535,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
struct ovs_header *ovs_header; struct ovs_header *ovs_header;
struct ovs_dp_stats dp_stats; struct ovs_dp_stats dp_stats;
struct ovs_dp_megaflow_stats dp_megaflow_stats; struct ovs_dp_megaflow_stats dp_megaflow_stats;
int err; struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
int err, pids_len;
ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
flags, cmd); flags, cmd);
@ -1564,6 +1566,12 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
ovs_flow_tbl_masks_cache_size(&dp->table))) ovs_flow_tbl_masks_cache_size(&dp->table)))
goto nla_put_failure; goto nla_put_failure;
if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32);
if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids))
goto nla_put_failure;
}
genlmsg_end(skb, ovs_header); genlmsg_end(skb, ovs_header);
return 0; return 0;
@ -1787,6 +1795,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.dp = dp; parms.dp = dp;
parms.port_no = OVSP_LOCAL; parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0;
/* So far only local changes have been made, now need the lock. */ /* So far only local changes have been made, now need the lock. */
ovs_lock(); ovs_lock();
@ -2004,6 +2014,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
[OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
[OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 },
}; };
static const struct genl_small_ops dp_datapath_genl_ops[] = { static const struct genl_small_ops dp_datapath_genl_ops[] = {
@ -2207,7 +2218,10 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
!a[OVS_VPORT_ATTR_UPCALL_PID]) !a[OVS_VPORT_ATTR_UPCALL_PID])
return -EINVAL; return -EINVAL;
if (a[OVS_VPORT_ATTR_IFINDEX])
parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
return -EOPNOTSUPP; return -EOPNOTSUPP;
port_no = a[OVS_VPORT_ATTR_PORT_NO] port_no = a[OVS_VPORT_ATTR_PORT_NO]
@ -2244,11 +2258,12 @@ restart:
} }
parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.options = a[OVS_VPORT_ATTR_OPTIONS];
parms.dp = dp; parms.dp = dp;
parms.port_no = port_no; parms.port_no = port_no;
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
vport = new_vport(&parms); vport = new_vport(&parms);
err = PTR_ERR(vport); err = PTR_ERR(vport);

View File

@ -147,6 +147,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
} }
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
dev->ifindex = parms->desired_ifindex;
internal_dev = internal_dev_priv(vport->dev); internal_dev = internal_dev_priv(vport->dev);
internal_dev->vport = vport; internal_dev->vport = vport;

View File

@ -90,12 +90,14 @@ struct vport {
* @type: New vport's type. * @type: New vport's type.
* @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if * @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if
* none was supplied. * none was supplied.
* @desired_ifindex: New vport's ifindex.
* @dp: New vport's datapath. * @dp: New vport's datapath.
* @port_no: New vport's port number. * @port_no: New vport's port number.
*/ */
struct vport_parms { struct vport_parms {
const char *name; const char *name;
enum ovs_vport_type type; enum ovs_vport_type type;
int desired_ifindex;
struct nlattr *options; struct nlattr *options;
/* For ovs_vport_alloc(). */ /* For ovs_vport_alloc(). */