netfilter: ipset: Introduction of new commands and protocol version 7

Two new commands (IPSET_CMD_GET_BYNAME, IPSET_CMD_GET_BYINDEX) are
introduced. The new commands makes possible to eliminate the getsockopt
operation (in iptables set/SET match/target) and thus use only netlink
communication between userspace and kernel for ipset. With the new
protocol version, userspace can exactly know which functionality is
supported by the running kernel.

Both the kernel and userspace is fully backward compatible.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
This commit is contained in:
Jozsef Kadlecsik 2018-10-27 15:07:40 +02:00
parent 29edbc3ebd
commit 23c42a403a
3 changed files with 160 additions and 25 deletions

View File

@ -303,11 +303,11 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
/* Netlink CB args */ /* Netlink CB args */
enum { enum {
IPSET_CB_NET = 0, /* net namespace */ IPSET_CB_NET = 0, /* net namespace */
IPSET_CB_PROTO, /* ipset protocol */
IPSET_CB_DUMP, /* dump single set/all sets */ IPSET_CB_DUMP, /* dump single set/all sets */
IPSET_CB_INDEX, /* set index */ IPSET_CB_INDEX, /* set index */
IPSET_CB_PRIVATE, /* set private data */ IPSET_CB_PRIVATE, /* set private data */
IPSET_CB_ARG0, /* type specific */ IPSET_CB_ARG0, /* type specific */
IPSET_CB_ARG1,
}; };
/* register and unregister set references */ /* register and unregister set references */

View File

@ -13,8 +13,9 @@
#include <linux/types.h> #include <linux/types.h>
/* The protocol version */ /* The protocol versions */
#define IPSET_PROTOCOL 6 #define IPSET_PROTOCOL 7
#define IPSET_PROTOCOL_MIN 6
/* The max length of strings including NUL: set and type identifiers */ /* The max length of strings including NUL: set and type identifiers */
#define IPSET_MAXNAMELEN 32 #define IPSET_MAXNAMELEN 32
@ -38,17 +39,19 @@ enum ipset_cmd {
IPSET_CMD_TEST, /* 11: Test an element in a set */ IPSET_CMD_TEST, /* 11: Test an element in a set */
IPSET_CMD_HEADER, /* 12: Get set header data only */ IPSET_CMD_HEADER, /* 12: Get set header data only */
IPSET_CMD_TYPE, /* 13: Get set type */ IPSET_CMD_TYPE, /* 13: Get set type */
IPSET_CMD_GET_BYNAME, /* 14: Get set index by name */
IPSET_CMD_GET_BYINDEX, /* 15: Get set name by index */
IPSET_MSG_MAX, /* Netlink message commands */ IPSET_MSG_MAX, /* Netlink message commands */
/* Commands in userspace: */ /* Commands in userspace: */
IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */
IPSET_CMD_HELP, /* 15: Get help */ IPSET_CMD_HELP, /* 17: Get help */
IPSET_CMD_VERSION, /* 16: Get program version */ IPSET_CMD_VERSION, /* 18: Get program version */
IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ IPSET_CMD_QUIT, /* 19: Quit from interactive mode */
IPSET_CMD_MAX, IPSET_CMD_MAX,
IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */
}; };
/* Attributes at command level */ /* Attributes at command level */
@ -66,6 +69,7 @@ enum {
IPSET_ATTR_LINENO, /* 9: Restore lineno */ IPSET_ATTR_LINENO, /* 9: Restore lineno */
IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
IPSET_ATTR_INDEX, /* 11: Kernel index of set */
__IPSET_ATTR_CMD_MAX, __IPSET_ATTR_CMD_MAX,
}; };
#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) #define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1)
@ -223,6 +227,7 @@ enum ipset_adt {
/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t /* Sets are identified by an index in kernel space. Tweak with ip_set_id_t
* and IPSET_INVALID_ID if you want to increase the max number of sets. * and IPSET_INVALID_ID if you want to increase the max number of sets.
* Also, IPSET_ATTR_INDEX must be changed.
*/ */
typedef __u16 ip_set_id_t; typedef __u16 ip_set_id_t;

View File

@ -768,11 +768,21 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
* The commands are serialized by the nfnl mutex. * The commands are serialized by the nfnl mutex.
*/ */
static inline u8 protocol(const struct nlattr * const tb[])
{
return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]);
}
static inline bool static inline bool
protocol_failed(const struct nlattr * const tb[]) protocol_failed(const struct nlattr * const tb[])
{ {
return !tb[IPSET_ATTR_PROTOCOL] || return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL;
nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; }
static inline bool
protocol_min_failed(const struct nlattr * const tb[])
{
return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN;
} }
static inline u32 static inline u32
@ -886,7 +896,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
u32 flags = flag_exist(nlh); u32 flags = flag_exist(nlh);
int ret = 0; int ret = 0;
if (unlikely(protocol_failed(attr) || if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_TYPENAME] ||
!attr[IPSET_ATTR_REVISION] || !attr[IPSET_ATTR_REVISION] ||
@ -1024,7 +1034,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
ip_set_id_t i; ip_set_id_t i;
int ret = 0; int ret = 0;
if (unlikely(protocol_failed(attr))) if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL; return -IPSET_ERR_PROTOCOL;
/* Must wait for flush to be really finished in list:set */ /* Must wait for flush to be really finished in list:set */
@ -1102,7 +1112,7 @@ static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
struct ip_set *s; struct ip_set *s;
ip_set_id_t i; ip_set_id_t i;
if (unlikely(protocol_failed(attr))) if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL; return -IPSET_ERR_PROTOCOL;
if (!attr[IPSET_ATTR_SETNAME]) { if (!attr[IPSET_ATTR_SETNAME]) {
@ -1144,7 +1154,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
ip_set_id_t i; ip_set_id_t i;
int ret = 0; int ret = 0;
if (unlikely(protocol_failed(attr) || if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_SETNAME2])) !attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL; return -IPSET_ERR_PROTOCOL;
@ -1193,7 +1203,7 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
ip_set_id_t from_id, to_id; ip_set_id_t from_id, to_id;
char from_name[IPSET_MAXNAMELEN]; char from_name[IPSET_MAXNAMELEN];
if (unlikely(protocol_failed(attr) || if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_SETNAME2])) !attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL; return -IPSET_ERR_PROTOCOL;
@ -1288,6 +1298,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len,
ip_set_setname_policy, NULL); ip_set_setname_policy, NULL);
cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
if (cda[IPSET_ATTR_SETNAME]) { if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set; struct ip_set *set;
@ -1389,7 +1400,8 @@ dump_last:
ret = -EMSGSIZE; ret = -EMSGSIZE;
goto release_refcount; goto release_refcount;
} }
if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL,
cb->args[IPSET_CB_PROTO]) ||
nla_put_string(skb, IPSET_ATTR_SETNAME, set->name)) nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
goto nla_put_failure; goto nla_put_failure;
if (dump_flags & IPSET_FLAG_LIST_SETNAME) if (dump_flags & IPSET_FLAG_LIST_SETNAME)
@ -1404,6 +1416,9 @@ dump_last:
nla_put_u8(skb, IPSET_ATTR_REVISION, nla_put_u8(skb, IPSET_ATTR_REVISION,
set->revision)) set->revision))
goto nla_put_failure; goto nla_put_failure;
if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
goto nla_put_failure;
ret = set->variant->head(set, skb); ret = set->variant->head(set, skb);
if (ret < 0) if (ret < 0)
goto release_refcount; goto release_refcount;
@ -1463,7 +1478,7 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlattr * const attr[], const struct nlattr * const attr[],
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
if (unlikely(protocol_failed(attr))) if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL; return -IPSET_ERR_PROTOCOL;
{ {
@ -1557,7 +1572,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool use_lineno; bool use_lineno;
int ret = 0; int ret = 0;
if (unlikely(protocol_failed(attr) || if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^ !((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) || (attr[IPSET_ATTR_ADT] != NULL)) ||
@ -1612,7 +1627,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool use_lineno; bool use_lineno;
int ret = 0; int ret = 0;
if (unlikely(protocol_failed(attr) || if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^ !((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) || (attr[IPSET_ATTR_ADT] != NULL)) ||
@ -1664,7 +1679,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0; int ret = 0;
if (unlikely(protocol_failed(attr) || if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_DATA] || !attr[IPSET_ATTR_DATA] ||
!flag_nested(attr[IPSET_ATTR_DATA]))) !flag_nested(attr[IPSET_ATTR_DATA])))
@ -1701,7 +1716,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
struct nlmsghdr *nlh2; struct nlmsghdr *nlh2;
int ret = 0; int ret = 0;
if (unlikely(protocol_failed(attr) || if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME])) !attr[IPSET_ATTR_SETNAME]))
return -IPSET_ERR_PROTOCOL; return -IPSET_ERR_PROTOCOL;
@ -1717,7 +1732,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
IPSET_CMD_HEADER); IPSET_CMD_HEADER);
if (!nlh2) if (!nlh2)
goto nlmsg_failure; goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) || nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
@ -1758,7 +1773,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const char *typename; const char *typename;
int ret = 0; int ret = 0;
if (unlikely(protocol_failed(attr) || if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_TYPENAME] ||
!attr[IPSET_ATTR_FAMILY])) !attr[IPSET_ATTR_FAMILY]))
return -IPSET_ERR_PROTOCOL; return -IPSET_ERR_PROTOCOL;
@ -1777,7 +1792,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
IPSET_CMD_TYPE); IPSET_CMD_TYPE);
if (!nlh2) if (!nlh2)
goto nlmsg_failure; goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
nla_put_u8(skb2, IPSET_ATTR_REVISION, max) || nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
@ -1828,6 +1843,111 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl,
goto nlmsg_failure; goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL)) if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
goto nla_put_failure; goto nla_put_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN))
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (ret < 0)
return ret;
return 0;
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
nlmsg_failure:
kfree_skb(skb2);
return -EMSGSIZE;
}
/* Get set by name or index, from userspace */
static int ip_set_byname(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
const struct ip_set *set;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
!attr[IPSET_ATTR_SETNAME]))
return -IPSET_ERR_PROTOCOL;
set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id);
if (id == IPSET_INVALID_ID)
return -ENOENT;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
IPSET_CMD_GET_BYNAME);
if (!nlh2)
goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id)))
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (ret < 0)
return ret;
return 0;
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
nlmsg_failure:
kfree_skb(skb2);
return -EMSGSIZE;
}
static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
[IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
[IPSET_ATTR_INDEX] = { .type = NLA_U16 },
};
static int ip_set_byindex(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
const struct ip_set *set;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
!attr[IPSET_ATTR_INDEX]))
return -IPSET_ERR_PROTOCOL;
id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]);
if (id >= inst->ip_set_max)
return -ENOENT;
set = ip_set(inst, id);
if (set == NULL)
return -ENOENT;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
IPSET_CMD_GET_BYINDEX);
if (!nlh2)
goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
goto nla_put_failure;
nlmsg_end(skb2, nlh2); nlmsg_end(skb2, nlh2);
ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
@ -1913,6 +2033,16 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
.attr_count = IPSET_ATTR_CMD_MAX, .attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_protocol_policy, .policy = ip_set_protocol_policy,
}, },
[IPSET_CMD_GET_BYNAME] = {
.call = ip_set_byname,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_GET_BYINDEX] = {
.call = ip_set_byindex,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_index_policy,
},
}; };
static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
@ -1958,7 +2088,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
goto done; goto done;
} }
if (req_version->version != IPSET_PROTOCOL) { if (req_version->version < IPSET_PROTOCOL_MIN) {
ret = -EPROTO; ret = -EPROTO;
goto done; goto done;
} }