From 871185ace40df871a93866b2a7ce441276fc4ee8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Nov 2019 12:33:59 +0100 Subject: [PATCH 01/17] netfilter: Clean up unnecessary #ifdef If CONFIG_NETFILTER_INGRESS is not enabled, nf_ingress() becomes a no-op because it solely contains an if-clause calling nf_hook_ingress_active(), for which an empty inline stub exists in . All the symbols used in the if-clause's body are still available even if CONFIG_NETFILTER_INGRESS is not enabled. The additional "#ifdef CONFIG_NETFILTER_INGRESS" in nf_ingress() is thus unnecessary, so drop it. Signed-off-by: Lukas Wunner Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- net/core/dev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 2c277b8aba38..1ccead4b19bf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4932,7 +4932,6 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { -#ifdef CONFIG_NETFILTER_INGRESS if (nf_hook_ingress_active(skb)) { int ingress_retval; @@ -4946,7 +4945,6 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, rcu_read_unlock(); return ingress_retval; } -#endif /* CONFIG_NETFILTER_INGRESS */ return 0; } From 2f5e70c8ce47396bfa8f5c437574b569c02597bb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Nov 2019 12:33:59 +0100 Subject: [PATCH 02/17] netfilter: Document ingress hook Amend kerneldoc of struct net_device to fix a "make htmldocs" warning: include/linux/netdevice.h:2045: warning: Function parameter or member 'nf_hooks_ingress' not described in 'net_device' Reported-by: kbuild test robot Signed-off-by: Lukas Wunner Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 30745068fb39..0b097bbd3663 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1708,6 +1708,7 @@ enum netdev_priv_flags { * @miniq_ingress: ingress/clsact qdisc specific data for * ingress processing * @ingress_queue: XXX: need comments on this one + * @nf_hooks_ingress: netfilter hooks executed for ingress packets * @broadcast: hw bcast address * * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, From cf3e204a1ca5442190018a317d9ec181b4639bd6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 13 Dec 2019 16:53:05 +0800 Subject: [PATCH 03/17] netfilter: nft_tunnel: no need to call htons() when dumping ports info->key.tp_src and tp_dst are __be16, when using nla_put_be16() to dump them, htons() is not needed, so remove it in this patch. Fixes: af308b94a2a4 ("netfilter: nf_tables: add tunnel support") Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 3d4c2ae605a8..ef2065dd4f8a 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -501,8 +501,8 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, static int nft_tunnel_ports_dump(struct sk_buff *skb, struct ip_tunnel_info *info) { - if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, htons(info->key.tp_src)) < 0 || - nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, htons(info->key.tp_dst)) < 0) + if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, info->key.tp_src) < 0 || + nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, info->key.tp_dst) < 0) return -1; return 0; From 0705f95c332081036d85f26691e9d3cd7d901c31 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 13 Dec 2019 16:53:06 +0800 Subject: [PATCH 04/17] netfilter: nft_tunnel: add the missing ERSPAN_VERSION nla_policy ERSPAN_VERSION is an attribute parsed in kernel side, nla_policy type should be added for it, like other attributes. Fixes: af308b94a2a4 ("netfilter: nf_tables: add tunnel support") Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_tunnel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index ef2065dd4f8a..6538895466e0 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -248,8 +248,9 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr, } static const struct nla_policy nft_tunnel_opts_erspan_policy[NFTA_TUNNEL_KEY_ERSPAN_MAX + 1] = { + [NFTA_TUNNEL_KEY_ERSPAN_VERSION] = { .type = NLA_U32 }, [NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX] = { .type = NLA_U32 }, - [NFTA_TUNNEL_KEY_ERSPAN_V2_DIR] = { .type = NLA_U8 }, + [NFTA_TUNNEL_KEY_ERSPAN_V2_DIR] = { .type = NLA_U8 }, [NFTA_TUNNEL_KEY_ERSPAN_V2_HWID] = { .type = NLA_U8 }, }; From 2149f36dbd44f2d6bf5357e5d096205b92cd854a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 13 Dec 2019 16:53:07 +0800 Subject: [PATCH 05/17] netfilter: nft_tunnel: also dump ERSPAN_VERSION This is not necessary, but it'll be easier to parse in userspace, also given that other places like act_tunnel_key, cls_flower and ip_tunnel_core are also doing so. Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_tunnel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 6538895466e0..b3a9b10ff43d 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -479,6 +479,9 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, htonl(opts->u.vxlan.gbp))) return -1; } else if (opts->flags & TUNNEL_ERSPAN_OPT) { + if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_VERSION, + htonl(opts->u.erspan.version))) + return -1; switch (opts->u.erspan.version) { case ERSPAN_VERSION: if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX, From 73239bd9707ab2f3b7621e50468c14410cf4e2c2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 13 Dec 2019 16:53:08 +0800 Subject: [PATCH 06/17] netfilter: nft_tunnel: also dump OPTS_ERSPAN/VXLAN This patch is to add the nest attr OPTS_ERSPAN/VXLAN when dumping KEY_OPTS, and it would be helpful when parsing in userpace. Also, this is needed for supporting multiple geneve opts in the future patches. Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_tunnel.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index b3a9b10ff43d..eb1740236526 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -468,17 +468,24 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, struct nft_tunnel_obj *priv) { struct nft_tunnel_opts *opts = &priv->opts; - struct nlattr *nest; + struct nlattr *nest, *inner; nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS); if (!nest) return -1; if (opts->flags & TUNNEL_VXLAN_OPT) { + inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN); + if (!inner) + return -1; if (nla_put_be32(skb, NFTA_TUNNEL_KEY_VXLAN_GBP, htonl(opts->u.vxlan.gbp))) return -1; + nla_nest_end(skb, inner); } else if (opts->flags & TUNNEL_ERSPAN_OPT) { + inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN); + if (!inner) + return -1; if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_VERSION, htonl(opts->u.erspan.version))) return -1; @@ -496,6 +503,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, return -1; break; } + nla_nest_end(skb, inner); } nla_nest_end(skb, nest); From 7e03998429ee0a27c13b8690c11f9cf40f67f6e9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 13 Dec 2019 16:53:09 +0800 Subject: [PATCH 07/17] netfilter: nft_tunnel: add the missing nla_nest_cancel() When nla_put_xxx() fails under nla_nest_start_noflag(), nla_nest_cancel() should be called, so that the skb can be trimmed properly. Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_tunnel.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index eb1740236526..23cd163689d5 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -443,10 +443,15 @@ static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info) if (!nest) return -1; - if (nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_SRC, &info->key.u.ipv6.src) < 0 || - nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_DST, &info->key.u.ipv6.dst) < 0 || - nla_put_be32(skb, NFTA_TUNNEL_KEY_IP6_FLOWLABEL, info->key.label)) + if (nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_SRC, + &info->key.u.ipv6.src) < 0 || + nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_DST, + &info->key.u.ipv6.dst) < 0 || + nla_put_be32(skb, NFTA_TUNNEL_KEY_IP6_FLOWLABEL, + info->key.label)) { + nla_nest_cancel(skb, nest); return -1; + } nla_nest_end(skb, nest); } else { @@ -454,9 +459,13 @@ static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info) if (!nest) return -1; - if (nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_SRC, info->key.u.ipv4.src) < 0 || - nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_DST, info->key.u.ipv4.dst) < 0) + if (nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_SRC, + info->key.u.ipv4.src) < 0 || + nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_DST, + info->key.u.ipv4.dst) < 0) { + nla_nest_cancel(skb, nest); return -1; + } nla_nest_end(skb, nest); } @@ -477,37 +486,42 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, if (opts->flags & TUNNEL_VXLAN_OPT) { inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN); if (!inner) - return -1; + goto failure; if (nla_put_be32(skb, NFTA_TUNNEL_KEY_VXLAN_GBP, htonl(opts->u.vxlan.gbp))) - return -1; + goto inner_failure; nla_nest_end(skb, inner); } else if (opts->flags & TUNNEL_ERSPAN_OPT) { inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN); if (!inner) - return -1; + goto failure; if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_VERSION, htonl(opts->u.erspan.version))) - return -1; + goto inner_failure; switch (opts->u.erspan.version) { case ERSPAN_VERSION: if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX, opts->u.erspan.u.index)) - return -1; + goto inner_failure; break; case ERSPAN_VERSION2: if (nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_HWID, get_hwid(&opts->u.erspan.u.md2)) || nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_DIR, opts->u.erspan.u.md2.dir)) - return -1; + goto inner_failure; break; } nla_nest_end(skb, inner); } nla_nest_end(skb, nest); - return 0; + +inner_failure: + nla_nest_cancel(skb, inner); +failure: + nla_nest_cancel(skb, nest); + return -1; } static int nft_tunnel_ports_dump(struct sk_buff *skb, From 13d74c0a9708a4f1ab0164a800ce9ea3de32f47b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 Dec 2019 01:58:15 +0100 Subject: [PATCH 08/17] netfilter: conntrack: remove two export symbols Not used anywhere, remove them. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 1 - net/netfilter/nf_conntrack_extend.c | 1 - 2 files changed, 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0af1898af2b8..983a9481e8f8 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2333,7 +2333,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) return nf_conntrack_hash_resize(hashsize); } -EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); static __always_inline unsigned int total_extension_size(void) { diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index c24e5b64b00c..3dbe2329c3f1 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -37,7 +37,6 @@ void nf_ct_ext_destroy(struct nf_conn *ct) kfree(ct->ext); } -EXPORT_SYMBOL(nf_ct_ext_destroy); void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { From db8f6f5c8de6dae924a68858ad6a4217f735be13 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Dec 2019 12:05:13 +0100 Subject: [PATCH 09/17] netfilter: nft_meta: move time handling to helper reduce size of the (large) meta evaluation function. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9740b554fdb3..ba74f3ee7264 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -33,8 +33,9 @@ static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); -static u8 nft_meta_weekday(time64_t secs) +static u8 nft_meta_weekday(void) { + time64_t secs = ktime_get_real_seconds(); unsigned int dse; u8 wday; @@ -56,6 +57,25 @@ static u32 nft_meta_hour(time64_t secs) + tm.tm_sec; } +static noinline_for_stack void +nft_meta_get_eval_time(enum nft_meta_keys key, + u32 *dest) +{ + switch (key) { + case NFT_META_TIME_NS: + nft_reg_store64(dest, ktime_get_real_ns()); + break; + case NFT_META_TIME_DAY: + nft_reg_store8(dest, nft_meta_weekday()); + break; + case NFT_META_TIME_HOUR: + *dest = nft_meta_hour(ktime_get_real_seconds()); + break; + default: + break; + } +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -247,13 +267,9 @@ void nft_meta_get_eval(const struct nft_expr *expr, strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); break; case NFT_META_TIME_NS: - nft_reg_store64(dest, ktime_get_real_ns()); - break; case NFT_META_TIME_DAY: - nft_reg_store8(dest, nft_meta_weekday(ktime_get_real_seconds())); - break; case NFT_META_TIME_HOUR: - *dest = nft_meta_hour(ktime_get_real_seconds()); + nft_meta_get_eval_time(priv->key, dest); break; default: WARN_ON(1); From 4a54594abdbee230a0471aa137b3d5405c897661 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Dec 2019 12:05:14 +0100 Subject: [PATCH 10/17] netfilter: nft_meta: move pkttype handling to helper When pkttype is loopback, nft_meta performs guesswork to detect broad/multicast packets. Place this in a helper, this is hardly a hot path. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 90 +++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index ba74f3ee7264..fe49b27dfa87 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -76,6 +76,56 @@ nft_meta_get_eval_time(enum nft_meta_keys key, } } +static noinline bool +nft_meta_get_eval_pkttype_lo(const struct nft_pktinfo *pkt, + u32 *dest) +{ + const struct sk_buff *skb = pkt->skb; + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) + nft_reg_store8(dest, PACKET_MULTICAST); + else + nft_reg_store8(dest, PACKET_BROADCAST); + break; + case NFPROTO_IPV6: + nft_reg_store8(dest, PACKET_MULTICAST); + break; + case NFPROTO_NETDEV: + switch (skb->protocol) { + case htons(ETH_P_IP): { + int noff = skb_network_offset(skb); + struct iphdr *iph, _iph; + + iph = skb_header_pointer(skb, noff, + sizeof(_iph), &_iph); + if (!iph) + return false; + + if (ipv4_is_multicast(iph->daddr)) + nft_reg_store8(dest, PACKET_MULTICAST); + else + nft_reg_store8(dest, PACKET_BROADCAST); + + break; + } + case htons(ETH_P_IPV6): + nft_reg_store8(dest, PACKET_MULTICAST); + break; + default: + WARN_ON_ONCE(1); + return false; + } + break; + default: + WARN_ON_ONCE(1); + return false; + } + + return true; +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -183,46 +233,8 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; } - switch (nft_pf(pkt)) { - case NFPROTO_IPV4: - if (ipv4_is_multicast(ip_hdr(skb)->daddr)) - nft_reg_store8(dest, PACKET_MULTICAST); - else - nft_reg_store8(dest, PACKET_BROADCAST); - break; - case NFPROTO_IPV6: - nft_reg_store8(dest, PACKET_MULTICAST); - break; - case NFPROTO_NETDEV: - switch (skb->protocol) { - case htons(ETH_P_IP): { - int noff = skb_network_offset(skb); - struct iphdr *iph, _iph; - - iph = skb_header_pointer(skb, noff, - sizeof(_iph), &_iph); - if (!iph) - goto err; - - if (ipv4_is_multicast(iph->daddr)) - nft_reg_store8(dest, PACKET_MULTICAST); - else - nft_reg_store8(dest, PACKET_BROADCAST); - - break; - } - case htons(ETH_P_IPV6): - nft_reg_store8(dest, PACKET_MULTICAST); - break; - default: - WARN_ON_ONCE(1); - goto err; - } - break; - default: - WARN_ON_ONCE(1); + if (!nft_meta_get_eval_pkttype_lo(pkt, dest)) goto err; - } break; case NFT_META_CPU: *dest = raw_smp_processor_id(); From 726b44f044e8e67cbe2209c1a5704aca981be3b2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Dec 2019 12:05:15 +0100 Subject: [PATCH 11/17] netfilter: nft_meta: move sk uid/git handling to helper Not a hot path. Also, both have copy&paste case statements, so use a common helper for both. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 65 ++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index fe49b27dfa87..1b32440ec2e6 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -126,6 +126,41 @@ nft_meta_get_eval_pkttype_lo(const struct nft_pktinfo *pkt, return true; } +static noinline bool +nft_meta_get_eval_skugid(enum nft_meta_keys key, + u32 *dest, + const struct nft_pktinfo *pkt) +{ + struct sock *sk = skb_to_full_sk(pkt->skb); + struct socket *sock; + + if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) + return false; + + read_lock_bh(&sk->sk_callback_lock); + sock = sk->sk_socket; + if (!sock || !sock->file) { + read_unlock_bh(&sk->sk_callback_lock); + return false; + } + + switch (key) { + case NFT_META_SKUID: + *dest = from_kuid_munged(&init_user_ns, + sock->file->f_cred->fsuid); + break; + case NFT_META_SKGID: + *dest = from_kgid_munged(&init_user_ns, + sock->file->f_cred->fsgid); + break; + default: + break; + } + + read_unlock_bh(&sk->sk_callback_lock); + return true; +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -180,37 +215,9 @@ void nft_meta_get_eval(const struct nft_expr *expr, nft_reg_store16(dest, out->type); break; case NFT_META_SKUID: - sk = skb_to_full_sk(skb); - if (!sk || !sk_fullsock(sk) || - !net_eq(nft_net(pkt), sock_net(sk))) - goto err; - - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket == NULL || - sk->sk_socket->file == NULL) { - read_unlock_bh(&sk->sk_callback_lock); - goto err; - } - - *dest = from_kuid_munged(&init_user_ns, - sk->sk_socket->file->f_cred->fsuid); - read_unlock_bh(&sk->sk_callback_lock); - break; case NFT_META_SKGID: - sk = skb_to_full_sk(skb); - if (!sk || !sk_fullsock(sk) || - !net_eq(nft_net(pkt), sock_net(sk))) + if (!nft_meta_get_eval_skugid(priv->key, dest, pkt)) goto err; - - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket == NULL || - sk->sk_socket->file == NULL) { - read_unlock_bh(&sk->sk_callback_lock); - goto err; - } - *dest = from_kgid_munged(&init_user_ns, - sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&sk->sk_callback_lock); break; #ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: { From b1327fbc29915ef5bf0eec5abc2e2e67983cb037 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Dec 2019 12:05:16 +0100 Subject: [PATCH 12/17] netfilter: nft_meta: move cgroup handling to helper Reduce size of main eval function. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 1b32440ec2e6..3fca1c3ec361 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -161,6 +161,20 @@ nft_meta_get_eval_skugid(enum nft_meta_keys key, return true; } +#ifdef CONFIG_CGROUP_NET_CLASSID +static noinline bool +nft_meta_get_eval_cgroup(u32 *dest, const struct nft_pktinfo *pkt) +{ + struct sock *sk = skb_to_full_sk(pkt->skb); + + if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) + return false; + + *dest = sock_cgroup_classid(&sk->sk_cgrp_data); + return true; +} +#endif + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -168,7 +182,6 @@ void nft_meta_get_eval(const struct nft_expr *expr, const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); - struct sock *sk; u32 *dest = ®s->data[priv->dreg]; switch (priv->key) { @@ -258,11 +271,8 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; #ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: - sk = skb_to_full_sk(skb); - if (!sk || !sk_fullsock(sk) || - !net_eq(nft_net(pkt), sock_net(sk))) + if (!nft_meta_get_eval_cgroup(dest, pkt)) goto err; - *dest = sock_cgroup_classid(&sk->sk_cgrp_data); break; #endif case NFT_META_PRANDOM: { From a4150a1faa3604beef25ed2374d537f86059ad52 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Dec 2019 12:05:17 +0100 Subject: [PATCH 13/17] netfilter: nft_meta: move interface kind handling to helper checkpatch complains about == NULL checks in original code, so use !in instead. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 3fca1c3ec361..2f7cc64b0c15 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -175,6 +175,30 @@ nft_meta_get_eval_cgroup(u32 *dest, const struct nft_pktinfo *pkt) } #endif +static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key, + u32 *dest, + const struct nft_pktinfo *pkt) +{ + const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); + + switch (key) { + case NFT_META_IIFKIND: + if (!in || !in->rtnl_link_ops) + return false; + strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ); + break; + case NFT_META_OIFKIND: + if (!out || !out->rtnl_link_ops) + return false; + strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); + break; + default: + return false; + } + + return true; +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -286,14 +310,9 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; #endif case NFT_META_IIFKIND: - if (in == NULL || in->rtnl_link_ops == NULL) - goto err; - strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ); - break; case NFT_META_OIFKIND: - if (out == NULL || out->rtnl_link_ops == NULL) + if (!nft_meta_get_eval_kind(priv->key, dest, pkt)) goto err; - strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); break; case NFT_META_TIME_NS: case NFT_META_TIME_DAY: From 8724e819cc9a8fab4c18e791cb0bd602fb294971 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Dec 2019 12:05:18 +0100 Subject: [PATCH 14/17] netfilter: nft_meta: move all interface related keys to helper Reduces repetiveness and reduces size of meta eval function. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 95 +++++++++++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 25 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 2f7cc64b0c15..022f1473ddd1 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -199,13 +199,79 @@ static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key, return true; } +static void nft_meta_store_ifindex(u32 *dest, const struct net_device *dev) +{ + *dest = dev ? dev->ifindex : 0; +} + +static void nft_meta_store_ifname(u32 *dest, const struct net_device *dev) +{ + strncpy((char *)dest, dev ? dev->name : "", IFNAMSIZ); +} + +static bool nft_meta_store_iftype(u32 *dest, const struct net_device *dev) +{ + if (!dev) + return false; + + nft_reg_store16(dest, dev->type); + return true; +} + +static bool nft_meta_store_ifgroup(u32 *dest, const struct net_device *dev) +{ + if (!dev) + return false; + + *dest = dev->group; + return true; +} + +static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest, + const struct nft_pktinfo *pkt) +{ + switch (key) { + case NFT_META_IIFNAME: + nft_meta_store_ifname(dest, nft_in(pkt)); + break; + case NFT_META_OIFNAME: + nft_meta_store_ifname(dest, nft_out(pkt)); + break; + case NFT_META_IIF: + nft_meta_store_ifindex(dest, nft_in(pkt)); + break; + case NFT_META_OIF: + nft_meta_store_ifindex(dest, nft_out(pkt)); + break; + case NFT_META_IIFTYPE: + if (!nft_meta_store_iftype(dest, nft_in(pkt))) + return false; + break; + case NFT_META_OIFTYPE: + if (!nft_meta_store_iftype(dest, nft_out(pkt))) + return false; + break; + case NFT_META_IIFGROUP: + if (!nft_meta_store_ifgroup(dest, nft_out(pkt))) + return false; + break; + case NFT_META_OIFGROUP: + if (!nft_meta_store_ifgroup(dest, nft_out(pkt))) + return false; + break; + default: + return false; + } + + return true; +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); u32 *dest = ®s->data[priv->dreg]; switch (priv->key) { @@ -230,26 +296,15 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = skb->mark; break; case NFT_META_IIF: - *dest = in ? in->ifindex : 0; - break; case NFT_META_OIF: - *dest = out ? out->ifindex : 0; - break; case NFT_META_IIFNAME: - strncpy((char *)dest, in ? in->name : "", IFNAMSIZ); - break; case NFT_META_OIFNAME: - strncpy((char *)dest, out ? out->name : "", IFNAMSIZ); - break; case NFT_META_IIFTYPE: - if (in == NULL) - goto err; - nft_reg_store16(dest, in->type); - break; case NFT_META_OIFTYPE: - if (out == NULL) + case NFT_META_IIFGROUP: + case NFT_META_OIFGROUP: + if (!nft_meta_get_eval_ifname(priv->key, dest, pkt)) goto err; - nft_reg_store16(dest, out->type); break; case NFT_META_SKUID: case NFT_META_SKGID: @@ -283,16 +338,6 @@ void nft_meta_get_eval(const struct nft_expr *expr, case NFT_META_CPU: *dest = raw_smp_processor_id(); break; - case NFT_META_IIFGROUP: - if (in == NULL) - goto err; - *dest = in->group; - break; - case NFT_META_OIFGROUP: - if (out == NULL) - goto err; - *dest = out->group; - break; #ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: if (!nft_meta_get_eval_cgroup(dest, pkt)) From 6b2faee0ca91b63bd5a3b2087d4f25c76b983961 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Dec 2019 12:05:19 +0100 Subject: [PATCH 15/17] netfilter: nft_meta: place prandom handling in a helper Move this out of the main eval loop, the numgen expression provides a better alternative to meta random. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 022f1473ddd1..ac6fc95387dc 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -266,6 +266,13 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest, return true; } +static noinline u32 nft_prandom_u32(void) +{ + struct rnd_state *state = this_cpu_ptr(&nft_prandom_state); + + return prandom_u32_state(state); +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -344,11 +351,9 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; break; #endif - case NFT_META_PRANDOM: { - struct rnd_state *state = this_cpu_ptr(&nft_prandom_state); - *dest = prandom_u32_state(state); + case NFT_META_PRANDOM: + *dest = nft_prandom_u32(); break; - } #ifdef CONFIG_XFRM case NFT_META_SECPATH: nft_reg_store8(dest, secpath_exists(skb)); From 01a0fc82252d82eda50d4e1252b826a3ef7afb3d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Dec 2019 12:05:20 +0100 Subject: [PATCH 16/17] netfilter: nft_meta: place rtclassid handling in a helper skb_dst is an inline helper with a WARN_ON(), so this is a bit more code than it looks like. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index ac6fc95387dc..fb1a571db924 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -273,6 +273,20 @@ static noinline u32 nft_prandom_u32(void) return prandom_u32_state(state); } +#ifdef CONFIG_IP_ROUTE_CLASSID +static noinline bool +nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest) +{ + const struct dst_entry *dst = skb_dst(skb); + + if (!dst) + return false; + + *dest = dst->tclassid; + return true; +} +#endif + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -319,14 +333,10 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; break; #ifdef CONFIG_IP_ROUTE_CLASSID - case NFT_META_RTCLASSID: { - const struct dst_entry *dst = skb_dst(skb); - - if (dst == NULL) + case NFT_META_RTCLASSID: + if (!nft_meta_get_eval_rtclassid(skb, dest)) goto err; - *dest = dst->tclassid; break; - } #endif #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: From c14ceb0ec727187f71a487a592ffa91767fed66e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Dec 2019 12:05:21 +0100 Subject: [PATCH 17/17] netfilter: nft_meta: add support for slave device ifindex matching Allow to match on vrf slave ifindex or name. In case there was no slave interface involved, store 0 in the destination register just like existing iif/oif matching. sdif(name) is restricted to the ipv4/ipv6 input and forward hooks, as it depends on ip(6) stack parsing/storing info in skb->cb[]. Cc: Martin Willi Cc: David Ahern Cc: Shrijeet Mukherjee Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++ net/netfilter/nft_meta.c | 76 +++++++++++++++++++++--- 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index bb9b049310df..e237ecbdcd8a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -805,6 +805,8 @@ enum nft_exthdr_attributes { * @NFT_META_TIME_NS: time since epoch (in nanoseconds) * @NFT_META_TIME_DAY: day of week (from 0 = Sunday to 6 = Saturday) * @NFT_META_TIME_HOUR: hour of day (in seconds) + * @NFT_META_SDIF: slave device interface index + * @NFT_META_SDIFNAME: slave device interface name */ enum nft_meta_keys { NFT_META_LEN, @@ -840,6 +842,8 @@ enum nft_meta_keys { NFT_META_TIME_NS, NFT_META_TIME_DAY, NFT_META_TIME_HOUR, + NFT_META_SDIF, + NFT_META_SDIFNAME, }; /** diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index fb1a571db924..951b6e87ed5d 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include /* for TCP_TIME_WAIT */ #include @@ -287,6 +288,28 @@ nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest) } #endif +static noinline u32 nft_meta_get_eval_sdif(const struct nft_pktinfo *pkt) +{ + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + return inet_sdif(pkt->skb); + case NFPROTO_IPV6: + return inet6_sdif(pkt->skb); + } + + return 0; +} + +static noinline void +nft_meta_get_eval_sdifname(u32 *dest, const struct nft_pktinfo *pkt) +{ + u32 sdif = nft_meta_get_eval_sdif(pkt); + const struct net_device *dev; + + dev = sdif ? dev_get_by_index_rcu(nft_net(pkt), sdif) : NULL; + nft_meta_store_ifname(dest, dev); +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -379,6 +402,12 @@ void nft_meta_get_eval(const struct nft_expr *expr, case NFT_META_TIME_HOUR: nft_meta_get_eval_time(priv->key, dest); break; + case NFT_META_SDIF: + *dest = nft_meta_get_eval_sdif(pkt); + break; + case NFT_META_SDIFNAME: + nft_meta_get_eval_sdifname(dest, pkt); + break; default: WARN_ON(1); goto err; @@ -459,6 +488,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_MARK: case NFT_META_IIF: case NFT_META_OIF: + case NFT_META_SDIF: case NFT_META_SKUID: case NFT_META_SKGID: #ifdef CONFIG_IP_ROUTE_CLASSID @@ -480,6 +510,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_OIFNAME: case NFT_META_IIFKIND: case NFT_META_OIFKIND: + case NFT_META_SDIFNAME: len = IFNAMSIZ; break; case NFT_META_PRANDOM: @@ -510,16 +541,28 @@ int nft_meta_get_init(const struct nft_ctx *ctx, } EXPORT_SYMBOL_GPL(nft_meta_get_init); -static int nft_meta_get_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) +static int nft_meta_get_validate_sdif(const struct nft_ctx *ctx) { -#ifdef CONFIG_XFRM - const struct nft_meta *priv = nft_expr_priv(expr); unsigned int hooks; - if (priv->key != NFT_META_SECPATH) - return 0; + switch (ctx->family) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); + break; + default: + return -EOPNOTSUPP; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + +static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx) +{ +#ifdef CONFIG_XFRM + unsigned int hooks; switch (ctx->family) { case NFPROTO_NETDEV: @@ -542,6 +585,25 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx, #endif } +static int nft_meta_get_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + switch (priv->key) { + case NFT_META_SECPATH: + return nft_meta_get_validate_xfrm(ctx); + case NFT_META_SDIF: + case NFT_META_SDIFNAME: + return nft_meta_get_validate_sdif(ctx); + default: + break; + } + + return 0; +} + int nft_meta_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data)