From 3b15d09f7e6db44065aaba5fd16dc7420035c5ad Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 28 Feb 2019 13:13:26 +0800 Subject: [PATCH 01/22] time: Introduce jiffies64_to_msecs() there is a similar helper in net/netfilter/nf_tables_api.c, this maybe become a common request someday, so move it to time.c Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Acked-by: John Stultz Signed-off-by: Pablo Neira Ayuso --- include/linux/jiffies.h | 1 + kernel/time/time.c | 10 ++++++++++ net/netfilter/nf_tables_api.c | 4 +--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fa928242567d..1b6d31da7cbc 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -297,6 +297,7 @@ static inline u64 jiffies_to_nsecs(const unsigned long j) } extern u64 jiffies64_to_nsecs(u64 j); +extern u64 jiffies64_to_msecs(u64 j); extern unsigned long __msecs_to_jiffies(const unsigned int m); #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) diff --git a/kernel/time/time.c b/kernel/time/time.c index c3f756f8534b..9e3f79d4f5a8 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -783,6 +783,16 @@ u64 jiffies64_to_nsecs(u64 j) } EXPORT_SYMBOL(jiffies64_to_nsecs); +u64 jiffies64_to_msecs(const u64 j) +{ +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (MSEC_PER_SEC / HZ) * j; +#else + return div_u64(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN); +#endif +} +EXPORT_SYMBOL(jiffies64_to_msecs); + /** * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64 * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 90e6b09ef2af..ee1b0d1445aa 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3193,9 +3193,7 @@ static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) static __be64 nf_jiffies64_to_msecs(u64 input) { - u64 ms = jiffies64_to_nsecs(input); - - return cpu_to_be64(div_u64(ms, NSEC_PER_MSEC)); + return cpu_to_be64(jiffies64_to_msecs(input)); } static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, From f7e840ee4dca312c78bd66de6f34fed84c305ede Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 17 Mar 2019 17:27:06 +0000 Subject: [PATCH 02/22] netfilter: nf_tables: remove unused parameter ctx Function nf_tables_set_desc_parse parameter ctx is not being used so remove it as it is redundant. Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ee1b0d1445aa..2d28b138ed18 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3436,8 +3436,7 @@ err: return err; } -static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, - struct nft_set_desc *desc, +static int nf_tables_set_desc_parse(struct nft_set_desc *desc, const struct nlattr *nla) { struct nlattr *da[NFTA_SET_DESC_MAX + 1]; @@ -3563,7 +3562,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); if (nla[NFTA_SET_DESC] != NULL) { - err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]); + err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); if (err < 0) return err; } From b3dfee340a9b35ccde43f886b1dd59d634945b50 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 19 Mar 2019 22:40:21 +0800 Subject: [PATCH 03/22] netfilter: nft_redir: Make nft_redir_dump static Fix sparse warning: net/netfilter/nft_redir.c:85:5: warning: symbol 'nft_redir_dump' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_redir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index a340cd8a751b..02f4b4a6f887 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -82,7 +82,7 @@ static int nft_redir_init(const struct nft_ctx *ctx, return nf_ct_netns_get(ctx->net, ctx->family); } -int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_redir *priv = nft_expr_priv(expr); From 227e1e4d0d6c5ea006864c9730f1404843d6d84a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 20 Mar 2019 08:40:47 +0100 Subject: [PATCH 04/22] netfilter: nf_flowtable: skip device lookup from interface index Use the output device from the route that we cache in the flowtable entry. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 1d291a51cd45..6452550d187f 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -235,13 +235,10 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (tuplehash == NULL) return NF_ACCEPT; - outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); - if (!outdev) - return NF_ACCEPT; - dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; + outdev = rt->dst.dev; if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) @@ -452,13 +449,10 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (tuplehash == NULL) return NF_ACCEPT; - outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); - if (!outdev) - return NF_ACCEPT; - dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; + outdev = rt->dst.dev; if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) return NF_ACCEPT; From 84c0d5e96f3ae20344fb3a79161eab18905dae56 Mon Sep 17 00:00:00 2001 From: Jacky Hu Date: Tue, 26 Mar 2019 18:31:21 +0800 Subject: [PATCH 05/22] ipvs: allow tunneling with gue encapsulation ipip packets are blocked in some public cloud environments, this patch allows gue encapsulation with the tunneling method, which would make tunneling working in those environments. Signed-off-by: Jacky Hu Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 5 ++ include/uapi/linux/ip_vs.h | 11 +++++ net/netfilter/ipvs/ip_vs_ctl.c | 35 +++++++++++++- net/netfilter/ipvs/ip_vs_xmit.c | 84 +++++++++++++++++++++++++++++++-- 4 files changed, 130 insertions(+), 5 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 047f9a5ccaad..2ac40135b576 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -600,6 +600,9 @@ struct ip_vs_dest_user_kern { /* Address family of addr */ u16 af; + + u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ }; @@ -660,6 +663,8 @@ struct ip_vs_dest { atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ atomic_t last_weight; /* server latest weight */ + __u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 1c916b2f89dc..e34f436fc79d 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -124,6 +124,13 @@ #define IP_VS_PEDATA_MAXLEN 255 +/* Tunnel types */ +enum { + IP_VS_CONN_F_TUNNEL_TYPE_IPIP = 0, /* IPIP */ + IP_VS_CONN_F_TUNNEL_TYPE_GUE, /* GUE */ + IP_VS_CONN_F_TUNNEL_TYPE_MAX, +}; + /* * The struct ip_vs_service_user and struct ip_vs_dest_user are * used to set IPVS rules through setsockopt. @@ -392,6 +399,10 @@ enum { IPVS_DEST_ATTR_STATS64, /* nested attribute for dest stats */ + IPVS_DEST_ATTR_TUN_TYPE, /* tunnel type */ + + IPVS_DEST_ATTR_TUN_PORT, /* tunnel port */ + __IPVS_DEST_ATTR_MAX, }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4b933669fd83..ab119a7540db 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -831,6 +831,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; conn_flags |= IP_VS_CONN_F_INACTIVE; + /* set the tunnel info */ + dest->tun_type = udest->tun_type; + dest->tun_port = udest->tun_port; + /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { conn_flags |= IP_VS_CONN_F_NOOUTPUT; @@ -987,6 +991,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } + if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + if (udest->tun_port == 0) { + pr_err("%s(): tunnel port is zero\n", __func__); + return -EINVAL; + } + } + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ @@ -1051,6 +1062,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } + if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + if (udest->tun_port == 0) { + pr_err("%s(): tunnel port is zero\n", __func__); + return -EINVAL; + } + } + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ @@ -2333,6 +2351,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, udest->u_threshold = udest_compat->u_threshold; udest->l_threshold = udest_compat->l_threshold; udest->af = AF_INET; + udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP; } static int @@ -2890,6 +2909,8 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, + [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 }, + [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 }, }; static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, @@ -3193,6 +3214,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) IP_VS_CONN_F_FWD_MASK)) || nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)) || + nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE, + dest->tun_type) || + nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT, + dest->tun_port) || nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, @@ -3315,12 +3340,14 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, /* If a full entry was requested, check for the additional fields */ if (full_entry) { struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, - *nla_l_thresh; + *nla_l_thresh, *nla_tun_type, *nla_tun_port; nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; + nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE]; + nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT]; if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) return -EINVAL; @@ -3330,6 +3357,12 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, udest->weight = nla_get_u32(nla_weight); udest->u_threshold = nla_get_u32(nla_u_thresh); udest->l_threshold = nla_get_u32(nla_l_thresh); + + if (nla_tun_type) + udest->tun_type = nla_get_u8(nla_tun_type); + + if (nla_tun_port) + udest->tun_port = nla_get_be16(nla_tun_port); } return 0; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 175349fcf91f..8d6f94b67772 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -32,6 +32,7 @@ #include #include /* for tcphdr */ #include +#include #include /* for csum_tcpudp_magic */ #include #include /* for icmp_send */ @@ -382,6 +383,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, mtu = dst_mtu(&rt->dst); } else { mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); + if (!dest) + goto err_put; + if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto err_put; @@ -533,6 +538,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, mtu = dst_mtu(&rt->dst); else { mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); + if (!dest) + goto err_put; + if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); if (mtu < IPV6_MIN_MTU) { IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IPV6_MIN_MTU); @@ -989,6 +998,41 @@ static inline int __tun_gso_type_mask(int encaps_af, int orig_af) } } +static int +ipvs_gue_encap(struct net *net, struct sk_buff *skb, + struct ip_vs_conn *cp, __u8 *next_protocol) +{ + __be16 dport; + __be16 sport = udp_flow_src_port(net, skb, 0, 0, false); + struct udphdr *udph; /* Our new UDP header */ + struct guehdr *gueh; /* Our new GUE header */ + + skb_push(skb, sizeof(struct guehdr)); + + gueh = (struct guehdr *)skb->data; + + gueh->control = 0; + gueh->version = 0; + gueh->hlen = 0; + gueh->flags = 0; + gueh->proto_ctype = *next_protocol; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + udph = udp_hdr(skb); + + dport = cp->dest->tun_port; + udph->dest = dport; + udph->source = sport; + udph->len = htons(skb->len); + udph->check = 0; + + *next_protocol = IPPROTO_UDP; + + return 0; +} + /* * IP Tunneling transmitter * @@ -1025,6 +1069,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int ret, local; + int tun_type, gso_type; EnterFunction(10); @@ -1046,6 +1091,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); + tun_type = cp->dest->tun_type; + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); + /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, @@ -1054,11 +1104,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af))) + gso_type = __tun_gso_type_mask(AF_INET, cp->af); + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + gso_type |= SKB_GSO_UDP_TUNNEL; + + if (iptunnel_handle_offloads(skb, gso_type)) goto tx_error; skb->transport_header = skb->network_header; + skb_set_inner_ipproto(skb, next_protocol); + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + ipvs_gue_encap(net, skb, cp, &next_protocol); + skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1102,6 +1161,8 @@ int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { + struct netns_ipvs *ipvs = cp->ipvs; + struct net *net = ipvs->net; struct rt6_info *rt; /* Route to the other host */ struct in6_addr saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -1112,10 +1173,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int ret, local; + int tun_type, gso_type; EnterFunction(10); - local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, &saddr, ipvsh, 1, IP_VS_RT_MODE_LOCAL | @@ -1134,17 +1196,31 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); + tun_type = cp->dest->tun_type; + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); + skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, &next_protocol, &payload_len, &dsfield, &ttl, NULL); if (IS_ERR(skb)) goto tx_error; - if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af))) + gso_type = __tun_gso_type_mask(AF_INET6, cp->af); + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + gso_type |= SKB_GSO_UDP_TUNNEL; + + if (iptunnel_handle_offloads(skb, gso_type)) goto tx_error; skb->transport_header = skb->network_header; + skb_set_inner_ipproto(skb, next_protocol); + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + ipvs_gue_encap(net, skb, cp, &next_protocol); + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1167,7 +1243,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) - ip6_local_out(cp->ipvs->net, skb->sk, skb); + ip6_local_out(net, skb->sk, skb); else if (ret == NF_DROP) kfree_skb(skb); From 01902f8c85bfde343a4c2b7428d18762442f3a25 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 26 Mar 2019 20:06:20 +0800 Subject: [PATCH 06/22] netfilter: optimize nf_inet_addr_cmp optimize nf_inet_addr_cmp by 64bit xor computation similar to ipv6_addr_equal() Signed-off-by: Yuan Linsi Signed-off-by: Li RongQing Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 72cb19c3db6a..4e0145ea033e 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -24,10 +24,17 @@ static inline int NF_DROP_GETERR(int verdict) static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul1 = (const unsigned long *)a1; + const unsigned long *ul2 = (const unsigned long *)a2; + + return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; +#else return a1->all[0] == a2->all[0] && a1->all[1] == a2->all[1] && a1->all[2] == a2->all[2] && a1->all[3] == a2->all[3]; +#endif } static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, From d164385ec572cbe3335a635ac308760e126d4ec0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Mar 2019 09:22:24 +0100 Subject: [PATCH 07/22] netfilter: nat: add inet family nat support We need minimal support from the nat core for this, as we do not want to register additional base hooks. When an inet hook is registered, interally register ipv4 and ipv6 hooks for them and unregister those when inet hooks are removed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat.h | 7 ++-- net/netfilter/nf_nat_core.c | 16 ++++------ net/netfilter/nf_nat_proto.c | 43 +++++++++++++++++++++---- net/netfilter/nft_chain_nat.c | 36 +++++++++++++++++++++ net/netfilter/nft_nat.c | 58 ++++++++++++++++++++++++++++++++-- 5 files changed, 141 insertions(+), 19 deletions(-) diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index cf332c4e0b32..423cda2c6542 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -69,9 +69,9 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, #endif } -int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, +int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, const struct nf_hook_ops *nat_ops, unsigned int ops_count); -void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, +void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, unsigned int ops_count); unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -98,6 +98,9 @@ void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); +int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + unsigned int nf_nat_inet_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index af7dc6537758..a9ec49edd7f4 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1009,7 +1009,7 @@ static struct nf_ct_helper_expectfn follow_master_nat = { .expectfn = nf_nat_follow_master, }; -int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, +int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, const struct nf_hook_ops *orig_nat_ops, unsigned int ops_count) { struct nat_net *nat_net = net_generic(net, nat_net_id); @@ -1019,14 +1019,12 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, struct nf_hook_ops *nat_ops; int i, ret; - if (WARN_ON_ONCE(ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net))) + if (WARN_ON_ONCE(pf >= ARRAY_SIZE(nat_net->nat_proto_net))) return -EINVAL; - nat_proto_net = &nat_net->nat_proto_net[ops->pf]; + nat_proto_net = &nat_net->nat_proto_net[pf]; for (i = 0; i < ops_count; i++) { - if (WARN_ON(orig_nat_ops[i].pf != ops->pf)) - return -EINVAL; if (orig_nat_ops[i].hooknum == hooknum) { hooknum = i; break; @@ -1086,8 +1084,8 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, return ret; } -void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, - unsigned int ops_count) +void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, + unsigned int ops_count) { struct nat_net *nat_net = net_generic(net, nat_net_id); struct nf_nat_hooks_net *nat_proto_net; @@ -1096,10 +1094,10 @@ void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, int hooknum = ops->hooknum; int i; - if (ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net)) + if (pf >= ARRAY_SIZE(nat_net->nat_proto_net)) return; - nat_proto_net = &nat_net->nat_proto_net[ops->pf]; + nat_proto_net = &nat_net->nat_proto_net[pf]; mutex_lock(&nf_nat_proto_mutex); if (WARN_ON(nat_proto_net->users == 0)) diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 62743da3004f..606d0a740615 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -725,7 +725,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, return ret; } -static const struct nf_hook_ops nf_nat_ipv4_ops[] = { +const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv4_in, @@ -758,13 +758,14 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops) { - return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops, + ARRAY_SIZE(nf_nat_ipv4_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn); void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops) { - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn); @@ -977,7 +978,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, return ret; } -static const struct nf_hook_ops nf_nat_ipv6_ops[] = { +const struct nf_hook_ops nf_nat_ipv6_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv6_in, @@ -1010,14 +1011,44 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops) { - return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, + return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn); void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops) { - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn); #endif /* CONFIG_IPV6 */ + +#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT) +int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops) +{ + int ret; + + if (WARN_ON_ONCE(ops->pf != NFPROTO_INET)) + return -EINVAL; + + ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops, + ARRAY_SIZE(nf_nat_ipv6_ops)); + if (ret) + return ret; + + ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops, + ARRAY_SIZE(nf_nat_ipv4_ops)); + if (ret) + nf_nat_ipv6_unregister_fn(net, ops); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn); + +void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops) +{ + nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); +} +EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn); +#endif /* NFT INET NAT */ diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c index ee4852088d50..2f89bde3c61c 100644 --- a/net/netfilter/nft_chain_nat.c +++ b/net/netfilter/nft_chain_nat.c @@ -74,6 +74,36 @@ static const struct nft_chain_type nft_chain_nat_ipv6 = { }; #endif +#ifdef CONFIG_NF_TABLES_INET +static int nft_nat_inet_reg(struct net *net, const struct nf_hook_ops *ops) +{ + return nf_nat_inet_register_fn(net, ops); +} + +static void nft_nat_inet_unreg(struct net *net, const struct nf_hook_ops *ops) +{ + nf_nat_inet_unregister_fn(net, ops); +} + +static const struct nft_chain_type nft_chain_nat_inet = { + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_INET, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_PRE_ROUTING] = nft_nat_do_chain, + [NF_INET_LOCAL_IN] = nft_nat_do_chain, + [NF_INET_LOCAL_OUT] = nft_nat_do_chain, + [NF_INET_POST_ROUTING] = nft_nat_do_chain, + }, + .ops_register = nft_nat_inet_reg, + .ops_unregister = nft_nat_inet_unreg, +}; +#endif + static int __init nft_chain_nat_init(void) { #ifdef CONFIG_NF_TABLES_IPV6 @@ -82,6 +112,9 @@ static int __init nft_chain_nat_init(void) #ifdef CONFIG_NF_TABLES_IPV4 nft_register_chain_type(&nft_chain_nat_ipv4); #endif +#ifdef CONFIG_NF_TABLES_INET + nft_register_chain_type(&nft_chain_nat_inet); +#endif return 0; } @@ -94,6 +127,9 @@ static void __exit nft_chain_nat_exit(void) #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_chain_type(&nft_chain_nat_ipv6); #endif +#ifdef CONFIG_NF_TABLES_INET + nft_unregister_chain_type(&nft_chain_nat_inet); +#endif } module_init(nft_chain_nat_init); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index e93aed9bda88..d90d421826aa 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -140,7 +140,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); - if (family != ctx->family) + if (ctx->family != NFPROTO_INET && ctx->family != family) return -EOPNOTSUPP; switch (family) { @@ -278,13 +278,67 @@ static struct nft_expr_type nft_nat_type __read_mostly = { .owner = THIS_MODULE, }; +#ifdef CONFIG_NF_TABLES_INET +static void nft_nat_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + if (priv->family == nft_pf(pkt)) + nft_nat_eval(expr, regs, pkt); +} + +static const struct nft_expr_ops nft_nat_inet_ops = { + .type = &nft_nat_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .eval = nft_nat_inet_eval, + .init = nft_nat_init, + .destroy = nft_nat_destroy, + .dump = nft_nat_dump, + .validate = nft_nat_validate, +}; + +static struct nft_expr_type nft_inet_nat_type __read_mostly = { + .name = "nat", + .family = NFPROTO_INET, + .ops = &nft_nat_inet_ops, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, +}; + +static int nft_nat_inet_module_init(void) +{ + return nft_register_expr(&nft_inet_nat_type); +} + +static void nft_nat_inet_module_exit(void) +{ + nft_unregister_expr(&nft_inet_nat_type); +} +#else +static int nft_nat_inet_module_init(void) { return 0; } +static void nft_nat_inet_module_exit(void) { } +#endif + static int __init nft_nat_module_init(void) { - return nft_register_expr(&nft_nat_type); + int ret = nft_nat_inet_module_init(); + + if (ret) + return ret; + + ret = nft_register_expr(&nft_nat_type); + if (ret) + nft_nat_inet_module_exit(); + + return ret; } static void __exit nft_nat_module_exit(void) { + nft_nat_inet_module_exit(); nft_unregister_expr(&nft_nat_type); } From c1deb065cf3b5bcd483e3f03479f930edb151b99 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Mar 2019 09:22:25 +0100 Subject: [PATCH 08/22] netfilter: nf_tables: merge route type into core very little code, so it really doesn't make sense to have extra modules or even a kconfig knob for this. Merge them and make functionality available unconditionally. The merge makes inet family route support trivial, so add it as well here. Before: text data bss dec hex filename 835 832 0 1667 683 nft_chain_route_ipv4.ko 870 832 0 1702 6a6 nft_chain_route_ipv6.ko 111568 2556 529 114653 1bfdd nf_tables.ko After: text data bss dec hex filename 113133 2556 529 116218 1c5fa nf_tables.ko Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 15 ++ include/net/netfilter/nf_tables.h | 2 + net/ipv4/netfilter/Kconfig | 8 - net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/nft_chain_route_ipv4.c | 89 ------------ net/ipv6/netfilter/Kconfig | 8 - net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/nft_chain_route_ipv6.c | 91 ------------ net/netfilter/Makefile | 3 +- net/netfilter/nf_nat_proto.c | 16 +- net/netfilter/nf_tables_api.c | 2 + net/netfilter/nft_chain_route.c | 169 ++++++++++++++++++++++ 12 files changed, 191 insertions(+), 214 deletions(-) delete mode 100644 net/ipv4/netfilter/nft_chain_route_ipv4.c delete mode 100644 net/ipv6/netfilter/nft_chain_route_ipv6.c create mode 100644 net/netfilter/nft_chain_route.c diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 471e9467105b..12113e502656 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -87,6 +87,21 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, } int ip6_route_me_harder(struct net *net, struct sk_buff *skb); + +static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->route_me_harder(net, skb); +#else + return ip6_route_me_harder(net, skb); +#endif +} + __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3e9ab643eedf..55dff3ab44c7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1411,4 +1411,6 @@ struct nft_trans_flowtable { int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); +void __init nft_chain_route_init(void); +void nft_chain_route_fini(void); #endif /* _NET_NF_TABLES_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c98391d49200..ea688832fc4e 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -27,14 +27,6 @@ config NF_TABLES_IPV4 if NF_TABLES_IPV4 -config NFT_CHAIN_ROUTE_IPV4 - tristate "IPv4 nf_tables route chain support" - help - This option enables the "route" chain for IPv4 in nf_tables. This - chain type is used to force packet re-routing after mangling header - fields such as the source, destination, type of service and - the packet mark. - config NFT_REJECT_IPV4 select NF_REJECT_IPV4 default NFT_REJECT diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index e241f5188ebe..2cfdda7b109f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -24,7 +24,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o $(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o -obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c deleted file mode 100644 index 7d82934c46f4..000000000000 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * Copyright (c) 2012 Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int nf_route_table_hook(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - struct nft_pktinfo pkt; - u32 mark; - __be32 saddr, daddr; - u_int8_t tos; - const struct iphdr *iph; - int err; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv4(&pkt, skb); - - mark = skb->mark; - iph = ip_hdr(skb); - saddr = iph->saddr; - daddr = iph->daddr; - tos = iph->tos; - - ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_STOLEN) { - iph = ip_hdr(skb); - - if (iph->saddr != saddr || - iph->daddr != daddr || - skb->mark != mark || - iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); - if (err < 0) - ret = NF_DROP_ERR(err); - } - } - return ret; -} - -static const struct nft_chain_type nft_chain_route_ipv4 = { - .name = "route", - .type = NFT_CHAIN_T_ROUTE, - .family = NFPROTO_IPV4, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_LOCAL_OUT), - .hooks = { - [NF_INET_LOCAL_OUT] = nf_route_table_hook, - }, -}; - -static int __init nft_chain_route_init(void) -{ - nft_register_chain_type(&nft_chain_route_ipv4); - - return 0; -} - -static void __exit nft_chain_route_exit(void) -{ - nft_unregister_chain_type(&nft_chain_route_ipv4); -} - -module_init(nft_chain_route_init); -module_exit(nft_chain_route_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_CHAIN(AF_INET, "route"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index ddc99a1653aa..3de3adb1a0c9 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -23,14 +23,6 @@ config NF_TABLES_IPV6 if NF_TABLES_IPV6 -config NFT_CHAIN_ROUTE_IPV6 - tristate "IPv6 nf_tables route chain support" - help - This option enables the "route" chain for IPv6 in nf_tables. This - chain type is used to force packet re-routing after mangling header - fields such as the source, destination, flowlabel, hop-limit and - the packet mark. - config NFT_REJECT_IPV6 select NF_REJECT_IPV6 default NFT_REJECT diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 3853c648ebaa..93aff604b243 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,7 +27,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o # nf_tables -obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c deleted file mode 100644 index da3f1f8cb325..000000000000 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * Copyright (c) 2012 Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int nf_route_table_hook(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - struct nft_pktinfo pkt; - struct in6_addr saddr, daddr; - u_int8_t hop_limit; - u32 mark, flowlabel; - int err; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv6(&pkt, skb); - - /* save source/dest address, mark, hoplimit, flowlabel, priority */ - memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); - memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); - mark = skb->mark; - hop_limit = ipv6_hdr(skb)->hop_limit; - - /* flowlabel and prio (includes version, which shouldn't change either */ - flowlabel = *((u32 *)ipv6_hdr(skb)); - - ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_STOLEN && - (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || - memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || - skb->mark != mark || - ipv6_hdr(skb)->hop_limit != hop_limit || - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); - if (err < 0) - ret = NF_DROP_ERR(err); - } - - return ret; -} - -static const struct nft_chain_type nft_chain_route_ipv6 = { - .name = "route", - .type = NFT_CHAIN_T_ROUTE, - .family = NFPROTO_IPV6, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_LOCAL_OUT), - .hooks = { - [NF_INET_LOCAL_OUT] = nf_route_table_hook, - }, -}; - -static int __init nft_chain_route_init(void) -{ - nft_register_chain_type(&nft_chain_route_ipv6); - - return 0; -} - -static void __exit nft_chain_route_exit(void) -{ - nft_unregister_chain_type(&nft_chain_route_ipv6); -} - -module_init(nft_chain_route_init); -module_exit(nft_chain_route_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route"); diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4894a85cdd0b..afbf475e02b2 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,7 +77,8 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ - nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o + nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \ + nft_chain_route.o nf_tables_set-objs := nf_tables_set_core.o \ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 606d0a740615..84f5c90a7f21 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -926,20 +926,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, return ret; } -static int nat_route_me_harder(struct net *net, struct sk_buff *skb) -{ -#ifdef CONFIG_IPV6_MODULE - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return -EHOSTUNREACH; - - return v6_ops->route_me_harder(net, skb); -#else - return ip6_route_me_harder(net, skb); -#endif -} - static unsigned int nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -959,7 +945,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = nat_route_me_harder(state->net, skb); + err = nf_ip6_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2d28b138ed18..a2bd439937e6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7530,6 +7530,7 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err5; + nft_chain_route_init(); return err; err5: rhltable_destroy(&nft_objname_ht); @@ -7549,6 +7550,7 @@ static void __exit nf_tables_module_exit(void) nfnetlink_subsys_unregister(&nf_tables_subsys); unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); + nft_chain_route_fini(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_destroy_work); rcu_barrier(); diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c new file mode 100644 index 000000000000..8826bbe71136 --- /dev/null +++ b/net/netfilter/nft_chain_route.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_NF_TABLES_IPV4 +static unsigned int nf_route_table_hook4(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct iphdr *iph; + struct nft_pktinfo pkt; + __be32 saddr, daddr; + unsigned int ret; + u32 mark; + int err; + u8 tos; + + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain(&pkt, priv); + if (ret == NF_ACCEPT) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) { + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } + return ret; +} + +static const struct nft_chain_type nft_chain_route_ipv4 = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV4, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook4, + }, +}; +#endif + +#ifdef CONFIG_NF_TABLES_IPV6 +static unsigned int nf_route_table_hook6(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct in6_addr saddr, daddr; + struct nft_pktinfo pkt; + u32 mark, flowlabel; + unsigned int ret; + u8 hop_limit; + int err; + + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); + + /* save source/dest address, mark, hoplimit, flowlabel, priority */ + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either)*/ + flowlabel = *((u32 *)ipv6_hdr(skb)); + + ret = nft_do_chain(&pkt, priv); + if (ret == NF_ACCEPT && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || + flowlabel != *((u32 *)ipv6_hdr(skb)))) { + err = nf_ip6_route_me_harder(state->net, skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } + + return ret; +} + +static const struct nft_chain_type nft_chain_route_ipv6 = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV6, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook6, + }, +}; +#endif + +#ifdef CONFIG_NF_TABLES_INET +static unsigned int nf_route_table_inet(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + switch (state->pf) { + case NFPROTO_IPV4: + return nf_route_table_hook4(priv, skb, state); + case NFPROTO_IPV6: + return nf_route_table_hook6(priv, skb, state); + default: + nft_set_pktinfo(&pkt, skb, state); + break; + } + + return nft_do_chain(&pkt, priv); +} + +static const struct nft_chain_type nft_chain_route_inet = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_INET, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_inet, + }, +}; +#endif + +void __init nft_chain_route_init(void) +{ +#ifdef CONFIG_NF_TABLES_IPV6 + nft_register_chain_type(&nft_chain_route_ipv6); +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + nft_register_chain_type(&nft_chain_route_ipv4); +#endif +#ifdef CONFIG_NF_TABLES_INET + nft_register_chain_type(&nft_chain_route_inet); +#endif +} + +void __exit nft_chain_route_fini(void) +{ +#ifdef CONFIG_NF_TABLES_IPV6 + nft_unregister_chain_type(&nft_chain_route_ipv6); +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + nft_unregister_chain_type(&nft_chain_route_ipv4); +#endif +#ifdef CONFIG_NF_TABLES_INET + nft_unregister_chain_type(&nft_chain_route_inet); +#endif +} From 4806e975729f99c7908d1688a143f1e16d464e6c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Mar 2019 09:22:26 +0100 Subject: [PATCH 09/22] netfilter: replace NF_NAT_NEEDED with IS_ENABLED(CONFIG_NF_NAT) NF_NAT_NEEDED is true whenever nat support for either ipv4 or ipv6 is enabled. Now that the af-specific nat configuration switches have been removed, IS_ENABLED(CONFIG_NF_NAT) has the same effect. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- include/net/netfilter/nf_conntrack_expect.h | 2 +- net/netfilter/Kconfig | 5 ----- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 16 ++++++++-------- net/netfilter/nf_conntrack_sip.c | 2 +- net/openvswitch/conntrack.c | 18 +++++++++--------- 7 files changed, 21 insertions(+), 26 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 4e0145ea033e..a7252f3baeb0 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -367,7 +367,7 @@ extern struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nf_nat_hook *nat_hook; rcu_read_lock(); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 006e430d1cdf..93ce6b0daaba 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -48,7 +48,7 @@ struct nf_conntrack_expect { /* Expectation class */ unsigned int class; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) union nf_inet_addr saved_addr; /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6548271209a0..f4384c096d0d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -404,11 +404,6 @@ config NF_NAT forms of full Network Address Port Translation. This can be controlled by iptables, ip6tables or nft. -config NF_NAT_NEEDED - bool - depends on NF_NAT - default y - config NF_NAT_AMANDA tristate depends on NF_CONNTRACK && NF_NAT diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 334d6e5b7762..59c18804a10a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -336,7 +336,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, exp->tuple.dst.u.all = *dst; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); #endif diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 66c596d287a5..32fe3060375a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,7 +45,7 @@ #include #include #include -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) #include #include #endif @@ -655,7 +655,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif @@ -1494,7 +1494,7 @@ static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl, return -EOPNOTSUPP; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) static int ctnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, @@ -1586,7 +1586,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) static int ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) { -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) int ret; if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) @@ -2369,7 +2369,7 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif @@ -2699,7 +2699,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, struct nf_conn *master = exp->master; long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; struct nf_conn_help *help; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; #endif @@ -2717,7 +2717,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, CTA_EXPECT_MASTER) < 0) goto nla_put_failure; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) || exp->saved_proto.all) { nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED); @@ -3180,7 +3180,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_expect *exp, u_int8_t u3) { -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *tb[CTA_EXPECT_NAT_MAX+1]; struct nf_conntrack_tuple nat_tuple = {}; int err; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 39fcc1ed18f3..d5454d1031a3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -928,7 +928,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, nfct_help(exp->master)->helper != nfct_help(ct)->helper || exp->class != class) break; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) if (!direct_rtp && (!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) || exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) && diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 0be3ab5bde26..626629944450 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -29,7 +29,7 @@ #include #include -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) #include #endif @@ -75,7 +75,7 @@ struct ovs_conntrack_info { struct md_mark mark; struct md_labels labels; char timeout[CTNL_TIMEOUT_NAME_MAX]; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */ #endif }; @@ -721,7 +721,7 @@ static bool skb_nfct_cached(struct net *net, return ct_executed; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) /* Modelled after nf_nat_ipv[46]_fn(). * range is only used for new, uninitialized NAT state. * Returns either NF_ACCEPT or NF_DROP. @@ -903,7 +903,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, return err; } -#else /* !CONFIG_NF_NAT_NEEDED */ +#else /* !CONFIG_NF_NAT */ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb, struct nf_conn *ct, @@ -1330,7 +1330,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, return 0; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) static int parse_nat(const struct nlattr *attr, struct ovs_conntrack_info *info, bool log) { @@ -1467,7 +1467,7 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { .maxlen = sizeof(struct md_labels) }, [OVS_CT_ATTR_HELPER] = { .minlen = 1, .maxlen = NF_CT_HELPER_NAME_LEN }, -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) /* NAT length is checked when parsing the nested attributes. */ [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, #endif @@ -1547,7 +1547,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, return -EINVAL; } break; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) case OVS_CT_ATTR_NAT: { int err = parse_nat(a, info, log); @@ -1677,7 +1677,7 @@ err_free_ct: return err; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, struct sk_buff *skb) { @@ -1783,7 +1783,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, return -EMSGSIZE; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) return -EMSGSIZE; #endif From 071657d2c38c54bf047cf2280fc96e4a3e8a91f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Mar 2019 09:22:27 +0100 Subject: [PATCH 10/22] netfilter: nft_masq: add inet support This allows use of a single masquerade rule in nat inet family to handle both ipv4 and ipv6. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_masq.c | 64 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index bee156eaa400..35a1794acf4c 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -218,6 +218,61 @@ static inline int nft_masq_module_init_ipv6(void) { return 0; } static inline void nft_masq_module_exit_ipv6(void) {} #endif +#ifdef CONFIG_NF_TABLES_INET +static void nft_masq_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + return nft_masq_ipv4_eval(expr, regs, pkt); + case NFPROTO_IPV6: + return nft_masq_ipv6_eval(expr, regs, pkt); + } + + WARN_ON_ONCE(1); +} + +static void +nft_masq_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_INET); +} + +static struct nft_expr_type nft_masq_inet_type; +static const struct nft_expr_ops nft_masq_inet_ops = { + .type = &nft_masq_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), + .eval = nft_masq_inet_eval, + .init = nft_masq_init, + .destroy = nft_masq_inet_destroy, + .dump = nft_masq_dump, + .validate = nft_masq_validate, +}; + +static struct nft_expr_type nft_masq_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "masq", + .ops = &nft_masq_inet_ops, + .policy = nft_masq_policy, + .maxattr = NFTA_MASQ_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_masq_module_init_inet(void) +{ + return nft_register_expr(&nft_masq_inet_type); +} + +static void nft_masq_module_exit_inet(void) +{ + nft_unregister_expr(&nft_masq_inet_type); +} +#else +static inline int nft_masq_module_init_inet(void) { return 0; } +static inline void nft_masq_module_exit_inet(void) {} +#endif + static int __init nft_masq_module_init(void) { int ret; @@ -226,8 +281,15 @@ static int __init nft_masq_module_init(void) if (ret < 0) return ret; + ret = nft_masq_module_init_inet(); + if (ret < 0) { + nft_masq_module_exit_ipv6(); + return ret; + } + ret = nft_register_expr(&nft_masq_ipv4_type); if (ret < 0) { + nft_masq_module_exit_inet(); nft_masq_module_exit_ipv6(); return ret; } @@ -235,6 +297,7 @@ static int __init nft_masq_module_init(void) ret = nf_nat_masquerade_ipv4_register_notifier(); if (ret < 0) { nft_masq_module_exit_ipv6(); + nft_masq_module_exit_inet(); nft_unregister_expr(&nft_masq_ipv4_type); return ret; } @@ -245,6 +308,7 @@ static int __init nft_masq_module_init(void) static void __exit nft_masq_module_exit(void) { nft_masq_module_exit_ipv6(); + nft_masq_module_exit_inet(); nft_unregister_expr(&nft_masq_ipv4_type); nf_nat_masquerade_ipv4_unregister_notifier(); } From 63ce3940f3ab1d81e7c6d310dba52aed85db6aa1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Mar 2019 09:22:28 +0100 Subject: [PATCH 11/22] netfilter: nft_redir: add inet support allows to redirect both ipv4 and ipv6 with a single rule in an inet nat table. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_redir.c | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 02f4b4a6f887..da74fdc4a684 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -202,6 +202,55 @@ static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { }; #endif +#ifdef CONFIG_NF_TABLES_INET +static void nft_redir_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + return nft_redir_ipv4_eval(expr, regs, pkt); + case NFPROTO_IPV6: + return nft_redir_ipv6_eval(expr, regs, pkt); + } + + WARN_ON_ONCE(1); +} + +static void +nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_INET); +} + +static struct nft_expr_type nft_redir_inet_type; +static const struct nft_expr_ops nft_redir_inet_ops = { + .type = &nft_redir_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_inet_eval, + .init = nft_redir_init, + .destroy = nft_redir_inet_destroy, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "redir", + .ops = &nft_redir_inet_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_MASQ_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_module_init_inet(void) +{ + return nft_register_expr(&nft_redir_inet_type); +} +#else +static inline int nft_redir_module_init_inet(void) { return 0; } +#endif + static int __init nft_redir_module_init(void) { int ret = nft_register_expr(&nft_redir_ipv4_type); @@ -217,6 +266,15 @@ static int __init nft_redir_module_init(void) } #endif + ret = nft_redir_module_init_inet(); + if (ret < 0) { + nft_unregister_expr(&nft_redir_ipv4_type); +#ifdef CONFIG_NF_TABLES_IPV6 + nft_unregister_expr(&nft_redir_ipv6_type); +#endif + return ret; + } + return ret; } @@ -226,6 +284,9 @@ static void __exit nft_redir_module_exit(void) #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_expr(&nft_redir_ipv6_type); #endif +#ifdef CONFIG_NF_TABLES_INET + nft_unregister_expr(&nft_redir_inet_type); +#endif } module_init(nft_redir_module_init); From 6978cdb129da13f46bcc4362639ba5ee8fc82921 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Mar 2019 09:22:29 +0100 Subject: [PATCH 12/22] kselftests: extend nft_nat with inet family based nat hooks With older nft versions, this will cause: [..] PASS: ipv6 ping to ns1 was ip6 NATted to ns2 /dev/stdin:4:30-31: Error: syntax error, unexpected to, expecting newline or semicolon ip daddr 10.0.1.99 dnat ip to 10.0.2.99 ^^ SKIP: inet nat tests PASS: ip IP masquerade for ns2 [..] as there is currently no way to detect if nft will be able to parse the inet format. redirect and masquerade tests need to be skipped in this case for inet too because nft userspace has overzealous family check and rejects their use in the inet family. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_nat.sh | 130 ++++++++++++++----- 1 file changed, 94 insertions(+), 36 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 8ec76681605c..248905130d5d 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -6,6 +6,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 ret=0 +test_inet_nat=true nft --version > /dev/null 2>&1 if [ $? -ne 0 ];then @@ -141,17 +142,24 @@ reset_counters() test_local_dnat6() { + local family=$1 local lret=0 + local IPF="" + + if [ $family = "inet" ];then + IPF="ip6" + fi + ip netns exec ns0 nft -f - </dev/null +table $family nat { chain output { type nat hook output priority 0; policy accept; - ip daddr 10.0.1.99 dnat to 10.0.2.99 + ip daddr 10.0.1.99 dnat $IPF to 10.0.2.99 } } EOF + if [ $? -ne 0 ]; then + if [ $family = "inet" ];then + echo "SKIP: inet nat tests" + test_inet_nat=false + return $ksft_skip + fi + echo "SKIP: Could not add add $family dnat hook" + return $ksft_skip + fi + # ping netns1, expect rewrite to netns2 ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null if [ $? -ne 0 ]; then @@ -264,9 +289,9 @@ EOF fi done - test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + test $lret -eq 0 && echo "PASS: ping to ns1 was $family NATted to ns2" - ip netns exec ns0 nft flush chain ip nat output + ip netns exec ns0 nft flush chain $family nat output reset_counters ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null @@ -313,7 +338,7 @@ EOF fi done - test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + test $lret -eq 0 && echo "PASS: ping to ns1 OK after $family nat output chain flush" return $lret } @@ -321,6 +346,7 @@ EOF test_masquerade6() { + local family=$1 local lret=0 ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -351,16 +377,21 @@ test_masquerade6() # add masquerading rule ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading" lret=1 fi @@ -397,19 +428,20 @@ EOF fi done - ip netns exec ns0 nft flush chain ip6 nat postrouting + ip netns exec ns0 nft flush chain $family nat postrouting if [ $? -ne 0 ]; then - echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + test $lret -eq 0 && echo "PASS: $family IPv6 masquerade for ns2" return $lret } test_masquerade() { + local family=$1 local lret=0 ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null @@ -440,16 +472,21 @@ test_masquerade() # add masquerading rule ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading" lret=1 fi @@ -485,19 +522,20 @@ EOF fi done - ip netns exec ns0 nft flush chain ip nat postrouting + ip netns exec ns0 nft flush chain $family nat postrouting if [ $? -ne 0 ]; then - echo "ERROR: Could not flush nat postrouting" 1>&2 + echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + test $lret -eq 0 && echo "PASS: $family IP masquerade for ns2" return $lret } test_redirect6() { + local family=$1 local lret=0 ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -527,16 +565,21 @@ test_redirect6() # add redirect rule ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + echo "ERROR: cannot ping ns1 from ns2 via ipv6 with active $family redirect" lret=1 fi @@ -560,19 +603,20 @@ EOF fi done - ip netns exec ns0 nft delete table ip6 nat + ip netns exec ns0 nft delete table $family nat if [ $? -ne 0 ]; then - echo "ERROR: Could not delete ip6 nat table" 1>&2 + echo "ERROR: Could not delete $family nat table" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + test $lret -eq 0 && echo "PASS: $family IPv6 redirection for ns2" return $lret } test_redirect() { + local family=$1 local lret=0 ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null @@ -603,16 +647,21 @@ test_redirect() # add redirect rule ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + echo "ERROR: cannot ping ns1 from ns2 with active $family ip redirect" lret=1 fi @@ -637,13 +686,13 @@ EOF fi done - ip netns exec ns0 nft delete table ip nat + ip netns exec ns0 nft delete table $family nat if [ $? -ne 0 ]; then - echo "ERROR: Could not delete nat table" 1>&2 + echo "ERROR: Could not delete $family nat table" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IP redirection for ns2" + test $lret -eq 0 && echo "PASS: $family IP redirection for ns2" return $lret } @@ -746,16 +795,25 @@ if [ $ret -eq 0 ];then fi reset_counters -test_local_dnat -test_local_dnat6 +test_local_dnat ip +test_local_dnat6 ip6 +reset_counters +$test_inet_nat && test_local_dnat inet +$test_inet_nat && test_local_dnat6 inet reset_counters -test_masquerade -test_masquerade6 +test_masquerade ip +test_masquerade6 ip6 +reset_counters +$test_inet_nat && test_masquerade inet +$test_inet_nat && test_masquerade6 inet reset_counters -test_redirect -test_redirect6 +test_redirect ip +test_redirect6 ip6 +reset_counters +$test_inet_nat && test_redirect inet +$test_inet_nat && test_redirect6 inet for i in 0 1 2; do ip netns del ns$i;done From 22c7652cdaa8cd33ce78bacceb4e826a3f795873 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 27 Mar 2019 11:36:26 +0100 Subject: [PATCH 13/22] netfilter: nft_osf: Add version option support Add version option support to the nftables "osf" expression. Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_osf.h | 11 ++++++--- include/uapi/linux/netfilter/nf_tables.h | 6 +++++ net/netfilter/nfnetlink_osf.c | 14 +++++------ net/netfilter/nft_osf.c | 30 ++++++++++++++++++++---- 4 files changed, 46 insertions(+), 15 deletions(-) diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h index c6000046c966..788613f36935 100644 --- a/include/linux/netfilter/nfnetlink_osf.h +++ b/include/linux/netfilter/nfnetlink_osf.h @@ -21,13 +21,18 @@ struct nf_osf_finger { struct nf_osf_user_finger finger; }; +struct nf_osf_data { + const char *genre; + const char *version; +}; + bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, int hooknum, struct net_device *in, struct net_device *out, const struct nf_osf_info *info, struct net *net, const struct list_head *nf_osf_fingers); -const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers, - const int ttl_check); +bool nf_osf_find(const struct sk_buff *skb, + const struct list_head *nf_osf_fingers, + const int ttl_check, struct nf_osf_data *data); #endif /* _NFOSF_H */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a66c8de006cc..061bb3eb20c3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1522,15 +1522,21 @@ enum nft_flowtable_hook_attributes { * * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers) * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8) + * @NFTA_OSF_FLAGS: flags (NLA_U32) */ enum nft_osf_attributes { NFTA_OSF_UNSPEC, NFTA_OSF_DREG, NFTA_OSF_TTL, + NFTA_OSF_FLAGS, __NFTA_OSF_MAX, }; #define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1) +enum nft_osf_flags { + NFT_OSF_F_VERSION = (1 << 0), +}; + /** * enum nft_device_attributes - nf_tables device netlink attributes * diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 1f1d90c1716b..7b827bcb412c 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -255,9 +255,9 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family, } EXPORT_SYMBOL_GPL(nf_osf_match); -const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers, - const int ttl_check) +bool nf_osf_find(const struct sk_buff *skb, + const struct list_head *nf_osf_fingers, + const int ttl_check, struct nf_osf_data *data) { const struct iphdr *ip = ip_hdr(skb); const struct nf_osf_user_finger *f; @@ -265,24 +265,24 @@ const char *nf_osf_find(const struct sk_buff *skb, const struct nf_osf_finger *kf; struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; - const char *genre = NULL; memset(&ctx, 0, sizeof(ctx)); tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); if (!tcp) - return NULL; + return false; list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { f = &kf->finger; if (!nf_osf_match_one(skb, f, ttl_check, &ctx)) continue; - genre = f->genre; + data->genre = f->genre; + data->version = f->version; break; } - return genre; + return true; } EXPORT_SYMBOL_GPL(nf_osf_find); diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index b13618c764ec..87b60d6617ef 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -7,11 +7,13 @@ struct nft_osf { enum nft_registers dreg:8; u8 ttl; + u32 flags; }; static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { [NFTA_OSF_DREG] = { .type = NLA_U32 }, [NFTA_OSF_TTL] = { .type = NLA_U8 }, + [NFTA_OSF_FLAGS] = { .type = NLA_U32 }, }; static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, @@ -20,9 +22,10 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, struct nft_osf *priv = nft_expr_priv(expr); u32 *dest = ®s->data[priv->dreg]; struct sk_buff *skb = pkt->skb; + char os_match[NFT_OSF_MAXGENRELEN + 1]; const struct tcphdr *tcp; + struct nf_osf_data data; struct tcphdr _tcph; - const char *os_name; tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); @@ -35,11 +38,17 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - os_name = nf_osf_find(skb, nf_osf_fingers, priv->ttl); - if (!os_name) + if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) { strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN); - else - strncpy((char *)dest, os_name, NFT_OSF_MAXGENRELEN); + } else { + if (priv->flags & NFT_OSF_F_VERSION) + snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s", + data.genre, data.version); + else + strlcpy(os_match, data.genre, NFT_OSF_MAXGENRELEN); + + strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN); + } } static int nft_osf_init(const struct nft_ctx *ctx, @@ -47,6 +56,7 @@ static int nft_osf_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_osf *priv = nft_expr_priv(expr); + u32 flags; int err; u8 ttl; @@ -57,6 +67,13 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->ttl = ttl; } + if (tb[NFTA_OSF_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_OSF_FLAGS])); + if (flags != NFT_OSF_F_VERSION) + return -EINVAL; + priv->flags = flags; + } + priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); err = nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); @@ -73,6 +90,9 @@ static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_OSF_FLAGS, ntohl(priv->flags))) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg)) goto nla_put_failure; From 3b0a081db1f730373993c7a27936778402a3322c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Apr 2019 10:58:20 +0200 Subject: [PATCH 14/22] netfilter: make two functions static They have no external callers anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 1 - include/net/netfilter/nf_tables.h | 2 -- net/netfilter/nf_tables_api.c | 5 ++--- net/netfilter/x_tables.c | 3 +-- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index bf384b3eedb8..1f852ef7b098 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -317,7 +317,6 @@ struct xt_table_info *xt_replace_table(struct xt_table *table, int *error); struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); -struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision); struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); int xt_find_revision(u8 af, const char *name, u8 revision, int target, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 55dff3ab44c7..2d5a0a1a87b8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -475,8 +475,6 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, enum nft_trans_phase phase); int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); -void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding, bool commit); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a2bd439937e6..e058273c5dde 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3782,8 +3782,8 @@ bind: } EXPORT_SYMBOL_GPL(nf_tables_bind_set); -void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding, bool event) +static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list); @@ -3794,7 +3794,6 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, GFP_KERNEL); } } -EXPORT_SYMBOL_GPL(nf_tables_unbind_set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e5e5c64df8d1..0a6656ed1534 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -227,7 +227,7 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) EXPORT_SYMBOL_GPL(xt_request_find_match); /* Find target, grabs ref. Returns ERR_PTR() on error. */ -struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) +static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; int err = -ENOENT; @@ -255,7 +255,6 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) return ERR_PTR(err); } -EXPORT_SYMBOL(xt_find_target); struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { From bf8981a2aa082d9d64771b47c8a1c9c388d8cd40 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Apr 2019 10:44:06 +0200 Subject: [PATCH 15/22] netfilter: nf_nat: merge ip/ip6 masquerade headers Both are now implemented by nf_nat_masquerade.c, so no need to keep different headers. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_nat_masquerade.h | 11 ----------- include/net/netfilter/{ipv4 => }/nf_nat_masquerade.h | 12 +++++++++--- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv6/netfilter/ip6t_MASQUERADE.c | 2 +- net/netfilter/nf_nat_masquerade.c | 3 +-- net/netfilter/nft_masq.c | 3 +-- 6 files changed, 13 insertions(+), 20 deletions(-) delete mode 100644 include/net/netfilter/ipv6/nf_nat_masquerade.h rename include/net/netfilter/{ipv4 => }/nf_nat_masquerade.h (50%) diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h deleted file mode 100644 index 2917bf95c437..000000000000 --- a/include/net/netfilter/ipv6/nf_nat_masquerade.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV6_H_ -#define _NF_NAT_MASQUERADE_IPV6_H_ - -unsigned int -nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, - const struct net_device *out); -int nf_nat_masquerade_ipv6_register_notifier(void); -void nf_nat_masquerade_ipv6_unregister_notifier(void); - -#endif /* _NF_NAT_MASQUERADE_IPV6_H_ */ diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h similarity index 50% rename from include/net/netfilter/ipv4/nf_nat_masquerade.h rename to include/net/netfilter/nf_nat_masquerade.h index 13d55206bb9f..cafe71822a53 100644 --- a/include/net/netfilter/ipv4/nf_nat_masquerade.h +++ b/include/net/netfilter/nf_nat_masquerade.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV4_H_ -#define _NF_NAT_MASQUERADE_IPV4_H_ +#ifndef _NF_NAT_MASQUERADE_H_ +#define _NF_NAT_MASQUERADE_H_ #include @@ -12,4 +12,10 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, int nf_nat_masquerade_ipv4_register_notifier(void); void nf_nat_masquerade_ipv4_unregister_notifier(void); -#endif /*_NF_NAT_MASQUERADE_IPV4_H_ */ +unsigned int +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, + const struct net_device *out); +int nf_nat_masquerade_ipv6_register_notifier(void); +void nf_nat_masquerade_ipv6_unregister_notifier(void); + +#endif /*_NF_NAT_MASQUERADE_H_ */ diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index fd3f9e8a74da..0a2bffb6a0ad 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 29c7f1915a96..4a22343ed67a 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include static unsigned int masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index d85c4d902e7b..10053e70f69d 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -7,8 +7,7 @@ #include #include -#include -#include +#include static DEFINE_MUTEX(masq_mutex); static unsigned int masq_refcnt4 __read_mostly; diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 35a1794acf4c..0783a3e99bd7 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -14,8 +14,7 @@ #include #include #include -#include -#include +#include struct nft_masq { u32 flags; From adf82accc5f526f1e812f1a8df7292fef7dad19a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Apr 2019 10:44:07 +0200 Subject: [PATCH 16/22] netfilter: x_tables: merge ip and ipv6 masquerade modules No need to have separate modules for this. before: text data bss dec filename 2038 1168 0 3206 net/ipv4/netfilter/ipt_MASQUERADE.ko 1526 1024 0 2550 net/ipv6/netfilter/ip6t_MASQUERADE.ko after: text data bss dec filename 2521 1296 0 3817 net/netfilter/xt_MASQUERADE.ko Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 12 +-- net/ipv4/netfilter/Makefile | 1 - net/ipv6/netfilter/Kconfig | 11 +-- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_MASQUERADE.c | 81 ----------------- net/netfilter/Kconfig | 14 +++ net/netfilter/Makefile | 1 + .../xt_MASQUERADE.c} | 90 ++++++++++++++----- 8 files changed, 87 insertions(+), 124 deletions(-) delete mode 100644 net/ipv6/netfilter/ip6t_MASQUERADE.c rename net/{ipv4/netfilter/ipt_MASQUERADE.c => netfilter/xt_MASQUERADE.c} (51%) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ea688832fc4e..1412b029f37f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -224,16 +224,10 @@ if IP_NF_NAT config IP_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - select NF_NAT_MASQUERADE - default m if NETFILTER_ADVANCED=n + select NETFILTER_XT_TARGET_MASQUERADE help - Masquerading is a special case of NAT: all outgoing connections are - changed to seem to come from a particular interface's address, and - if the interface goes down, those connections are lost. This is - only useful for dialup accounts with dynamic IP address (ie. your IP - address will be different on next dialup). - - To compile it as a module, choose M here. If unsure, say N. + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE. config IP_NF_TARGET_NETMAP tristate "NETMAP target support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 2cfdda7b109f..c50e0ec095d2 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o -obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 3de3adb1a0c9..086fc669279e 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -270,15 +270,10 @@ if IP6_NF_NAT config IP6_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - select NF_NAT_MASQUERADE + select NETFILTER_XT_TARGET_MASQUERADE help - Masquerading is a special case of NAT: all outgoing connections are - changed to seem to come from a particular interface's address, and - if the interface goes down, those connections are lost. This is - only useful for dialup accounts with dynamic IP address (ie. your IP - address will be different on next dialup). - - To compile it as a module, choose M here. If unsure, say N. + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE. config IP6_NF_TARGET_NPT tristate "NPT (Network Prefix translation) target support" diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 93aff604b243..731a74c60dca 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -46,7 +46,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o # targets -obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c deleted file mode 100644 index 4a22343ed67a..000000000000 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int -masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) -{ - return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par)); -} - -static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_range2 *range = par->targinfo; - - if (range->flags & NF_NAT_RANGE_MAP_IPS) - return -EINVAL; - return nf_ct_netns_get(par->net, par->family); -} - -static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par) -{ - nf_ct_netns_put(par->net, par->family); -} - -static struct xt_target masquerade_tg6_reg __read_mostly = { - .name = "MASQUERADE", - .family = NFPROTO_IPV6, - .checkentry = masquerade_tg6_checkentry, - .destroy = masquerade_tg6_destroy, - .target = masquerade_tg6, - .targetsize = sizeof(struct nf_nat_range), - .table = "nat", - .hooks = 1 << NF_INET_POST_ROUTING, - .me = THIS_MODULE, -}; - -static int __init masquerade_tg6_init(void) -{ - int err; - - err = xt_register_target(&masquerade_tg6_reg); - if (err) - return err; - - err = nf_nat_masquerade_ipv6_register_notifier(); - if (err) - xt_unregister_target(&masquerade_tg6_reg); - - return err; -} -static void __exit masquerade_tg6_exit(void) -{ - nf_nat_masquerade_ipv6_unregister_notifier(); - xt_unregister_target(&masquerade_tg6_reg); -} - -module_init(masquerade_tg6_init); -module_exit(masquerade_tg6_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_DESCRIPTION("Xtables: automatic address SNAT"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f4384c096d0d..02b281d3c167 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -997,6 +997,20 @@ config NETFILTER_XT_TARGET_REDIRECT To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_MASQUERADE + tristate "MASQUERADE target support" + depends on NF_NAT + default m if NETFILTER_ADVANCED=n + select NF_NAT_MASQUERADE + help + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_TEE tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index afbf475e02b2..72cca6b48960 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -148,6 +148,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o +obj-$(CONFIG_NETFILTER_XT_TARGET_MASQUERADE) += xt_MASQUERADE.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c similarity index 51% rename from net/ipv4/netfilter/ipt_MASQUERADE.c rename to net/netfilter/xt_MASQUERADE.c index 0a2bffb6a0ad..96d884718749 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/netfilter/xt_MASQUERADE.c @@ -9,17 +9,7 @@ * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include #include #include #include @@ -64,38 +54,90 @@ static void masquerade_tg_destroy(const struct xt_tgdtor_param *par) nf_ct_netns_put(par->net, par->family); } -static struct xt_target masquerade_tg_reg __read_mostly = { - .name = "MASQUERADE", - .family = NFPROTO_IPV4, - .target = masquerade_tg, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = 1 << NF_INET_POST_ROUTING, - .checkentry = masquerade_tg_check, - .destroy = masquerade_tg_destroy, - .me = THIS_MODULE, +#if IS_ENABLED(CONFIG_IPV6) +static unsigned int +masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par)); +} + +static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range2 *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + + return nf_ct_netns_get(par->net, par->family); +} +#endif + +static struct xt_target masquerade_tg_reg[] __read_mostly = { + { +#if IS_ENABLED(CONFIG_IPV6) + .name = "MASQUERADE", + .family = NFPROTO_IPV6, + .target = masquerade_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .checkentry = masquerade_tg6_checkentry, + .destroy = masquerade_tg_destroy, + .me = THIS_MODULE, + }, { +#endif + .name = "MASQUERADE", + .family = NFPROTO_IPV4, + .target = masquerade_tg, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .checkentry = masquerade_tg_check, + .destroy = masquerade_tg_destroy, + .me = THIS_MODULE, + } }; static int __init masquerade_tg_init(void) { int ret; - ret = xt_register_target(&masquerade_tg_reg); + ret = xt_register_targets(masquerade_tg_reg, + ARRAY_SIZE(masquerade_tg_reg)); if (ret) return ret; ret = nf_nat_masquerade_ipv4_register_notifier(); - if (ret) - xt_unregister_target(&masquerade_tg_reg); + if (ret) { + xt_unregister_targets(masquerade_tg_reg, + ARRAY_SIZE(masquerade_tg_reg)); + return ret; + } +#if IS_ENABLED(CONFIG_IPV6) + ret = nf_nat_masquerade_ipv6_register_notifier(); + if (ret) { + xt_unregister_targets(masquerade_tg_reg, + ARRAY_SIZE(masquerade_tg_reg)); + nf_nat_masquerade_ipv4_unregister_notifier(); + return ret; + } +#endif return ret; } static void __exit masquerade_tg_exit(void) { - xt_unregister_target(&masquerade_tg_reg); + xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg)); nf_nat_masquerade_ipv4_unregister_notifier(); +#if IS_ENABLED(CONFIG_IPV6) + nf_nat_masquerade_ipv6_unregister_notifier(); +#endif } module_init(masquerade_tg_init); module_exit(masquerade_tg_exit); +#if IS_ENABLED(CONFIG_IPV6) +MODULE_ALIAS("ip6t_MASQUERADE"); +#endif +MODULE_ALIAS("ipt_MASQUERADE"); From 610a43149cabd0c7aa7bed19cbcf05a0249ab32a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Apr 2019 10:44:08 +0200 Subject: [PATCH 17/22] netfilter: nf_nat_masquerade: unify ipv4/6 notifier registration Only reason for having two different register functions was because of ipt_MASQUERADE and ip6t_MASQUERADE being two different modules. Previous patch merged those into xt_MASQUERADE, so we can merge this too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat_masquerade.h | 6 +- net/netfilter/nf_nat_masquerade.c | 101 ++++++++-------------- net/netfilter/nft_masq.c | 16 +--- net/netfilter/xt_MASQUERADE.c | 16 +--- 4 files changed, 44 insertions(+), 95 deletions(-) diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h index cafe71822a53..54a14d643c34 100644 --- a/include/net/netfilter/nf_nat_masquerade.h +++ b/include/net/netfilter/nf_nat_masquerade.h @@ -9,13 +9,11 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, const struct nf_nat_range2 *range, const struct net_device *out); -int nf_nat_masquerade_ipv4_register_notifier(void); -void nf_nat_masquerade_ipv4_unregister_notifier(void); +int nf_nat_masquerade_inet_register_notifiers(void); +void nf_nat_masquerade_inet_unregister_notifiers(void); unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out); -int nf_nat_masquerade_ipv6_register_notifier(void); -void nf_nat_masquerade_ipv6_unregister_notifier(void); #endif /*_NF_NAT_MASQUERADE_H_ */ diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index 10053e70f69d..8e8a65d46345 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -10,8 +10,7 @@ #include static DEFINE_MUTEX(masq_mutex); -static unsigned int masq_refcnt4 __read_mostly; -static unsigned int masq_refcnt6 __read_mostly; +static unsigned int masq_refcnt __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -136,56 +135,6 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -int nf_nat_masquerade_ipv4_register_notifier(void) -{ - int ret = 0; - - mutex_lock(&masq_mutex); - if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) { - ret = -EOVERFLOW; - goto out_unlock; - } - - /* check if the notifier was already set */ - if (++masq_refcnt4 > 1) - goto out_unlock; - - /* Register for device down reports */ - ret = register_netdevice_notifier(&masq_dev_notifier); - if (ret) - goto err_dec; - /* Register IP address change reports */ - ret = register_inetaddr_notifier(&masq_inet_notifier); - if (ret) - goto err_unregister; - - mutex_unlock(&masq_mutex); - return ret; - -err_unregister: - unregister_netdevice_notifier(&masq_dev_notifier); -err_dec: - masq_refcnt4--; -out_unlock: - mutex_unlock(&masq_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier); - -void nf_nat_masquerade_ipv4_unregister_notifier(void) -{ - mutex_lock(&masq_mutex); - /* check if the notifier still has clients */ - if (--masq_refcnt4 > 0) - goto out_unlock; - - unregister_netdevice_notifier(&masq_dev_notifier); - unregister_inetaddr_notifier(&masq_inet_notifier); -out_unlock: - mutex_unlock(&masq_mutex); -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); - #if IS_ENABLED(CONFIG_IPV6) static atomic_t v6_worker_count __read_mostly; @@ -321,44 +270,68 @@ static struct notifier_block masq_inet6_notifier = { .notifier_call = masq_inet6_event, }; -int nf_nat_masquerade_ipv6_register_notifier(void) +static int nf_nat_masquerade_ipv6_register_notifier(void) +{ + return register_inet6addr_notifier(&masq_inet6_notifier); +} +#else +static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } +#endif + +int nf_nat_masquerade_inet_register_notifiers(void) { int ret = 0; mutex_lock(&masq_mutex); - if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) { + if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { ret = -EOVERFLOW; goto out_unlock; } - /* check if the notifier is already set */ - if (++masq_refcnt6 > 1) + /* check if the notifier was already set */ + if (++masq_refcnt > 1) goto out_unlock; - ret = register_inet6addr_notifier(&masq_inet6_notifier); + /* Register for device down reports */ + ret = register_netdevice_notifier(&masq_dev_notifier); if (ret) goto err_dec; + /* Register IP address change reports */ + ret = register_inetaddr_notifier(&masq_inet_notifier); + if (ret) + goto err_unregister; + + ret = nf_nat_masquerade_ipv6_register_notifier(); + if (ret) + goto err_unreg_inet; mutex_unlock(&masq_mutex); return ret; +err_unreg_inet: + unregister_inetaddr_notifier(&masq_inet_notifier); +err_unregister: + unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - masq_refcnt6--; + masq_refcnt--; out_unlock: mutex_unlock(&masq_mutex); return ret; } -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); +EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); -void nf_nat_masquerade_ipv6_unregister_notifier(void) +void nf_nat_masquerade_inet_unregister_notifiers(void) { mutex_lock(&masq_mutex); - /* check if the notifier still has clients */ - if (--masq_refcnt6 > 0) + /* check if the notifiers still have clients */ + if (--masq_refcnt > 0) goto out_unlock; + unregister_netdevice_notifier(&masq_dev_notifier); + unregister_inetaddr_notifier(&masq_inet_notifier); +#if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&masq_inet6_notifier); +#endif out_unlock: mutex_unlock(&masq_mutex); } -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); -#endif +EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers); diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 0783a3e99bd7..86fd90085eaf 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -195,22 +195,12 @@ static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { static int __init nft_masq_module_init_ipv6(void) { - int ret = nft_register_expr(&nft_masq_ipv6_type); - - if (ret) - return ret; - - ret = nf_nat_masquerade_ipv6_register_notifier(); - if (ret < 0) - nft_unregister_expr(&nft_masq_ipv6_type); - - return ret; + return nft_register_expr(&nft_masq_ipv6_type); } static void nft_masq_module_exit_ipv6(void) { nft_unregister_expr(&nft_masq_ipv6_type); - nf_nat_masquerade_ipv6_unregister_notifier(); } #else static inline int nft_masq_module_init_ipv6(void) { return 0; } @@ -293,7 +283,7 @@ static int __init nft_masq_module_init(void) return ret; } - ret = nf_nat_masquerade_ipv4_register_notifier(); + ret = nf_nat_masquerade_inet_register_notifiers(); if (ret < 0) { nft_masq_module_exit_ipv6(); nft_masq_module_exit_inet(); @@ -309,7 +299,7 @@ static void __exit nft_masq_module_exit(void) nft_masq_module_exit_ipv6(); nft_masq_module_exit_inet(); nft_unregister_expr(&nft_masq_ipv4_type); - nf_nat_masquerade_ipv4_unregister_notifier(); + nf_nat_masquerade_inet_unregister_notifiers(); } module_init(nft_masq_module_init); diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c index 96d884718749..ece20d832adc 100644 --- a/net/netfilter/xt_MASQUERADE.c +++ b/net/netfilter/xt_MASQUERADE.c @@ -107,32 +107,20 @@ static int __init masquerade_tg_init(void) if (ret) return ret; - ret = nf_nat_masquerade_ipv4_register_notifier(); + ret = nf_nat_masquerade_inet_register_notifiers(); if (ret) { xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg)); return ret; } -#if IS_ENABLED(CONFIG_IPV6) - ret = nf_nat_masquerade_ipv6_register_notifier(); - if (ret) { - xt_unregister_targets(masquerade_tg_reg, - ARRAY_SIZE(masquerade_tg_reg)); - nf_nat_masquerade_ipv4_unregister_notifier(); - return ret; - } -#endif return ret; } static void __exit masquerade_tg_exit(void) { xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg)); - nf_nat_masquerade_ipv4_unregister_notifier(); -#if IS_ENABLED(CONFIG_IPV6) - nf_nat_masquerade_ipv6_unregister_notifier(); -#endif + nf_nat_masquerade_inet_unregister_notifiers(); } module_init(masquerade_tg_init); From 26f7fe4a5db5b41d2abe53e37100c8984b157fb2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Apr 2019 16:36:39 +0200 Subject: [PATCH 18/22] selftests: netfilter: add ebtables broute test case ebtables -t broute allows to redirect packets in a way that they get pushed up the stack, even if the interface is part of a bridge. In case of IP packets to non-local address, this means those IP packets are routed instead of bridged-forwarded, just as if the bridge would not have existed. Expected test output is: PASS: netns connectivity: ns1 and ns2 can reach each other PASS: ns1/ns2 connectivity with active broute rule PASS: ns1/ns2 connectivity with active broute rule and bridge forward drop Signed-off-by: Florian Westphal Acked-by: David S. Miller Acked-by: Nikolay Aleksandrov Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 2 +- .../selftests/netfilter/bridge_brouter.sh | 146 ++++++++++++++++++ 2 files changed, 147 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/bridge_brouter.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index c9ff2b47bd1c..80dae72a25c7 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh nft_nat.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/bridge_brouter.sh b/tools/testing/selftests/netfilter/bridge_brouter.sh new file mode 100755 index 000000000000..29f3955b9af7 --- /dev/null +++ b/tools/testing/selftests/netfilter/bridge_brouter.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# +# This test is for bridge 'brouting', i.e. make some packets being routed +# rather than getting bridged even though they arrive on interface that is +# part of a bridge. + +# eth0 br0 eth0 +# setup is: ns1 <-> ns0 <-> ns2 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +ebtables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ebtables" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +if [ $? -ne 0 ]; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 link set veth1 up + +ip -net ns0 link add br0 type bridge +if [ $? -ne 0 ]; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +ip -net ns0 link set veth0 master br0 +ip -net ns0 link set veth1 master br0 +ip -net ns0 link set br0 up +ip -net ns0 addr add 10.0.0.1/24 dev br0 + +# place both in same subnet, ns1 and ns2 connected via ns0:br0 +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.0.1$i/24 dev eth0 +done + +test_ebtables_broute() +{ + local cipt + + # redirect is needed so the dstmac is rewritten to the bridge itself, + # ip stack won't process OTHERHOST (foreign unicast mac) packets. + ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP + if [ $? -ne 0 ]; then + echo "SKIP: Could not add ebtables broute redirect rule" + return $ksft_skip + fi + + # ping netns1, expected to not work (ip forwarding is off) + ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "ERROR: ping works, should have failed" 1>&2 + return 1 + fi + + # enable forwarding on both interfaces. + # neither needs an ip address, but at least the bridge needs + # an ip address in same network segment as ns1 and ns2 (ns0 + # needs to be able to determine route for to-be-forwarded packet). + ip netns exec ns0 sysctl -q net.ipv4.conf.veth0.forwarding=1 + ip netns exec ns0 sysctl -q net.ipv4.conf.veth1.forwarding=1 + + sleep 1 + + ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2 + return 1 + fi + + echo "PASS: ns1/ns2 connectivity with active broute rule" + ip netns exec ns0 ebtables -t broute -F + + # ping netns1, expected to work (frames are bridged) + ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping did not work, but it should (bridged)" 1>&2 + return 1 + fi + + ip netns exec ns0 ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP + + # ping netns1, expected to not work (DROP in bridge forward) + ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2 + return 1 + fi + + # re-activate brouter + ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP + + ip netns exec ns2 ping -q -c 1 10.0.0.11 > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2 + return 1 + fi + + echo "PASS: ns1/ns2 connectivity with active broute rule and bridge forward drop" + return 0 +} + +# test basic connectivity +ip netns exec ns1 ping -c 1 -q 10.0.0.12 > /dev/null +if [ $? -ne 0 ]; then + echo "ERROR: Could not reach ns2 from ns1" 1>&2 + ret=1 +fi + +ip netns exec ns2 ping -c 1 -q 10.0.0.11 > /dev/null +if [ $? -ne 0 ]; then + echo "ERROR: Could not reach ns1 from ns2" 1>&2 + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: netns connectivity: ns1 and ns2 can reach each other" +fi + +test_ebtables_broute +ret=$? +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret From f12064d1b402c60c5db9c4b63d5ed6d7facb33f6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Apr 2019 16:36:40 +0200 Subject: [PATCH 19/22] bridge: reduce size of input cb to 16 bytes Reduce size of br_input_skb_cb from 24 to 16 bytes by using bitfield for those values that can only be 0 or 1. igmp is the igmp type value, so it needs to be at least u8. Furthermore, the bridge currently relies on step-by-step initialization of br_input_skb_cb fields as the skb passes through the stack. Explicitly zero out the bridge input cb instead, this avoids having to review/validate that no BR_INPUT_SKB_CB(skb)->foo test can see a 'random' value from previous protocol cb. AFAICS all current fields are always set up before they are read again, so this is not a bug fix. Signed-off-by: Florian Westphal Acked-by: David S. Miller Acked-by: Nikolay Aleksandrov Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_arp_nd_proxy.c | 18 +++++++++--------- net/bridge/br_input.c | 2 ++ net/bridge/br_private.h | 12 +++++------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 724b474ade54..15116752365a 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -131,7 +131,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, u8 *arpptr, *sha; __be32 sip, tip; - BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0; if ((dev->flags & IFF_NOARP) || !pskb_may_pull(skb, arp_hdr_len(dev))) @@ -161,7 +161,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, return; if (ipv4_is_zeronet(sip) || sip == tip) { /* prevent flooding to neigh suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } } @@ -181,7 +181,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, /* its our local ip, so don't proxy reply * and don't forward to neigh suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } @@ -217,7 +217,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, */ if (replied || br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; } neigh_release(n); @@ -393,7 +393,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, struct ipv6hdr *iphdr; struct neighbour *n; - BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0; if (p && (p->flags & BR_NEIGH_SUPPRESS)) return; @@ -401,7 +401,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT && !msg->icmph.icmp6_solicited) { /* prevent flooding to neigh suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } @@ -414,7 +414,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) { /* prevent flooding to neigh suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } @@ -432,7 +432,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, /* its our own ip, so don't proxy reply * and don't forward to arp suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } @@ -465,7 +465,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, */ if (replied || br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; } neigh_release(n); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 5ea7e56119c1..e2f93e5c72da 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -227,6 +227,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) if (!skb) return RX_HANDLER_CONSUMED; + memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); + p = br_port_get_rcu(skb->dev); if (p->flags & BR_VLAN_TUNNEL) { if (br_handle_ingress_vlan_tunnel(skb, p, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 7946aa3b6e09..e7110a6e2b7e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -425,15 +425,13 @@ struct br_input_skb_cb { struct net_device *brdev; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - int igmp; - int mrouters_only; + u8 igmp; + u8 mrouters_only:1; #endif - - bool proxyarp_replied; - bool src_port_isolated; - + u8 proxyarp_replied:1; + u8 src_port_isolated:1; #ifdef CONFIG_BRIDGE_VLAN_FILTERING - bool vlan_filtered; + u8 vlan_filtered:1; #endif #ifdef CONFIG_NET_SWITCHDEV From 971502d77faa50a37c89bc6d172450294ad9a5fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Apr 2019 16:36:41 +0200 Subject: [PATCH 20/22] bridge: netfilter: unroll NF_HOOK helper in bridge input path Replace NF_HOOK() based invocation of the netfilter hooks with a private copy of nf_hook_slow(). This copy has one difference: it can return the rx handler value expected by the stack, i.e. RX_HANDLER_CONSUMED or RX_HANDLER_PASS. This is needed by the next patch to invoke the ebtables "broute" table via the standard netfilter hooks rather than the custom "br_should_route_hook" indirection that is used now. When the skb is to be "brouted", we must return RX_HANDLER_PASS from the bridge rx input handler, but there is no way to indicate this via NF_HOOK(), unless perhaps by some hack such as exposing bridge_cb in the netfilter core or a percpu flag. text data bss dec filename 3369 56 0 3425 net/bridge/br_input.o.before 3458 40 0 3498 net/bridge/br_input.o.after This allows removal of the "br_should_route_hook" in the next patch. Signed-off-by: Florian Westphal Acked-by: David S. Miller Acked-by: Nikolay Aleksandrov Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 3 ++ net/bridge/br_input.c | 55 +++++++++++++++++++++++++++++--- net/netfilter/core.c | 1 + net/netfilter/nf_internals.h | 3 -- net/netfilter/nf_queue.c | 1 + 5 files changed, 56 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index a50a69f5334c..7239105d9d2e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -119,4 +119,7 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, return queue; } +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + const struct nf_hook_entries *entries, unsigned int index, + unsigned int verdict); #endif /* _NF_QUEUE_H */ diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e2f93e5c72da..4ac34fb5f943 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -206,6 +207,55 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu return 0; } +static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb) +{ +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + struct nf_hook_entries *e = NULL; + struct nf_hook_state state; + unsigned int verdict, i; + struct net *net; + int ret; + + net = dev_net(skb->dev); +#ifdef HAVE_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_BRIDGE][NF_BR_PRE_ROUTING])) + goto frame_finish; +#endif + + e = rcu_dereference(net->nf.hooks_bridge[NF_BR_PRE_ROUTING]); + if (!e) + goto frame_finish; + + nf_hook_state_init(&state, NF_BR_PRE_ROUTING, + NFPROTO_BRIDGE, skb->dev, NULL, NULL, + net, br_handle_frame_finish); + + for (i = 0; i < e->num_hook_entries; i++) { + verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state); + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + break; + case NF_DROP: + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + case NF_QUEUE: + ret = nf_queue(skb, &state, e, i, verdict); + if (ret == 1) + continue; + return RX_HANDLER_CONSUMED; + default: /* STOLEN */ + return RX_HANDLER_CONSUMED; + } + } +frame_finish: + net = dev_net(skb->dev); + br_handle_frame_finish(net, NULL, skb); +#else + br_handle_frame_finish(dev_net(skb->dev), NULL, skb); +#endif + return RX_HANDLER_CONSUMED; +} + /* * Return NULL if skb is handled * note: already called with rcu_read_lock @@ -304,10 +354,7 @@ forward: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, - br_handle_frame_finish); - break; + return nf_hook_bridge_pre(skb, pskb); default: drop: kfree_skb(skb); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 93aaec3a54ec..71f06900473e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "nf_internals.h" diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index e15779fd58e3..d6c43902ebd7 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -7,9 +7,6 @@ #include /* nf_queue.c */ -int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - const struct nf_hook_entries *entries, unsigned int index, - unsigned int verdict); void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index a36a77bae1d6..9dc1d6e04946 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -240,6 +240,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, return 0; } +EXPORT_SYMBOL_GPL(nf_queue); static unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, From 223fd0adfa8af36d5d9b5d38016e579ee052f367 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Apr 2019 16:36:42 +0200 Subject: [PATCH 21/22] bridge: broute: make broute a real ebtables table This makes broute a normal ebtables table, hooking at PREROUTING. The broute hook is removed. It uses skb->cb to signal to bridge rx handler that the skb should be routed instead of being bridged. This change is backwards compatible with ebtables as no userspace visible parts are changed. This means we can also remove the !ops test in ebt_register_table, it was only there for broute table sake. Signed-off-by: Florian Westphal Acked-by: David S. Miller Acked-by: Nikolay Aleksandrov Signed-off-by: Pablo Neira Ayuso --- include/linux/if_bridge.h | 3 -- net/bridge/br_input.c | 18 ++------ net/bridge/br_private.h | 3 ++ net/bridge/netfilter/ebtable_broute.c | 63 +++++++++++++++++++-------- net/bridge/netfilter/ebtables.c | 7 +-- 5 files changed, 52 insertions(+), 42 deletions(-) diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 627b788ba0ff..ef0819ced0fc 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -56,9 +56,6 @@ struct br_ip_list { extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); -typedef int br_should_route_hook_t(struct sk_buff *skb); -extern br_should_route_hook_t __rcu *br_should_route_hook; - #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 4ac34fb5f943..e0aacfedcfe1 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -24,10 +24,6 @@ #include "br_private.h" #include "br_private_tunnel.h" -/* Hook for brouter */ -br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; -EXPORT_SYMBOL(br_should_route_hook); - static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -234,6 +230,10 @@ static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb) verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state); switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: + if (BR_INPUT_SKB_CB(skb)->br_netfilter_broute) { + *pskb = skb; + return RX_HANDLER_PASS; + } break; case NF_DROP: kfree_skb(skb); @@ -265,7 +265,6 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) struct net_bridge_port *p; struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; - br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return RX_HANDLER_PASS; @@ -341,15 +340,6 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) forward: switch (p->state) { case BR_STATE_FORWARDING: - rhook = rcu_dereference(br_should_route_hook); - if (rhook) { - if ((*rhook)(skb)) { - *pskb = skb; - return RX_HANDLER_PASS; - } - dest = eth_hdr(skb)->h_dest; - } - /* fall through */ case BR_STATE_LEARNING: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e7110a6e2b7e..4bea2f11da9b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -433,6 +433,9 @@ struct br_input_skb_cb { #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_filtered:1; #endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + u8 br_netfilter_broute:1; +#endif #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 276b60262981..ec2652a459da 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -15,6 +15,8 @@ #include #include +#include "../br_private.h" + /* EBT_ACCEPT means the frame will be bridged * EBT_DROP means the frame will be routed */ @@ -48,30 +50,63 @@ static const struct ebt_table broute_table = { .me = THIS_MODULE, }; -static int ebt_broute(struct sk_buff *skb) +static unsigned int ebt_broute(void *priv, struct sk_buff *skb, + const struct nf_hook_state *s) { + struct net_bridge_port *p = br_port_get_rcu(skb->dev); struct nf_hook_state state; + unsigned char *dest; int ret; + if (!p || p->state != BR_STATE_FORWARDING) + return NF_ACCEPT; + nf_hook_state_init(&state, NF_BR_BROUTING, - NFPROTO_BRIDGE, skb->dev, NULL, NULL, - dev_net(skb->dev), NULL); + NFPROTO_BRIDGE, s->in, NULL, NULL, + s->net, NULL); ret = ebt_do_table(skb, &state, state.net->xt.broute_table); - if (ret == NF_DROP) - return 1; /* route it */ - return 0; /* bridge it */ + + if (ret != NF_DROP) + return ret; + + /* DROP in ebtables -t broute means that the + * skb should be routed, not bridged. + * This is awkward, but can't be changed for compatibility + * reasons. + * + * We map DROP to ACCEPT and set the ->br_netfilter_broute flag. + */ + BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1; + + /* undo PACKET_HOST mangling done in br_input in case the dst + * address matches the logical bridge but not the port. + */ + dest = eth_hdr(skb)->h_dest; + if (skb->pkt_type == PACKET_HOST && + !ether_addr_equal(skb->dev->dev_addr, dest) && + ether_addr_equal(p->br->dev->dev_addr, dest)) + skb->pkt_type = PACKET_OTHERHOST; + + return NF_ACCEPT; } +static const struct nf_hook_ops ebt_ops_broute = { + .hook = ebt_broute, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_FIRST, +}; + static int __net_init broute_net_init(struct net *net) { - return ebt_register_table(net, &broute_table, NULL, + return ebt_register_table(net, &broute_table, &ebt_ops_broute, &net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.broute_table, NULL); + ebt_unregister_table(net, net->xt.broute_table, &ebt_ops_broute); } static struct pernet_operations broute_net_ops = { @@ -81,21 +116,11 @@ static struct pernet_operations broute_net_ops = { static int __init ebtable_broute_init(void) { - int ret; - - ret = register_pernet_subsys(&broute_net_ops); - if (ret < 0) - return ret; - /* see br_input.c */ - RCU_INIT_POINTER(br_should_route_hook, - (br_should_route_hook_t *)ebt_broute); - return 0; + return register_pernet_subsys(&broute_net_ops); } static void __exit ebtable_broute_fini(void) { - RCU_INIT_POINTER(br_should_route_hook, NULL); - synchronize_net(); unregister_pernet_subsys(&broute_net_ops); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index eb15891f8b9f..383f0328ff68 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1221,10 +1221,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, mutex_unlock(&ebt_mutex); WRITE_ONCE(*res, table); - - if (!ops) - return 0; - ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret) { __ebt_unregister_table(net, table); @@ -1248,8 +1244,7 @@ out: void ebt_unregister_table(struct net *net, struct ebt_table *table, const struct nf_hook_ops *ops) { - if (ops) - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); __ebt_unregister_table(net, table); } From dc2f4189dcd2c87e211d30d9524ae8ebe19af577 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sat, 13 Apr 2019 14:03:36 +1000 Subject: [PATCH 22/22] bridge: only include nf_queue.h if needed After merging the netfilter-next tree, today's linux-next build (powerpc ppc44x_defconfig) failed like this: In file included from net/bridge/br_input.c:19: include/net/netfilter/nf_queue.h:16:23: error: field 'state' has incomplete type struct nf_hook_state state; ^~~~~ Fixes: 971502d77faa ("bridge: netfilter: unroll NF_HOOK helper in bridge input path") Signed-off-by: Stephen Rothwell Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e0aacfedcfe1..86dc46f6a68f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -16,7 +16,9 @@ #include #include #include +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE #include +#endif #include #include #include