ipv4: support for fib route lwtunnel encap attributes

This patch adds support in ipv4 fib functions to parse user
provided encap attributes and attach encap state data to fib_nh
and rtable.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Roopa Prabhu 2015-07-21 10:43:47 +02:00 committed by David S. Miller
parent 499a242568
commit 571e722676
5 changed files with 122 additions and 4 deletions

View File

@ -44,7 +44,9 @@ struct fib_config {
u32 fc_flow; u32 fc_flow;
u32 fc_nlflags; u32 fc_nlflags;
struct nl_info fc_nlinfo; struct nl_info fc_nlinfo;
}; struct nlattr *fc_encap;
u16 fc_encap_type;
};
struct fib_info; struct fib_info;
struct rtable; struct rtable;
@ -89,6 +91,7 @@ struct fib_nh {
struct rtable __rcu * __percpu *nh_pcpu_rth_output; struct rtable __rcu * __percpu *nh_pcpu_rth_output;
struct rtable __rcu *nh_rth_input; struct rtable __rcu *nh_rth_input;
struct fnhe_hash_bucket __rcu *nh_exceptions; struct fnhe_hash_bucket __rcu *nh_exceptions;
struct lwtunnel_state *nh_lwtstate;
}; };
/* /*

View File

@ -66,6 +66,7 @@ struct rtable {
struct list_head rt_uncached; struct list_head rt_uncached;
struct uncached_list *rt_uncached_list; struct uncached_list *rt_uncached_list;
struct lwtunnel_state *rt_lwtstate;
}; };
static inline bool rt_is_input_route(const struct rtable *rt) static inline bool rt_is_input_route(const struct rtable *rt)

View File

@ -591,6 +591,8 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_METRICS] = { .type = NLA_NESTED }, [RTA_METRICS] = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
[RTA_FLOW] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 },
[RTA_ENCAP_TYPE] = { .type = NLA_U16 },
[RTA_ENCAP] = { .type = NLA_NESTED },
}; };
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@ -656,6 +658,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
case RTA_TABLE: case RTA_TABLE:
cfg->fc_table = nla_get_u32(attr); cfg->fc_table = nla_get_u32(attr);
break; break;
case RTA_ENCAP:
cfg->fc_encap = attr;
break;
case RTA_ENCAP_TYPE:
cfg->fc_encap_type = nla_get_u16(attr);
break;
} }
} }

View File

@ -42,6 +42,7 @@
#include <net/ip_fib.h> #include <net/ip_fib.h>
#include <net/netlink.h> #include <net/netlink.h>
#include <net/nexthop.h> #include <net/nexthop.h>
#include <net/lwtunnel.h>
#include "fib_lookup.h" #include "fib_lookup.h"
@ -208,6 +209,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
change_nexthops(fi) { change_nexthops(fi) {
if (nexthop_nh->nh_dev) if (nexthop_nh->nh_dev)
dev_put(nexthop_nh->nh_dev); dev_put(nexthop_nh->nh_dev);
lwtunnel_state_put(nexthop_nh->nh_lwtstate);
free_nh_exceptions(nexthop_nh); free_nh_exceptions(nexthop_nh);
rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
rt_fibinfo_free(&nexthop_nh->nh_rth_input); rt_fibinfo_free(&nexthop_nh->nh_rth_input);
@ -266,6 +268,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
#ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid != onh->nh_tclassid || nh->nh_tclassid != onh->nh_tclassid ||
#endif #endif
lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK)) ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
return -1; return -1;
onh++; onh++;
@ -366,6 +369,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
payload += nla_total_size((RTAX_MAX * nla_total_size(4))); payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
if (fi->fib_nhs) { if (fi->fib_nhs) {
size_t nh_encapsize = 0;
/* Also handles the special case fib_nhs == 1 */ /* Also handles the special case fib_nhs == 1 */
/* each nexthop is packed in an attribute */ /* each nexthop is packed in an attribute */
@ -374,8 +378,21 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
/* may contain flow and gateway attribute */ /* may contain flow and gateway attribute */
nhsize += 2 * nla_total_size(4); nhsize += 2 * nla_total_size(4);
/* grab encap info */
for_nexthops(fi) {
if (nh->nh_lwtstate) {
/* RTA_ENCAP_TYPE */
nh_encapsize += lwtunnel_get_encap_size(
nh->nh_lwtstate);
/* RTA_ENCAP */
nh_encapsize += nla_total_size(2);
}
} endfor_nexthops(fi);
/* all nexthops are packed in a nested attribute */ /* all nexthops are packed in a nested attribute */
payload += nla_total_size(fi->fib_nhs * nhsize); payload += nla_total_size((fi->fib_nhs * nhsize) +
nh_encapsize);
} }
return payload; return payload;
@ -452,6 +469,9 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg) int remaining, struct fib_config *cfg)
{ {
struct net *net = cfg->fc_nlinfo.nl_net;
int ret;
change_nexthops(fi) { change_nexthops(fi) {
int attrlen; int attrlen;
@ -475,18 +495,66 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
if (nexthop_nh->nh_tclassid) if (nexthop_nh->nh_tclassid)
fi->fib_net->ipv4.fib_num_tclassid_users++; fi->fib_net->ipv4.fib_num_tclassid_users++;
#endif #endif
nla = nla_find(attrs, attrlen, RTA_ENCAP);
if (nla) {
struct lwtunnel_state *lwtstate;
struct net_device *dev = NULL;
struct nlattr *nla_entype;
nla_entype = nla_find(attrs, attrlen,
RTA_ENCAP_TYPE);
if (!nla_entype)
goto err_inval;
if (cfg->fc_oif)
dev = __dev_get_by_index(net, cfg->fc_oif);
ret = lwtunnel_build_state(dev, nla_get_u16(
nla_entype),
nla, &lwtstate);
if (ret)
goto errout;
lwtunnel_state_get(lwtstate);
nexthop_nh->nh_lwtstate = lwtstate;
}
} }
rtnh = rtnh_next(rtnh, &remaining); rtnh = rtnh_next(rtnh, &remaining);
} endfor_nexthops(fi); } endfor_nexthops(fi);
return 0; return 0;
err_inval:
ret = -EINVAL;
errout:
return ret;
} }
#endif #endif
int fib_encap_match(struct net *net, u16 encap_type,
struct nlattr *encap,
int oif, const struct fib_nh *nh)
{
struct lwtunnel_state *lwtstate;
struct net_device *dev = NULL;
int ret;
if (encap_type == LWTUNNEL_ENCAP_NONE)
return 0;
if (oif)
dev = __dev_get_by_index(net, oif);
ret = lwtunnel_build_state(dev, encap_type,
encap, &lwtstate);
if (!ret)
return lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
return 0;
}
int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
{ {
struct net *net = cfg->fc_nlinfo.nl_net;
#ifdef CONFIG_IP_ROUTE_MULTIPATH #ifdef CONFIG_IP_ROUTE_MULTIPATH
struct rtnexthop *rtnh; struct rtnexthop *rtnh;
int remaining; int remaining;
@ -496,6 +564,12 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
return 1; return 1;
if (cfg->fc_oif || cfg->fc_gw) { if (cfg->fc_oif || cfg->fc_gw) {
if (cfg->fc_encap) {
if (fib_encap_match(net, cfg->fc_encap_type,
cfg->fc_encap, cfg->fc_oif,
fi->fib_nh))
return 1;
}
if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
(!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
return 0; return 0;
@ -882,6 +956,22 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
} else { } else {
struct fib_nh *nh = fi->fib_nh; struct fib_nh *nh = fi->fib_nh;
if (cfg->fc_encap) {
struct lwtunnel_state *lwtstate;
struct net_device *dev = NULL;
if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE)
goto err_inval;
if (cfg->fc_oif)
dev = __dev_get_by_index(net, cfg->fc_oif);
err = lwtunnel_build_state(dev, cfg->fc_encap_type,
cfg->fc_encap, &lwtstate);
if (err)
goto failure;
lwtunnel_state_get(lwtstate);
nh->nh_lwtstate = lwtstate;
}
nh->nh_oif = cfg->fc_oif; nh->nh_oif = cfg->fc_oif;
nh->nh_gw = cfg->fc_gw; nh->nh_gw = cfg->fc_gw;
nh->nh_flags = cfg->fc_flags; nh->nh_flags = cfg->fc_flags;
@ -1055,6 +1145,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
goto nla_put_failure; goto nla_put_failure;
#endif #endif
if (fi->fib_nh->nh_lwtstate)
lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate);
} }
#ifdef CONFIG_IP_ROUTE_MULTIPATH #ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi->fib_nhs > 1) { if (fi->fib_nhs > 1) {
@ -1090,6 +1182,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
goto nla_put_failure; goto nla_put_failure;
#endif #endif
if (nh->nh_lwtstate)
lwtunnel_fill_encap(skb, nh->nh_lwtstate);
/* length of rtnetlink header + attributes */ /* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
} endfor_nexthops(fi); } endfor_nexthops(fi);

View File

@ -102,6 +102,7 @@
#include <net/tcp.h> #include <net/tcp.h>
#include <net/icmp.h> #include <net/icmp.h>
#include <net/xfrm.h> #include <net/xfrm.h>
#include <net/lwtunnel.h>
#include <net/netevent.h> #include <net/netevent.h>
#include <net/rtnetlink.h> #include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
@ -1355,6 +1356,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
list_del(&rt->rt_uncached); list_del(&rt->rt_uncached);
spin_unlock_bh(&ul->lock); spin_unlock_bh(&ul->lock);
} }
lwtunnel_state_put(rt->rt_lwtstate);
} }
void rt_flush_dev(struct net_device *dev) void rt_flush_dev(struct net_device *dev)
@ -1403,6 +1405,12 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
#ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid; rt->dst.tclassid = nh->nh_tclassid;
#endif #endif
if (nh->nh_lwtstate) {
lwtunnel_state_get(nh->nh_lwtstate);
rt->rt_lwtstate = nh->nh_lwtstate;
} else {
rt->rt_lwtstate = NULL;
}
if (unlikely(fnhe)) if (unlikely(fnhe))
cached = rt_bind_exception(rt, fnhe, daddr); cached = rt_bind_exception(rt, fnhe, daddr);
else if (!(rt->dst.flags & DST_NOCACHE)) else if (!(rt->dst.flags & DST_NOCACHE))
@ -1488,6 +1496,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_gateway = 0; rth->rt_gateway = 0;
rth->rt_uses_gateway = 0; rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached); INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_lwtstate = NULL;
if (our) { if (our) {
rth->dst.input= ip_local_deliver; rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL; rth->rt_flags |= RTCF_LOCAL;
@ -1617,6 +1626,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_gateway = 0; rth->rt_gateway = 0;
rth->rt_uses_gateway = 0; rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached); INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_lwtstate = NULL;
RT_CACHE_STAT_INC(in_slow_tot); RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward; rth->dst.input = ip_forward;
@ -1791,6 +1801,8 @@ local_input:
rth->rt_gateway = 0; rth->rt_gateway = 0;
rth->rt_uses_gateway = 0; rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached); INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_lwtstate = NULL;
RT_CACHE_STAT_INC(in_slow_tot); RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) { if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error; rth->dst.input= ip_error;
@ -1980,7 +1992,7 @@ add:
rth->rt_gateway = 0; rth->rt_gateway = 0;
rth->rt_uses_gateway = 0; rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached); INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_lwtstate = NULL;
RT_CACHE_STAT_INC(out_slow_tot); RT_CACHE_STAT_INC(out_slow_tot);
if (flags & RTCF_LOCAL) if (flags & RTCF_LOCAL)
@ -2260,7 +2272,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_uses_gateway = ort->rt_uses_gateway; rt->rt_uses_gateway = ort->rt_uses_gateway;
INIT_LIST_HEAD(&rt->rt_uncached); INIT_LIST_HEAD(&rt->rt_uncached);
rt->rt_lwtstate = NULL;
dst_free(new); dst_free(new);
} }