Merge branch 'rt6_pmtu'

Martin KaFai Lau says:

====================
ipv6: Stop /128 route from disappearing after pmtu update

The series is separated from another patch series,
'ipv6: Only create RTF_CACHE route after encountering pmtu exception',
which can be found here:
http://thread.gmane.org/gmane.linux.network/359140

This series focus on fixing the /128 route issues.  It is currently targeted
for net-next due to the number of code churn but it is also applicable
to net (should be without conflict).  The original reported problem can be
found here:
http://thread.gmane.org/gmane.linux.network/348138

Patch 01 and 02 are to prepare the fib6 search to expect both the
RTF_CACHE clone and its original route exist at the same fib6_node.

Patch 03 fixes the /128 route disappearing bug.

Patch 04 and 05 stop rt6_info from using the inet_peer's metrics to
avoid the /128 routes (like the /128 clone and its original route)
from stepping on each others' metrics.

The second patch is by 'Steffen Klassert <steffen.klassert@secunet.com>'
which I pulled off from netdev.  The third patch is also mostly by
Steffen with one minor optimization.

Many thanks to Hannes Frederic Sowa <hannes@stressinduktion.org> on
reviewing the patches and giving advice.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-05-01 20:57:07 -04:00
commit 36c829633b
5 changed files with 94 additions and 145 deletions

View File

@ -109,7 +109,6 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
extern const u32 dst_default_metrics[];
#define DST_METRICS_READ_ONLY 0x1UL
#define DST_METRICS_FORCE_OVERWRITE 0x2UL
#define DST_METRICS_FLAGS 0x3UL
#define __DST_METRICS_PTR(Y) \
((u32 *)((Y) & ~DST_METRICS_FLAGS))
@ -120,11 +119,6 @@ static inline bool dst_metrics_read_only(const struct dst_entry *dst)
return dst->_metrics & DST_METRICS_READ_ONLY;
}
static inline void dst_metrics_set_force_overwrite(struct dst_entry *dst)
{
dst->_metrics |= DST_METRICS_FORCE_OVERWRITE;
}
void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old);
static inline void dst_destroy_metrics_generic(struct dst_entry *dst)

View File

@ -121,44 +121,14 @@ struct rt6_info {
struct rt6key rt6i_prefsrc;
struct inet6_dev *rt6i_idev;
unsigned long _rt6i_peer;
u32 rt6i_metric;
u32 rt6i_pmtu;
/* more non-fragment space at head required */
unsigned short rt6i_nfheader_len;
u8 rt6i_protocol;
};
static inline struct inet_peer *rt6_peer_ptr(struct rt6_info *rt)
{
return inetpeer_ptr(rt->_rt6i_peer);
}
static inline bool rt6_has_peer(struct rt6_info *rt)
{
return inetpeer_ptr_is_peer(rt->_rt6i_peer);
}
static inline void __rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer)
{
__inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer);
}
static inline bool rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer)
{
return inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer);
}
static inline void rt6_init_peer(struct rt6_info *rt, struct inet_peer_base *base)
{
inetpeer_init_ptr(&rt->_rt6i_peer, base);
}
static inline void rt6_transfer_peer(struct rt6_info *rt, struct rt6_info *ort)
{
inetpeer_transfer_peer(&rt->_rt6i_peer, &ort->_rt6i_peer);
}
static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
return ((struct rt6_info *)dst)->rt6i_idev;
@ -189,15 +159,6 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
rt0->rt6i_flags |= RTF_EXPIRES;
}
static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
{
struct dst_entry *new = (struct dst_entry *) from;
rt->rt6i_flags &= ~RTF_EXPIRES;
dst_hold(new);
rt->dst.from = new;
}
static inline void ip6_rt_put(struct rt6_info *rt)
{
/* dst_release() accepts a NULL parameter.

View File

@ -2121,6 +2121,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
if (!fn)
goto out;
noflags |= RTF_CACHE;
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
if (rt->dst.dev->ifindex != dev->ifindex)
continue;

View File

@ -92,6 +92,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static void rt6_dst_from_metrics_check(struct rt6_info *rt);
static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
#ifdef CONFIG_IPV6_ROUTE_INFO
@ -104,65 +105,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *gwaddr, int ifindex);
#endif
static void rt6_bind_peer(struct rt6_info *rt, int create)
{
struct inet_peer_base *base;
struct inet_peer *peer;
base = inetpeer_base_ptr(rt->_rt6i_peer);
if (!base)
return;
peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
if (peer) {
if (!rt6_set_peer(rt, peer))
inet_putpeer(peer);
}
}
static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
{
if (rt6_has_peer(rt))
return rt6_peer_ptr(rt);
rt6_bind_peer(rt, create);
return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
}
static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
{
return __rt6_get_peer(rt, 1);
}
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
{
struct rt6_info *rt = (struct rt6_info *) dst;
struct inet_peer *peer;
u32 *p = NULL;
struct rt6_info *rt = (struct rt6_info *)dst;
if (!(rt->dst.flags & DST_HOST))
if (rt->rt6i_flags & RTF_CACHE)
return NULL;
else
return dst_cow_metrics_generic(dst, old);
peer = rt6_get_peer_create(rt);
if (peer) {
u32 *old_p = __DST_METRICS_PTR(old);
unsigned long prev, new;
p = peer->metrics;
if (inet_metrics_new(peer) ||
(old & DST_METRICS_FORCE_OVERWRITE))
memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
new = (unsigned long) p;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev != old) {
p = __DST_METRICS_PTR(prev);
if (prev & DST_METRICS_READ_ONLY)
p = NULL;
}
}
return p;
}
static inline const void *choose_neigh_daddr(struct rt6_info *rt,
@ -311,7 +261,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
struct dst_entry *dst = &rt->dst;
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
INIT_LIST_HEAD(&rt->rt6i_siblings);
}
return rt;
@ -323,8 +272,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
struct inet6_dev *idev = rt->rt6i_idev;
struct dst_entry *from = dst->from;
if (!(rt->dst.flags & DST_HOST))
dst_destroy_metrics_generic(dst);
dst_destroy_metrics_generic(dst);
if (idev) {
rt->rt6i_idev = NULL;
@ -333,11 +281,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
dst->from = NULL;
dst_release(from);
if (rt6_has_peer(rt)) {
struct inet_peer *peer = rt6_peer_ptr(rt);
inet_putpeer(peer);
}
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@ -652,15 +595,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
u32 metric, int oif, int strict,
bool *do_rr)
{
struct rt6_info *rt, *match;
struct rt6_info *rt, *match, *cont;
int mpri = -1;
match = NULL;
for (rt = rr_head; rt && rt->rt6i_metric == metric;
rt = rt->dst.rt6_next)
cont = NULL;
for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
}
match = find_match(rt, oif, strict, &mpri, match, do_rr);
for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
rt = rt->dst.rt6_next)
}
for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
}
match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
if (match || !cont)
return match;
for (rt = cont; rt; rt = rt->dst.rt6_next)
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
@ -959,7 +920,7 @@ redo_rt6_select:
if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
else if (!(rt->dst.flags & DST_HOST))
else if (!(rt->dst.flags & DST_HOST) || !(rt->dst.flags & RTF_LOCAL))
nrt = rt6_alloc_clone(rt, &fl6->daddr);
else
goto out2;
@ -985,6 +946,7 @@ redo_rt6_select:
goto redo_fib6_lookup_lock;
out2:
rt6_dst_from_metrics_check(rt);
rt->dst.lastuse = jiffies;
rt->dst.__use++;
@ -1059,7 +1021,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
new = &rt->dst;
memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
rt6_init_peer(rt, net->ipv6.peers);
new->__use = 1;
new->input = dst_discard;
@ -1093,6 +1054,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
* Destination cache support functions
*/
static void rt6_dst_from_metrics_check(struct rt6_info *rt)
{
if (rt->dst.from &&
dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
}
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
struct rt6_info *rt;
@ -1109,6 +1077,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt6_check_expired(rt))
return NULL;
rt6_dst_from_metrics_check(rt);
return dst;
}
@ -1154,14 +1124,14 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct rt6_info *rt6 = (struct rt6_info *)dst;
dst_confirm(dst);
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
struct net *net = dev_net(dst->dev);
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
dst_metric_set(dst, RTAX_MTU, mtu);
rt6->rt6i_pmtu = mtu;
rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
}
@ -1341,9 +1311,14 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
static unsigned int ip6_mtu(const struct dst_entry *dst)
{
const struct rt6_info *rt = (const struct rt6_info *)dst;
unsigned int mtu = rt->rt6i_pmtu;
struct inet6_dev *idev;
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
goto out;
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
goto out;
@ -1590,10 +1565,8 @@ int ip6_route_add(struct fib6_config *cfg)
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
if (rt->rt6i_dst.plen == 128) {
if (rt->rt6i_dst.plen == 128)
rt->dst.flags |= DST_HOST;
dst_metrics_set_force_overwrite(&rt->dst);
}
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@ -1785,6 +1758,9 @@ static int ip6_route_del(struct fib6_config *cfg)
if (fn) {
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
if ((rt->rt6i_flags & RTF_CACHE) &&
!(cfg->fc_flags & RTF_CACHE))
continue;
if (cfg->fc_ifindex &&
(!rt->dst.dev ||
rt->dst.dev->ifindex != cfg->fc_ifindex))
@ -1926,12 +1902,27 @@ out:
* Misc support functions
*/
static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
{
BUG_ON(from->dst.from);
rt->rt6i_flags &= ~RTF_EXPIRES;
dst_hold(&from->dst);
rt->dst.from = &from->dst;
dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
}
static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
const struct in6_addr *dest)
{
struct net *net = dev_net(ort->dst.dev);
struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
ort->rt6i_table);
struct rt6_info *rt;
if (ort->rt6i_flags & RTF_CACHE)
ort = (struct rt6_info *)ort->dst.from;
rt = ip6_dst_alloc(net, ort->dst.dev, 0,
ort->rt6i_table);
if (rt) {
rt->dst.input = ort->dst.input;
@ -1940,7 +1931,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
rt->rt6i_dst.addr = *dest;
rt->rt6i_dst.plen = 128;
dst_copy_metrics(&rt->dst, &ort->dst);
rt->dst.error = ort->dst.error;
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
@ -2372,11 +2362,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
PMTU discouvery.
*/
if (rt->dst.dev == arg->dev &&
!dst_metric_locked(&rt->dst, RTAX_MTU) &&
(dst_mtu(&rt->dst) >= arg->mtu ||
(dst_mtu(&rt->dst) < arg->mtu &&
dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
if (rt->rt6i_flags & RTF_CACHE) {
/* For RTF_CACHE with rt6i_pmtu == 0
* (i.e. a redirected route),
* the metrics of its rt->dst.from has already
* been updated.
*/
if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
rt->rt6i_pmtu = arg->mtu;
} else if (dst_mtu(&rt->dst) >= arg->mtu ||
(dst_mtu(&rt->dst) < arg->mtu &&
dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
}
}
return 0;
}
@ -2433,6 +2432,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rtm->rtm_type == RTN_LOCAL)
cfg->fc_flags |= RTF_LOCAL;
if (rtm->rtm_flags & RTM_F_CLONED)
cfg->fc_flags |= RTF_CACHE;
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@ -2603,6 +2605,7 @@ static int rt6_fill_node(struct net *net,
int iif, int type, u32 portid, u32 seq,
int prefix, int nowait, unsigned int flags)
{
u32 metrics[RTAX_MAX];
struct rtmsg *rtm;
struct nlmsghdr *nlh;
long expires;
@ -2716,7 +2719,10 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
}
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
if (rt->rt6i_pmtu)
metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure;
if (rt->rt6i_flags & RTF_GATEWAY) {

View File

@ -71,13 +71,6 @@ static int xfrm6_get_tos(const struct flowi *fl)
return 0;
}
static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
{
struct rt6_info *rt = (struct rt6_info *)xdst;
rt6_init_peer(rt, net->ipv6.peers);
}
static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
@ -106,8 +99,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
return -ENODEV;
}
rt6_transfer_peer(&xdst->u.rt6, rt);
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
@ -255,10 +246,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
if (likely(xdst->u.rt6.rt6i_idev))
in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
if (rt6_has_peer(&xdst->u.rt6)) {
struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
inet_putpeer(peer);
}
xfrm_dst_destroy(xdst);
}
@ -308,7 +295,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
.init_dst = xfrm6_init_dst,
.init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
.blackhole_route = ip6_blackhole_route,