net: take care of cloned skbs in tcp_try_coalesce()

Before stealing fragments or skb head, we must make sure skbs are not
cloned.

Alexander was worried about destination skb being cloned : In bridge
setups, a driver could be fooled if skb->data_len would not match skb
nr_frags.

If source skb is cloned, we must take references on pages instead.

Bug happened using tcpdump (if not using mmap())

Introduce kfree_skb_partial() helper to cleanup code.

Reported-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2012-05-02 07:55:58 +00:00 committed by David S. Miller
parent eeb7fc7bc0
commit 923dd347b8
1 changed files with 28 additions and 14 deletions

View File

@ -4532,6 +4532,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
* @sk: socket * @sk: socket
* @to: prior buffer * @to: prior buffer
* @from: buffer to add in queue * @from: buffer to add in queue
* @fragstolen: pointer to boolean
* *
* Before queueing skb @from after @to, try to merge them * Before queueing skb @from after @to, try to merge them
* to reduce overall memory use and queue lengths, if cost is small. * to reduce overall memory use and queue lengths, if cost is small.
@ -4544,10 +4545,10 @@ static bool tcp_try_coalesce(struct sock *sk,
struct sk_buff *from, struct sk_buff *from,
bool *fragstolen) bool *fragstolen)
{ {
int delta, len = from->len; int i, delta, len = from->len;
*fragstolen = false; *fragstolen = false;
if (tcp_hdr(from)->fin) if (tcp_hdr(from)->fin || skb_cloned(to))
return false; return false;
if (len <= skb_tailroom(to)) { if (len <= skb_tailroom(to)) {
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
@ -4574,7 +4575,13 @@ copyfrags:
skb_shinfo(from)->frags, skb_shinfo(from)->frags,
skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
skb_shinfo(from)->nr_frags = 0;
if (skb_cloned(from))
for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
skb_frag_ref(from, i);
else
skb_shinfo(from)->nr_frags = 0;
to->truesize += delta; to->truesize += delta;
atomic_add(delta, &sk->sk_rmem_alloc); atomic_add(delta, &sk->sk_rmem_alloc);
sk_mem_charge(sk, delta); sk_mem_charge(sk, delta);
@ -4592,13 +4599,26 @@ copyfrags:
offset = from->data - (unsigned char *)page_address(page); offset = from->data - (unsigned char *)page_address(page);
skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
page, offset, skb_headlen(from)); page, offset, skb_headlen(from));
*fragstolen = true;
if (skb_cloned(from))
get_page(page);
else
*fragstolen = true;
delta = len; /* we dont know real truesize... */ delta = len; /* we dont know real truesize... */
goto copyfrags; goto copyfrags;
} }
return false; return false;
} }
static void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
{
if (head_stolen)
kmem_cache_free(skbuff_head_cache, skb);
else
__kfree_skb(skb);
}
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
@ -4642,10 +4662,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
__skb_queue_after(&tp->out_of_order_queue, skb1, skb); __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
} else { } else {
if (fragstolen) kfree_skb_partial(skb, fragstolen);
kmem_cache_free(skbuff_head_cache, skb);
else
__kfree_skb(skb);
skb = NULL; skb = NULL;
} }
@ -4804,12 +4821,9 @@ queue_and_out:
tcp_fast_path_check(sk); tcp_fast_path_check(sk);
if (eaten > 0) { if (eaten > 0)
if (fragstolen) kfree_skb_partial(skb, fragstolen);
kmem_cache_free(skbuff_head_cache, skb); else if (!sock_flag(sk, SOCK_DEAD))
else
__kfree_skb(skb);
} else if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0); sk->sk_data_ready(sk, 0);
return; return;
} }