Merge branch 'tcpflags'

Eric Dumazet says:

====================
tcp: no longer keep around headers in input path

Looking at tcp_try_coalesce() I was wondering why I did :

if (tcp_hdr(from)->fin)
     return false;

The answer would be to allow the aggregation, if we simply OR the FIN and PSH
flags eventually present in @from to @to packet. (Note a change is also
needed in skb_try_coalesce() to avoid calling skb_put() with 0 len)

Then, looking at tcp_recvmsg(), I realized we access tcp_hdr(skb)->syn
(and maybe tcp_hdr(skb)->fin) for every packet we process from socket
receive queue.

We have to understand TCP flags are cold in cpu caches most of the time
(assuming TCP timestamps, and that application calls recvmsg() a long
time after incoming packet was processed), and bringing a whole
cache line only to access one bit is not very nice.

It would make sense to use in TCP input path TCP_SKB_CB(skb)->tcp_flags
as we do in output path.

This saves one cache line miss, and TCP tcp_collapse() can avoid dealing
with the headers.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-09-15 14:41:12 -04:00
commit 437024067a
5 changed files with 22 additions and 32 deletions

View File

@ -3936,7 +3936,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return false;
if (len <= skb_tailroom(to)) {
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
if (len)
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
*delta_truesize = 0;
return true;
}

View File

@ -1510,9 +1510,9 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
offset = seq - TCP_SKB_CB(skb)->seq;
if (tcp_hdr(skb)->syn)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--;
if (offset < skb->len || tcp_hdr(skb)->fin) {
if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
*off = offset;
return skb;
}
@ -1585,7 +1585,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (offset + 1 != skb->len)
continue;
}
if (tcp_hdr(skb)->fin) {
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
sk_eat_skb(sk, skb, false);
++seq;
break;
@ -1722,11 +1722,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
break;
offset = *seq - TCP_SKB_CB(skb)->seq;
if (tcp_hdr(skb)->syn)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--;
if (offset < skb->len)
goto found_ok_skb;
if (tcp_hdr(skb)->fin)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
WARN(!(flags & MSG_PEEK),
"recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
@ -1959,7 +1959,7 @@ skip_copy:
if (used + offset < skb->len)
continue;
if (tcp_hdr(skb)->fin)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
if (!(flags & MSG_PEEK)) {
sk_eat_skb(sk, skb, copied_early);
@ -2160,8 +2160,10 @@ void tcp_close(struct sock *sk, long timeout)
* reader process may not have drained the data yet!
*/
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
tcp_hdr(skb)->fin;
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
len--;
data_was_unread += len;
__kfree_skb(skb);
}

View File

@ -4093,7 +4093,7 @@ static void tcp_ofo_queue(struct sock *sk)
__skb_unlink(skb, &tp->out_of_order_queue);
__skb_queue_tail(&sk->sk_receive_queue, skb);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (tcp_hdr(skb)->fin)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk);
}
}
@ -4143,9 +4143,6 @@ static bool tcp_try_coalesce(struct sock *sk,
*fragstolen = false;
if (tcp_hdr(from)->fin)
return false;
/* Its possible this segment overlaps with prior segment in queue */
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false;
@ -4158,6 +4155,7 @@ static bool tcp_try_coalesce(struct sock *sk,
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
return true;
}
@ -4513,7 +4511,7 @@ restart:
* - bloated or contains data before "start" or
* overlaps to the next one.
*/
if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
(tcp_win_from_space(skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start))) {
end_of_skbs = false;
@ -4532,30 +4530,18 @@ restart:
/* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq;
}
if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
if (end_of_skbs ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return;
while (before(start, end)) {
int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
struct sk_buff *nskb;
unsigned int header = skb_headroom(skb);
int copy = SKB_MAX_ORDER(header, 0);
/* Too big header? This can happen with IPv6. */
if (copy < 0)
return;
if (end - start < copy)
copy = end - start;
nskb = alloc_skb(copy + header, GFP_ATOMIC);
nskb = alloc_skb(copy, GFP_ATOMIC);
if (!nskb)
return;
skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
skb_set_network_header(nskb, (skb_network_header(skb) -
skb->head));
skb_set_transport_header(nskb, (skb_transport_header(skb) -
skb->head));
skb_reserve(nskb, header);
memcpy(nskb->head, skb->head, header);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
__skb_queue_before(list, skb, nskb);
@ -4579,8 +4565,7 @@ restart:
skb = tcp_collapse_one(sk, skb, list);
if (!skb ||
skb == tail ||
tcp_hdr(skb)->syn ||
tcp_hdr(skb)->fin)
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return;
}
}

View File

@ -1638,6 +1638,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;

View File

@ -1415,6 +1415,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;