tcp: avoid premature drops in tcp_add_backlog()

[ Upstream commit ec00ed472bdb7d0af840da68c8c11bff9f4d9caa ] While testing TCP performance with latest trees, I saw suspect SOCKET_BACKLOG drops. tcp_add_backlog() computes its limit with : limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1); limit += 64 * 1024; This does not take into account that sk->sk_backlog.len is reset only at the very end of __release_sock(). Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach sk_rcvbuf in normal conditions. We should double sk->sk_rcvbuf contribution in the formula to absorb bubbles in the backlog, which happen more often for very fast flows. This change maintains decent protection against abuses. Fixes: c377411f24 ("net: sk_add_backlog() take rmem_alloc into account") Signed-off-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/20240423125620.3309458-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
2024-04-23 12:56:20 +00:00 · 2024-04-23 12:56:20 +00:00 · 00bb933578
parent a47027919d
commit 00bb933578
1 changed files with 11 additions and 2 deletions
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@ -1822,7 +1822,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
 		     enum skb_drop_reason *reason)
 {
-	u32 limit, tail_gso_size, tail_gso_segs;
+	u32 tail_gso_size, tail_gso_segs;
 	struct skb_shared_info *shinfo;
 	const struct tcphdr *th;
 	struct tcphdr *thtail;
@ -1831,6 +1831,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
 	bool fragstolen;
 	u32 gso_segs;
 	u32 gso_size;
 	u64 limit;
 	int delta;
 	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
@ -1928,7 +1929,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
 	__skb_push(skb, hdrlen);
 no_coalesce:
-	limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
+	/* sk->sk_backlog.len is reset only at the end of __release_sock().
 	 * Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach
 	 * sk_rcvbuf in normal conditions.
 	 */
 	limit = ((u64)READ_ONCE(sk->sk_rcvbuf)) << 1;
 	limit += ((u32)READ_ONCE(sk->sk_sndbuf)) >> 1;
 	/* Only socket owner can try to collapse/prune rx queues
 	 * to reduce memory overhead, so add a little headroom here.
@ -1936,6 +1943,8 @@ no_coalesce:
 	 */
 	limit += 64 * 1024;
 	limit = min_t(u64, limit, UINT_MAX);
 	if (unlikely(sk_add_backlog(sk, skb, limit))) {
 		bh_unlock_sock(sk);
 		*reason = SKB_DROP_REASON_SOCKET_BACKLOG;