Merge branch 'tcp-rbtree-retransmit-queue'
Eric Dumazet says: ==================== tcp: implement rb-tree based retransmit queue This patch series implement RB-tree based retransmit queue for TCP, to better match modern BDP. Tested: On receiver : netem on ingress : delay 150ms 200us loss 1 GRO disabled to force stress and SACK storms. for f in `seq 1 10` do ./netperf -H lpaa6 -l30 -- -K bbr -o THROUGHPUT|tail -1 done | awk '{print $0} {sum += $0} END {printf "%7u\n",sum}' Before patch : 323.87 351.48 339.59 338.62 306.72 204.07 304.93 291.88 202.47 176.88 -> 2840 After patch: 1700.83 2207.98 2070.17 1544.26 2114.76 2124.89 1693.14 1080.91 2216.82 1299.94 -> 18053 Average of 1805 Mbits istead of 284 Mbits. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
ca82214144
|
@ -3158,6 +3158,12 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
|
||||||
return __skb_grow(skb, len);
|
return __skb_grow(skb, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
|
||||||
|
#define skb_rb_first(root) rb_to_skb(rb_first(root))
|
||||||
|
#define skb_rb_last(root) rb_to_skb(rb_last(root))
|
||||||
|
#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode))
|
||||||
|
#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode))
|
||||||
|
|
||||||
#define skb_queue_walk(queue, skb) \
|
#define skb_queue_walk(queue, skb) \
|
||||||
for (skb = (queue)->next; \
|
for (skb = (queue)->next; \
|
||||||
skb != (struct sk_buff *)(queue); \
|
skb != (struct sk_buff *)(queue); \
|
||||||
|
@ -3172,6 +3178,18 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
|
||||||
for (; skb != (struct sk_buff *)(queue); \
|
for (; skb != (struct sk_buff *)(queue); \
|
||||||
skb = skb->next)
|
skb = skb->next)
|
||||||
|
|
||||||
|
#define skb_rbtree_walk(skb, root) \
|
||||||
|
for (skb = skb_rb_first(root); skb != NULL; \
|
||||||
|
skb = skb_rb_next(skb))
|
||||||
|
|
||||||
|
#define skb_rbtree_walk_from(skb) \
|
||||||
|
for (; skb != NULL; \
|
||||||
|
skb = skb_rb_next(skb))
|
||||||
|
|
||||||
|
#define skb_rbtree_walk_from_safe(skb, tmp) \
|
||||||
|
for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \
|
||||||
|
skb = tmp)
|
||||||
|
|
||||||
#define skb_queue_walk_from_safe(queue, skb, tmp) \
|
#define skb_queue_walk_from_safe(queue, skb, tmp) \
|
||||||
for (tmp = skb->next; \
|
for (tmp = skb->next; \
|
||||||
skb != (struct sk_buff *)(queue); \
|
skb != (struct sk_buff *)(queue); \
|
||||||
|
|
|
@ -60,7 +60,7 @@
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/wait.h>
|
#include <linux/wait.h>
|
||||||
#include <linux/cgroup-defs.h>
|
#include <linux/cgroup-defs.h>
|
||||||
|
#include <linux/rbtree.h>
|
||||||
#include <linux/filter.h>
|
#include <linux/filter.h>
|
||||||
#include <linux/rculist_nulls.h>
|
#include <linux/rculist_nulls.h>
|
||||||
#include <linux/poll.h>
|
#include <linux/poll.h>
|
||||||
|
@ -397,7 +397,10 @@ struct sock {
|
||||||
int sk_wmem_queued;
|
int sk_wmem_queued;
|
||||||
refcount_t sk_wmem_alloc;
|
refcount_t sk_wmem_alloc;
|
||||||
unsigned long sk_tsq_flags;
|
unsigned long sk_tsq_flags;
|
||||||
struct sk_buff *sk_send_head;
|
union {
|
||||||
|
struct sk_buff *sk_send_head;
|
||||||
|
struct rb_root tcp_rtx_queue;
|
||||||
|
};
|
||||||
struct sk_buff_head sk_write_queue;
|
struct sk_buff_head sk_write_queue;
|
||||||
__s32 sk_peek_off;
|
__s32 sk_peek_off;
|
||||||
int sk_write_pending;
|
int sk_write_pending;
|
||||||
|
|
|
@ -551,7 +551,13 @@ void tcp_xmit_retransmit_queue(struct sock *);
|
||||||
void tcp_simple_retransmit(struct sock *);
|
void tcp_simple_retransmit(struct sock *);
|
||||||
void tcp_enter_recovery(struct sock *sk, bool ece_ack);
|
void tcp_enter_recovery(struct sock *sk, bool ece_ack);
|
||||||
int tcp_trim_head(struct sock *, struct sk_buff *, u32);
|
int tcp_trim_head(struct sock *, struct sk_buff *, u32);
|
||||||
int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
|
enum tcp_queue {
|
||||||
|
TCP_FRAG_IN_WRITE_QUEUE,
|
||||||
|
TCP_FRAG_IN_RTX_QUEUE,
|
||||||
|
};
|
||||||
|
int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
|
||||||
|
struct sk_buff *skb, u32 len,
|
||||||
|
unsigned int mss_now, gfp_t gfp);
|
||||||
|
|
||||||
void tcp_send_probe0(struct sock *);
|
void tcp_send_probe0(struct sock *);
|
||||||
void tcp_send_partial(struct sock *);
|
void tcp_send_partial(struct sock *);
|
||||||
|
@ -1606,19 +1612,11 @@ static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb)
|
||||||
skb->_skb_refdst = _save; \
|
skb->_skb_refdst = _save; \
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write queue abstraction */
|
void tcp_write_queue_purge(struct sock *sk);
|
||||||
static inline void tcp_write_queue_purge(struct sock *sk)
|
|
||||||
{
|
|
||||||
struct sk_buff *skb;
|
|
||||||
|
|
||||||
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
|
static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk)
|
||||||
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
|
{
|
||||||
tcp_skb_tsorted_anchor_cleanup(skb);
|
return skb_rb_first(&sk->tcp_rtx_queue);
|
||||||
sk_wmem_free_skb(sk, skb);
|
|
||||||
}
|
|
||||||
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
|
|
||||||
sk_mem_reclaim(sk);
|
|
||||||
tcp_clear_all_retrans_hints(tcp_sk(sk));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
|
static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
|
||||||
|
@ -1643,18 +1641,12 @@ static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk,
|
||||||
return skb_queue_prev(&sk->sk_write_queue, skb);
|
return skb_queue_prev(&sk->sk_write_queue, skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define tcp_for_write_queue(skb, sk) \
|
|
||||||
skb_queue_walk(&(sk)->sk_write_queue, skb)
|
|
||||||
|
|
||||||
#define tcp_for_write_queue_from(skb, sk) \
|
|
||||||
skb_queue_walk_from(&(sk)->sk_write_queue, skb)
|
|
||||||
|
|
||||||
#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
|
#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
|
||||||
skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
|
skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
|
||||||
|
|
||||||
static inline struct sk_buff *tcp_send_head(const struct sock *sk)
|
static inline struct sk_buff *tcp_send_head(const struct sock *sk)
|
||||||
{
|
{
|
||||||
return sk->sk_send_head;
|
return skb_peek(&sk->sk_write_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool tcp_skb_is_last(const struct sock *sk,
|
static inline bool tcp_skb_is_last(const struct sock *sk,
|
||||||
|
@ -1663,29 +1655,30 @@ static inline bool tcp_skb_is_last(const struct sock *sk,
|
||||||
return skb_queue_is_last(&sk->sk_write_queue, skb);
|
return skb_queue_is_last(&sk->sk_write_queue, skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb)
|
static inline bool tcp_write_queue_empty(const struct sock *sk)
|
||||||
{
|
{
|
||||||
if (tcp_skb_is_last(sk, skb))
|
return skb_queue_empty(&sk->sk_write_queue);
|
||||||
sk->sk_send_head = NULL;
|
}
|
||||||
else
|
|
||||||
sk->sk_send_head = tcp_write_queue_next(sk, skb);
|
static inline bool tcp_rtx_queue_empty(const struct sock *sk)
|
||||||
|
{
|
||||||
|
return RB_EMPTY_ROOT(&sk->tcp_rtx_queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk)
|
||||||
|
{
|
||||||
|
return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
|
static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
|
||||||
{
|
{
|
||||||
if (sk->sk_send_head == skb_unlinked) {
|
if (tcp_write_queue_empty(sk))
|
||||||
sk->sk_send_head = NULL;
|
|
||||||
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
|
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
|
||||||
}
|
|
||||||
if (tcp_sk(sk)->highest_sack == skb_unlinked)
|
if (tcp_sk(sk)->highest_sack == skb_unlinked)
|
||||||
tcp_sk(sk)->highest_sack = NULL;
|
tcp_sk(sk)->highest_sack = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void tcp_init_send_head(struct sock *sk)
|
|
||||||
{
|
|
||||||
sk->sk_send_head = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
|
static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
__skb_queue_tail(&sk->sk_write_queue, skb);
|
__skb_queue_tail(&sk->sk_write_queue, skb);
|
||||||
|
@ -1696,8 +1689,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb
|
||||||
__tcp_add_write_queue_tail(sk, skb);
|
__tcp_add_write_queue_tail(sk, skb);
|
||||||
|
|
||||||
/* Queue it, remembering where we must start sending. */
|
/* Queue it, remembering where we must start sending. */
|
||||||
if (sk->sk_send_head == NULL) {
|
if (sk->sk_write_queue.next == skb) {
|
||||||
sk->sk_send_head = skb;
|
|
||||||
tcp_chrono_start(sk, TCP_CHRONO_BUSY);
|
tcp_chrono_start(sk, TCP_CHRONO_BUSY);
|
||||||
|
|
||||||
if (tcp_sk(sk)->highest_sack == NULL)
|
if (tcp_sk(sk)->highest_sack == NULL)
|
||||||
|
@ -1710,35 +1702,32 @@ static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *s
|
||||||
__skb_queue_head(&sk->sk_write_queue, skb);
|
__skb_queue_head(&sk->sk_write_queue, skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Insert buff after skb on the write queue of sk. */
|
|
||||||
static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
|
|
||||||
struct sk_buff *buff,
|
|
||||||
struct sock *sk)
|
|
||||||
{
|
|
||||||
__skb_queue_after(&sk->sk_write_queue, skb, buff);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Insert new before skb on the write queue of sk. */
|
/* Insert new before skb on the write queue of sk. */
|
||||||
static inline void tcp_insert_write_queue_before(struct sk_buff *new,
|
static inline void tcp_insert_write_queue_before(struct sk_buff *new,
|
||||||
struct sk_buff *skb,
|
struct sk_buff *skb,
|
||||||
struct sock *sk)
|
struct sock *sk)
|
||||||
{
|
{
|
||||||
__skb_queue_before(&sk->sk_write_queue, skb, new);
|
__skb_queue_before(&sk->sk_write_queue, skb, new);
|
||||||
|
|
||||||
if (sk->sk_send_head == skb)
|
|
||||||
sk->sk_send_head = new;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
|
static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
|
||||||
{
|
{
|
||||||
list_del(&skb->tcp_tsorted_anchor);
|
|
||||||
tcp_skb_tsorted_anchor_cleanup(skb);
|
|
||||||
__skb_unlink(skb, &sk->sk_write_queue);
|
__skb_unlink(skb, &sk->sk_write_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool tcp_write_queue_empty(struct sock *sk)
|
void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb);
|
||||||
|
|
||||||
|
static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk)
|
||||||
{
|
{
|
||||||
return skb_queue_empty(&sk->sk_write_queue);
|
tcp_skb_tsorted_anchor_cleanup(skb);
|
||||||
|
rb_erase(&skb->rbnode, &sk->tcp_rtx_queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk)
|
||||||
|
{
|
||||||
|
list_del(&skb->tcp_tsorted_anchor);
|
||||||
|
tcp_rtx_queue_unlink(skb, sk);
|
||||||
|
sk_wmem_free_skb(sk, skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void tcp_push_pending_frames(struct sock *sk)
|
static inline void tcp_push_pending_frames(struct sock *sk)
|
||||||
|
@ -1767,8 +1756,9 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
|
||||||
|
|
||||||
static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
|
static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL :
|
struct sk_buff *next = skb_rb_next(skb);
|
||||||
tcp_write_queue_next(sk, skb);
|
|
||||||
|
tcp_sk(sk)->highest_sack = next ?: tcp_send_head(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
|
static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
|
||||||
|
@ -1778,7 +1768,9 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
|
||||||
|
|
||||||
static inline void tcp_highest_sack_reset(struct sock *sk)
|
static inline void tcp_highest_sack_reset(struct sock *sk)
|
||||||
{
|
{
|
||||||
tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
|
struct sk_buff *skb = tcp_rtx_queue_head(sk);
|
||||||
|
|
||||||
|
tcp_sk(sk)->highest_sack = skb ?: tcp_send_head(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Called when old skb is about to be deleted (to be combined with new skb) */
|
/* Called when old skb is about to be deleted (to be combined with new skb) */
|
||||||
|
@ -1948,7 +1940,7 @@ extern void tcp_rack_reo_timeout(struct sock *sk);
|
||||||
/* At how many usecs into the future should the RTO fire? */
|
/* At how many usecs into the future should the RTO fire? */
|
||||||
static inline s64 tcp_rto_delta_us(const struct sock *sk)
|
static inline s64 tcp_rto_delta_us(const struct sock *sk)
|
||||||
{
|
{
|
||||||
const struct sk_buff *skb = tcp_write_queue_head(sk);
|
const struct sk_buff *skb = tcp_rtx_queue_head(sk);
|
||||||
u32 rto = inet_csk(sk)->icsk_rto;
|
u32 rto = inet_csk(sk)->icsk_rto;
|
||||||
u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
|
u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
|
||||||
|
|
||||||
|
|
|
@ -413,6 +413,7 @@ void tcp_init_sock(struct sock *sk)
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
tp->out_of_order_queue = RB_ROOT;
|
tp->out_of_order_queue = RB_ROOT;
|
||||||
|
sk->tcp_rtx_queue = RB_ROOT;
|
||||||
tcp_init_xmit_timers(sk);
|
tcp_init_xmit_timers(sk);
|
||||||
INIT_LIST_HEAD(&tp->tsq_node);
|
INIT_LIST_HEAD(&tp->tsq_node);
|
||||||
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
|
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
|
||||||
|
@ -469,8 +470,10 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
|
||||||
tcp_init_buffer_space(sk);
|
tcp_init_buffer_space(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
|
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
|
||||||
{
|
{
|
||||||
|
struct sk_buff *skb = tcp_write_queue_tail(sk);
|
||||||
|
|
||||||
if (tsflags && skb) {
|
if (tsflags && skb) {
|
||||||
struct skb_shared_info *shinfo = skb_shinfo(skb);
|
struct skb_shared_info *shinfo = skb_shinfo(skb);
|
||||||
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
|
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
|
||||||
|
@ -699,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
|
|
||||||
if (!tcp_send_head(sk))
|
|
||||||
return;
|
|
||||||
|
|
||||||
skb = tcp_write_queue_tail(sk);
|
skb = tcp_write_queue_tail(sk);
|
||||||
|
if (!skb)
|
||||||
|
return;
|
||||||
if (!(flags & MSG_MORE) || forced_push(tp))
|
if (!(flags & MSG_MORE) || forced_push(tp))
|
||||||
tcp_mark_push(tp, skb);
|
tcp_mark_push(tp, skb);
|
||||||
|
|
||||||
|
@ -962,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
|
||||||
int copy, i;
|
int copy, i;
|
||||||
bool can_coalesce;
|
bool can_coalesce;
|
||||||
|
|
||||||
if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
|
if (!skb || (copy = size_goal - skb->len) <= 0 ||
|
||||||
!tcp_skb_can_collapse_to(skb)) {
|
!tcp_skb_can_collapse_to(skb)) {
|
||||||
new_segment:
|
new_segment:
|
||||||
if (!sk_stream_memory_free(sk))
|
if (!sk_stream_memory_free(sk))
|
||||||
goto wait_for_sndbuf;
|
goto wait_for_sndbuf;
|
||||||
|
|
||||||
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
|
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
|
||||||
skb_queue_empty(&sk->sk_write_queue));
|
tcp_rtx_and_write_queues_empty(sk));
|
||||||
if (!skb)
|
if (!skb)
|
||||||
goto wait_for_memory;
|
goto wait_for_memory;
|
||||||
|
|
||||||
|
@ -1041,7 +1043,7 @@ wait_for_memory:
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (copied) {
|
if (copied) {
|
||||||
tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk));
|
tcp_tx_timestamp(sk, sk->sk_tsflags);
|
||||||
if (!(flags & MSG_SENDPAGE_NOTLAST))
|
if (!(flags & MSG_SENDPAGE_NOTLAST))
|
||||||
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
|
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
|
||||||
}
|
}
|
||||||
|
@ -1197,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
|
||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL;
|
skb = tcp_write_queue_tail(sk);
|
||||||
uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
|
uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
|
||||||
if (!uarg) {
|
if (!uarg) {
|
||||||
err = -ENOBUFS;
|
err = -ENOBUFS;
|
||||||
|
@ -1273,7 +1275,7 @@ restart:
|
||||||
int max = size_goal;
|
int max = size_goal;
|
||||||
|
|
||||||
skb = tcp_write_queue_tail(sk);
|
skb = tcp_write_queue_tail(sk);
|
||||||
if (tcp_send_head(sk)) {
|
if (skb) {
|
||||||
if (skb->ip_summed == CHECKSUM_NONE)
|
if (skb->ip_summed == CHECKSUM_NONE)
|
||||||
max = mss_now;
|
max = mss_now;
|
||||||
copy = max - skb->len;
|
copy = max - skb->len;
|
||||||
|
@ -1293,7 +1295,7 @@ new_segment:
|
||||||
process_backlog = false;
|
process_backlog = false;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
first_skb = skb_queue_empty(&sk->sk_write_queue);
|
first_skb = tcp_rtx_and_write_queues_empty(sk);
|
||||||
skb = sk_stream_alloc_skb(sk,
|
skb = sk_stream_alloc_skb(sk,
|
||||||
select_size(sk, sg, first_skb),
|
select_size(sk, sg, first_skb),
|
||||||
sk->sk_allocation,
|
sk->sk_allocation,
|
||||||
|
@ -1418,7 +1420,7 @@ wait_for_memory:
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (copied) {
|
if (copied) {
|
||||||
tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk));
|
tcp_tx_timestamp(sk, sockc.tsflags);
|
||||||
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
|
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
|
||||||
}
|
}
|
||||||
out_nopush:
|
out_nopush:
|
||||||
|
@ -1519,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
|
||||||
|
|
||||||
/* XXX -- need to support SO_PEEK_OFF */
|
/* XXX -- need to support SO_PEEK_OFF */
|
||||||
|
|
||||||
|
skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
|
||||||
|
err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
copied += skb->len;
|
||||||
|
}
|
||||||
|
|
||||||
skb_queue_walk(&sk->sk_write_queue, skb) {
|
skb_queue_walk(&sk->sk_write_queue, skb) {
|
||||||
err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
|
err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -2318,6 +2327,37 @@ static inline bool tcp_need_reset(int state)
|
||||||
TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
|
TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void tcp_rtx_queue_purge(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
|
||||||
|
|
||||||
|
while (p) {
|
||||||
|
struct sk_buff *skb = rb_to_skb(p);
|
||||||
|
|
||||||
|
p = rb_next(p);
|
||||||
|
/* Since we are deleting whole queue, no need to
|
||||||
|
* list_del(&skb->tcp_tsorted_anchor)
|
||||||
|
*/
|
||||||
|
tcp_rtx_queue_unlink(skb, sk);
|
||||||
|
sk_wmem_free_skb(sk, skb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcp_write_queue_purge(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct sk_buff *skb;
|
||||||
|
|
||||||
|
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
|
||||||
|
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
|
||||||
|
tcp_skb_tsorted_anchor_cleanup(skb);
|
||||||
|
sk_wmem_free_skb(sk, skb);
|
||||||
|
}
|
||||||
|
tcp_rtx_queue_purge(sk);
|
||||||
|
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
|
||||||
|
sk_mem_reclaim(sk);
|
||||||
|
tcp_clear_all_retrans_hints(tcp_sk(sk));
|
||||||
|
}
|
||||||
|
|
||||||
int tcp_disconnect(struct sock *sk, int flags)
|
int tcp_disconnect(struct sock *sk, int flags)
|
||||||
{
|
{
|
||||||
struct inet_sock *inet = inet_sk(sk);
|
struct inet_sock *inet = inet_sk(sk);
|
||||||
|
@ -2376,7 +2416,6 @@ int tcp_disconnect(struct sock *sk, int flags)
|
||||||
* issue in __tcp_select_window()
|
* issue in __tcp_select_window()
|
||||||
*/
|
*/
|
||||||
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
|
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
|
||||||
tcp_init_send_head(sk);
|
|
||||||
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
|
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
|
||||||
__sk_dst_reset(sk);
|
__sk_dst_reset(sk);
|
||||||
dst_release(sk->sk_rx_dst);
|
dst_release(sk->sk_rx_dst);
|
||||||
|
|
|
@ -465,17 +465,15 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
|
||||||
void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
|
void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct rb_node *p;
|
|
||||||
struct sk_buff *skb;
|
|
||||||
struct dst_entry *dst;
|
struct dst_entry *dst;
|
||||||
|
struct sk_buff *skb;
|
||||||
|
|
||||||
if (!tp->syn_fastopen)
|
if (!tp->syn_fastopen)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!tp->data_segs_in) {
|
if (!tp->data_segs_in) {
|
||||||
p = rb_first(&tp->out_of_order_queue);
|
skb = skb_rb_first(&tp->out_of_order_queue);
|
||||||
if (p && !rb_next(p)) {
|
if (skb && !skb_rb_next(skb)) {
|
||||||
skb = rb_entry(p, struct sk_buff, rbnode);
|
|
||||||
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
|
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
|
||||||
tcp_fastopen_active_disable(sk);
|
tcp_fastopen_active_disable(sk);
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -1142,6 +1142,7 @@ struct tcp_sacktag_state {
|
||||||
u64 last_sackt;
|
u64 last_sackt;
|
||||||
struct rate_sample *rate;
|
struct rate_sample *rate;
|
||||||
int flag;
|
int flag;
|
||||||
|
unsigned int mss_now;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
|
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
|
||||||
|
@ -1191,7 +1192,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
|
||||||
if (pkt_len >= skb->len && !in_sack)
|
if (pkt_len >= skb->len && !in_sack)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
|
err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
|
||||||
|
pkt_len, mss, GFP_ATOMIC);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -1288,13 +1290,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
|
||||||
/* Shift newly-SACKed bytes from this skb to the immediately previous
|
/* Shift newly-SACKed bytes from this skb to the immediately previous
|
||||||
* already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
|
* already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
|
||||||
*/
|
*/
|
||||||
static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
|
static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
|
||||||
|
struct sk_buff *skb,
|
||||||
struct tcp_sacktag_state *state,
|
struct tcp_sacktag_state *state,
|
||||||
unsigned int pcount, int shifted, int mss,
|
unsigned int pcount, int shifted, int mss,
|
||||||
bool dup_sack)
|
bool dup_sack)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
|
|
||||||
u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
|
u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
|
||||||
u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
|
u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
|
||||||
|
|
||||||
|
@ -1363,8 +1365,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
|
||||||
if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
|
if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
|
||||||
TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
|
TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
|
||||||
|
|
||||||
tcp_unlink_write_queue(skb, sk);
|
tcp_rtx_queue_unlink_and_free(skb, sk);
|
||||||
sk_wmem_free_skb(sk, skb);
|
|
||||||
|
|
||||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
|
||||||
|
|
||||||
|
@ -1414,9 +1415,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
|
||||||
goto fallback;
|
goto fallback;
|
||||||
|
|
||||||
/* Can only happen with delayed DSACK + discard craziness */
|
/* Can only happen with delayed DSACK + discard craziness */
|
||||||
if (unlikely(skb == tcp_write_queue_head(sk)))
|
prev = skb_rb_prev(skb);
|
||||||
|
if (!prev)
|
||||||
goto fallback;
|
goto fallback;
|
||||||
prev = tcp_write_queue_prev(sk, skb);
|
|
||||||
|
|
||||||
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
|
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
|
||||||
goto fallback;
|
goto fallback;
|
||||||
|
@ -1495,18 +1496,17 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
|
||||||
|
|
||||||
if (!skb_shift(prev, skb, len))
|
if (!skb_shift(prev, skb, len))
|
||||||
goto fallback;
|
goto fallback;
|
||||||
if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
|
if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* Hole filled allows collapsing with the next as well, this is very
|
/* Hole filled allows collapsing with the next as well, this is very
|
||||||
* useful when hole on every nth skb pattern happens
|
* useful when hole on every nth skb pattern happens
|
||||||
*/
|
*/
|
||||||
if (prev == tcp_write_queue_tail(sk))
|
skb = skb_rb_next(prev);
|
||||||
|
if (!skb)
|
||||||
goto out;
|
goto out;
|
||||||
skb = tcp_write_queue_next(sk, prev);
|
|
||||||
|
|
||||||
if (!skb_can_shift(skb) ||
|
if (!skb_can_shift(skb) ||
|
||||||
(skb == tcp_send_head(sk)) ||
|
|
||||||
((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
|
((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
|
||||||
(mss != tcp_skb_seglen(skb)))
|
(mss != tcp_skb_seglen(skb)))
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -1514,7 +1514,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
|
||||||
len = skb->len;
|
len = skb->len;
|
||||||
if (skb_shift(prev, skb, len)) {
|
if (skb_shift(prev, skb, len)) {
|
||||||
pcount += tcp_skb_pcount(skb);
|
pcount += tcp_skb_pcount(skb);
|
||||||
tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
|
tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
|
||||||
|
len, mss, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
@ -1538,13 +1539,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *tmp;
|
struct sk_buff *tmp;
|
||||||
|
|
||||||
tcp_for_write_queue_from(skb, sk) {
|
skb_rbtree_walk_from(skb) {
|
||||||
int in_sack = 0;
|
int in_sack = 0;
|
||||||
bool dup_sack = dup_sack_in;
|
bool dup_sack = dup_sack_in;
|
||||||
|
|
||||||
if (skb == tcp_send_head(sk))
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* queue is in-order => we can short-circuit the walk early */
|
/* queue is in-order => we can short-circuit the walk early */
|
||||||
if (!before(TCP_SKB_CB(skb)->seq, end_seq))
|
if (!before(TCP_SKB_CB(skb)->seq, end_seq))
|
||||||
break;
|
break;
|
||||||
|
@ -1606,23 +1604,44 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
|
||||||
return skb;
|
return skb;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Avoid all extra work that is being done by sacktag while walking in
|
static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
|
||||||
* a normal way
|
struct tcp_sacktag_state *state,
|
||||||
*/
|
u32 seq)
|
||||||
|
{
|
||||||
|
struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
|
||||||
|
struct sk_buff *skb;
|
||||||
|
int unack_bytes;
|
||||||
|
|
||||||
|
while (*p) {
|
||||||
|
parent = *p;
|
||||||
|
skb = rb_to_skb(parent);
|
||||||
|
if (before(seq, TCP_SKB_CB(skb)->seq)) {
|
||||||
|
p = &parent->rb_left;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
|
||||||
|
p = &parent->rb_right;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
state->fack_count = 0;
|
||||||
|
unack_bytes = TCP_SKB_CB(skb)->seq - tcp_sk(sk)->snd_una;
|
||||||
|
if (state->mss_now && unack_bytes > 0)
|
||||||
|
state->fack_count = unack_bytes / state->mss_now;
|
||||||
|
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
|
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
|
||||||
struct tcp_sacktag_state *state,
|
struct tcp_sacktag_state *state,
|
||||||
u32 skip_to_seq)
|
u32 skip_to_seq)
|
||||||
{
|
{
|
||||||
tcp_for_write_queue_from(skb, sk) {
|
if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
|
||||||
if (skb == tcp_send_head(sk))
|
return skb;
|
||||||
break;
|
|
||||||
|
|
||||||
if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
|
return tcp_sacktag_bsearch(sk, state, skip_to_seq);
|
||||||
break;
|
|
||||||
|
|
||||||
state->fack_count += tcp_skb_pcount(skb);
|
|
||||||
}
|
|
||||||
return skb;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
|
static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
|
||||||
|
@ -1744,8 +1763,9 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
skb = tcp_write_queue_head(sk);
|
state->mss_now = tcp_current_mss(sk);
|
||||||
state->fack_count = 0;
|
state->fack_count = 0;
|
||||||
|
skb = NULL;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
||||||
if (!tp->sacked_out) {
|
if (!tp->sacked_out) {
|
||||||
|
@ -1969,7 +1989,7 @@ void tcp_enter_loss(struct sock *sk)
|
||||||
if (tcp_is_reno(tp))
|
if (tcp_is_reno(tp))
|
||||||
tcp_reset_reno_sack(tp);
|
tcp_reset_reno_sack(tp);
|
||||||
|
|
||||||
skb = tcp_write_queue_head(sk);
|
skb = tcp_rtx_queue_head(sk);
|
||||||
is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
|
is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
|
||||||
if (is_reneg) {
|
if (is_reneg) {
|
||||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
|
||||||
|
@ -1978,10 +1998,7 @@ void tcp_enter_loss(struct sock *sk)
|
||||||
}
|
}
|
||||||
tcp_clear_all_retrans_hints(tp);
|
tcp_clear_all_retrans_hints(tp);
|
||||||
|
|
||||||
tcp_for_write_queue(skb, sk) {
|
skb_rbtree_walk_from(skb) {
|
||||||
if (skb == tcp_send_head(sk))
|
|
||||||
break;
|
|
||||||
|
|
||||||
mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
|
mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
|
||||||
is_reneg);
|
is_reneg);
|
||||||
if (mark_lost)
|
if (mark_lost)
|
||||||
|
@ -2207,20 +2224,18 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
|
||||||
const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
|
const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
|
||||||
|
|
||||||
WARN_ON(packets > tp->packets_out);
|
WARN_ON(packets > tp->packets_out);
|
||||||
if (tp->lost_skb_hint) {
|
skb = tp->lost_skb_hint;
|
||||||
skb = tp->lost_skb_hint;
|
if (skb) {
|
||||||
cnt = tp->lost_cnt_hint;
|
|
||||||
/* Head already handled? */
|
/* Head already handled? */
|
||||||
if (mark_head && skb != tcp_write_queue_head(sk))
|
if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
|
||||||
return;
|
return;
|
||||||
|
cnt = tp->lost_cnt_hint;
|
||||||
} else {
|
} else {
|
||||||
skb = tcp_write_queue_head(sk);
|
skb = tcp_rtx_queue_head(sk);
|
||||||
cnt = 0;
|
cnt = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
tcp_for_write_queue_from(skb, sk) {
|
skb_rbtree_walk_from(skb) {
|
||||||
if (skb == tcp_send_head(sk))
|
|
||||||
break;
|
|
||||||
/* TODO: do this better */
|
/* TODO: do this better */
|
||||||
/* this is not the most efficient way to do this... */
|
/* this is not the most efficient way to do this... */
|
||||||
tp->lost_skb_hint = skb;
|
tp->lost_skb_hint = skb;
|
||||||
|
@ -2244,7 +2259,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
|
||||||
/* If needed, chop off the prefix to mark as lost. */
|
/* If needed, chop off the prefix to mark as lost. */
|
||||||
lost = (packets - oldcnt) * mss;
|
lost = (packets - oldcnt) * mss;
|
||||||
if (lost < skb->len &&
|
if (lost < skb->len &&
|
||||||
tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
|
tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
|
||||||
|
lost, mss, GFP_ATOMIC) < 0)
|
||||||
break;
|
break;
|
||||||
cnt = packets;
|
cnt = packets;
|
||||||
}
|
}
|
||||||
|
@ -2328,7 +2344,7 @@ static bool tcp_any_retrans_done(const struct sock *sk)
|
||||||
if (tp->retrans_out)
|
if (tp->retrans_out)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
skb = tcp_write_queue_head(sk);
|
skb = tcp_rtx_queue_head(sk);
|
||||||
if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
|
if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
@ -2369,9 +2385,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
|
||||||
if (unmark_loss) {
|
if (unmark_loss) {
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
|
|
||||||
tcp_for_write_queue(skb, sk) {
|
skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
|
||||||
if (skb == tcp_send_head(sk))
|
|
||||||
break;
|
|
||||||
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
|
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
|
||||||
}
|
}
|
||||||
tp->lost_out = 0;
|
tp->lost_out = 0;
|
||||||
|
@ -2616,9 +2630,7 @@ void tcp_simple_retransmit(struct sock *sk)
|
||||||
unsigned int mss = tcp_current_mss(sk);
|
unsigned int mss = tcp_current_mss(sk);
|
||||||
u32 prior_lost = tp->lost_out;
|
u32 prior_lost = tp->lost_out;
|
||||||
|
|
||||||
tcp_for_write_queue(skb, sk) {
|
skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
|
||||||
if (skb == tcp_send_head(sk))
|
|
||||||
break;
|
|
||||||
if (tcp_skb_seglen(skb) > mss &&
|
if (tcp_skb_seglen(skb) > mss &&
|
||||||
!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
|
!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
|
||||||
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
|
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
|
||||||
|
@ -2712,7 +2724,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
|
||||||
* is updated in tcp_ack()). Otherwise fall back to
|
* is updated in tcp_ack()). Otherwise fall back to
|
||||||
* the conventional recovery.
|
* the conventional recovery.
|
||||||
*/
|
*/
|
||||||
if (tcp_send_head(sk) &&
|
if (!tcp_write_queue_empty(sk) &&
|
||||||
after(tcp_wnd_end(tp), tp->snd_nxt)) {
|
after(tcp_wnd_end(tp), tp->snd_nxt)) {
|
||||||
*rexmit = REXMIT_NEW;
|
*rexmit = REXMIT_NEW;
|
||||||
return;
|
return;
|
||||||
|
@ -2804,9 +2816,9 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
|
||||||
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
|
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
|
||||||
(tcp_fackets_out(tp) > tp->reordering));
|
(tcp_fackets_out(tp) > tp->reordering));
|
||||||
|
|
||||||
if (WARN_ON(!tp->packets_out && tp->sacked_out))
|
if (!tp->packets_out && tp->sacked_out)
|
||||||
tp->sacked_out = 0;
|
tp->sacked_out = 0;
|
||||||
if (WARN_ON(!tp->sacked_out && tp->fackets_out))
|
if (!tp->sacked_out && tp->fackets_out)
|
||||||
tp->fackets_out = 0;
|
tp->fackets_out = 0;
|
||||||
|
|
||||||
/* Now state machine starts.
|
/* Now state machine starts.
|
||||||
|
@ -3076,11 +3088,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
u32 prior_sacked = tp->sacked_out;
|
u32 prior_sacked = tp->sacked_out;
|
||||||
u32 reord = tp->packets_out;
|
u32 reord = tp->packets_out;
|
||||||
|
struct sk_buff *skb, *next;
|
||||||
bool fully_acked = true;
|
bool fully_acked = true;
|
||||||
long sack_rtt_us = -1L;
|
long sack_rtt_us = -1L;
|
||||||
long seq_rtt_us = -1L;
|
long seq_rtt_us = -1L;
|
||||||
long ca_rtt_us = -1L;
|
long ca_rtt_us = -1L;
|
||||||
struct sk_buff *skb;
|
|
||||||
u32 pkts_acked = 0;
|
u32 pkts_acked = 0;
|
||||||
u32 last_in_flight = 0;
|
u32 last_in_flight = 0;
|
||||||
bool rtt_update;
|
bool rtt_update;
|
||||||
|
@ -3088,7 +3100,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||||
|
|
||||||
first_ackt = 0;
|
first_ackt = 0;
|
||||||
|
|
||||||
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
|
for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
|
||||||
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
|
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
|
||||||
u8 sacked = scb->sacked;
|
u8 sacked = scb->sacked;
|
||||||
u32 acked_pcount;
|
u32 acked_pcount;
|
||||||
|
@ -3106,8 +3118,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||||
break;
|
break;
|
||||||
fully_acked = false;
|
fully_acked = false;
|
||||||
} else {
|
} else {
|
||||||
/* Speedup tcp_unlink_write_queue() and next loop */
|
|
||||||
prefetchw(skb->next);
|
|
||||||
acked_pcount = tcp_skb_pcount(skb);
|
acked_pcount = tcp_skb_pcount(skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3159,12 +3169,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||||
if (!fully_acked)
|
if (!fully_acked)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
tcp_unlink_write_queue(skb, sk);
|
next = skb_rb_next(skb);
|
||||||
sk_wmem_free_skb(sk, skb);
|
|
||||||
if (unlikely(skb == tp->retransmit_skb_hint))
|
if (unlikely(skb == tp->retransmit_skb_hint))
|
||||||
tp->retransmit_skb_hint = NULL;
|
tp->retransmit_skb_hint = NULL;
|
||||||
if (unlikely(skb == tp->lost_skb_hint))
|
if (unlikely(skb == tp->lost_skb_hint))
|
||||||
tp->lost_skb_hint = NULL;
|
tp->lost_skb_hint = NULL;
|
||||||
|
tcp_rtx_queue_unlink_and_free(skb, sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!skb)
|
if (!skb)
|
||||||
|
@ -3256,12 +3266,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||||
|
|
||||||
static void tcp_ack_probe(struct sock *sk)
|
static void tcp_ack_probe(struct sock *sk)
|
||||||
{
|
{
|
||||||
const struct tcp_sock *tp = tcp_sk(sk);
|
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
|
struct sk_buff *head = tcp_send_head(sk);
|
||||||
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
/* Was it a usable window open? */
|
/* Was it a usable window open? */
|
||||||
|
if (!head)
|
||||||
if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
|
return;
|
||||||
|
if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
|
||||||
icsk->icsk_backoff = 0;
|
icsk->icsk_backoff = 0;
|
||||||
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
|
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
|
||||||
/* Socket must be waked up by subsequent tcp_data_snd_check().
|
/* Socket must be waked up by subsequent tcp_data_snd_check().
|
||||||
|
@ -3381,7 +3393,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
|
||||||
tp->pred_flags = 0;
|
tp->pred_flags = 0;
|
||||||
tcp_fast_path_check(sk);
|
tcp_fast_path_check(sk);
|
||||||
|
|
||||||
if (tcp_send_head(sk))
|
if (!tcp_write_queue_empty(sk))
|
||||||
tcp_slow_start_after_idle_check(sk);
|
tcp_slow_start_after_idle_check(sk);
|
||||||
|
|
||||||
if (nwin > tp->max_window) {
|
if (nwin > tp->max_window) {
|
||||||
|
@ -3566,8 +3578,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
||||||
sack_state.first_sackt = 0;
|
sack_state.first_sackt = 0;
|
||||||
sack_state.rate = &rs;
|
sack_state.rate = &rs;
|
||||||
|
|
||||||
/* We very likely will need to access write queue head. */
|
/* We very likely will need to access rtx queue. */
|
||||||
prefetchw(sk->sk_write_queue.next);
|
prefetch(sk->tcp_rtx_queue.rb_node);
|
||||||
|
|
||||||
/* If the ack is older than previous acks
|
/* If the ack is older than previous acks
|
||||||
* then we can probably ignore it.
|
* then we can probably ignore it.
|
||||||
|
@ -3681,8 +3693,7 @@ no_queue:
|
||||||
* being used to time the probes, and is probably far higher than
|
* being used to time the probes, and is probably far higher than
|
||||||
* it needs to be for normal retransmission.
|
* it needs to be for normal retransmission.
|
||||||
*/
|
*/
|
||||||
if (tcp_send_head(sk))
|
tcp_ack_probe(sk);
|
||||||
tcp_ack_probe(sk);
|
|
||||||
|
|
||||||
if (tp->tlp_high_seq)
|
if (tp->tlp_high_seq)
|
||||||
tcp_process_tlp_ack(sk, ack, flag);
|
tcp_process_tlp_ack(sk, ack, flag);
|
||||||
|
@ -4335,7 +4346,7 @@ static void tcp_ofo_queue(struct sock *sk)
|
||||||
|
|
||||||
p = rb_first(&tp->out_of_order_queue);
|
p = rb_first(&tp->out_of_order_queue);
|
||||||
while (p) {
|
while (p) {
|
||||||
skb = rb_entry(p, struct sk_buff, rbnode);
|
skb = rb_to_skb(p);
|
||||||
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
|
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -4399,7 +4410,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
|
||||||
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
|
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct rb_node **p, *q, *parent;
|
struct rb_node **p, *parent;
|
||||||
struct sk_buff *skb1;
|
struct sk_buff *skb1;
|
||||||
u32 seq, end_seq;
|
u32 seq, end_seq;
|
||||||
bool fragstolen;
|
bool fragstolen;
|
||||||
|
@ -4458,7 +4469,7 @@ coalesce_done:
|
||||||
parent = NULL;
|
parent = NULL;
|
||||||
while (*p) {
|
while (*p) {
|
||||||
parent = *p;
|
parent = *p;
|
||||||
skb1 = rb_entry(parent, struct sk_buff, rbnode);
|
skb1 = rb_to_skb(parent);
|
||||||
if (before(seq, TCP_SKB_CB(skb1)->seq)) {
|
if (before(seq, TCP_SKB_CB(skb1)->seq)) {
|
||||||
p = &parent->rb_left;
|
p = &parent->rb_left;
|
||||||
continue;
|
continue;
|
||||||
|
@ -4503,9 +4514,7 @@ insert:
|
||||||
|
|
||||||
merge_right:
|
merge_right:
|
||||||
/* Remove other segments covered by skb. */
|
/* Remove other segments covered by skb. */
|
||||||
while ((q = rb_next(&skb->rbnode)) != NULL) {
|
while ((skb1 = skb_rb_next(skb)) != NULL) {
|
||||||
skb1 = rb_entry(q, struct sk_buff, rbnode);
|
|
||||||
|
|
||||||
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
|
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
|
||||||
break;
|
break;
|
||||||
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
|
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
|
||||||
|
@ -4520,7 +4529,7 @@ merge_right:
|
||||||
tcp_drop(sk, skb1);
|
tcp_drop(sk, skb1);
|
||||||
}
|
}
|
||||||
/* If there is no skb after us, we are the last_skb ! */
|
/* If there is no skb after us, we are the last_skb ! */
|
||||||
if (!q)
|
if (!skb1)
|
||||||
tp->ooo_last_skb = skb;
|
tp->ooo_last_skb = skb;
|
||||||
|
|
||||||
add_sack:
|
add_sack:
|
||||||
|
@ -4706,7 +4715,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li
|
||||||
if (list)
|
if (list)
|
||||||
return !skb_queue_is_last(list, skb) ? skb->next : NULL;
|
return !skb_queue_is_last(list, skb) ? skb->next : NULL;
|
||||||
|
|
||||||
return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
|
return skb_rb_next(skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
|
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
|
||||||
|
@ -4727,7 +4736,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
|
/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
|
||||||
static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
|
void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct rb_node **p = &root->rb_node;
|
struct rb_node **p = &root->rb_node;
|
||||||
struct rb_node *parent = NULL;
|
struct rb_node *parent = NULL;
|
||||||
|
@ -4735,7 +4744,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
|
||||||
|
|
||||||
while (*p) {
|
while (*p) {
|
||||||
parent = *p;
|
parent = *p;
|
||||||
skb1 = rb_entry(parent, struct sk_buff, rbnode);
|
skb1 = rb_to_skb(parent);
|
||||||
if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
|
if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
|
||||||
p = &parent->rb_left;
|
p = &parent->rb_left;
|
||||||
else
|
else
|
||||||
|
@ -4854,26 +4863,19 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *skb, *head;
|
struct sk_buff *skb, *head;
|
||||||
struct rb_node *p;
|
|
||||||
u32 start, end;
|
u32 start, end;
|
||||||
|
|
||||||
p = rb_first(&tp->out_of_order_queue);
|
skb = skb_rb_first(&tp->out_of_order_queue);
|
||||||
skb = rb_entry_safe(p, struct sk_buff, rbnode);
|
|
||||||
new_range:
|
new_range:
|
||||||
if (!skb) {
|
if (!skb) {
|
||||||
p = rb_last(&tp->out_of_order_queue);
|
tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
|
||||||
/* Note: This is possible p is NULL here. We do not
|
|
||||||
* use rb_entry_safe(), as ooo_last_skb is valid only
|
|
||||||
* if rbtree is not empty.
|
|
||||||
*/
|
|
||||||
tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
start = TCP_SKB_CB(skb)->seq;
|
start = TCP_SKB_CB(skb)->seq;
|
||||||
end = TCP_SKB_CB(skb)->end_seq;
|
end = TCP_SKB_CB(skb)->end_seq;
|
||||||
|
|
||||||
for (head = skb;;) {
|
for (head = skb;;) {
|
||||||
skb = tcp_skb_next(skb, NULL);
|
skb = skb_rb_next(skb);
|
||||||
|
|
||||||
/* Range is terminated when we see a gap or when
|
/* Range is terminated when we see a gap or when
|
||||||
* we are at the queue end.
|
* we are at the queue end.
|
||||||
|
@ -4916,14 +4918,14 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
|
||||||
do {
|
do {
|
||||||
prev = rb_prev(node);
|
prev = rb_prev(node);
|
||||||
rb_erase(node, &tp->out_of_order_queue);
|
rb_erase(node, &tp->out_of_order_queue);
|
||||||
tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
|
tcp_drop(sk, rb_to_skb(node));
|
||||||
sk_mem_reclaim(sk);
|
sk_mem_reclaim(sk);
|
||||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
|
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
|
||||||
!tcp_under_memory_pressure(sk))
|
!tcp_under_memory_pressure(sk))
|
||||||
break;
|
break;
|
||||||
node = prev;
|
node = prev;
|
||||||
} while (node);
|
} while (node);
|
||||||
tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
|
tp->ooo_last_skb = rb_to_skb(prev);
|
||||||
|
|
||||||
/* Reset SACK state. A conforming SACK implementation will
|
/* Reset SACK state. A conforming SACK implementation will
|
||||||
* do the same at a timeout based retransmit. When a connection
|
* do the same at a timeout based retransmit. When a connection
|
||||||
|
@ -5538,7 +5540,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
|
||||||
struct tcp_fastopen_cookie *cookie)
|
struct tcp_fastopen_cookie *cookie)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
|
struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
|
||||||
u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
|
u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
|
||||||
bool syn_drop = false;
|
bool syn_drop = false;
|
||||||
|
|
||||||
|
@ -5573,9 +5575,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
|
||||||
tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
|
tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
|
||||||
|
|
||||||
if (data) { /* Retransmit unacked data in SYN */
|
if (data) { /* Retransmit unacked data in SYN */
|
||||||
tcp_for_write_queue_from(data, sk) {
|
skb_rbtree_walk_from(data) {
|
||||||
if (data == tcp_send_head(sk) ||
|
if (__tcp_retransmit_skb(sk, data, 1))
|
||||||
__tcp_retransmit_skb(sk, data, 1))
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
tcp_rearm_rto(sk);
|
tcp_rearm_rto(sk);
|
||||||
|
|
|
@ -480,7 +480,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
||||||
TCP_TIMEOUT_INIT;
|
TCP_TIMEOUT_INIT;
|
||||||
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
|
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
|
||||||
|
|
||||||
skb = tcp_write_queue_head(sk);
|
skb = tcp_rtx_queue_head(sk);
|
||||||
BUG_ON(!skb);
|
BUG_ON(!skb);
|
||||||
|
|
||||||
tcp_mstamp_refresh(tp);
|
tcp_mstamp_refresh(tp);
|
||||||
|
|
|
@ -66,15 +66,17 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||||
int push_one, gfp_t gfp);
|
int push_one, gfp_t gfp);
|
||||||
|
|
||||||
/* Account for new data that has been sent to the network. */
|
/* Account for new data that has been sent to the network. */
|
||||||
static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
|
static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
unsigned int prior_packets = tp->packets_out;
|
unsigned int prior_packets = tp->packets_out;
|
||||||
|
|
||||||
tcp_advance_send_head(sk, skb);
|
|
||||||
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
|
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
|
||||||
|
|
||||||
|
__skb_unlink(skb, &sk->sk_write_queue);
|
||||||
|
tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
|
||||||
|
|
||||||
tp->packets_out += tcp_skb_pcount(skb);
|
tp->packets_out += tcp_skb_pcount(skb);
|
||||||
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
|
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
|
||||||
tcp_rearm_rto(sk);
|
tcp_rearm_rto(sk);
|
||||||
|
@ -1249,12 +1251,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
|
||||||
TCP_SKB_CB(skb)->eor = 0;
|
TCP_SKB_CB(skb)->eor = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Insert buff after skb on the write or rtx queue of sk. */
|
||||||
|
static void tcp_insert_write_queue_after(struct sk_buff *skb,
|
||||||
|
struct sk_buff *buff,
|
||||||
|
struct sock *sk,
|
||||||
|
enum tcp_queue tcp_queue)
|
||||||
|
{
|
||||||
|
if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
|
||||||
|
__skb_queue_after(&sk->sk_write_queue, skb, buff);
|
||||||
|
else
|
||||||
|
tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
|
||||||
|
}
|
||||||
|
|
||||||
/* Function to create two new TCP segments. Shrinks the given segment
|
/* Function to create two new TCP segments. Shrinks the given segment
|
||||||
* to the specified size and appends a new segment with the rest of the
|
* to the specified size and appends a new segment with the rest of the
|
||||||
* packet to the list. This won't be called frequently, I hope.
|
* packet to the list. This won't be called frequently, I hope.
|
||||||
* Remember, these are still headerless SKBs at this point.
|
* Remember, these are still headerless SKBs at this point.
|
||||||
*/
|
*/
|
||||||
int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
|
int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
|
||||||
|
struct sk_buff *skb, u32 len,
|
||||||
unsigned int mss_now, gfp_t gfp)
|
unsigned int mss_now, gfp_t gfp)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
@ -1337,7 +1352,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
|
||||||
|
|
||||||
/* Link BUFF into the send queue. */
|
/* Link BUFF into the send queue. */
|
||||||
__skb_header_release(buff);
|
__skb_header_release(buff);
|
||||||
tcp_insert_write_queue_after(skb, buff, sk);
|
tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
|
||||||
list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
|
list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1625,10 +1640,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
|
||||||
* is caused by insufficient sender buffer:
|
* is caused by insufficient sender buffer:
|
||||||
* 1) just sent some data (see tcp_write_xmit)
|
* 1) just sent some data (see tcp_write_xmit)
|
||||||
* 2) not cwnd limited (this else condition)
|
* 2) not cwnd limited (this else condition)
|
||||||
* 3) no more data to send (null tcp_send_head )
|
* 3) no more data to send (tcp_write_queue_empty())
|
||||||
* 4) application is hitting buffer limit (SOCK_NOSPACE)
|
* 4) application is hitting buffer limit (SOCK_NOSPACE)
|
||||||
*/
|
*/
|
||||||
if (!tcp_send_head(sk) && sk->sk_socket &&
|
if (tcp_write_queue_empty(sk) && sk->sk_socket &&
|
||||||
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
|
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
|
||||||
(1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
|
(1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
|
||||||
tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
|
tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
|
||||||
|
@ -1824,7 +1839,8 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
|
||||||
* know that all the data is in scatter-gather pages, and that the
|
* know that all the data is in scatter-gather pages, and that the
|
||||||
* packet has never been sent out before (and thus is not cloned).
|
* packet has never been sent out before (and thus is not cloned).
|
||||||
*/
|
*/
|
||||||
static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
|
static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
|
||||||
|
struct sk_buff *skb, unsigned int len,
|
||||||
unsigned int mss_now, gfp_t gfp)
|
unsigned int mss_now, gfp_t gfp)
|
||||||
{
|
{
|
||||||
struct sk_buff *buff;
|
struct sk_buff *buff;
|
||||||
|
@ -1833,7 +1849,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
|
||||||
|
|
||||||
/* All of a TSO frame must be composed of paged data. */
|
/* All of a TSO frame must be composed of paged data. */
|
||||||
if (skb->len != skb->data_len)
|
if (skb->len != skb->data_len)
|
||||||
return tcp_fragment(sk, skb, len, mss_now, gfp);
|
return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
|
||||||
|
|
||||||
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
|
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
|
||||||
if (unlikely(!buff))
|
if (unlikely(!buff))
|
||||||
|
@ -1869,7 +1885,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
|
||||||
|
|
||||||
/* Link BUFF into the send queue. */
|
/* Link BUFF into the send queue. */
|
||||||
__skb_header_release(buff);
|
__skb_header_release(buff);
|
||||||
tcp_insert_write_queue_after(skb, buff, sk);
|
tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1939,8 +1955,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
|
||||||
goto send_now;
|
goto send_now;
|
||||||
}
|
}
|
||||||
|
|
||||||
head = tcp_write_queue_head(sk);
|
/* TODO : use tsorted_sent_queue ? */
|
||||||
|
head = tcp_rtx_queue_head(sk);
|
||||||
|
if (!head)
|
||||||
|
goto send_now;
|
||||||
age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
|
age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
|
||||||
/* If next ACK is likely to come too late (half srtt), do not defer */
|
/* If next ACK is likely to come too late (half srtt), do not defer */
|
||||||
if (age < (tp->srtt_us >> 4))
|
if (age < (tp->srtt_us >> 4))
|
||||||
|
@ -2158,13 +2176,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
|
||||||
limit <<= factor;
|
limit <<= factor;
|
||||||
|
|
||||||
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
|
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
|
||||||
/* Always send the 1st or 2nd skb in write queue.
|
/* Always send skb if rtx queue is empty.
|
||||||
* No need to wait for TX completion to call us back,
|
* No need to wait for TX completion to call us back,
|
||||||
* after softirq/tasklet schedule.
|
* after softirq/tasklet schedule.
|
||||||
* This helps when TX completions are delayed too much.
|
* This helps when TX completions are delayed too much.
|
||||||
*/
|
*/
|
||||||
if (skb == sk->sk_write_queue.next ||
|
if (tcp_rtx_queue_empty(sk))
|
||||||
skb->prev == sk->sk_write_queue.next)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
|
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
|
||||||
|
@ -2215,7 +2232,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
|
||||||
* it's the "most interesting" or current chrono we are
|
* it's the "most interesting" or current chrono we are
|
||||||
* tracking and starts busy chrono if we have pending data.
|
* tracking and starts busy chrono if we have pending data.
|
||||||
*/
|
*/
|
||||||
if (tcp_write_queue_empty(sk))
|
if (tcp_rtx_and_write_queues_empty(sk))
|
||||||
tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
|
tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
|
||||||
else if (type == tp->chrono_type)
|
else if (type == tp->chrono_type)
|
||||||
tcp_chrono_set(tp, TCP_CHRONO_BUSY);
|
tcp_chrono_set(tp, TCP_CHRONO_BUSY);
|
||||||
|
@ -2310,7 +2327,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||||
nonagle);
|
nonagle);
|
||||||
|
|
||||||
if (skb->len > limit &&
|
if (skb->len > limit &&
|
||||||
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
|
unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
|
||||||
|
skb, limit, mss_now, gfp)))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
|
if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
|
||||||
|
@ -2350,7 +2368,7 @@ repair:
|
||||||
tcp_cwnd_validate(sk, is_cwnd_limited);
|
tcp_cwnd_validate(sk, is_cwnd_limited);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return !tp->packets_out && tcp_send_head(sk);
|
return !tp->packets_out && !tcp_write_queue_empty(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool tcp_schedule_loss_probe(struct sock *sk)
|
bool tcp_schedule_loss_probe(struct sock *sk)
|
||||||
|
@ -2374,7 +2392,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
|
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
|
||||||
tcp_send_head(sk))
|
!tcp_write_queue_empty(sk))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Probe timeout is 2*rtt. Add minimum RTO to account
|
/* Probe timeout is 2*rtt. Add minimum RTO to account
|
||||||
|
@ -2427,18 +2445,14 @@ void tcp_send_loss_probe(struct sock *sk)
|
||||||
int mss = tcp_current_mss(sk);
|
int mss = tcp_current_mss(sk);
|
||||||
|
|
||||||
skb = tcp_send_head(sk);
|
skb = tcp_send_head(sk);
|
||||||
if (skb) {
|
if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
|
||||||
if (tcp_snd_wnd_test(tp, skb, mss)) {
|
pcount = tp->packets_out;
|
||||||
pcount = tp->packets_out;
|
tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
|
||||||
tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
|
if (tp->packets_out > pcount)
|
||||||
if (tp->packets_out > pcount)
|
goto probe_sent;
|
||||||
goto probe_sent;
|
goto rearm_timer;
|
||||||
goto rearm_timer;
|
|
||||||
}
|
|
||||||
skb = tcp_write_queue_prev(sk, skb);
|
|
||||||
} else {
|
|
||||||
skb = tcp_write_queue_tail(sk);
|
|
||||||
}
|
}
|
||||||
|
skb = skb_rb_last(&sk->tcp_rtx_queue);
|
||||||
|
|
||||||
/* At most one outstanding TLP retransmission. */
|
/* At most one outstanding TLP retransmission. */
|
||||||
if (tp->tlp_high_seq)
|
if (tp->tlp_high_seq)
|
||||||
|
@ -2456,10 +2470,11 @@ void tcp_send_loss_probe(struct sock *sk)
|
||||||
goto rearm_timer;
|
goto rearm_timer;
|
||||||
|
|
||||||
if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
|
if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
|
||||||
if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
|
if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
|
||||||
|
(pcount - 1) * mss, mss,
|
||||||
GFP_ATOMIC)))
|
GFP_ATOMIC)))
|
||||||
goto rearm_timer;
|
goto rearm_timer;
|
||||||
skb = tcp_write_queue_next(sk, skb);
|
skb = skb_rb_next(skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
|
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
|
||||||
|
@ -2659,7 +2674,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
|
||||||
static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
|
static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
|
struct sk_buff *next_skb = skb_rb_next(skb);
|
||||||
int skb_size, next_skb_size;
|
int skb_size, next_skb_size;
|
||||||
|
|
||||||
skb_size = skb->len;
|
skb_size = skb->len;
|
||||||
|
@ -2676,8 +2691,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
|
||||||
}
|
}
|
||||||
tcp_highest_sack_combine(sk, next_skb, skb);
|
tcp_highest_sack_combine(sk, next_skb, skb);
|
||||||
|
|
||||||
tcp_unlink_write_queue(next_skb, sk);
|
|
||||||
|
|
||||||
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
|
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
|
||||||
skb->ip_summed = CHECKSUM_PARTIAL;
|
skb->ip_summed = CHECKSUM_PARTIAL;
|
||||||
|
|
||||||
|
@ -2705,7 +2718,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
|
||||||
|
|
||||||
tcp_skb_collapse_tstamp(skb, next_skb);
|
tcp_skb_collapse_tstamp(skb, next_skb);
|
||||||
|
|
||||||
sk_wmem_free_skb(sk, next_skb);
|
tcp_rtx_queue_unlink_and_free(next_skb, sk);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2716,8 +2729,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
|
||||||
return false;
|
return false;
|
||||||
if (skb_cloned(skb))
|
if (skb_cloned(skb))
|
||||||
return false;
|
return false;
|
||||||
if (skb == tcp_send_head(sk))
|
|
||||||
return false;
|
|
||||||
/* Some heuristics for collapsing over SACK'd could be invented */
|
/* Some heuristics for collapsing over SACK'd could be invented */
|
||||||
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
|
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
|
||||||
return false;
|
return false;
|
||||||
|
@ -2740,7 +2751,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
|
||||||
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
|
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
tcp_for_write_queue_from_safe(skb, tmp, sk) {
|
skb_rbtree_walk_from_safe(skb, tmp) {
|
||||||
if (!tcp_can_collapse(sk, skb))
|
if (!tcp_can_collapse(sk, skb))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -2815,7 +2826,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
|
||||||
|
|
||||||
len = cur_mss * segs;
|
len = cur_mss * segs;
|
||||||
if (skb->len > len) {
|
if (skb->len > len) {
|
||||||
if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
|
if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
|
||||||
|
cur_mss, GFP_ATOMIC))
|
||||||
return -ENOMEM; /* We'll try again later. */
|
return -ENOMEM; /* We'll try again later. */
|
||||||
} else {
|
} else {
|
||||||
if (skb_unclone(skb, GFP_ATOMIC))
|
if (skb_unclone(skb, GFP_ATOMIC))
|
||||||
|
@ -2906,29 +2918,24 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
|
||||||
void tcp_xmit_retransmit_queue(struct sock *sk)
|
void tcp_xmit_retransmit_queue(struct sock *sk)
|
||||||
{
|
{
|
||||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
|
struct sk_buff *skb, *rtx_head = NULL, *hole = NULL;
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *skb;
|
|
||||||
struct sk_buff *hole = NULL;
|
|
||||||
u32 max_segs;
|
u32 max_segs;
|
||||||
int mib_idx;
|
int mib_idx;
|
||||||
|
|
||||||
if (!tp->packets_out)
|
if (!tp->packets_out)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (tp->retransmit_skb_hint) {
|
skb = tp->retransmit_skb_hint;
|
||||||
skb = tp->retransmit_skb_hint;
|
if (!skb) {
|
||||||
} else {
|
rtx_head = tcp_rtx_queue_head(sk);
|
||||||
skb = tcp_write_queue_head(sk);
|
skb = rtx_head;
|
||||||
}
|
}
|
||||||
|
|
||||||
max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
|
max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
|
||||||
tcp_for_write_queue_from(skb, sk) {
|
skb_rbtree_walk_from(skb) {
|
||||||
__u8 sacked;
|
__u8 sacked;
|
||||||
int segs;
|
int segs;
|
||||||
|
|
||||||
if (skb == tcp_send_head(sk))
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (tcp_pacing_check(sk))
|
if (tcp_pacing_check(sk))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -2973,7 +2980,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
|
||||||
if (tcp_in_cwnd_reduction(sk))
|
if (tcp_in_cwnd_reduction(sk))
|
||||||
tp->prr_out += tcp_skb_pcount(skb);
|
tp->prr_out += tcp_skb_pcount(skb);
|
||||||
|
|
||||||
if (skb == tcp_write_queue_head(sk) &&
|
if (skb == rtx_head &&
|
||||||
icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
|
icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
|
||||||
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
|
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
|
||||||
inet_csk(sk)->icsk_rto,
|
inet_csk(sk)->icsk_rto,
|
||||||
|
@ -3015,12 +3022,15 @@ void tcp_send_fin(struct sock *sk)
|
||||||
* Note: in the latter case, FIN packet will be sent after a timeout,
|
* Note: in the latter case, FIN packet will be sent after a timeout,
|
||||||
* as TCP stack thinks it has already been transmitted.
|
* as TCP stack thinks it has already been transmitted.
|
||||||
*/
|
*/
|
||||||
if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
|
if (!tskb && tcp_under_memory_pressure(sk))
|
||||||
|
tskb = skb_rb_last(&sk->tcp_rtx_queue);
|
||||||
|
|
||||||
|
if (tskb) {
|
||||||
coalesce:
|
coalesce:
|
||||||
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
|
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
|
||||||
TCP_SKB_CB(tskb)->end_seq++;
|
TCP_SKB_CB(tskb)->end_seq++;
|
||||||
tp->write_seq++;
|
tp->write_seq++;
|
||||||
if (!tcp_send_head(sk)) {
|
if (tcp_write_queue_empty(sk)) {
|
||||||
/* This means tskb was already sent.
|
/* This means tskb was already sent.
|
||||||
* Pretend we included the FIN on previous transmit.
|
* Pretend we included the FIN on previous transmit.
|
||||||
* We need to set tp->snd_nxt to the value it would have
|
* We need to set tp->snd_nxt to the value it would have
|
||||||
|
@ -3086,9 +3096,9 @@ int tcp_send_synack(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
|
|
||||||
skb = tcp_write_queue_head(sk);
|
skb = tcp_rtx_queue_head(sk);
|
||||||
if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
|
if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
|
||||||
pr_debug("%s: wrong queue state\n", __func__);
|
pr_err("%s: wrong queue state\n", __func__);
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
|
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
|
||||||
|
@ -3101,10 +3111,9 @@ int tcp_send_synack(struct sock *sk)
|
||||||
if (!nskb)
|
if (!nskb)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
|
INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
|
||||||
tcp_unlink_write_queue(skb, sk);
|
tcp_rtx_queue_unlink_and_free(skb, sk);
|
||||||
__skb_header_release(nskb);
|
__skb_header_release(nskb);
|
||||||
__tcp_add_write_queue_head(sk, nskb);
|
tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
|
||||||
sk_wmem_free_skb(sk, skb);
|
|
||||||
sk->sk_wmem_queued += nskb->truesize;
|
sk->sk_wmem_queued += nskb->truesize;
|
||||||
sk_mem_charge(sk, nskb->truesize);
|
sk_mem_charge(sk, nskb->truesize);
|
||||||
skb = nskb;
|
skb = nskb;
|
||||||
|
@ -3327,7 +3336,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
|
||||||
|
|
||||||
tcb->end_seq += skb->len;
|
tcb->end_seq += skb->len;
|
||||||
__skb_header_release(skb);
|
__skb_header_release(skb);
|
||||||
__tcp_add_write_queue_tail(sk, skb);
|
|
||||||
sk->sk_wmem_queued += skb->truesize;
|
sk->sk_wmem_queued += skb->truesize;
|
||||||
sk_mem_charge(sk, skb->truesize);
|
sk_mem_charge(sk, skb->truesize);
|
||||||
tp->write_seq = tcb->end_seq;
|
tp->write_seq = tcb->end_seq;
|
||||||
|
@ -3405,12 +3413,13 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
||||||
TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
|
TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
|
||||||
if (!err) {
|
if (!err) {
|
||||||
tp->syn_data = (fo->copied > 0);
|
tp->syn_data = (fo->copied > 0);
|
||||||
|
tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
|
||||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* data was not sent, this is our new send_head */
|
/* data was not sent, put it in write_queue */
|
||||||
sk->sk_send_head = syn_data;
|
__skb_queue_tail(&sk->sk_write_queue, syn_data);
|
||||||
tp->packets_out -= tcp_skb_pcount(syn_data);
|
tp->packets_out -= tcp_skb_pcount(syn_data);
|
||||||
|
|
||||||
fallback:
|
fallback:
|
||||||
|
@ -3453,6 +3462,7 @@ int tcp_connect(struct sock *sk)
|
||||||
tp->retrans_stamp = tcp_time_stamp(tp);
|
tp->retrans_stamp = tcp_time_stamp(tp);
|
||||||
tcp_connect_queue_skb(sk, buff);
|
tcp_connect_queue_skb(sk, buff);
|
||||||
tcp_ecn_send_syn(sk, buff);
|
tcp_ecn_send_syn(sk, buff);
|
||||||
|
tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
|
||||||
|
|
||||||
/* Send off SYN; include data in Fast Open. */
|
/* Send off SYN; include data in Fast Open. */
|
||||||
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
|
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
|
||||||
|
@ -3647,7 +3657,8 @@ int tcp_write_wakeup(struct sock *sk, int mib)
|
||||||
skb->len > mss) {
|
skb->len > mss) {
|
||||||
seg_size = min(seg_size, mss);
|
seg_size = min(seg_size, mss);
|
||||||
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
|
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
|
||||||
if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
|
if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
|
||||||
|
skb, seg_size, mss, GFP_ATOMIC))
|
||||||
return -1;
|
return -1;
|
||||||
} else if (!tcp_skb_pcount(skb))
|
} else if (!tcp_skb_pcount(skb))
|
||||||
tcp_set_skb_tso_segs(skb, mss);
|
tcp_set_skb_tso_segs(skb, mss);
|
||||||
|
@ -3677,7 +3688,7 @@ void tcp_send_probe0(struct sock *sk)
|
||||||
|
|
||||||
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
|
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
|
||||||
|
|
||||||
if (tp->packets_out || !tcp_send_head(sk)) {
|
if (tp->packets_out || tcp_write_queue_empty(sk)) {
|
||||||
/* Cancel probe timer, if it is not required. */
|
/* Cancel probe timer, if it is not required. */
|
||||||
icsk->icsk_probes_out = 0;
|
icsk->icsk_probes_out = 0;
|
||||||
icsk->icsk_backoff = 0;
|
icsk->icsk_backoff = 0;
|
||||||
|
|
|
@ -156,8 +156,13 @@ static bool retransmits_timed_out(struct sock *sk,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
start_ts = tcp_sk(sk)->retrans_stamp;
|
start_ts = tcp_sk(sk)->retrans_stamp;
|
||||||
if (unlikely(!start_ts))
|
if (unlikely(!start_ts)) {
|
||||||
start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk));
|
struct sk_buff *head = tcp_rtx_queue_head(sk);
|
||||||
|
|
||||||
|
if (!head)
|
||||||
|
return false;
|
||||||
|
start_ts = tcp_skb_timestamp(head);
|
||||||
|
}
|
||||||
|
|
||||||
if (likely(timeout == 0)) {
|
if (likely(timeout == 0)) {
|
||||||
linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
|
linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
|
||||||
|
@ -304,11 +309,12 @@ static void tcp_delack_timer(unsigned long data)
|
||||||
static void tcp_probe_timer(struct sock *sk)
|
static void tcp_probe_timer(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
|
struct sk_buff *skb = tcp_send_head(sk);
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
int max_probes;
|
int max_probes;
|
||||||
u32 start_ts;
|
u32 start_ts;
|
||||||
|
|
||||||
if (tp->packets_out || !tcp_send_head(sk)) {
|
if (tp->packets_out || !skb) {
|
||||||
icsk->icsk_probes_out = 0;
|
icsk->icsk_probes_out = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -321,9 +327,9 @@ static void tcp_probe_timer(struct sock *sk)
|
||||||
* corresponding system limit. We also implement similar policy when
|
* corresponding system limit. We also implement similar policy when
|
||||||
* we use RTO to probe window in tcp_retransmit_timer().
|
* we use RTO to probe window in tcp_retransmit_timer().
|
||||||
*/
|
*/
|
||||||
start_ts = tcp_skb_timestamp(tcp_send_head(sk));
|
start_ts = tcp_skb_timestamp(skb);
|
||||||
if (!start_ts)
|
if (!start_ts)
|
||||||
tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
|
skb->skb_mstamp = tp->tcp_mstamp;
|
||||||
else if (icsk->icsk_user_timeout &&
|
else if (icsk->icsk_user_timeout &&
|
||||||
(s32)(tcp_time_stamp(tp) - start_ts) >
|
(s32)(tcp_time_stamp(tp) - start_ts) >
|
||||||
jiffies_to_msecs(icsk->icsk_user_timeout))
|
jiffies_to_msecs(icsk->icsk_user_timeout))
|
||||||
|
@ -408,7 +414,7 @@ void tcp_retransmit_timer(struct sock *sk)
|
||||||
if (!tp->packets_out)
|
if (!tp->packets_out)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
WARN_ON(tcp_write_queue_empty(sk));
|
WARN_ON(tcp_rtx_queue_empty(sk));
|
||||||
|
|
||||||
tp->tlp_high_seq = 0;
|
tp->tlp_high_seq = 0;
|
||||||
|
|
||||||
|
@ -441,7 +447,7 @@ void tcp_retransmit_timer(struct sock *sk)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
tcp_enter_loss(sk);
|
tcp_enter_loss(sk);
|
||||||
tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1);
|
tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1);
|
||||||
__sk_dst_reset(sk);
|
__sk_dst_reset(sk);
|
||||||
goto out_reset_timer;
|
goto out_reset_timer;
|
||||||
}
|
}
|
||||||
|
@ -473,7 +479,7 @@ void tcp_retransmit_timer(struct sock *sk)
|
||||||
|
|
||||||
tcp_enter_loss(sk);
|
tcp_enter_loss(sk);
|
||||||
|
|
||||||
if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) {
|
if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
|
||||||
/* Retransmission failed because of local congestion,
|
/* Retransmission failed because of local congestion,
|
||||||
* do not backoff.
|
* do not backoff.
|
||||||
*/
|
*/
|
||||||
|
@ -647,7 +653,7 @@ static void tcp_keepalive_timer (unsigned long data)
|
||||||
elapsed = keepalive_time_when(tp);
|
elapsed = keepalive_time_when(tp);
|
||||||
|
|
||||||
/* It is alive without keepalive 8) */
|
/* It is alive without keepalive 8) */
|
||||||
if (tp->packets_out || tcp_send_head(sk))
|
if (tp->packets_out || !tcp_write_queue_empty(sk))
|
||||||
goto resched;
|
goto resched;
|
||||||
|
|
||||||
elapsed = keepalive_time_elapsed(tp);
|
elapsed = keepalive_time_elapsed(tp);
|
||||||
|
|
|
@ -148,12 +148,6 @@ struct netem_skb_cb {
|
||||||
psched_time_t time_to_send;
|
psched_time_t time_to_send;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
|
|
||||||
{
|
|
||||||
return rb_entry(rb, struct sk_buff, rbnode);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
|
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
/* we assume we can use skb next/prev/tstamp as storage for rb_node */
|
/* we assume we can use skb next/prev/tstamp as storage for rb_node */
|
||||||
|
@ -364,7 +358,7 @@ static void tfifo_reset(struct Qdisc *sch)
|
||||||
struct rb_node *p = rb_first(&q->t_root);
|
struct rb_node *p = rb_first(&q->t_root);
|
||||||
|
|
||||||
while (p) {
|
while (p) {
|
||||||
struct sk_buff *skb = netem_rb_to_skb(p);
|
struct sk_buff *skb = rb_to_skb(p);
|
||||||
|
|
||||||
p = rb_next(p);
|
p = rb_next(p);
|
||||||
rb_erase(&skb->rbnode, &q->t_root);
|
rb_erase(&skb->rbnode, &q->t_root);
|
||||||
|
@ -382,7 +376,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
|
|
||||||
parent = *p;
|
parent = *p;
|
||||||
skb = netem_rb_to_skb(parent);
|
skb = rb_to_skb(parent);
|
||||||
if (tnext >= netem_skb_cb(skb)->time_to_send)
|
if (tnext >= netem_skb_cb(skb)->time_to_send)
|
||||||
p = &parent->rb_right;
|
p = &parent->rb_right;
|
||||||
else
|
else
|
||||||
|
@ -538,7 +532,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
|
||||||
struct sk_buff *t_skb;
|
struct sk_buff *t_skb;
|
||||||
struct netem_skb_cb *t_last;
|
struct netem_skb_cb *t_last;
|
||||||
|
|
||||||
t_skb = netem_rb_to_skb(rb_last(&q->t_root));
|
t_skb = skb_rb_last(&q->t_root);
|
||||||
t_last = netem_skb_cb(t_skb);
|
t_last = netem_skb_cb(t_skb);
|
||||||
if (!last ||
|
if (!last ||
|
||||||
t_last->time_to_send > last->time_to_send) {
|
t_last->time_to_send > last->time_to_send) {
|
||||||
|
@ -617,7 +611,7 @@ deliver:
|
||||||
if (p) {
|
if (p) {
|
||||||
psched_time_t time_to_send;
|
psched_time_t time_to_send;
|
||||||
|
|
||||||
skb = netem_rb_to_skb(p);
|
skb = rb_to_skb(p);
|
||||||
|
|
||||||
/* if more time remaining? */
|
/* if more time remaining? */
|
||||||
time_to_send = netem_skb_cb(skb)->time_to_send;
|
time_to_send = netem_skb_cb(skb)->time_to_send;
|
||||||
|
|
Loading…
Reference in New Issue