tcp: allow for bigger reordering level
While testing upcoming Yaogong patch (converting out of order queue into an RB tree), I hit the max reordering level of linux TCP stack. Reordering level was limited to 127 for no good reason, and some network setups [1] can easily reach this limit and get limited throughput. Allow a new max limit of 300, and add a sysctl to allow admins to even allow bigger (or lower) values if needed. [1] Aggregation of links, per packet load balancing, fabrics not doing deep packet inspections, alternative TCP congestion modules... Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Yaogong Wang <wygivan@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
7aef06db0f
commit
dca145ffaa
|
@ -2230,11 +2230,8 @@ balance-rr: This mode is the only mode that will permit a single
|
|||
|
||||
It is possible to adjust TCP/IP's congestion limits by
|
||||
altering the net.ipv4.tcp_reordering sysctl parameter. The
|
||||
usual default value is 3, and the maximum useful value is 127.
|
||||
For a four interface balance-rr bond, expect that a single
|
||||
TCP/IP stream will utilize no more than approximately 2.3
|
||||
interface's worth of throughput, even after adjusting
|
||||
tcp_reordering.
|
||||
usual default value is 3. But keep in mind TCP stack is able
|
||||
to automatically increase this when it detects reorders.
|
||||
|
||||
Note that the fraction of packets that will be delivered out of
|
||||
order is highly variable, and is unlikely to be zero. The level
|
||||
|
|
|
@ -376,9 +376,17 @@ tcp_orphan_retries - INTEGER
|
|||
may consume significant resources. Cf. tcp_max_orphans.
|
||||
|
||||
tcp_reordering - INTEGER
|
||||
Maximal reordering of packets in a TCP stream.
|
||||
Initial reordering level of packets in a TCP stream.
|
||||
TCP stack can then dynamically adjust flow reordering level
|
||||
between this initial value and tcp_max_reordering
|
||||
Default: 3
|
||||
|
||||
tcp_max_reordering - INTEGER
|
||||
Maximal reordering level of packets in a TCP stream.
|
||||
300 is a fairly conservative value, but you might increase it
|
||||
if paths are using per packet load balancing (like bonding rr mode)
|
||||
Default: 300
|
||||
|
||||
tcp_retrans_collapse - BOOLEAN
|
||||
Bug-to-bug compatibility with some broken printers.
|
||||
On retransmit try to send bigger packets to work around bugs in
|
||||
|
|
|
@ -204,10 +204,10 @@ struct tcp_sock {
|
|||
|
||||
u16 urg_data; /* Saved octet of OOB data and control flags */
|
||||
u8 ecn_flags; /* ECN status bits. */
|
||||
u8 reordering; /* Packet reordering metric. */
|
||||
u8 keepalive_probes; /* num of allowed keep alive probes */
|
||||
u32 reordering; /* Packet reordering metric. */
|
||||
u32 snd_up; /* Urgent pointer */
|
||||
|
||||
u8 keepalive_probes; /* num of allowed keep alive probes */
|
||||
/*
|
||||
* Options received (usually on last packet, some only on SYN packets).
|
||||
*/
|
||||
|
|
|
@ -70,9 +70,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
|
|||
/* After receiving this amount of duplicate ACKs fast retransmit starts. */
|
||||
#define TCP_FASTRETRANS_THRESH 3
|
||||
|
||||
/* Maximal reordering. */
|
||||
#define TCP_MAX_REORDERING 127
|
||||
|
||||
/* Maximal number of ACKs sent quickly to accelerate slow-start. */
|
||||
#define TCP_MAX_QUICKACKS 16U
|
||||
|
||||
|
@ -252,6 +249,7 @@ extern int sysctl_tcp_abort_on_overflow;
|
|||
extern int sysctl_tcp_max_orphans;
|
||||
extern int sysctl_tcp_fack;
|
||||
extern int sysctl_tcp_reordering;
|
||||
extern int sysctl_tcp_max_reordering;
|
||||
extern int sysctl_tcp_dsack;
|
||||
extern long sysctl_tcp_mem[3];
|
||||
extern int sysctl_tcp_wmem[3];
|
||||
|
|
|
@ -495,6 +495,13 @@ static struct ctl_table ipv4_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_max_reordering",
|
||||
.data = &sysctl_tcp_max_reordering,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "tcp_dsack",
|
||||
.data = &sysctl_tcp_dsack,
|
||||
|
|
|
@ -81,6 +81,7 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
|
|||
int sysctl_tcp_sack __read_mostly = 1;
|
||||
int sysctl_tcp_fack __read_mostly = 1;
|
||||
int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
|
||||
int sysctl_tcp_max_reordering __read_mostly = 300;
|
||||
EXPORT_SYMBOL(sysctl_tcp_reordering);
|
||||
int sysctl_tcp_dsack __read_mostly = 1;
|
||||
int sysctl_tcp_app_win __read_mostly = 31;
|
||||
|
@ -833,7 +834,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
|
|||
if (metric > tp->reordering) {
|
||||
int mib_idx;
|
||||
|
||||
tp->reordering = min(TCP_MAX_REORDERING, metric);
|
||||
tp->reordering = min(sysctl_tcp_max_reordering, metric);
|
||||
|
||||
/* This exciting event is worth to be remembered. 8) */
|
||||
if (ts)
|
||||
|
|
Loading…
Reference in New Issue