Merge branch 'tcp-add-three-static-keys'
Eric Dumazet says: ==================== tcp: add three static keys Recent addition of per TCP socket rx/tx cache brought regressions for some workloads, as reported by Feng Tang. It seems better to make them opt-in, before we adopt better heuristics. The last patch adds high_order_alloc_disable sysctl to ask TCP sendmsg() to exclusively use order-0 allocations, as mm layer has specific optimizations. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
35fc07aee8
|
@ -772,6 +772,14 @@ tcp_challenge_ack_limit - INTEGER
|
|||
in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
|
||||
Default: 100
|
||||
|
||||
tcp_rx_skb_cache - BOOLEAN
|
||||
Controls a per TCP socket cache of one skb, that might help
|
||||
performance of some workloads. This might be dangerous
|
||||
on systems with a lot of TCP sockets, since it increases
|
||||
memory usage.
|
||||
|
||||
Default: 0 (disabled)
|
||||
|
||||
UDP variables:
|
||||
|
||||
udp_l3mdev_accept - BOOLEAN
|
||||
|
|
|
@ -600,7 +600,6 @@ void bpf_map_area_free(void *base);
|
|||
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
|
||||
|
||||
extern int sysctl_unprivileged_bpf_disabled;
|
||||
extern int sysctl_bpf_stats_enabled;
|
||||
|
||||
int bpf_map_new_fd(struct bpf_map *map, int flags);
|
||||
int bpf_prog_new_fd(struct bpf_prog *prog);
|
||||
|
|
|
@ -63,6 +63,9 @@ extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
|
|||
void __user *, size_t *, loff_t *);
|
||||
extern int proc_do_large_bitmap(struct ctl_table *, int,
|
||||
void __user *, size_t *, loff_t *);
|
||||
extern int proc_do_static_key(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
/*
|
||||
* Register a set of sysctl names by calling register_sysctl_table
|
||||
|
|
|
@ -1463,12 +1463,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
|
|||
__sk_mem_reclaim(sk, 1 << 20);
|
||||
}
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
|
||||
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
|
||||
sk->sk_wmem_queued -= skb->truesize;
|
||||
sk_mem_uncharge(sk, skb->truesize);
|
||||
if (!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
|
||||
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
|
||||
!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
|
||||
skb_zcopy_clear(skb, true);
|
||||
sk->sk_tx_skb_cache = skb;
|
||||
return;
|
||||
|
@ -2433,13 +2435,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
|
|||
* This routine must be called with interrupts disabled or with the socket
|
||||
* locked so that the sk_buff queue operation is ok.
|
||||
*/
|
||||
DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
|
||||
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
__skb_unlink(skb, &sk->sk_receive_queue);
|
||||
if (
|
||||
#ifdef CONFIG_RPS
|
||||
!static_branch_unlikely(&rps_needed) &&
|
||||
#endif
|
||||
if (static_branch_unlikely(&tcp_rx_skb_cache_key) &&
|
||||
!sk->sk_rx_skb_cache) {
|
||||
sk->sk_rx_skb_cache = skb;
|
||||
skb_orphan(skb);
|
||||
|
@ -2534,6 +2534,8 @@ extern int sysctl_optmem_max;
|
|||
extern __u32 sysctl_wmem_default;
|
||||
extern __u32 sysctl_rmem_default;
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
|
||||
|
||||
static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
|
||||
{
|
||||
/* Does this proto have per netns sysctl_wmem ? */
|
||||
|
|
|
@ -2097,7 +2097,6 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
|
|||
|
||||
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
|
||||
EXPORT_SYMBOL(bpf_stats_enabled_key);
|
||||
int sysctl_bpf_stats_enabled __read_mostly;
|
||||
|
||||
/* All definitions of tracepoints related to BPF. */
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
|
|
@ -230,11 +230,6 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
|
|||
#endif
|
||||
static int proc_dopipe_max_size(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MAGIC_SYSRQ
|
||||
/* Note: sysrq code uses its own private copy */
|
||||
|
@ -1253,12 +1248,10 @@ static struct ctl_table kern_table[] = {
|
|||
},
|
||||
{
|
||||
.procname = "bpf_stats_enabled",
|
||||
.data = &sysctl_bpf_stats_enabled,
|
||||
.maxlen = sizeof(sysctl_bpf_stats_enabled),
|
||||
.data = &bpf_stats_enabled_key.key,
|
||||
.maxlen = sizeof(bpf_stats_enabled_key),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax_bpf_stats,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
.proc_handler = proc_do_static_key,
|
||||
},
|
||||
#endif
|
||||
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
|
||||
|
@ -3374,26 +3367,35 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
|
|||
|
||||
#endif /* CONFIG_PROC_SYSCTL */
|
||||
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
|
||||
static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
#if defined(CONFIG_SYSCTL)
|
||||
int proc_do_static_key(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret, bpf_stats = *(int *)table->data;
|
||||
struct ctl_table tmp = *table;
|
||||
struct static_key *key = (struct static_key *)table->data;
|
||||
static DEFINE_MUTEX(static_key_mutex);
|
||||
int val, ret;
|
||||
struct ctl_table tmp = {
|
||||
.data = &val,
|
||||
.maxlen = sizeof(val),
|
||||
.mode = table->mode,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
};
|
||||
|
||||
if (write && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
tmp.data = &bpf_stats;
|
||||
mutex_lock(&static_key_mutex);
|
||||
val = static_key_enabled(key);
|
||||
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
|
||||
if (write && !ret) {
|
||||
*(int *)table->data = bpf_stats;
|
||||
if (bpf_stats)
|
||||
static_branch_enable(&bpf_stats_enabled_key);
|
||||
if (val)
|
||||
static_key_enable(key);
|
||||
else
|
||||
static_branch_disable(&bpf_stats_enabled_key);
|
||||
static_key_disable(key);
|
||||
}
|
||||
mutex_unlock(&static_key_mutex);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -2320,6 +2320,7 @@ static void sk_leave_memory_pressure(struct sock *sk)
|
|||
|
||||
/* On 32bit arches, an skb frag is limited to 2^15 */
|
||||
#define SKB_FRAG_PAGE_ORDER get_order(32768)
|
||||
DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
|
||||
|
||||
/**
|
||||
* skb_page_frag_refill - check that a page_frag contains enough room
|
||||
|
@ -2344,7 +2345,8 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
|
|||
}
|
||||
|
||||
pfrag->offset = 0;
|
||||
if (SKB_FRAG_PAGE_ORDER) {
|
||||
if (SKB_FRAG_PAGE_ORDER &&
|
||||
!static_branch_unlikely(&net_high_order_alloc_disable_key)) {
|
||||
/* Avoid direct reclaim but allow kswapd to wake */
|
||||
pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
|
||||
__GFP_COMP | __GFP_NOWARN |
|
||||
|
|
|
@ -562,6 +562,13 @@ static struct ctl_table net_core_table[] = {
|
|||
.extra1 = &zero,
|
||||
.extra2 = &two,
|
||||
},
|
||||
{
|
||||
.procname = "high_order_alloc_disable",
|
||||
.data = &net_high_order_alloc_disable_key.key,
|
||||
.maxlen = sizeof(net_high_order_alloc_disable_key),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_do_static_key,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
|
|
|
@ -51,6 +51,11 @@ static int comp_sack_nr_max = 255;
|
|||
static u32 u32_max_div_HZ = UINT_MAX / HZ;
|
||||
static int one_day_secs = 24 * 3600;
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
|
||||
EXPORT_SYMBOL(tcp_rx_skb_cache_key);
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
|
||||
|
||||
/* obsolete */
|
||||
static int sysctl_tcp_low_latency __read_mostly;
|
||||
|
||||
|
@ -559,6 +564,18 @@ static struct ctl_table ipv4_table[] = {
|
|||
.extra1 = &sysctl_fib_sync_mem_min,
|
||||
.extra2 = &sysctl_fib_sync_mem_max,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_rx_skb_cache",
|
||||
.data = &tcp_rx_skb_cache_key.key,
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_do_static_key,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_tx_skb_cache",
|
||||
.data = &tcp_tx_skb_cache_key.key,
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_do_static_key,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue