Merge branch 'net-sysctl-races-round-4'
Kuniyuki Iwashima says: ==================== sysctl: Fix data-races around ipv4_net_table (Round 4). This series fixes data-races around 17 knobs after fib_multipath_use_neigh in ipv4_net_table. tcp_fack was skipped because it's obsolete and there's no readers. So, round 5 will start with tcp_dsack, 2 rounds left for 27 knobs. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
3b15b3e93e
|
@ -1236,8 +1236,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
|
|||
csk->sndbuf = newsk->sk_sndbuf;
|
||||
csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
|
||||
RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
|
||||
sock_net(newsk)->
|
||||
ipv4.sysctl_tcp_window_scaling,
|
||||
READ_ONCE(sock_net(newsk)->
|
||||
ipv4.sysctl_tcp_window_scaling),
|
||||
tp->window_clamp);
|
||||
neigh_release(n);
|
||||
inet_inherit_port(&tcp_hashinfo, lsk, newsk);
|
||||
|
@ -1384,7 +1384,7 @@ static void chtls_pass_accept_request(struct sock *sk,
|
|||
#endif
|
||||
}
|
||||
if (req->tcpopt.wsf <= 14 &&
|
||||
sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
|
||||
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
|
||||
inet_rsk(oreq)->wscale_ok = 1;
|
||||
inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
|
||||
}
|
||||
|
|
|
@ -10324,7 +10324,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
|
|||
unsigned long *fields = config->fields;
|
||||
u32 hash_fields;
|
||||
|
||||
switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
|
||||
switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
|
||||
case 0:
|
||||
mlxsw_sp_mp4_hash_outer_addr(config);
|
||||
break;
|
||||
|
@ -10342,7 +10342,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
|
|||
mlxsw_sp_mp_hash_inner_l3(config);
|
||||
break;
|
||||
case 3:
|
||||
hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
|
||||
hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
|
||||
/* Outer */
|
||||
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
|
||||
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
|
||||
|
|
|
@ -357,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name)
|
|||
|
||||
static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
|
||||
{
|
||||
return port < net->ipv4.sysctl_ip_prot_sock;
|
||||
return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
|
||||
}
|
||||
|
||||
#else
|
||||
|
|
|
@ -1403,8 +1403,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
|
|||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
s32 delta;
|
||||
|
||||
if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
|
||||
ca_ops->cong_control)
|
||||
if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
|
||||
tp->packets_out || ca_ops->cong_control)
|
||||
return;
|
||||
delta = tcp_jiffies32 - tp->lsndtime;
|
||||
if (delta > inet_csk(sk)->icsk_rto)
|
||||
|
|
|
@ -238,7 +238,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
|
|||
int dif, int sdif)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
|
||||
return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
|
||||
return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
|
||||
bound_dev_if, dif, sdif);
|
||||
#else
|
||||
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
|
||||
|
|
|
@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net,
|
|||
.daddr = *(struct in6_addr *)daddr,
|
||||
};
|
||||
|
||||
if (net->ipv4.sysctl_tcp_timestamps != 1)
|
||||
if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
|
||||
return 0;
|
||||
|
||||
ts_secret_init();
|
||||
|
@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
|
|||
#ifdef CONFIG_INET
|
||||
u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
|
||||
{
|
||||
if (net->ipv4.sysctl_tcp_timestamps != 1)
|
||||
if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
|
||||
return 0;
|
||||
|
||||
ts_secret_init();
|
||||
|
|
|
@ -2216,7 +2216,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
|
|||
}
|
||||
|
||||
change_nexthops(fi) {
|
||||
if (net->ipv4.sysctl_fib_multipath_use_neigh) {
|
||||
if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
|
||||
if (!fib_good_nh(nexthop_nh))
|
||||
continue;
|
||||
if (!first) {
|
||||
|
|
|
@ -1929,7 +1929,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
|
|||
const struct sk_buff *skb,
|
||||
bool *p_has_inner)
|
||||
{
|
||||
u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
|
||||
u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
|
||||
struct flow_keys keys, hash_keys;
|
||||
|
||||
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
|
||||
|
@ -1958,7 +1958,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
|
|||
const struct sk_buff *skb,
|
||||
bool has_inner)
|
||||
{
|
||||
u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
|
||||
u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
|
||||
struct flow_keys keys, hash_keys;
|
||||
|
||||
/* We assume the packet carries an encapsulation, but if none was
|
||||
|
@ -2018,7 +2018,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net,
|
|||
static u32 fib_multipath_custom_hash_fl4(const struct net *net,
|
||||
const struct flowi4 *fl4)
|
||||
{
|
||||
u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
|
||||
u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
|
||||
struct flow_keys hash_keys;
|
||||
|
||||
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
|
||||
|
@ -2048,7 +2048,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
|
|||
struct flow_keys hash_keys;
|
||||
u32 mhash = 0;
|
||||
|
||||
switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
|
||||
switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
|
||||
case 0:
|
||||
memset(&hash_keys, 0, sizeof(hash_keys));
|
||||
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
|
||||
|
|
|
@ -247,12 +247,12 @@ bool cookie_timestamp_decode(const struct net *net,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (!net->ipv4.sysctl_tcp_timestamps)
|
||||
if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
|
||||
return false;
|
||||
|
||||
tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
|
||||
|
||||
if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
|
||||
if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
|
||||
return false;
|
||||
|
||||
if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
|
||||
|
@ -261,7 +261,7 @@ bool cookie_timestamp_decode(const struct net *net,
|
|||
tcp_opt->wscale_ok = 1;
|
||||
tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
|
||||
|
||||
return net->ipv4.sysctl_tcp_window_scaling != 0;
|
||||
return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
|
||||
}
|
||||
EXPORT_SYMBOL(cookie_timestamp_decode);
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
|
|||
* port limit.
|
||||
*/
|
||||
if ((range[1] < range[0]) ||
|
||||
(range[0] < net->ipv4.sysctl_ip_prot_sock))
|
||||
(range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
|
||||
ret = -EINVAL;
|
||||
else
|
||||
set_local_port_range(net, range);
|
||||
|
@ -110,7 +110,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
|
|||
.extra2 = &ip_privileged_port_max,
|
||||
};
|
||||
|
||||
pports = net->ipv4.sysctl_ip_prot_sock;
|
||||
pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
|
||||
|
||||
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
|
||||
|
||||
|
@ -122,7 +122,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
|
|||
if (range[0] < pports)
|
||||
ret = -EINVAL;
|
||||
else
|
||||
net->ipv4.sysctl_ip_prot_sock = pports;
|
||||
WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
|
|
@ -1051,7 +1051,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
|
|||
tp->undo_marker ? tp->undo_retrans : 0);
|
||||
#endif
|
||||
tp->reordering = min_t(u32, (metric + mss - 1) / mss,
|
||||
sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
|
||||
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
|
||||
}
|
||||
|
||||
/* This exciting event is worth to be remembered. 8) */
|
||||
|
@ -2030,7 +2030,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
|
|||
return;
|
||||
|
||||
tp->reordering = min_t(u32, tp->packets_out + addend,
|
||||
sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
|
||||
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
|
||||
tp->reord_seen++;
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
|
||||
}
|
||||
|
@ -2095,7 +2095,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
|
|||
|
||||
static bool tcp_is_rack(const struct sock *sk)
|
||||
{
|
||||
return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
|
||||
return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
|
||||
TCP_RACK_LOSS_DETECTION;
|
||||
}
|
||||
|
||||
/* If we detect SACK reneging, forget all SACK information
|
||||
|
@ -4060,7 +4061,7 @@ void tcp_parse_options(const struct net *net,
|
|||
break;
|
||||
case TCPOPT_WINDOW:
|
||||
if (opsize == TCPOLEN_WINDOW && th->syn &&
|
||||
!estab && net->ipv4.sysctl_tcp_window_scaling) {
|
||||
!estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
|
||||
__u8 snd_wscale = *(__u8 *)ptr;
|
||||
opt_rx->wscale_ok = 1;
|
||||
if (snd_wscale > TCP_MAX_WSCALE) {
|
||||
|
@ -4076,7 +4077,7 @@ void tcp_parse_options(const struct net *net,
|
|||
case TCPOPT_TIMESTAMP:
|
||||
if ((opsize == TCPOLEN_TIMESTAMP) &&
|
||||
((estab && opt_rx->tstamp_ok) ||
|
||||
(!estab && net->ipv4.sysctl_tcp_timestamps))) {
|
||||
(!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
|
||||
opt_rx->saw_tstamp = 1;
|
||||
opt_rx->rcv_tsval = get_unaligned_be32(ptr);
|
||||
opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
|
||||
|
@ -4084,7 +4085,7 @@ void tcp_parse_options(const struct net *net,
|
|||
break;
|
||||
case TCPOPT_SACK_PERM:
|
||||
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
|
||||
!estab && net->ipv4.sysctl_tcp_sack) {
|
||||
!estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
|
||||
opt_rx->sack_ok = TCP_SACK_SEEN;
|
||||
tcp_sack_reset(opt_rx);
|
||||
}
|
||||
|
@ -5571,7 +5572,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
|
|||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
u32 ptr = ntohs(th->urg_ptr);
|
||||
|
||||
if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
|
||||
if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
|
||||
ptr--;
|
||||
ptr += ntohl(th->seq);
|
||||
|
||||
|
|
|
@ -173,7 +173,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
|||
* Oh well... nobody has a sufficient solution to this
|
||||
* protocol bug yet.
|
||||
*/
|
||||
if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
|
||||
if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
|
||||
kill:
|
||||
inet_twsk_deschedule_put(tw);
|
||||
return TCP_TW_SUCCESS;
|
||||
|
@ -781,7 +781,7 @@ listen_overflow:
|
|||
if (sk != req->rsk_listener)
|
||||
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
|
||||
|
||||
if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
|
||||
if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) {
|
||||
inet_rsk(req)->acked = 1;
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -791,18 +791,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
|
|||
opts->mss = tcp_advertise_mss(sk);
|
||||
remaining -= TCPOLEN_MSS_ALIGNED;
|
||||
|
||||
if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
|
||||
if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
|
||||
opts->options |= OPTION_TS;
|
||||
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
|
||||
opts->tsecr = tp->rx_opt.ts_recent;
|
||||
remaining -= TCPOLEN_TSTAMP_ALIGNED;
|
||||
}
|
||||
if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
|
||||
if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
|
||||
opts->ws = tp->rx_opt.rcv_wscale;
|
||||
opts->options |= OPTION_WSCALE;
|
||||
remaining -= TCPOLEN_WSCALE_ALIGNED;
|
||||
}
|
||||
if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
|
||||
if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
|
||||
opts->options |= OPTION_SACK_ADVERTISE;
|
||||
if (unlikely(!(OPTION_TS & opts->options)))
|
||||
remaining -= TCPOLEN_SACKPERM_ALIGNED;
|
||||
|
@ -1898,7 +1898,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
|
|||
if (tp->packets_out > tp->snd_cwnd_used)
|
||||
tp->snd_cwnd_used = tp->packets_out;
|
||||
|
||||
if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
|
||||
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
|
||||
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
|
||||
!ca_ops->cong_control)
|
||||
tcp_cwnd_application_limited(sk);
|
||||
|
@ -2741,7 +2741,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
|
|||
if (rcu_access_pointer(tp->fastopen_rsk))
|
||||
return false;
|
||||
|
||||
early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
|
||||
early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
|
||||
/* Schedule a loss probe in 2*RTT for SACK capable connections
|
||||
* not in loss recovery, that are either limited by cwnd or application.
|
||||
*/
|
||||
|
@ -3105,7 +3105,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
|
|||
struct sk_buff *skb = to, *tmp;
|
||||
bool first = true;
|
||||
|
||||
if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
|
||||
if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
|
||||
return;
|
||||
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
|
||||
return;
|
||||
|
@ -3647,7 +3647,7 @@ static void tcp_connect_init(struct sock *sk)
|
|||
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
|
||||
*/
|
||||
tp->tcp_header_len = sizeof(struct tcphdr);
|
||||
if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
|
||||
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
|
||||
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
|
||||
|
||||
#ifdef CONFIG_TCP_MD5SIG
|
||||
|
@ -3683,7 +3683,7 @@ static void tcp_connect_init(struct sock *sk)
|
|||
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
|
||||
&tp->rcv_wnd,
|
||||
&tp->window_clamp,
|
||||
sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
|
||||
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
|
||||
&rcv_wscale,
|
||||
rcv_wnd);
|
||||
|
||||
|
|
|
@ -14,7 +14,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
|
|||
return 0;
|
||||
|
||||
if (tp->sacked_out >= tp->reordering &&
|
||||
!(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
|
||||
!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
|
||||
TCP_RACK_NO_DUPTHRESH))
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -187,7 +188,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
|
|||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
|
||||
if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
|
||||
TCP_RACK_STATIC_REO_WND) ||
|
||||
!rs->prior_delivered)
|
||||
return;
|
||||
|
||||
|
|
|
@ -578,7 +578,7 @@ out_reset_timer:
|
|||
* linear-timeout retransmissions into a black hole
|
||||
*/
|
||||
if (sk->sk_state == TCP_ESTABLISHED &&
|
||||
(tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
|
||||
(tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
|
||||
tcp_stream_is_thin(tp) &&
|
||||
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
|
||||
icsk->icsk_backoff = 0;
|
||||
|
|
Loading…
Reference in New Issue