469 lines
10 KiB
C
469 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Multipath TCP
|
|
*
|
|
* Copyright (c) 2019, Tessares SA.
|
|
*/
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
#include <linux/sysctl.h>
|
|
#endif
|
|
#include <linux/proc_fs.h>
|
|
|
|
#include <net/net_namespace.h>
|
|
#include <net/netns/generic.h>
|
|
|
|
#include "protocol.h"
|
|
#include "ctrl.h"
|
|
|
|
#define MPTCP_SYSCTL_PATH "net/mptcp"
|
|
|
|
static int mptcp_pernet_id;
|
|
struct mptcp_pernet {
|
|
#ifdef CONFIG_SYSCTL
|
|
struct ctl_table_header *ctl_table_hdr;
|
|
#endif
|
|
|
|
unsigned int add_addr_timeout;
|
|
unsigned int stale_loss_cnt;
|
|
int mptcp_enabled;
|
|
int checksum_enabled;
|
|
int allow_join_initial_addr_port;
|
|
int tcp_enabled;
|
|
int dup_addr;
|
|
};
|
|
|
|
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
|
|
{
|
|
return net_generic(net, mptcp_pernet_id);
|
|
}
|
|
|
|
int mptcp_is_enabled(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->mptcp_enabled;
|
|
}
|
|
EXPORT_SYMBOL(mptcp_is_enabled);
|
|
|
|
unsigned int mptcp_get_add_addr_timeout(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->add_addr_timeout;
|
|
}
|
|
|
|
int mptcp_is_checksum_enabled(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->checksum_enabled;
|
|
}
|
|
|
|
int mptcp_allow_join_id0(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->allow_join_initial_addr_port;
|
|
}
|
|
|
|
unsigned int mptcp_stale_loss_cnt(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->stale_loss_cnt;
|
|
}
|
|
|
|
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
|
|
{
|
|
pernet->mptcp_enabled = 1;
|
|
pernet->add_addr_timeout = TCP_RTO_MAX;
|
|
pernet->checksum_enabled = 0;
|
|
pernet->allow_join_initial_addr_port = 1;
|
|
pernet->stale_loss_cnt = 4;
|
|
}
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
static struct ctl_table mptcp_sysctl_table[] = {
|
|
{
|
|
.procname = "enabled",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
/* users with CAP_NET_ADMIN or root (not and) can change this
|
|
* value, same as other sysctl or the 'net' tree.
|
|
*/
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_ONE
|
|
},
|
|
{
|
|
.procname = "add_addr_timeout",
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_jiffies,
|
|
},
|
|
{
|
|
.procname = "checksum_enabled",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_ONE
|
|
},
|
|
{
|
|
.procname = "allow_join_initial_addr_port",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_ONE
|
|
},
|
|
{
|
|
.procname = "stale_loss_cnt",
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_douintvec_minmax,
|
|
},
|
|
{
|
|
.procname = "tcp_enabled",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
{
|
|
.procname = "dup_addr",
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
{}
|
|
};
|
|
|
|
int mptcp_tcp_enabled(const struct net *net)
|
|
{
|
|
return mptcp_is_enabled(net) && mptcp_get_pernet(net)->tcp_enabled;
|
|
}
|
|
EXPORT_SYMBOL_GPL(mptcp_tcp_enabled);
|
|
|
|
int mptcp_dup_addr_enabled(struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->dup_addr;
|
|
}
|
|
|
|
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
|
|
{
|
|
struct ctl_table_header *hdr;
|
|
struct ctl_table *table;
|
|
|
|
table = mptcp_sysctl_table;
|
|
if (!net_eq(net, &init_net)) {
|
|
table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL);
|
|
if (!table)
|
|
goto err_alloc;
|
|
}
|
|
|
|
table[0].data = &pernet->mptcp_enabled;
|
|
table[1].data = &pernet->add_addr_timeout;
|
|
table[2].data = &pernet->checksum_enabled;
|
|
table[3].data = &pernet->allow_join_initial_addr_port;
|
|
table[4].data = &pernet->stale_loss_cnt;
|
|
table[5].data = &pernet->tcp_enabled;
|
|
table[6].data = &pernet->dup_addr;
|
|
|
|
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
|
|
if (!hdr)
|
|
goto err_reg;
|
|
|
|
pernet->ctl_table_hdr = hdr;
|
|
|
|
return 0;
|
|
|
|
err_reg:
|
|
if (!net_eq(net, &init_net))
|
|
kfree(table);
|
|
err_alloc:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
|
|
{
|
|
struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
|
|
|
|
unregister_net_sysctl_table(pernet->ctl_table_hdr);
|
|
|
|
kfree(table);
|
|
}
|
|
|
|
#else
|
|
|
|
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
|
|
|
|
#endif /* CONFIG_SYSCTL */
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
static inline int mptcp_seq_get_family(struct seq_file *seq)
|
|
{
|
|
struct mptcp_seq_afinfo *info;
|
|
|
|
info = (struct mptcp_seq_afinfo *)PDE_DATA(file_inode(seq->file));
|
|
return info->family;
|
|
}
|
|
|
|
static inline int mptcp_seq_check_family(struct seq_file *seq,
|
|
struct sock *sk)
|
|
{
|
|
return sk->sk_family != mptcp_seq_get_family(seq);
|
|
}
|
|
|
|
static struct sock *mptcp_get_idx(struct seq_file *seq, loff_t pos)
|
|
{
|
|
struct mptcp_iter_state *state = seq->private;
|
|
struct net *net = seq_file_net(seq);
|
|
struct mptcp_sock *msk;
|
|
struct sock *sk;
|
|
|
|
state->listen_budget = true;
|
|
state->num = pos;
|
|
|
|
rcu_read_lock();
|
|
|
|
/* lookup the msk on listening socket */
|
|
sk_for_each_rcu(sk, &net->mptcp.sklist) {
|
|
if (mptcp_seq_check_family(seq, sk) ||
|
|
!refcount_inc_not_zero(&sk->sk_refcnt))
|
|
continue;
|
|
|
|
if (!(pos--))
|
|
return sk;
|
|
}
|
|
|
|
rcu_read_unlock();
|
|
state->listen_budget = false;
|
|
state->s_slot = 0;
|
|
state->s_num = 0;
|
|
|
|
/* lookup the msk in the token budget */
|
|
while ((msk = mptcp_token_iter_next(net, &state->s_slot,
|
|
&state->s_num)) != NULL) {
|
|
sk = (struct sock *)msk;
|
|
|
|
if (mptcp_seq_check_family(seq, sk)) {
|
|
sock_put(sk);
|
|
continue;
|
|
}
|
|
|
|
if (!(pos--))
|
|
return sk;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static void *mptcp_seq_start(struct seq_file *seq, loff_t *pos)
|
|
{
|
|
return *pos ? mptcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
|
|
}
|
|
|
|
static struct sock *mptcp_get_next(struct seq_file *seq, struct sock *sk)
|
|
{
|
|
struct mptcp_iter_state *state = seq->private;
|
|
struct net *net = seq_file_net(seq);
|
|
|
|
sock_put(sk);
|
|
|
|
if (state->listen_budget) {
|
|
sk = sk_next(sk);
|
|
if (sk && refcount_inc_not_zero(&sk->sk_refcnt))
|
|
return sk;
|
|
|
|
rcu_read_unlock();
|
|
state->listen_budget = false;
|
|
state->s_slot = 0;
|
|
state->s_num = 0;
|
|
}
|
|
|
|
return (struct sock *)mptcp_token_iter_next(net, &state->s_slot,
|
|
&state->s_num);
|
|
}
|
|
|
|
static void *mptcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|
{
|
|
struct mptcp_iter_state *state = seq->private;
|
|
struct sock *sk = v;
|
|
|
|
if (v != SEQ_START_TOKEN) {
|
|
do {
|
|
sk = mptcp_get_next(seq, sk);
|
|
} while (sk && mptcp_seq_check_family(seq, sk));
|
|
state->num++;
|
|
} else {
|
|
sk = mptcp_get_idx(seq, 0);
|
|
}
|
|
|
|
(*pos)++;
|
|
return sk;
|
|
}
|
|
|
|
static void mptcp_seq_stop(struct seq_file *seq, void *v)
|
|
{
|
|
}
|
|
|
|
static inline int mpctp_format_sock(struct seq_file *seq, struct sock *sk)
|
|
{
|
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
|
struct mptcp_iter_state *state = seq->private;
|
|
const struct inet_sock *inet = inet_sk(sk);
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
|
__u16 destp = ntohs(inet->inet_dport);
|
|
__u16 srcp = ntohs(inet->inet_sport);
|
|
const struct in6_addr *dest6, *src6;
|
|
unsigned long timer_expires;
|
|
__be32 src, dest;
|
|
int timer_active;
|
|
int rx_queue = 0;
|
|
|
|
/* retrans timer seems to be the only timer that used */
|
|
if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
|
|
timer_active = 1;
|
|
timer_expires = icsk->icsk_timeout;
|
|
} else {
|
|
timer_active = 0;
|
|
timer_expires = jiffies;
|
|
}
|
|
|
|
if (mptcp_is_fully_established(sk))
|
|
rx_queue = msk->ack_seq - msk->copied_seq - 1;
|
|
|
|
if (mptcp_seq_get_family(seq) == AF_INET6)
|
|
goto fmt_ipv6;
|
|
|
|
src = inet->inet_rcv_saddr;
|
|
dest = inet->inet_daddr;
|
|
seq_printf(seq, MPTCP_SEQ_CONT,
|
|
state->num, src, srcp, dest, destp, sk->sk_state,
|
|
msk->write_seq - msk->snd_una,
|
|
rx_queue,
|
|
timer_active,
|
|
jiffies_delta_to_clock_t(timer_expires - jiffies),
|
|
atomic_read(&msk->subflow_count),
|
|
icsk->icsk_retransmits,
|
|
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
|
|
sock_i_ino(sk),
|
|
refcount_read(&sk->sk_refcnt), sk);
|
|
|
|
return 0;
|
|
|
|
fmt_ipv6:
|
|
dest6 = &sk->sk_v6_daddr;
|
|
src6 = &sk->sk_v6_rcv_saddr;
|
|
seq_printf(seq, MPTCP6_SEQ_CONT, state->num,
|
|
src6->s6_addr32[0], src6->s6_addr32[1],
|
|
src6->s6_addr32[2], src6->s6_addr32[3], srcp,
|
|
dest6->s6_addr32[0], dest6->s6_addr32[1],
|
|
dest6->s6_addr32[2], dest6->s6_addr32[3], destp,
|
|
sk->sk_state,
|
|
msk->write_seq - msk->snd_una,
|
|
rx_queue,
|
|
timer_active,
|
|
jiffies_delta_to_clock_t(timer_expires - jiffies),
|
|
atomic_read(&msk->subflow_count),
|
|
icsk->icsk_retransmits,
|
|
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
|
|
sock_i_ino(sk),
|
|
refcount_read(&sk->sk_refcnt), sk);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mptcp_seq_show(struct seq_file *seq, void *v)
|
|
{
|
|
if (v == SEQ_START_TOKEN) {
|
|
const char *header = MPTCP_SEQ_HEADER;
|
|
|
|
if (mptcp_seq_get_family(seq) == AF_INET6)
|
|
header = MPTCP6_SEQ_HEADER;
|
|
|
|
seq_puts(seq, header);
|
|
return 0;
|
|
}
|
|
return mpctp_format_sock(seq, v);
|
|
}
|
|
|
|
static const struct seq_operations mptcp_seq_ops = {
|
|
.start = mptcp_seq_start,
|
|
.next = mptcp_seq_next,
|
|
.stop = mptcp_seq_stop,
|
|
.show = mptcp_seq_show,
|
|
};
|
|
#endif
|
|
|
|
struct mptcp_seq_afinfo mptcp_seq_afinfo = {
|
|
.family = AF_INET,
|
|
};
|
|
|
|
struct mptcp_seq_afinfo mptcp6_seq_afinfo = {
|
|
.family = AF_INET6,
|
|
};
|
|
|
|
static int __net_init mptcp_net_init(struct net *net)
|
|
{
|
|
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
|
|
|
|
mptcp_pernet_set_defaults(pernet);
|
|
|
|
mutex_init(&net->mptcp.sklist_lock);
|
|
INIT_HLIST_HEAD(&net->mptcp.sklist);
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
if (!proc_create_net_data("mptcp", 0444, net->proc_net,
|
|
&mptcp_seq_ops,
|
|
sizeof(struct mptcp_iter_state),
|
|
&mptcp_seq_afinfo))
|
|
return -ENOMEM;
|
|
|
|
#ifdef CONFIG_MPTCP_IPV6
|
|
if (!proc_create_net_data("mptcp6", 0444, net->proc_net,
|
|
&mptcp_seq_ops,
|
|
sizeof(struct mptcp_iter_state),
|
|
&mptcp6_seq_afinfo)) {
|
|
remove_proc_entry("mptcp", net->proc_net);
|
|
return -ENOMEM;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
return mptcp_pernet_new_table(net, pernet);
|
|
}
|
|
|
|
/* Note: the callback will only be called per extra netns */
|
|
static void __net_exit mptcp_net_exit(struct net *net)
|
|
{
|
|
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
|
|
|
|
remove_proc_entry("mptcp", net->proc_net);
|
|
remove_proc_entry("mptcp6", net->proc_net);
|
|
mptcp_pernet_del_table(pernet);
|
|
}
|
|
|
|
static struct pernet_operations mptcp_pernet_ops = {
|
|
.init = mptcp_net_init,
|
|
.exit = mptcp_net_exit,
|
|
.id = &mptcp_pernet_id,
|
|
.size = sizeof(struct mptcp_pernet),
|
|
};
|
|
|
|
void __init mptcp_init(void)
|
|
{
|
|
mptcp_join_cookie_init();
|
|
mptcp_proto_init();
|
|
|
|
if (register_pernet_subsys(&mptcp_pernet_ops) < 0)
|
|
panic("Failed to register MPTCP pernet subsystem.\n");
|
|
}
|
|
|
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
|
int __init mptcpv6_init(void)
|
|
{
|
|
int err;
|
|
|
|
err = mptcp_proto_v6_init();
|
|
|
|
return err;
|
|
}
|
|
#endif
|