Merge branch 'mptcp-socket-options'
Mat Martineau says: ==================== mptcp: Improve socket option handling MPTCP sockets have previously had limited socket option support. The architecture of MPTCP sockets (one userspace-facing MPTCP socket that manages one or more in-kernel TCP subflow sockets) adds complexity for passing options through to lower levels. This patch set adds MPTCP support for socket options commonly used with TCP. Patch 1 reverts an interim socket option fix (a socket option blocklist) that was merged in the net tree for v5.12. Patch 2 moves the socket option code to a separate file, with no functional changes. Patch 3 adds an allowlist for socket options that are known to function with MPTCP. Later patches in this set add more allowed options. Patches 4 and 5 add infrastructure for syncing MPTCP-level options with the TCP subflows. Patches 6-12 add support for specific socket options. Patch 13 adds a socket option self test. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
c133acf38c
|
@ -2,7 +2,7 @@
|
|||
obj-$(CONFIG_MPTCP) += mptcp.o
|
||||
|
||||
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
|
||||
mib.o pm_netlink.o
|
||||
mib.o pm_netlink.o sockopt.o
|
||||
|
||||
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
|
||||
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
|
||||
|
|
|
@ -90,16 +90,6 @@ static bool mptcp_is_tcpsk(struct sock *sk)
|
|||
return false;
|
||||
}
|
||||
|
||||
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
|
||||
{
|
||||
sock_owned_by_me((const struct sock *)msk);
|
||||
|
||||
if (likely(!__mptcp_check_fallback(msk)))
|
||||
return NULL;
|
||||
|
||||
return msk->first;
|
||||
}
|
||||
|
||||
static int __mptcp_socket_create(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
|
@ -740,18 +730,47 @@ wake:
|
|||
sk->sk_data_ready(sk);
|
||||
}
|
||||
|
||||
void __mptcp_flush_join_list(struct mptcp_sock *msk)
|
||||
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
bool ret = false;
|
||||
|
||||
if (likely(list_empty(&msk->join_list)))
|
||||
return;
|
||||
return false;
|
||||
|
||||
spin_lock_bh(&msk->join_list_lock);
|
||||
list_for_each_entry(subflow, &msk->join_list, node)
|
||||
list_for_each_entry(subflow, &msk->join_list, node) {
|
||||
u32 sseq = READ_ONCE(subflow->setsockopt_seq);
|
||||
|
||||
mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
|
||||
if (READ_ONCE(msk->setsockopt_seq) != sseq)
|
||||
ret = true;
|
||||
}
|
||||
list_splice_tail_init(&msk->join_list, &msk->conn_list);
|
||||
spin_unlock_bh(&msk->join_list_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __mptcp_flush_join_list(struct mptcp_sock *msk)
|
||||
{
|
||||
if (likely(!mptcp_do_flush_join_list(msk)))
|
||||
return;
|
||||
|
||||
if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
|
||||
mptcp_schedule_work((struct sock *)msk);
|
||||
}
|
||||
|
||||
static void mptcp_flush_join_list(struct mptcp_sock *msk)
|
||||
{
|
||||
bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags);
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (!mptcp_do_flush_join_list(msk) && !sync_needed)
|
||||
return;
|
||||
|
||||
mptcp_sockopt_sync_all(msk);
|
||||
}
|
||||
|
||||
static bool mptcp_timer_pending(struct sock *sk)
|
||||
|
@ -1467,7 +1486,7 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
|
|||
int ret = 0;
|
||||
|
||||
prev_ssk = ssk;
|
||||
__mptcp_flush_join_list(msk);
|
||||
mptcp_flush_join_list(msk);
|
||||
ssk = mptcp_subflow_get_send(msk);
|
||||
|
||||
/* try to keep the subflow socket lock across
|
||||
|
@ -1893,7 +1912,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
|
|||
unsigned int moved = 0;
|
||||
bool ret, done;
|
||||
|
||||
__mptcp_flush_join_list(msk);
|
||||
mptcp_flush_join_list(msk);
|
||||
do {
|
||||
struct sock *ssk = mptcp_subflow_recv_lookup(msk);
|
||||
bool slowpath;
|
||||
|
@ -2317,7 +2336,7 @@ static void mptcp_worker(struct work_struct *work)
|
|||
goto unlock;
|
||||
|
||||
mptcp_check_data_fin_ack(sk);
|
||||
__mptcp_flush_join_list(msk);
|
||||
mptcp_flush_join_list(msk);
|
||||
|
||||
mptcp_check_fastclose(msk);
|
||||
|
||||
|
@ -2380,6 +2399,9 @@ static int __mptcp_init_sock(struct sock *sk)
|
|||
/* re-use the csk retrans timer for MPTCP-level retrans */
|
||||
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
|
||||
timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
|
||||
|
||||
tcp_assign_congestion_control(sk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2517,7 +2539,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
|
|||
}
|
||||
}
|
||||
|
||||
__mptcp_flush_join_list(msk);
|
||||
mptcp_flush_join_list(msk);
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
|
@ -2573,6 +2595,8 @@ static void __mptcp_destroy_sock(struct sock *sk)
|
|||
WARN_ON_ONCE(msk->rmem_released);
|
||||
sk_stream_kill_queues(sk);
|
||||
xfrm_sk_free_policy(sk);
|
||||
|
||||
tcp_cleanup_congestion_control(sk);
|
||||
sk_refcnt_debug_release(sk);
|
||||
mptcp_dispose_initial_subflow(msk);
|
||||
sock_put(sk);
|
||||
|
@ -2654,7 +2678,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
|
|||
struct mptcp_subflow_context *subflow;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
__mptcp_flush_join_list(msk);
|
||||
mptcp_do_flush_join_list(msk);
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
|
@ -2703,6 +2728,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
|
|||
msk->snd_nxt = msk->write_seq;
|
||||
msk->snd_una = msk->write_seq;
|
||||
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
|
||||
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
|
||||
|
||||
if (mp_opt->mp_capable) {
|
||||
msk->can_ack = true;
|
||||
|
@ -2811,161 +2837,6 @@ static void mptcp_destroy(struct sock *sk)
|
|||
sk_sockets_allocated_dec(sk);
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct socket *ssock;
|
||||
int ret;
|
||||
|
||||
switch (optname) {
|
||||
case SO_REUSEPORT:
|
||||
case SO_REUSEADDR:
|
||||
lock_sock(sk);
|
||||
ssock = __mptcp_nmpc_socket(msk);
|
||||
if (!ssock) {
|
||||
release_sock(sk);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
|
||||
if (ret == 0) {
|
||||
if (optname == SO_REUSEPORT)
|
||||
sk->sk_reuseport = ssock->sk->sk_reuseport;
|
||||
else if (optname == SO_REUSEADDR)
|
||||
sk->sk_reuse = ssock->sk->sk_reuse;
|
||||
}
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
int ret = -EOPNOTSUPP;
|
||||
struct socket *ssock;
|
||||
|
||||
switch (optname) {
|
||||
case IPV6_V6ONLY:
|
||||
lock_sock(sk);
|
||||
ssock = __mptcp_nmpc_socket(msk);
|
||||
if (!ssock) {
|
||||
release_sock(sk);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
|
||||
if (ret == 0)
|
||||
sk->sk_ipv6only = ssock->sk->sk_ipv6only;
|
||||
|
||||
release_sock(sk);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool mptcp_unsupported(int level, int optname)
|
||||
{
|
||||
if (level == SOL_IP) {
|
||||
switch (optname) {
|
||||
case IP_ADD_MEMBERSHIP:
|
||||
case IP_ADD_SOURCE_MEMBERSHIP:
|
||||
case IP_DROP_MEMBERSHIP:
|
||||
case IP_DROP_SOURCE_MEMBERSHIP:
|
||||
case IP_BLOCK_SOURCE:
|
||||
case IP_UNBLOCK_SOURCE:
|
||||
case MCAST_JOIN_GROUP:
|
||||
case MCAST_LEAVE_GROUP:
|
||||
case MCAST_JOIN_SOURCE_GROUP:
|
||||
case MCAST_LEAVE_SOURCE_GROUP:
|
||||
case MCAST_BLOCK_SOURCE:
|
||||
case MCAST_UNBLOCK_SOURCE:
|
||||
case MCAST_MSFILTER:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (level == SOL_IPV6) {
|
||||
switch (optname) {
|
||||
case IPV6_ADDRFORM:
|
||||
case IPV6_ADD_MEMBERSHIP:
|
||||
case IPV6_DROP_MEMBERSHIP:
|
||||
case IPV6_JOIN_ANYCAST:
|
||||
case IPV6_LEAVE_ANYCAST:
|
||||
case MCAST_JOIN_GROUP:
|
||||
case MCAST_LEAVE_GROUP:
|
||||
case MCAST_JOIN_SOURCE_GROUP:
|
||||
case MCAST_LEAVE_SOURCE_GROUP:
|
||||
case MCAST_BLOCK_SOURCE:
|
||||
case MCAST_UNBLOCK_SOURCE:
|
||||
case MCAST_MSFILTER:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt(struct sock *sk, int level, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sock *ssk;
|
||||
|
||||
pr_debug("msk=%p", msk);
|
||||
|
||||
if (mptcp_unsupported(level, optname))
|
||||
return -ENOPROTOOPT;
|
||||
|
||||
if (level == SOL_SOCKET)
|
||||
return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
|
||||
|
||||
/* @@ the meaning of setsockopt() when the socket is connected and
|
||||
* there are multiple subflows is not yet defined. It is up to the
|
||||
* MPTCP-level socket to configure the subflows until the subflow
|
||||
* is in TCP fallback, when TCP socket options are passed through
|
||||
* to the one remaining subflow.
|
||||
*/
|
||||
lock_sock(sk);
|
||||
ssk = __mptcp_tcp_fallback(msk);
|
||||
release_sock(sk);
|
||||
if (ssk)
|
||||
return tcp_setsockopt(ssk, level, optname, optval, optlen);
|
||||
|
||||
if (level == SOL_IPV6)
|
||||
return mptcp_setsockopt_v6(msk, optname, optval, optlen);
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int mptcp_getsockopt(struct sock *sk, int level, int optname,
|
||||
char __user *optval, int __user *option)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sock *ssk;
|
||||
|
||||
pr_debug("msk=%p", msk);
|
||||
|
||||
/* @@ the meaning of setsockopt() when the socket is connected and
|
||||
* there are multiple subflows is not yet defined. It is up to the
|
||||
* MPTCP-level socket to configure the subflows until the subflow
|
||||
* is in TCP fallback, when socket options are passed through
|
||||
* to the one remaining subflow.
|
||||
*/
|
||||
lock_sock(sk);
|
||||
ssk = __mptcp_tcp_fallback(msk);
|
||||
release_sock(sk);
|
||||
if (ssk)
|
||||
return tcp_getsockopt(ssk, level, optname, optval, option);
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
void __mptcp_data_acked(struct sock *sk)
|
||||
{
|
||||
if (!sock_owned_by_user(sk))
|
||||
|
@ -3375,7 +3246,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
|
|||
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
|
||||
* This is needed so NOSPACE flag can be set from tcp stack.
|
||||
*/
|
||||
__mptcp_flush_join_list(msk);
|
||||
mptcp_flush_join_list(msk);
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
|
|
|
@ -108,6 +108,7 @@
|
|||
#define MPTCP_CLEAN_UNA 7
|
||||
#define MPTCP_ERROR_REPORT 8
|
||||
#define MPTCP_RETRANSMIT 9
|
||||
#define MPTCP_WORK_SYNC_SETSOCKOPT 10
|
||||
|
||||
static inline bool before64(__u64 seq1, __u64 seq2)
|
||||
{
|
||||
|
@ -255,6 +256,8 @@ struct mptcp_sock {
|
|||
u64 time; /* start time of measurement window */
|
||||
u64 rtt_us; /* last maximum rtt of subflows */
|
||||
} rcvq_space;
|
||||
|
||||
u32 setsockopt_seq;
|
||||
};
|
||||
|
||||
#define mptcp_lock_sock(___sk, cb) do { \
|
||||
|
@ -413,6 +416,8 @@ struct mptcp_subflow_context {
|
|||
long delegated_status;
|
||||
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
|
||||
|
||||
u32 setsockopt_seq;
|
||||
|
||||
struct sock *tcp_sock; /* tcp sk backpointer */
|
||||
struct sock *conn; /* parent mptcp_sock */
|
||||
const struct inet_connection_sock_af_ops *icsk_af_ops;
|
||||
|
@ -571,6 +576,11 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
|
|||
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
|
||||
bool mptcp_finish_join(struct sock *sk);
|
||||
bool mptcp_schedule_work(struct sock *sk);
|
||||
int mptcp_setsockopt(struct sock *sk, int level, int optname,
|
||||
sockptr_t optval, unsigned int optlen);
|
||||
int mptcp_getsockopt(struct sock *sk, int level, int optname,
|
||||
char __user *optval, int __user *option);
|
||||
|
||||
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
|
||||
void __mptcp_data_acked(struct sock *sk);
|
||||
void __mptcp_error_report(struct sock *sk);
|
||||
|
@ -730,6 +740,12 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
|
|||
unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
|
||||
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
|
||||
|
||||
int mptcp_setsockopt(struct sock *sk, int level, int optname,
|
||||
sockptr_t optval, unsigned int optlen);
|
||||
|
||||
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
|
||||
void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
|
||||
|
||||
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
|
||||
{
|
||||
return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
|
||||
|
|
|
@ -0,0 +1,756 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Multipath TCP
|
||||
*
|
||||
* Copyright (c) 2021, Red Hat.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "MPTCP: " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/protocol.h>
|
||||
#include <net/tcp.h>
|
||||
#include <net/mptcp.h>
|
||||
#include "protocol.h"
|
||||
|
||||
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
|
||||
{
|
||||
sock_owned_by_me((const struct sock *)msk);
|
||||
|
||||
if (likely(!__mptcp_check_fallback(msk)))
|
||||
return NULL;
|
||||
|
||||
return msk->first;
|
||||
}
|
||||
|
||||
static u32 sockopt_seq_reset(const struct sock *sk)
|
||||
{
|
||||
sock_owned_by_me(sk);
|
||||
|
||||
/* Highbits contain state. Allows to distinguish sockopt_seq
|
||||
* of listener and established:
|
||||
* s0 = new_listener()
|
||||
* sockopt(s0) - seq is 1
|
||||
* s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
|
||||
* sockopt(s0) - seq increments to 2 on s0
|
||||
* sockopt(s1) // seq increments to 2 on s1 (different option)
|
||||
* new ssk completes join, inherits options from s0 // seq 2
|
||||
* Needs sync from mptcp join logic, but ssk->seq == msk->seq
|
||||
*
|
||||
* Set High order bits to sk_state so ssk->seq == msk->seq test
|
||||
* will fail.
|
||||
*/
|
||||
|
||||
return (u32)sk->sk_state << 24u;
|
||||
}
|
||||
|
||||
static void sockopt_seq_inc(struct mptcp_sock *msk)
|
||||
{
|
||||
u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff;
|
||||
|
||||
msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq;
|
||||
}
|
||||
|
||||
static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
|
||||
unsigned int optlen, int *val)
|
||||
{
|
||||
if (optlen < sizeof(int))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_sockptr(val, optval, sizeof(*val)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
lock_sock(sk);
|
||||
sockopt_seq_inc(msk);
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
bool slow = lock_sock_fast(ssk);
|
||||
|
||||
switch (optname) {
|
||||
case SO_DEBUG:
|
||||
sock_valbool_flag(ssk, SOCK_DBG, !!val);
|
||||
break;
|
||||
case SO_KEEPALIVE:
|
||||
if (ssk->sk_prot->keepalive)
|
||||
ssk->sk_prot->keepalive(ssk, !!val);
|
||||
sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
|
||||
break;
|
||||
case SO_PRIORITY:
|
||||
ssk->sk_priority = val;
|
||||
break;
|
||||
case SO_SNDBUF:
|
||||
case SO_SNDBUFFORCE:
|
||||
ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
|
||||
WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
|
||||
break;
|
||||
case SO_RCVBUF:
|
||||
case SO_RCVBUFFORCE:
|
||||
ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
|
||||
WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
|
||||
break;
|
||||
case SO_MARK:
|
||||
if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
|
||||
ssk->sk_mark = sk->sk_mark;
|
||||
sk_dst_reset(ssk);
|
||||
}
|
||||
break;
|
||||
case SO_INCOMING_CPU:
|
||||
WRITE_ONCE(ssk->sk_incoming_cpu, val);
|
||||
break;
|
||||
}
|
||||
|
||||
subflow->setsockopt_seq = msk->setsockopt_seq;
|
||||
unlock_sock_fast(ssk, slow);
|
||||
}
|
||||
|
||||
release_sock(sk);
|
||||
}
|
||||
|
||||
static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val)
|
||||
{
|
||||
sockptr_t optval = KERNEL_SOCKPTR(&val);
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
int ret;
|
||||
|
||||
ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
|
||||
optval, sizeof(val));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mptcp_sol_socket_sync_intval(msk, optname, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
WRITE_ONCE(sk->sk_incoming_cpu, val);
|
||||
|
||||
mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
int val, ret;
|
||||
|
||||
ret = mptcp_get_int_option(msk, optval, optlen, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (optname) {
|
||||
case SO_KEEPALIVE:
|
||||
mptcp_sol_socket_sync_intval(msk, optname, val);
|
||||
return 0;
|
||||
case SO_DEBUG:
|
||||
case SO_MARK:
|
||||
case SO_PRIORITY:
|
||||
case SO_SNDBUF:
|
||||
case SO_SNDBUFFORCE:
|
||||
case SO_RCVBUF:
|
||||
case SO_RCVBUFFORCE:
|
||||
return mptcp_sol_socket_intval(msk, optname, val);
|
||||
case SO_INCOMING_CPU:
|
||||
mptcp_so_incoming_cpu(msk, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -ENOPROTOOPT;
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
|
||||
unsigned int optlen)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct linger ling;
|
||||
sockptr_t kopt;
|
||||
int ret;
|
||||
|
||||
if (optlen < sizeof(ling))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_sockptr(&ling, optval, sizeof(ling)))
|
||||
return -EFAULT;
|
||||
|
||||
kopt = KERNEL_SOCKPTR(&ling);
|
||||
ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
lock_sock(sk);
|
||||
sockopt_seq_inc(msk);
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
bool slow = lock_sock_fast(ssk);
|
||||
|
||||
if (!ling.l_onoff) {
|
||||
sock_reset_flag(ssk, SOCK_LINGER);
|
||||
} else {
|
||||
ssk->sk_lingertime = sk->sk_lingertime;
|
||||
sock_set_flag(ssk, SOCK_LINGER);
|
||||
}
|
||||
|
||||
subflow->setsockopt_seq = msk->setsockopt_seq;
|
||||
unlock_sock_fast(ssk, slow);
|
||||
}
|
||||
|
||||
release_sock(sk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct socket *ssock;
|
||||
int ret;
|
||||
|
||||
switch (optname) {
|
||||
case SO_REUSEPORT:
|
||||
case SO_REUSEADDR:
|
||||
case SO_BINDTODEVICE:
|
||||
case SO_BINDTOIFINDEX:
|
||||
lock_sock(sk);
|
||||
ssock = __mptcp_nmpc_socket(msk);
|
||||
if (!ssock) {
|
||||
release_sock(sk);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
|
||||
if (ret == 0) {
|
||||
if (optname == SO_REUSEPORT)
|
||||
sk->sk_reuseport = ssock->sk->sk_reuseport;
|
||||
else if (optname == SO_REUSEADDR)
|
||||
sk->sk_reuse = ssock->sk->sk_reuse;
|
||||
else if (optname == SO_BINDTODEVICE)
|
||||
sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
|
||||
else if (optname == SO_BINDTOIFINDEX)
|
||||
sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
|
||||
}
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
case SO_KEEPALIVE:
|
||||
case SO_PRIORITY:
|
||||
case SO_SNDBUF:
|
||||
case SO_SNDBUFFORCE:
|
||||
case SO_RCVBUF:
|
||||
case SO_RCVBUFFORCE:
|
||||
case SO_MARK:
|
||||
case SO_INCOMING_CPU:
|
||||
case SO_DEBUG:
|
||||
return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
|
||||
case SO_LINGER:
|
||||
return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
|
||||
case SO_NO_CHECK:
|
||||
case SO_DONTROUTE:
|
||||
case SO_BROADCAST:
|
||||
case SO_BSDCOMPAT:
|
||||
case SO_PASSCRED:
|
||||
case SO_PASSSEC:
|
||||
case SO_RXQ_OVFL:
|
||||
case SO_WIFI_STATUS:
|
||||
case SO_NOFCS:
|
||||
case SO_SELECT_ERR_QUEUE:
|
||||
return 0;
|
||||
}
|
||||
|
||||
return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
int ret = -EOPNOTSUPP;
|
||||
struct socket *ssock;
|
||||
|
||||
switch (optname) {
|
||||
case IPV6_V6ONLY:
|
||||
lock_sock(sk);
|
||||
ssock = __mptcp_nmpc_socket(msk);
|
||||
if (!ssock) {
|
||||
release_sock(sk);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
|
||||
if (ret == 0)
|
||||
sk->sk_ipv6only = ssock->sk->sk_ipv6only;
|
||||
|
||||
release_sock(sk);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool mptcp_supported_sockopt(int level, int optname)
|
||||
{
|
||||
if (level == SOL_SOCKET) {
|
||||
switch (optname) {
|
||||
case SO_DEBUG:
|
||||
case SO_REUSEPORT:
|
||||
case SO_REUSEADDR:
|
||||
|
||||
/* the following ones need a better implementation,
|
||||
* but are quite common we want to preserve them
|
||||
*/
|
||||
case SO_BINDTODEVICE:
|
||||
case SO_SNDBUF:
|
||||
case SO_SNDBUFFORCE:
|
||||
case SO_RCVBUF:
|
||||
case SO_RCVBUFFORCE:
|
||||
case SO_KEEPALIVE:
|
||||
case SO_PRIORITY:
|
||||
case SO_LINGER:
|
||||
case SO_TIMESTAMP_OLD:
|
||||
case SO_TIMESTAMP_NEW:
|
||||
case SO_TIMESTAMPNS_OLD:
|
||||
case SO_TIMESTAMPNS_NEW:
|
||||
case SO_TIMESTAMPING_OLD:
|
||||
case SO_TIMESTAMPING_NEW:
|
||||
case SO_RCVLOWAT:
|
||||
case SO_RCVTIMEO_OLD:
|
||||
case SO_RCVTIMEO_NEW:
|
||||
case SO_SNDTIMEO_OLD:
|
||||
case SO_SNDTIMEO_NEW:
|
||||
case SO_MARK:
|
||||
case SO_INCOMING_CPU:
|
||||
case SO_BINDTOIFINDEX:
|
||||
case SO_BUSY_POLL:
|
||||
case SO_PREFER_BUSY_POLL:
|
||||
case SO_BUSY_POLL_BUDGET:
|
||||
|
||||
/* next ones are no-op for plain TCP */
|
||||
case SO_NO_CHECK:
|
||||
case SO_DONTROUTE:
|
||||
case SO_BROADCAST:
|
||||
case SO_BSDCOMPAT:
|
||||
case SO_PASSCRED:
|
||||
case SO_PASSSEC:
|
||||
case SO_RXQ_OVFL:
|
||||
case SO_WIFI_STATUS:
|
||||
case SO_NOFCS:
|
||||
case SO_SELECT_ERR_QUEUE:
|
||||
return true;
|
||||
}
|
||||
|
||||
/* SO_OOBINLINE is not supported, let's avoid the related mess */
|
||||
/* SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
|
||||
* SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
|
||||
* we must be careful with subflows
|
||||
*/
|
||||
/* SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
|
||||
* explicitly the sk_protocol field
|
||||
*/
|
||||
/* SO_PEEK_OFF is unsupported, as it is for plain TCP */
|
||||
/* SO_MAX_PACING_RATE is unsupported, we must be careful with subflows */
|
||||
/* SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
|
||||
* but likely needs careful design
|
||||
*/
|
||||
/* SO_ZEROCOPY is currently unsupported, TODO in sndmsg */
|
||||
/* SO_TXTIME is currently unsupported */
|
||||
return false;
|
||||
}
|
||||
if (level == SOL_IP) {
|
||||
switch (optname) {
|
||||
/* should work fine */
|
||||
case IP_FREEBIND:
|
||||
case IP_TRANSPARENT:
|
||||
|
||||
/* the following are control cmsg related */
|
||||
case IP_PKTINFO:
|
||||
case IP_RECVTTL:
|
||||
case IP_RECVTOS:
|
||||
case IP_RECVOPTS:
|
||||
case IP_RETOPTS:
|
||||
case IP_PASSSEC:
|
||||
case IP_RECVORIGDSTADDR:
|
||||
case IP_CHECKSUM:
|
||||
case IP_RECVFRAGSIZE:
|
||||
|
||||
/* common stuff that need some love */
|
||||
case IP_TOS:
|
||||
case IP_TTL:
|
||||
case IP_BIND_ADDRESS_NO_PORT:
|
||||
case IP_MTU_DISCOVER:
|
||||
case IP_RECVERR:
|
||||
|
||||
/* possibly less common may deserve some love */
|
||||
case IP_MINTTL:
|
||||
|
||||
/* the following is apparently a no-op for plain TCP */
|
||||
case IP_RECVERR_RFC4884:
|
||||
return true;
|
||||
}
|
||||
|
||||
/* IP_OPTIONS is not supported, needs subflow care */
|
||||
/* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */
|
||||
/* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF,
|
||||
* IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP,
|
||||
* IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE,
|
||||
* MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP,
|
||||
* MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE,
|
||||
* MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal
|
||||
* with mcast stuff
|
||||
*/
|
||||
/* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */
|
||||
return false;
|
||||
}
|
||||
if (level == SOL_IPV6) {
|
||||
switch (optname) {
|
||||
case IPV6_V6ONLY:
|
||||
|
||||
/* the following are control cmsg related */
|
||||
case IPV6_RECVPKTINFO:
|
||||
case IPV6_2292PKTINFO:
|
||||
case IPV6_RECVHOPLIMIT:
|
||||
case IPV6_2292HOPLIMIT:
|
||||
case IPV6_RECVRTHDR:
|
||||
case IPV6_2292RTHDR:
|
||||
case IPV6_RECVHOPOPTS:
|
||||
case IPV6_2292HOPOPTS:
|
||||
case IPV6_RECVDSTOPTS:
|
||||
case IPV6_2292DSTOPTS:
|
||||
case IPV6_RECVTCLASS:
|
||||
case IPV6_FLOWINFO:
|
||||
case IPV6_RECVPATHMTU:
|
||||
case IPV6_RECVORIGDSTADDR:
|
||||
case IPV6_RECVFRAGSIZE:
|
||||
|
||||
/* the following ones need some love but are quite common */
|
||||
case IPV6_TCLASS:
|
||||
case IPV6_TRANSPARENT:
|
||||
case IPV6_FREEBIND:
|
||||
case IPV6_PKTINFO:
|
||||
case IPV6_2292PKTOPTIONS:
|
||||
case IPV6_UNICAST_HOPS:
|
||||
case IPV6_MTU_DISCOVER:
|
||||
case IPV6_MTU:
|
||||
case IPV6_RECVERR:
|
||||
case IPV6_FLOWINFO_SEND:
|
||||
case IPV6_FLOWLABEL_MGR:
|
||||
case IPV6_MINHOPCOUNT:
|
||||
case IPV6_DONTFRAG:
|
||||
case IPV6_AUTOFLOWLABEL:
|
||||
|
||||
/* the following one is a no-op for plain TCP */
|
||||
case IPV6_RECVERR_RFC4884:
|
||||
return true;
|
||||
}
|
||||
|
||||
/* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are
|
||||
* not supported
|
||||
*/
|
||||
/* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF,
|
||||
* IPV6_MULTICAST_IF, IPV6_ADDRFORM,
|
||||
* IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST,
|
||||
* IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP,
|
||||
* MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP,
|
||||
* MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER
|
||||
* are not supported better not deal with mcast
|
||||
*/
|
||||
/* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */
|
||||
|
||||
/* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */
|
||||
/* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */
|
||||
return false;
|
||||
}
|
||||
if (level == SOL_TCP) {
|
||||
switch (optname) {
|
||||
/* the following are no-op or should work just fine */
|
||||
case TCP_THIN_DUPACK:
|
||||
case TCP_DEFER_ACCEPT:
|
||||
|
||||
/* the following need some love */
|
||||
case TCP_MAXSEG:
|
||||
case TCP_NODELAY:
|
||||
case TCP_THIN_LINEAR_TIMEOUTS:
|
||||
case TCP_CONGESTION:
|
||||
case TCP_ULP:
|
||||
case TCP_CORK:
|
||||
case TCP_KEEPIDLE:
|
||||
case TCP_KEEPINTVL:
|
||||
case TCP_KEEPCNT:
|
||||
case TCP_SYNCNT:
|
||||
case TCP_SAVE_SYN:
|
||||
case TCP_LINGER2:
|
||||
case TCP_WINDOW_CLAMP:
|
||||
case TCP_QUICKACK:
|
||||
case TCP_USER_TIMEOUT:
|
||||
case TCP_TIMESTAMP:
|
||||
case TCP_NOTSENT_LOWAT:
|
||||
case TCP_TX_DELAY:
|
||||
return true;
|
||||
}
|
||||
|
||||
/* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */
|
||||
|
||||
/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
|
||||
* TCP_REPAIR_WINDOW are not supported, better avoid this mess
|
||||
*/
|
||||
/* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE,
|
||||
* are not supported fastopen is currently unsupported
|
||||
*/
|
||||
/* TCP_INQ is currently unsupported, needs some recvmsg work */
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval,
|
||||
unsigned int optlen)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
char name[TCP_CA_NAME_MAX];
|
||||
bool cap_net_admin;
|
||||
int ret;
|
||||
|
||||
if (optlen < 1)
|
||||
return -EINVAL;
|
||||
|
||||
ret = strncpy_from_sockptr(name, optval,
|
||||
min_t(long, TCP_CA_NAME_MAX - 1, optlen));
|
||||
if (ret < 0)
|
||||
return -EFAULT;
|
||||
|
||||
name[ret] = 0;
|
||||
|
||||
cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN);
|
||||
|
||||
ret = 0;
|
||||
lock_sock(sk);
|
||||
sockopt_seq_inc(msk);
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
int err;
|
||||
|
||||
lock_sock(ssk);
|
||||
err = tcp_set_congestion_control(ssk, name, true, cap_net_admin);
|
||||
if (err < 0 && ret == 0)
|
||||
ret = err;
|
||||
subflow->setsockopt_seq = msk->setsockopt_seq;
|
||||
release_sock(ssk);
|
||||
}
|
||||
|
||||
if (ret == 0)
|
||||
tcp_set_congestion_control(sk, name, false, cap_net_admin);
|
||||
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
switch (optname) {
|
||||
case TCP_ULP:
|
||||
return -EOPNOTSUPP;
|
||||
case TCP_CONGESTION:
|
||||
return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
int mptcp_setsockopt(struct sock *sk, int level, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sock *ssk;
|
||||
|
||||
pr_debug("msk=%p", msk);
|
||||
|
||||
if (!mptcp_supported_sockopt(level, optname))
|
||||
return -ENOPROTOOPT;
|
||||
|
||||
if (level == SOL_SOCKET)
|
||||
return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
|
||||
|
||||
/* @@ the meaning of setsockopt() when the socket is connected and
|
||||
* there are multiple subflows is not yet defined. It is up to the
|
||||
* MPTCP-level socket to configure the subflows until the subflow
|
||||
* is in TCP fallback, when TCP socket options are passed through
|
||||
* to the one remaining subflow.
|
||||
*/
|
||||
lock_sock(sk);
|
||||
ssk = __mptcp_tcp_fallback(msk);
|
||||
release_sock(sk);
|
||||
if (ssk)
|
||||
return tcp_setsockopt(ssk, level, optname, optval, optlen);
|
||||
|
||||
if (level == SOL_IPV6)
|
||||
return mptcp_setsockopt_v6(msk, optname, optval, optlen);
|
||||
|
||||
if (level == SOL_TCP)
|
||||
return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen);
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
|
||||
char __user *optval, int __user *optlen)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct socket *ssock;
|
||||
int ret = -EINVAL;
|
||||
struct sock *ssk;
|
||||
|
||||
lock_sock(sk);
|
||||
ssk = msk->first;
|
||||
if (ssk) {
|
||||
ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ssock = __mptcp_nmpc_socket(msk);
|
||||
if (!ssock)
|
||||
goto out;
|
||||
|
||||
ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
|
||||
|
||||
out:
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
|
||||
char __user *optval, int __user *optlen)
|
||||
{
|
||||
switch (optname) {
|
||||
case TCP_ULP:
|
||||
case TCP_CONGESTION:
|
||||
case TCP_INFO:
|
||||
case TCP_CC_INFO:
|
||||
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
|
||||
optval, optlen);
|
||||
}
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
int mptcp_getsockopt(struct sock *sk, int level, int optname,
|
||||
char __user *optval, int __user *option)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sock *ssk;
|
||||
|
||||
pr_debug("msk=%p", msk);
|
||||
|
||||
/* @@ the meaning of setsockopt() when the socket is connected and
|
||||
* there are multiple subflows is not yet defined. It is up to the
|
||||
* MPTCP-level socket to configure the subflows until the subflow
|
||||
* is in TCP fallback, when socket options are passed through
|
||||
* to the one remaining subflow.
|
||||
*/
|
||||
lock_sock(sk);
|
||||
ssk = __mptcp_tcp_fallback(msk);
|
||||
release_sock(sk);
|
||||
if (ssk)
|
||||
return tcp_getsockopt(ssk, level, optname, optval, option);
|
||||
|
||||
if (level == SOL_TCP)
|
||||
return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
|
||||
{
|
||||
static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
if (ssk->sk_prot->keepalive) {
|
||||
if (sock_flag(sk, SOCK_KEEPOPEN))
|
||||
ssk->sk_prot->keepalive(ssk, 1);
|
||||
else
|
||||
ssk->sk_prot->keepalive(ssk, 0);
|
||||
}
|
||||
|
||||
ssk->sk_priority = sk->sk_priority;
|
||||
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
|
||||
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
|
||||
|
||||
if (sk->sk_userlocks & tx_rx_locks) {
|
||||
ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
|
||||
if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
|
||||
WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
|
||||
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
|
||||
WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
|
||||
}
|
||||
|
||||
if (sock_flag(sk, SOCK_LINGER)) {
|
||||
ssk->sk_lingertime = sk->sk_lingertime;
|
||||
sock_set_flag(ssk, SOCK_LINGER);
|
||||
} else {
|
||||
sock_reset_flag(ssk, SOCK_LINGER);
|
||||
}
|
||||
|
||||
if (sk->sk_mark != ssk->sk_mark) {
|
||||
ssk->sk_mark = sk->sk_mark;
|
||||
sk_dst_reset(ssk);
|
||||
}
|
||||
|
||||
sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
|
||||
|
||||
if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
|
||||
tcp_set_congestion_control(ssk, inet_csk(sk)->icsk_ca_ops->name, false, true);
|
||||
}
|
||||
|
||||
static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
|
||||
{
|
||||
bool slow = lock_sock_fast(ssk);
|
||||
|
||||
sync_socket_options(msk, ssk);
|
||||
|
||||
unlock_sock_fast(ssk, slow);
|
||||
}
|
||||
|
||||
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
|
||||
|
||||
msk_owned_by_me(msk);
|
||||
|
||||
if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
|
||||
__mptcp_sockopt_sync(msk, ssk);
|
||||
|
||||
subflow->setsockopt_seq = msk->setsockopt_seq;
|
||||
}
|
||||
}
|
||||
|
||||
void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
u32 seq;
|
||||
|
||||
seq = sockopt_seq_reset(sk);
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
u32 sseq = READ_ONCE(subflow->setsockopt_seq);
|
||||
|
||||
if (sseq != msk->setsockopt_seq) {
|
||||
__mptcp_sockopt_sync(msk, ssk);
|
||||
WRITE_ONCE(subflow->setsockopt_seq, seq);
|
||||
} else if (sseq != seq) {
|
||||
WRITE_ONCE(subflow->setsockopt_seq, seq);
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
msk->setsockopt_seq = seq;
|
||||
}
|
|
@ -679,6 +679,9 @@ create_child:
|
|||
goto out;
|
||||
}
|
||||
|
||||
/* ssk inherits options of listener sk */
|
||||
ctx->setsockopt_seq = listener->setsockopt_seq;
|
||||
|
||||
if (ctx->mp_capable) {
|
||||
/* this can't race with mptcp_close(), as the msk is
|
||||
* not yet exposted to user-space
|
||||
|
@ -694,6 +697,7 @@ create_child:
|
|||
* created mptcp socket
|
||||
*/
|
||||
new_msk->sk_destruct = mptcp_sock_destruct;
|
||||
mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
|
||||
mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
|
||||
mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
|
||||
ctx->conn = new_msk;
|
||||
|
@ -1317,6 +1321,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
|
|||
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
|
||||
|
||||
mptcp_add_pending_subflow(msk, subflow);
|
||||
mptcp_sockopt_sync(msk, ssk);
|
||||
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
|
||||
if (err && err != -EINPROGRESS)
|
||||
goto failed_unlink;
|
||||
|
|
|
@ -6,7 +6,7 @@ KSFT_KHDR_INSTALL := 1
|
|||
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
|
||||
|
||||
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
|
||||
simult_flows.sh
|
||||
simult_flows.sh mptcp_sockopt.sh
|
||||
|
||||
TEST_GEN_FILES = mptcp_connect pm_nl_ctl
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ static bool cfg_join;
|
|||
static bool cfg_remove;
|
||||
static unsigned int cfg_do_w;
|
||||
static int cfg_wait;
|
||||
static uint32_t cfg_mark;
|
||||
|
||||
static void die_usage(void)
|
||||
{
|
||||
|
@ -69,6 +70,7 @@ static void die_usage(void)
|
|||
fprintf(stderr, "\t-p num -- use port num\n");
|
||||
fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
|
||||
fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
|
||||
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
|
||||
fprintf(stderr, "\t-u -- check mptcp ulp\n");
|
||||
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
|
||||
exit(1);
|
||||
|
@ -140,6 +142,17 @@ static void set_sndbuf(int fd, unsigned int size)
|
|||
}
|
||||
}
|
||||
|
||||
static void set_mark(int fd, uint32_t mark)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
|
||||
if (err) {
|
||||
perror("set SO_MARK");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static int sock_listen_mptcp(const char * const listenaddr,
|
||||
const char * const port)
|
||||
{
|
||||
|
@ -248,6 +261,9 @@ static int sock_connect_mptcp(const char * const remoteaddr,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (cfg_mark)
|
||||
set_mark(sock, cfg_mark);
|
||||
|
||||
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
|
||||
break; /* success */
|
||||
|
||||
|
@ -830,7 +846,7 @@ static void parse_opts(int argc, char **argv)
|
|||
{
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:")) != -1) {
|
||||
while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:")) != -1) {
|
||||
switch (c) {
|
||||
case 'j':
|
||||
cfg_join = true;
|
||||
|
@ -880,6 +896,9 @@ static void parse_opts(int argc, char **argv)
|
|||
case 'w':
|
||||
cfg_wait = atoi(optarg)*1000000;
|
||||
break;
|
||||
case 'M':
|
||||
cfg_mark = strtol(optarg, NULL, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -911,6 +930,8 @@ int main(int argc, char *argv[])
|
|||
set_rcvbuf(fd, cfg_rcvbuf);
|
||||
if (cfg_sndbuf)
|
||||
set_sndbuf(fd, cfg_sndbuf);
|
||||
if (cfg_mark)
|
||||
set_mark(fd, cfg_mark);
|
||||
|
||||
return main_loop_s(fd);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,276 @@
|
|||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
ret=0
|
||||
sin=""
|
||||
sout=""
|
||||
cin=""
|
||||
cout=""
|
||||
ksft_skip=4
|
||||
timeout_poll=30
|
||||
timeout_test=$((timeout_poll * 2 + 1))
|
||||
mptcp_connect=""
|
||||
do_all_tests=1
|
||||
|
||||
add_mark_rules()
|
||||
{
|
||||
local ns=$1
|
||||
local m=$2
|
||||
|
||||
for t in iptables ip6tables; do
|
||||
# just to debug: check we have multiple subflows connection requests
|
||||
ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT
|
||||
|
||||
# RST packets might be handled by a internal dummy socket
|
||||
ip netns exec $ns $t -A OUTPUT -p tcp --tcp-flags RST RST -m mark --mark 0 -j ACCEPT
|
||||
|
||||
ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark $m -j ACCEPT
|
||||
ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark 0 -j DROP
|
||||
done
|
||||
}
|
||||
|
||||
init()
|
||||
{
|
||||
rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
|
||||
|
||||
ns1="ns1-$rndh"
|
||||
ns2="ns2-$rndh"
|
||||
|
||||
for netns in "$ns1" "$ns2";do
|
||||
ip netns add $netns || exit $ksft_skip
|
||||
ip -net $netns link set lo up
|
||||
ip netns exec $netns sysctl -q net.mptcp.enabled=1
|
||||
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
|
||||
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
|
||||
done
|
||||
|
||||
for i in `seq 1 4`; do
|
||||
ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
|
||||
ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
|
||||
ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
|
||||
ip -net "$ns1" link set ns1eth$i up
|
||||
|
||||
ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
|
||||
ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
|
||||
ip -net "$ns2" link set ns2eth$i up
|
||||
|
||||
# let $ns2 reach any $ns1 address from any interface
|
||||
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
|
||||
|
||||
ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal
|
||||
ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal
|
||||
|
||||
ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal
|
||||
ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal
|
||||
done
|
||||
|
||||
ip netns exec $ns1 ./pm_nl_ctl limits 8 8
|
||||
ip netns exec $ns2 ./pm_nl_ctl limits 8 8
|
||||
|
||||
add_mark_rules $ns1 1
|
||||
add_mark_rules $ns2 2
|
||||
}
|
||||
|
||||
cleanup()
|
||||
{
|
||||
for netns in "$ns1" "$ns2"; do
|
||||
ip netns del $netns
|
||||
done
|
||||
rm -f "$cin" "$cout"
|
||||
rm -f "$sin" "$sout"
|
||||
}
|
||||
|
||||
ip -Version > /dev/null 2>&1
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not run test without ip tool"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
iptables -V > /dev/null 2>&1
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not run all tests without iptables tool"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
ip6tables -V > /dev/null 2>&1
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not run all tests without ip6tables tool"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
check_mark()
|
||||
{
|
||||
local ns=$1
|
||||
local af=$2
|
||||
|
||||
tables=iptables
|
||||
|
||||
if [ $af -eq 6 ];then
|
||||
tables=ip6tables
|
||||
fi
|
||||
|
||||
counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP)
|
||||
values=${counters%DROP*}
|
||||
|
||||
for v in $values; do
|
||||
if [ $v -ne 0 ]; then
|
||||
echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2
|
||||
return 1
|
||||
fi
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
print_file_err()
|
||||
{
|
||||
ls -l "$1" 1>&2
|
||||
echo "Trailing bytes are: "
|
||||
tail -c 27 "$1"
|
||||
}
|
||||
|
||||
check_transfer()
|
||||
{
|
||||
in=$1
|
||||
out=$2
|
||||
what=$3
|
||||
|
||||
cmp "$in" "$out" > /dev/null 2>&1
|
||||
if [ $? -ne 0 ] ;then
|
||||
echo "[ FAIL ] $what does not match (in, out):"
|
||||
print_file_err "$in"
|
||||
print_file_err "$out"
|
||||
ret=1
|
||||
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# $1: IP address
|
||||
is_v6()
|
||||
{
|
||||
[ -z "${1##*:*}" ]
|
||||
}
|
||||
|
||||
do_transfer()
|
||||
{
|
||||
listener_ns="$1"
|
||||
connector_ns="$2"
|
||||
cl_proto="$3"
|
||||
srv_proto="$4"
|
||||
connect_addr="$5"
|
||||
|
||||
port=12001
|
||||
|
||||
:> "$cout"
|
||||
:> "$sout"
|
||||
|
||||
mptcp_connect="./mptcp_connect -r 20"
|
||||
|
||||
local local_addr
|
||||
if is_v6 "${connect_addr}"; then
|
||||
local_addr="::"
|
||||
else
|
||||
local_addr="0.0.0.0"
|
||||
fi
|
||||
|
||||
timeout ${timeout_test} \
|
||||
ip netns exec ${listener_ns} \
|
||||
$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \
|
||||
${local_addr} < "$sin" > "$sout" &
|
||||
spid=$!
|
||||
|
||||
sleep 1
|
||||
|
||||
timeout ${timeout_test} \
|
||||
ip netns exec ${connector_ns} \
|
||||
$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \
|
||||
$connect_addr < "$cin" > "$cout" &
|
||||
|
||||
cpid=$!
|
||||
|
||||
wait $cpid
|
||||
retc=$?
|
||||
wait $spid
|
||||
rets=$?
|
||||
|
||||
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
|
||||
echo " client exit code $retc, server $rets" 1>&2
|
||||
echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
|
||||
ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
|
||||
|
||||
echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
|
||||
ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
|
||||
|
||||
ret=1
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [ $local_addr = "::" ];then
|
||||
check_mark $listener_ns 6
|
||||
check_mark $connector_ns 6
|
||||
else
|
||||
check_mark $listener_ns 4
|
||||
check_mark $connector_ns 4
|
||||
fi
|
||||
|
||||
check_transfer $cin $sout "file received by server"
|
||||
|
||||
rets=$?
|
||||
|
||||
if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
|
||||
return 0
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
make_file()
|
||||
{
|
||||
name=$1
|
||||
who=$2
|
||||
size=$3
|
||||
|
||||
dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
|
||||
echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
|
||||
|
||||
echo "Created $name (size $size KB) containing data sent by $who"
|
||||
}
|
||||
|
||||
run_tests()
|
||||
{
|
||||
listener_ns="$1"
|
||||
connector_ns="$2"
|
||||
connect_addr="$3"
|
||||
lret=0
|
||||
|
||||
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
|
||||
|
||||
lret=$?
|
||||
|
||||
if [ $lret -ne 0 ]; then
|
||||
ret=$lret
|
||||
return
|
||||
fi
|
||||
}
|
||||
|
||||
sin=$(mktemp)
|
||||
sout=$(mktemp)
|
||||
cin=$(mktemp)
|
||||
cout=$(mktemp)
|
||||
init
|
||||
make_file "$cin" "client" 1
|
||||
make_file "$sin" "server" 1
|
||||
trap cleanup EXIT
|
||||
|
||||
run_tests $ns1 $ns2 10.0.1.1
|
||||
run_tests $ns1 $ns2 dead:beef:1::1
|
||||
|
||||
|
||||
if [ $ret -eq 0 ];then
|
||||
echo "PASS: all packets had packet mark set"
|
||||
fi
|
||||
|
||||
exit $ret
|
Loading…
Reference in New Issue