diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 2dfbfdff45a8..037e054b01a2 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -324,8 +324,7 @@ ETH_HLEN + ETH_FCS_LEN, \ cache_line_size()) -#define MVNETA_SKB_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \ - NET_IP_ALIGN) +#define MVNETA_SKB_HEADROOM max(XDP_PACKET_HEADROOM, NET_SKB_PAD) #define MVNETA_SKB_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \ MVNETA_SKB_HEADROOM)) #define MVNETA_SKB_SIZE(len) (SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD) @@ -1167,6 +1166,7 @@ bm_mtu_err: mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, 1 << pp->id); pp->bm_priv = NULL; + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; mvreg_write(pp, MVNETA_ACC_MODE, MVNETA_ACC_MODE_EXT1); netdev_info(pp->dev, "fail to update MTU, fall back to software BM\n"); } @@ -4948,7 +4948,6 @@ static int mvneta_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); pp->id = global_port_id++; - pp->rx_offset_correction = MVNETA_SKB_HEADROOM; /* Obtain access to BM resources if enabled and already initialized */ bm_node = of_parse_phandle(dn, "buffer-manager", 0); @@ -4973,6 +4972,10 @@ static int mvneta_probe(struct platform_device *pdev) } of_node_put(bm_node); + /* sw buffer management */ + if (!pp->bm_priv) + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; + err = mvneta_init(&pdev->dev, pp); if (err < 0) goto err_netdev; @@ -5130,6 +5133,7 @@ static int mvneta_resume(struct device *device) err = mvneta_bm_port_init(pdev, pp); if (err < 0) { dev_info(&pdev->dev, "use SW buffer management\n"); + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; pp->bm_priv = NULL; } } diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1cf73e6f85ca..3dc964010fef 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -148,9 +148,7 @@ struct tcp_request_sock { const struct tcp_request_sock_ops *af_specific; u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; -#if IS_ENABLED(CONFIG_MPTCP) bool is_mptcp; -#endif u32 txhash; u32 rcv_isn; u32 snt_isn; diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 27627e2d1bc2..c971d25431ea 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -174,15 +174,12 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, #endif /* CONFIG_MPTCP */ -void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped); - #if IS_ENABLED(CONFIG_MPTCP_IPV6) int mptcpv6_init(void); +void mptcpv6_handle_mapped(struct sock *sk, bool mapped); #elif IS_ENABLED(CONFIG_IPV6) -static inline int mptcpv6_init(void) -{ - return 0; -} +static inline int mptcpv6_init(void) { return 0; } +static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } #endif #endif /* __NET_MPTCP_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 4a180f2a13e3..e55d5f765807 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -476,6 +476,13 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, if (!inet_get_convert_csum(sk)) features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + /* UDP segmentation expects packets of type CHECKSUM_PARTIAL or + * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial + * packets in udp_gro_complete_segment. As does UDP GSO, verified by + * udp_send_skb. But when those packets are looped in dev_loopback_xmit + * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this + * specific case, where PARTIAL is both correct and required. + */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 536e032d95c8..ea46fc6aa883 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -802,16 +802,12 @@ net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata) if (!n_hw_metadata) return NULL; - trap_group_name = kmemdup(hw_metadata->trap_group_name, - strlen(hw_metadata->trap_group_name) + 1, - GFP_ATOMIC | __GFP_ZERO); + trap_group_name = kstrdup(hw_metadata->trap_group_name, GFP_ATOMIC); if (!trap_group_name) goto free_hw_metadata; n_hw_metadata->trap_group_name = trap_group_name; - trap_name = kmemdup(hw_metadata->trap_name, - strlen(hw_metadata->trap_name) + 1, - GFP_ATOMIC | __GFP_ZERO); + trap_name = kstrdup(hw_metadata->trap_name, GFP_ATOMIC); if (!trap_name) goto free_trap_group; n_hw_metadata->trap_name = trap_name; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cdad6ed532c4..09c44bf2e1d2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1242,6 +1242,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, return 0; memset(&vf_vlan_info, 0, sizeof(vf_vlan_info)); + memset(&node_guid, 0, sizeof(node_guid)); + memset(&port_guid, 0, sizeof(port_guid)); vf_mac.vf = vf_vlan.vf = @@ -1290,8 +1292,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, sizeof(vf_trust), &vf_trust)) goto nla_put_vf_failure; - memset(&node_guid, 0, sizeof(node_guid)); - memset(&port_guid, 0, sizeof(port_guid)); if (dev->netdev_ops->ndo_get_vf_guid && !dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid, &port_guid)) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 345b2b0ff618..9a4f6b16c9bc 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -349,6 +349,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = 0; treq->tfo_listener = false; + + if (IS_ENABLED(CONFIG_MPTCP)) + treq->is_mptcp = 0; + if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e8b840a4767e..e325b4506e25 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6637,6 +6637,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, af_ops->init_req(req, sk, skb); + if (IS_ENABLED(CONFIG_MPTCP) && want_cookie) + tcp_rsk(req)->is_mptcp = 0; + if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 30915f6f31e3..13235a012388 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -178,6 +178,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->tfo_listener = false; + if (IS_ENABLED(CONFIG_MPTCP)) + treq->is_mptcp = 0; + if (security_inet_conn_request(sk, skb, req)) goto out_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 33a578a3eb3a..eaf09e6b7844 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -239,7 +239,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_af_ops = &ipv6_mapped; if (sk_is_mptcp(sk)) - mptcp_handle_ipv6_mapped(sk, true); + mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; #ifdef CONFIG_TCP_MD5SIG tp->af_specific = &tcp_sock_ipv6_mapped_specific; @@ -251,7 +251,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &ipv6_specific; if (sk_is_mptcp(sk)) - mptcp_handle_ipv6_mapped(sk, false); + mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; #ifdef CONFIG_TCP_MD5SIG tp->af_specific = &tcp_sock_ipv6_specific; @@ -1208,7 +1208,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; if (sk_is_mptcp(newsk)) - mptcp_handle_ipv6_mapped(newsk, true); + mptcpv6_handle_mapped(newsk, true); newsk->sk_backlog_rcv = tcp_v4_do_rcv; #ifdef CONFIG_TCP_MD5SIG newtp->af_specific = &tcp_sock_ipv6_mapped_specific; diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 5db56d2218c5..49f6054e7f4e 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -10,17 +10,19 @@ config MPTCP uses the TCP protocol, and TCP options carry header information for MPTCP. +if MPTCP + config MPTCP_IPV6 bool "MPTCP: IPv6 support for Multipath TCP" - depends on MPTCP select IPV6 default y config MPTCP_HMAC_TEST bool "Tests for MPTCP HMAC implementation" - default n help This option enable boot time self-test for the HMAC implementation used by the MPTCP code Say N if you are unsure. + +endif diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 39fdca79ce90..3bccee455688 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -28,7 +28,7 @@ static void __mptcp_close(struct sock *sk, long timeout); static const struct proto_ops *tcp_proto_ops(struct sock *sk) { -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_MPTCP_IPV6) if (sk->sk_family == AF_INET6) return &inet6_stream_ops; #endif @@ -644,19 +644,21 @@ static void __mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); + LIST_HEAD(conn_list); mptcp_token_destroy(msk->token); inet_sk_state_store(sk, TCP_CLOSE); - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { + list_splice_init(&msk->conn_list, &conn_list); + + release_sock(sk); + + list_for_each_entry_safe(subflow, tmp, &conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); __mptcp_close_ssk(sk, ssk, subflow, timeout); } - if (msk->cached_ext) - __skb_ext_put(msk->cached_ext); - release_sock(sk); sk_common_release(sk); } @@ -776,18 +778,19 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, static void mptcp_destroy(struct sock *sk) { + struct mptcp_sock *msk = mptcp_sk(sk); + + if (msk->cached_ext) + __skb_ext_put(msk->cached_ext); } static int mptcp_setsockopt(struct sock *sk, int level, int optname, - char __user *uoptval, unsigned int optlen) + char __user *optval, unsigned int optlen) { struct mptcp_sock *msk = mptcp_sk(sk); - char __kernel *optval; int ret = -EOPNOTSUPP; struct socket *ssock; - - /* will be treated as __user in tcp_setsockopt */ - optval = (char __kernel __force *)uoptval; + struct sock *ssk; pr_debug("msk=%p", msk); @@ -796,27 +799,28 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (!IS_ERR(ssock)) { - pr_debug("subflow=%p", ssock->sk); - ret = kernel_setsockopt(ssock, level, optname, optval, optlen); + if (IS_ERR(ssock)) { + release_sock(sk); + return ret; } + + ssk = ssock->sk; + sock_hold(ssk); release_sock(sk); + ret = tcp_setsockopt(ssk, level, optname, optval, optlen); + sock_put(ssk); + return ret; } static int mptcp_getsockopt(struct sock *sk, int level, int optname, - char __user *uoptval, int __user *uoption) + char __user *optval, int __user *option) { struct mptcp_sock *msk = mptcp_sk(sk); - char __kernel *optval; int ret = -EOPNOTSUPP; - int __kernel *option; struct socket *ssock; - - /* will be treated as __user in tcp_getsockopt */ - optval = (char __kernel __force *)uoptval; - option = (int __kernel __force *)uoption; + struct sock *ssk; pr_debug("msk=%p", msk); @@ -825,12 +829,18 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (!IS_ERR(ssock)) { - pr_debug("subflow=%p", ssock->sk); - ret = kernel_getsockopt(ssock, level, optname, optval, option); + if (IS_ERR(ssock)) { + release_sock(sk); + return ret; } + + ssk = ssock->sk; + sock_hold(ssk); release_sock(sk); + ret = tcp_getsockopt(ssk, level, optname, optval, option); + sock_put(ssk); + return ret; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1662e1178949..65122edf60aa 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -186,6 +186,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); + if (tcp_rsk(req)->is_mptcp == 0) + goto create_child; + /* if the sk is MP_CAPABLE, we try to fetch the client key */ subflow_req = mptcp_subflow_rsk(req); if (subflow_req->mp_capable) { @@ -582,9 +585,9 @@ subflow_default_af_ops(struct sock *sk) return &subflow_specific; } -void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped) -{ #if IS_ENABLED(CONFIG_MPTCP_IPV6) +void mptcpv6_handle_mapped(struct sock *sk, bool mapped) +{ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock_af_ops *target; @@ -599,8 +602,8 @@ void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped) subflow->icsk_af_ops = icsk->icsk_af_ops; icsk->icsk_af_ops = target; -#endif } +#endif int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) { @@ -621,7 +624,9 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) */ sf->sk->sk_net_refcnt = 1; get_net(net); +#ifdef CONFIG_PROC_FS this_cpu_add(*net->core.sock_inuse, 1); +#endif err = tcp_set_ulp(sf->sk, "mptcp"); release_sock(sf->sk); @@ -767,7 +772,7 @@ static void subflow_ulp_clone(const struct request_sock *req, struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk); struct mptcp_subflow_context *new_ctx; - if (!subflow_req->mp_capable) { + if (!tcp_rsk(req)->is_mptcp || !subflow_req->mp_capable) { subflow_ulp_fallback(newsk, old_ctx); return; } diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index dba70377bbd9..a36974e9c601 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -377,7 +377,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, if (mask != q->tab_mask) { struct sk_buff **ntab; - ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO); + ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); if (!ntab) return -ENOMEM;