2019-05-27 14:55:01 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
|
|
* operating system. INET is implemented using the BSD Socket
|
|
|
|
* interface as the means of communication with the user level.
|
|
|
|
*
|
|
|
|
* Definitions for the UDP protocol.
|
|
|
|
*
|
|
|
|
* Version: @(#)udp.h 1.0.2 04/28/93
|
|
|
|
*
|
|
|
|
* Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
|
|
*/
|
|
|
|
#ifndef _LINUX_UDP_H
|
|
|
|
#define _LINUX_UDP_H
|
|
|
|
|
2005-12-27 12:43:12 +08:00
|
|
|
#include <net/inet_sock.h>
|
2008-03-24 13:06:51 +08:00
|
|
|
#include <linux/skbuff.h>
|
2008-06-17 08:14:11 +08:00
|
|
|
#include <net/netns/hash.h>
|
2012-10-13 17:46:48 +08:00
|
|
|
#include <uapi/linux/udp.h>
|
2008-03-24 13:06:51 +08:00
|
|
|
|
|
|
|
static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return (struct udphdr *)skb_transport_header(skb);
|
|
|
|
}
|
|
|
|
|
2012-12-07 22:14:14 +08:00
|
|
|
static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return (struct udphdr *)skb_inner_transport_header(skb);
|
|
|
|
}
|
|
|
|
|
2009-10-07 08:37:59 +08:00
|
|
|
#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2015-03-19 05:05:33 +08:00
|
|
|
static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
|
2008-06-17 08:11:50 +08:00
|
|
|
{
|
2009-10-07 08:37:59 +08:00
|
|
|
return (num + net_hash_mix(net)) & mask;
|
2008-06-17 08:11:50 +08:00
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
struct udp_sock {
|
|
|
|
/* inet_sock has to be the first member */
|
|
|
|
struct inet_sock inet;
|
2009-11-08 18:17:30 +08:00
|
|
|
#define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0]
|
|
|
|
#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1]
|
2009-11-08 18:17:58 +08:00
|
|
|
#define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node
|
2005-04-17 06:20:36 +08:00
|
|
|
int pending; /* Any pending frames ? */
|
|
|
|
unsigned int corkflag; /* Cork is required */
|
2014-05-23 23:47:32 +08:00
|
|
|
__u8 encap_type; /* Is this an Encapsulation socket? */
|
|
|
|
unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
|
2018-11-07 19:38:28 +08:00
|
|
|
no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
|
2018-11-07 19:38:29 +08:00
|
|
|
encap_enabled:1, /* This socket enabled encap
|
2018-11-07 19:38:28 +08:00
|
|
|
* processing; UDP tunnels and
|
|
|
|
* different encapsulation layer set
|
|
|
|
* this
|
|
|
|
*/
|
2018-11-07 19:38:29 +08:00
|
|
|
gro_enabled:1; /* Can accept GRO packets */
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2006-07-01 00:25:18 +08:00
|
|
|
* Following member retains the information to create a UDP header
|
2005-04-17 06:20:36 +08:00
|
|
|
* when the socket is uncorked.
|
|
|
|
*/
|
|
|
|
__u16 len; /* total length of pending frames */
|
udp: generate gso with UDP_SEGMENT
Support generic segmentation offload for udp datagrams. Callers can
concatenate and send at once the payload of multiple datagrams with
the same destination.
To set segment size, the caller sets socket option UDP_SEGMENT to the
length of each discrete payload. This value must be smaller than or
equal to the relevant MTU.
A follow-up patch adds cmsg UDP_SEGMENT to specify segment size on a
per send call basis.
Total byte length may then exceed MTU. If not an exact multiple of
segment size, the last segment will be shorter.
The implementation adds a gso_size field to the udp socket, ip(v6)
cmsg cookie and inet_cork structure to be able to set the value at
setsockopt or cmsg time and to work with both lockless and corked
paths.
Initial benchmark numbers show UDP GSO about as expensive as TCP GSO.
tcp tso
3197 MB/s 54232 msg/s 54232 calls/s
6,457,754,262 cycles
tcp gso
1765 MB/s 29939 msg/s 29939 calls/s
11,203,021,806 cycles
tcp without tso/gso *
739 MB/s 12548 msg/s 12548 calls/s
11,205,483,630 cycles
udp
876 MB/s 14873 msg/s 624666 calls/s
11,205,777,429 cycles
udp gso
2139 MB/s 36282 msg/s 36282 calls/s
11,204,374,561 cycles
[*] after reverting commit 0a6b2a1dc2a2
("tcp: switch to GSO being always on")
Measured total system cycles ('-a') for one core while pinning both
the network receive path and benchmark process to that core:
perf stat -a -C 12 -e cycles \
./udpgso_bench_tx -C 12 -4 -D "$DST" -l 4
Note the reduction in calls/s with GSO. Bytes per syscall drops
increases from 1470 to 61818.
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-27 01:42:17 +08:00
|
|
|
__u16 gso_size;
|
2006-11-28 03:10:57 +08:00
|
|
|
/*
|
|
|
|
* Fields specific to UDP-Lite.
|
|
|
|
*/
|
|
|
|
__u16 pcslen;
|
|
|
|
__u16 pcrlen;
|
|
|
|
/* indicator bits used by pcflag: */
|
|
|
|
#define UDPLITE_BIT 0x1 /* set by udplite proto init function */
|
|
|
|
#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */
|
|
|
|
#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */
|
|
|
|
__u8 pcflag; /* marks socket as UDP-Lite if > 0 */
|
2007-06-28 06:37:46 +08:00
|
|
|
__u8 unused[3];
|
|
|
|
/*
|
|
|
|
* For encapsulation sockets.
|
|
|
|
*/
|
|
|
|
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
|
udp: Handle ICMP errors for tunnels with same destination port on both endpoints
For both IPv4 and IPv6, if we can't match errors to a socket, try
tunnels before ignoring them. Look up a socket with the original source
and destination ports as found in the UDP packet inside the ICMP payload,
this will work for tunnels that force the same destination port for both
endpoints, i.e. VXLAN and GENEVE.
Actually, lwtunnels could break this assumption if they are configured by
an external control plane to have different destination ports on the
endpoints: in this case, we won't be able to trace ICMP messages back to
them.
For IPv6 redirect messages, call ip6_redirect() directly with the output
interface argument set to the interface we received the packet from (as
it's the very interface we should build the exception on), otherwise the
new nexthop will be rejected. There's no such need for IPv4.
Tunnels can now export an encap_err_lookup() operation that indicates a
match. Pass the packet to the lookup function, and if the tunnel driver
reports a matching association, continue with regular ICMP error handling.
v2:
- Added newline between network and transport header sets in
__udp{4,6}_lib_err_encap() (David Miller)
- Removed redundant skb_reset_network_header(skb); in
__udp4_lib_err_encap()
- Removed redundant reassignment of iph in __udp4_lib_err_encap()
(Sabrina Dubroca)
- Edited comment to __udp{4,6}_lib_err_encap() to reflect the fact this
won't work with lwtunnels configured to use asymmetric ports. By the way,
it's VXLAN, not VxLAN (Jiri Benc)
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-08 19:19:14 +08:00
|
|
|
int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
|
2013-03-19 14:11:12 +08:00
|
|
|
void (*encap_destroy)(struct sock *sk);
|
2016-04-05 23:22:51 +08:00
|
|
|
|
|
|
|
/* GRO functions for UDP socket */
|
2018-06-24 13:13:49 +08:00
|
|
|
struct sk_buff * (*gro_receive)(struct sock *sk,
|
|
|
|
struct list_head *head,
|
2016-04-05 23:22:51 +08:00
|
|
|
struct sk_buff *skb);
|
|
|
|
int (*gro_complete)(struct sock *sk,
|
|
|
|
struct sk_buff *skb,
|
|
|
|
int nhoff);
|
2016-12-09 03:41:56 +08:00
|
|
|
|
2017-05-16 17:20:14 +08:00
|
|
|
/* udp_recvmsg try to use this before splicing sk_receive_queue */
|
|
|
|
struct sk_buff_head reader_queue ____cacheline_aligned_in_smp;
|
|
|
|
|
2016-12-09 03:41:56 +08:00
|
|
|
/* This field is dirtied by udp_recvmsg() */
|
|
|
|
int forward_deficit;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
udp: generate gso with UDP_SEGMENT
Support generic segmentation offload for udp datagrams. Callers can
concatenate and send at once the payload of multiple datagrams with
the same destination.
To set segment size, the caller sets socket option UDP_SEGMENT to the
length of each discrete payload. This value must be smaller than or
equal to the relevant MTU.
A follow-up patch adds cmsg UDP_SEGMENT to specify segment size on a
per send call basis.
Total byte length may then exceed MTU. If not an exact multiple of
segment size, the last segment will be shorter.
The implementation adds a gso_size field to the udp socket, ip(v6)
cmsg cookie and inet_cork structure to be able to set the value at
setsockopt or cmsg time and to work with both lockless and corked
paths.
Initial benchmark numbers show UDP GSO about as expensive as TCP GSO.
tcp tso
3197 MB/s 54232 msg/s 54232 calls/s
6,457,754,262 cycles
tcp gso
1765 MB/s 29939 msg/s 29939 calls/s
11,203,021,806 cycles
tcp without tso/gso *
739 MB/s 12548 msg/s 12548 calls/s
11,205,483,630 cycles
udp
876 MB/s 14873 msg/s 624666 calls/s
11,205,777,429 cycles
udp gso
2139 MB/s 36282 msg/s 36282 calls/s
11,204,374,561 cycles
[*] after reverting commit 0a6b2a1dc2a2
("tcp: switch to GSO being always on")
Measured total system cycles ('-a') for one core while pinning both
the network receive path and benchmark process to that core:
perf stat -a -C 12 -e cycles \
./udpgso_bench_tx -C 12 -4 -D "$DST" -l 4
Note the reduction in calls/s with GSO. Bytes per syscall drops
increases from 1470 to 61818.
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-27 01:42:17 +08:00
|
|
|
#define UDP_MAX_SEGMENTS (1 << 6UL)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static inline struct udp_sock *udp_sk(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return (struct udp_sock *)sk;
|
|
|
|
}
|
2008-03-01 00:06:47 +08:00
|
|
|
|
2014-05-23 23:47:32 +08:00
|
|
|
static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
|
|
|
|
{
|
|
|
|
udp_sk(sk)->no_check6_tx = val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
|
|
|
|
{
|
|
|
|
udp_sk(sk)->no_check6_rx = val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool udp_get_no_check6_tx(struct sock *sk)
|
|
|
|
{
|
|
|
|
return udp_sk(sk)->no_check6_tx;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool udp_get_no_check6_rx(struct sock *sk)
|
|
|
|
{
|
|
|
|
return udp_sk(sk)->no_check6_rx;
|
|
|
|
}
|
|
|
|
|
2018-11-07 19:38:30 +08:00
|
|
|
static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
|
|
|
|
struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
int gso_size;
|
|
|
|
|
|
|
|
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
|
|
|
|
gso_size = skb_shinfo(skb)->gso_size;
|
|
|
|
put_cmsg(msg, SOL_UDP, UDP_GRO, sizeof(gso_size), &gso_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-07 19:38:33 +08:00
|
|
|
static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return !udp_sk(sk)->gro_enabled && skb_is_gso(skb) &&
|
|
|
|
skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4;
|
|
|
|
}
|
|
|
|
|
2016-04-01 23:52:13 +08:00
|
|
|
#define udp_portaddr_for_each_entry(__sk, list) \
|
|
|
|
hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
|
2009-11-09 13:26:33 +08:00
|
|
|
|
2016-04-01 23:52:13 +08:00
|
|
|
#define udp_portaddr_for_each_entry_rcu(__sk, list) \
|
|
|
|
hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
|
2009-11-09 13:26:33 +08:00
|
|
|
|
2017-03-31 17:47:39 +08:00
|
|
|
#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#endif /* _LINUX_UDP_H */
|