2019-05-27 14:55:01 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
|
|
* operating system. INET is implemented using the BSD Socket
|
|
|
|
* interface as the means of communication with the user level.
|
|
|
|
*
|
|
|
|
* Definitions for the UDP module.
|
|
|
|
*
|
|
|
|
* Version: @(#)udp.h 1.0.2 05/07/93
|
|
|
|
*
|
2005-05-06 07:16:16 +08:00
|
|
|
* Authors: Ross Biro
|
2005-04-17 06:20:36 +08:00
|
|
|
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
|
|
*
|
|
|
|
* Fixes:
|
|
|
|
* Alan Cox : Turned on udp checksums. I don't want to
|
|
|
|
* chase 'memory corruption' bugs that aren't!
|
|
|
|
*/
|
|
|
|
#ifndef _UDP_H
|
|
|
|
#define _UDP_H
|
|
|
|
|
|
|
|
#include <linux/list.h>
|
2011-11-24 09:12:59 +08:00
|
|
|
#include <linux/bug.h>
|
2005-12-27 12:43:12 +08:00
|
|
|
#include <net/inet_sock.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <net/sock.h>
|
|
|
|
#include <net/snmp.h>
|
2006-11-28 03:10:57 +08:00
|
|
|
#include <net/ip.h>
|
|
|
|
#include <linux/ipv6.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/seq_file.h>
|
2006-10-20 05:23:57 +08:00
|
|
|
#include <linux/poll.h>
|
2020-06-24 06:31:15 +08:00
|
|
|
#include <linux/indirect_call_wrapper.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-11-28 03:10:57 +08:00
|
|
|
/**
|
|
|
|
* struct udp_skb_cb - UDP(-Lite) private variables
|
|
|
|
*
|
|
|
|
* @header: private variables used by IPv4/IPv6
|
|
|
|
* @cscov: checksum coverage length (UDP-Lite only)
|
|
|
|
* @partial_cov: if set indicates partial csum coverage
|
|
|
|
*/
|
|
|
|
struct udp_skb_cb {
|
|
|
|
union {
|
|
|
|
struct inet_skb_parm h4;
|
2011-12-10 17:48:31 +08:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2006-11-28 03:10:57 +08:00
|
|
|
struct inet6_skb_parm h6;
|
|
|
|
#endif
|
|
|
|
} header;
|
|
|
|
__u16 cscov;
|
|
|
|
__u8 partial_cov;
|
|
|
|
};
|
|
|
|
#define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb))
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-11-08 18:17:05 +08:00
|
|
|
/**
|
|
|
|
* struct udp_hslot - UDP hash slot
|
|
|
|
*
|
|
|
|
* @head: head of list of sockets
|
|
|
|
* @count: number of sockets in 'head' list
|
|
|
|
* @lock: spinlock protecting changes to head/count
|
|
|
|
*/
|
2008-10-29 16:41:45 +08:00
|
|
|
struct udp_hslot {
|
2016-04-01 23:52:13 +08:00
|
|
|
struct hlist_head head;
|
2009-11-08 18:17:05 +08:00
|
|
|
int count;
|
2008-10-29 16:41:45 +08:00
|
|
|
spinlock_t lock;
|
|
|
|
} __attribute__((aligned(2 * sizeof(long))));
|
2009-10-07 08:37:59 +08:00
|
|
|
|
2009-11-08 18:17:58 +08:00
|
|
|
/**
|
|
|
|
* struct udp_table - UDP table
|
|
|
|
*
|
|
|
|
* @hash: hash table, sockets are hashed on (local port)
|
|
|
|
* @hash2: hash table, sockets are hashed on (local port, local address)
|
|
|
|
* @mask: number of slots in hash tables, minus 1
|
|
|
|
* @log: log2(number of slots in hash table)
|
|
|
|
*/
|
2008-10-29 16:41:45 +08:00
|
|
|
struct udp_table {
|
2009-10-07 08:37:59 +08:00
|
|
|
struct udp_hslot *hash;
|
2009-11-08 18:17:58 +08:00
|
|
|
struct udp_hslot *hash2;
|
|
|
|
unsigned int mask;
|
|
|
|
unsigned int log;
|
2008-10-29 16:41:45 +08:00
|
|
|
};
|
|
|
|
extern struct udp_table udp_table;
|
2013-09-24 02:33:36 +08:00
|
|
|
void udp_table_init(struct udp_table *, const char *);
|
2009-10-07 08:37:59 +08:00
|
|
|
static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
|
2012-04-15 13:58:06 +08:00
|
|
|
struct net *net, unsigned int num)
|
2009-10-07 08:37:59 +08:00
|
|
|
{
|
|
|
|
return &table->hash[udp_hashfn(net, num, table->mask)];
|
|
|
|
}
|
2009-11-08 18:17:58 +08:00
|
|
|
/*
|
|
|
|
* For secondary hash, net_hash_mix() is performed before calling
|
|
|
|
* udp_hashslot2(), this explains difference with udp_hashslot()
|
|
|
|
*/
|
|
|
|
static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,
|
|
|
|
unsigned int hash)
|
|
|
|
{
|
|
|
|
return &table->hash2[hash & table->mask];
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
extern struct proto udp_prot;
|
|
|
|
|
2010-11-10 07:24:26 +08:00
|
|
|
extern atomic_long_t udp_memory_allocated;
|
2007-12-31 16:29:24 +08:00
|
|
|
|
|
|
|
/* sysctl variables for udp */
|
2010-11-10 07:24:26 +08:00
|
|
|
extern long sysctl_udp_mem[3];
|
2007-12-31 16:29:24 +08:00
|
|
|
extern int sysctl_udp_rmem_min;
|
|
|
|
extern int sysctl_udp_wmem_min;
|
|
|
|
|
2005-12-27 12:43:12 +08:00
|
|
|
struct sk_buff;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-11-28 03:10:57 +08:00
|
|
|
/*
|
|
|
|
* Generic checksumming routines for UDP(-Lite) v4 and v6
|
|
|
|
*/
|
2006-11-15 13:40:42 +08:00
|
|
|
static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb)
|
2006-11-28 03:10:57 +08:00
|
|
|
{
|
2014-06-15 14:24:20 +08:00
|
|
|
return (UDP_SKB_CB(skb)->cscov == skb->len ?
|
|
|
|
__skb_checksum_complete(skb) :
|
|
|
|
__skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov));
|
2006-11-28 03:10:57 +08:00
|
|
|
}
|
|
|
|
|
2006-11-21 10:06:37 +08:00
|
|
|
static inline int udp_lib_checksum_complete(struct sk_buff *skb)
|
2006-11-28 03:10:57 +08:00
|
|
|
{
|
2007-04-10 02:59:39 +08:00
|
|
|
return !skb_csum_unnecessary(skb) &&
|
2006-11-28 03:10:57 +08:00
|
|
|
__udp_lib_checksum_complete(skb);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* udp_csum_outgoing - compute UDPv4/v6 checksum over fragments
|
|
|
|
* @sk: socket we are writing to
|
|
|
|
* @skb: sk_buff containing the filled-in UDP header
|
|
|
|
* (checksum field must be zeroed out)
|
|
|
|
*/
|
2006-11-15 13:35:48 +08:00
|
|
|
static inline __wsum udp_csum_outgoing(struct sock *sk, struct sk_buff *skb)
|
2006-11-28 03:10:57 +08:00
|
|
|
{
|
2007-04-26 09:04:18 +08:00
|
|
|
__wsum csum = csum_partial(skb_transport_header(skb),
|
|
|
|
sizeof(struct udphdr), 0);
|
2006-11-28 03:10:57 +08:00
|
|
|
skb_queue_walk(&sk->sk_write_queue, skb) {
|
|
|
|
csum = csum_add(csum, skb->csum);
|
|
|
|
}
|
|
|
|
return csum;
|
|
|
|
}
|
|
|
|
|
2011-03-01 10:36:48 +08:00
|
|
|
static inline __wsum udp_csum(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
__wsum csum = csum_partial(skb_transport_header(skb),
|
|
|
|
sizeof(struct udphdr), skb->csum);
|
|
|
|
|
|
|
|
for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) {
|
|
|
|
csum = csum_add(csum, skb->csum);
|
|
|
|
}
|
|
|
|
return csum;
|
|
|
|
}
|
|
|
|
|
2014-06-05 08:19:48 +08:00
|
|
|
static inline __sum16 udp_v4_check(int len, __be32 saddr,
|
|
|
|
__be32 daddr, __wsum base)
|
|
|
|
{
|
|
|
|
return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
|
|
|
|
}
|
|
|
|
|
|
|
|
void udp_set_csum(bool nocheck, struct sk_buff *skb,
|
|
|
|
__be32 saddr, __be32 daddr, int len);
|
|
|
|
|
2016-04-06 00:41:15 +08:00
|
|
|
static inline void udp_csum_pull_header(struct sk_buff *skb)
|
|
|
|
{
|
2016-06-01 06:22:41 +08:00
|
|
|
if (!skb->csum_valid && skb->ip_summed == CHECKSUM_NONE)
|
|
|
|
skb->csum = csum_partial(skb->data, sizeof(struct udphdr),
|
2016-04-06 00:41:15 +08:00
|
|
|
skb->csum);
|
|
|
|
skb_pull_rcsum(skb, sizeof(struct udphdr));
|
|
|
|
UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr);
|
|
|
|
}
|
|
|
|
|
2016-04-05 23:22:51 +08:00
|
|
|
typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
|
|
|
|
__be16 dport);
|
|
|
|
|
2020-06-24 06:31:15 +08:00
|
|
|
INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
|
|
|
|
struct sk_buff *));
|
|
|
|
INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
|
|
|
|
INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
|
|
|
|
struct sk_buff *));
|
|
|
|
INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
|
2018-06-24 13:13:49 +08:00
|
|
|
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
|
2020-01-25 18:26:45 +08:00
|
|
|
struct udphdr *uh, struct sock *sk);
|
2016-04-05 23:22:51 +08:00
|
|
|
int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
|
2014-08-23 04:34:44 +08:00
|
|
|
|
2018-04-27 01:42:16 +08:00
|
|
|
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
|
2018-05-08 02:08:34 +08:00
|
|
|
netdev_features_t features);
|
2018-04-27 01:42:16 +08:00
|
|
|
|
2014-08-23 04:34:44 +08:00
|
|
|
static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct udphdr *uh;
|
|
|
|
unsigned int hlen, off;
|
|
|
|
|
|
|
|
off = skb_gro_offset(skb);
|
|
|
|
hlen = off + sizeof(*uh);
|
|
|
|
uh = skb_gro_header_fast(skb, off);
|
|
|
|
if (skb_gro_header_hard(skb, hlen))
|
|
|
|
uh = skb_gro_header_slow(skb, hlen, off);
|
|
|
|
|
|
|
|
return uh;
|
|
|
|
}
|
|
|
|
|
2006-11-28 03:10:57 +08:00
|
|
|
/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
|
2016-02-11 00:50:35 +08:00
|
|
|
static inline int udp_lib_hash(struct sock *sk)
|
2006-11-28 03:10:57 +08:00
|
|
|
{
|
|
|
|
BUG();
|
2016-02-11 00:50:35 +08:00
|
|
|
return 0;
|
2006-11-28 03:10:57 +08:00
|
|
|
}
|
|
|
|
|
2013-09-24 02:33:36 +08:00
|
|
|
void udp_lib_unhash(struct sock *sk);
|
|
|
|
void udp_lib_rehash(struct sock *sk, u16 new_hash);
|
2006-11-28 03:10:57 +08:00
|
|
|
|
|
|
|
static inline void udp_lib_close(struct sock *sk, long timeout)
|
|
|
|
{
|
|
|
|
sk_common_release(sk);
|
|
|
|
}
|
|
|
|
|
2013-09-24 02:33:36 +08:00
|
|
|
int udp_lib_get_port(struct sock *sk, unsigned short snum,
|
|
|
|
unsigned int hash2_nulladdr);
|
2006-11-28 03:10:57 +08:00
|
|
|
|
2015-02-25 01:17:31 +08:00
|
|
|
u32 udp_flow_hashrnd(void);
|
|
|
|
|
2014-07-02 12:32:39 +08:00
|
|
|
static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
|
|
|
|
int min, int max, bool use_eth)
|
|
|
|
{
|
|
|
|
u32 hash;
|
|
|
|
|
|
|
|
if (min >= max) {
|
|
|
|
/* Use default range */
|
|
|
|
inet_get_local_port_range(net, &min, &max);
|
|
|
|
}
|
|
|
|
|
|
|
|
hash = skb_get_hash(skb);
|
2015-02-25 01:17:31 +08:00
|
|
|
if (unlikely(!hash)) {
|
|
|
|
if (use_eth) {
|
|
|
|
/* Can't find a normal hash, caller has indicated an
|
|
|
|
* Ethernet packet so use that to compute a hash.
|
|
|
|
*/
|
|
|
|
hash = jhash(skb->data, 2 * ETH_ALEN,
|
|
|
|
(__force u32) skb->protocol);
|
|
|
|
} else {
|
|
|
|
/* Can't derive any sort of hash for the packet, set
|
|
|
|
* to some consistent random value.
|
|
|
|
*/
|
|
|
|
hash = udp_flow_hashrnd();
|
|
|
|
}
|
2014-07-02 12:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Since this is being sent on the wire obfuscate hash a bit
|
|
|
|
* to minimize possbility that any useful information to an
|
|
|
|
* attacker is leaked. Only upper 16 bits are relevant in the
|
|
|
|
* computation for 16 bit port value.
|
|
|
|
*/
|
|
|
|
hash ^= hash << 16;
|
|
|
|
|
|
|
|
return htons((((u64) hash * (max - min)) >> 32) + min);
|
|
|
|
}
|
|
|
|
|
2018-06-08 17:35:40 +08:00
|
|
|
static inline int udp_rqueue_get(struct sock *sk)
|
|
|
|
{
|
|
|
|
return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit);
|
|
|
|
}
|
|
|
|
|
net: ensure unbound datagram socket to be chosen when not in a VRF
Ensure an unbound datagram skt is chosen when not in a VRF. The check
for a device match in compute_score() for UDP must be performed when
there is no device match. For this, a failure is returned when there is
no device match. This ensures that bound sockets are never selected,
even if there is no unbound socket.
Allow IPv6 packets to be sent over a datagram skt bound to a VRF. These
packets are currently blocked, as flowi6_oif was set to that of the
master vrf device, and the ipi6_ifindex is that of the slave device.
Allow these packets to be sent by checking the device with ipi6_ifindex
has the same L3 scope as that of the bound device of the skt, which is
the master vrf device. Note that this check always succeeds if the skt
is unbound.
Even though the right datagram skt is now selected by compute_score(),
a different skt is being returned that is bound to the wrong vrf. The
difference between these and stream sockets is the handling of the skt
option for SO_REUSEPORT. While the handling when adding a skt for reuse
correctly checks that the bound device of the skt is a match, the skts
in the hashslot are already incorrect. So for the same hash, a skt for
the wrong vrf may be selected for the required port. The root cause is
that the skt is immediately placed into a slot when it is created,
but when the skt is then bound using SO_BINDTODEVICE, it remains in the
same slot. The solution is to move the skt to the correct slot by
forcing a rehash.
Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Tested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-07 23:36:04 +08:00
|
|
|
static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
|
|
|
|
int dif, int sdif)
|
|
|
|
{
|
|
|
|
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
|
|
|
|
return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
|
|
|
|
bound_dev_if, dif, sdif);
|
|
|
|
#else
|
|
|
|
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2006-11-28 03:10:57 +08:00
|
|
|
/* net/ipv4/udp.c */
|
2016-11-15 23:37:53 +08:00
|
|
|
void udp_destruct_sock(struct sock *sk);
|
udp: implement memory accounting helpers
Avoid using the generic helpers.
Use the receive queue spin lock to protect the memory
accounting operation, both on enqueue and on dequeue.
On dequeue perform partial memory reclaiming, trying to
leave a quantum of forward allocated memory.
On enqueue use a custom helper, to allow some optimizations:
- use a plain spin_lock() variant instead of the slightly
costly spin_lock_irqsave(),
- avoid dst_force check, since the calling code has already
dropped the skb dst
- avoid orphaning the skb, since skb_steal_sock() already did
the work for us
The above needs custom memory reclaiming on shutdown, provided
by the udp_destruct_sock().
v5 -> v6:
- don't orphan the skb on enqueue
v4 -> v5:
- replace the mem_lock with the receive queue spin lock
- ensure that the bh is always allowed to enqueue at least
a skb, even if sk_rcvbuf is exceeded
v3 -> v4:
- reworked memory accunting, simplifying the schema
- provide an helper for both memory scheduling and enqueuing
v1 -> v2:
- use a udp specific destrctor to perform memory reclaiming
- remove a couple of helpers, unneeded after the above cleanup
- do not reclaim memory on dequeue if not under memory
pressure
- reworked the fwd accounting schema to avoid potential
integer overflow
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-21 19:55:46 +08:00
|
|
|
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
|
|
|
|
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
|
2016-11-04 18:28:59 +08:00
|
|
|
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
|
2017-05-16 17:20:14 +08:00
|
|
|
struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
|
2019-04-08 16:15:59 +08:00
|
|
|
int noblock, int *off, int *err);
|
2016-11-04 18:28:59 +08:00
|
|
|
static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
|
|
|
|
int noblock, int *err)
|
|
|
|
{
|
2019-04-08 16:15:59 +08:00
|
|
|
int off = 0;
|
2016-11-04 18:28:59 +08:00
|
|
|
|
2019-04-08 16:15:59 +08:00
|
|
|
return __skb_recv_udp(sk, flags, noblock, &off, err);
|
2016-11-04 18:28:59 +08:00
|
|
|
}
|
udp: implement memory accounting helpers
Avoid using the generic helpers.
Use the receive queue spin lock to protect the memory
accounting operation, both on enqueue and on dequeue.
On dequeue perform partial memory reclaiming, trying to
leave a quantum of forward allocated memory.
On enqueue use a custom helper, to allow some optimizations:
- use a plain spin_lock() variant instead of the slightly
costly spin_lock_irqsave(),
- avoid dst_force check, since the calling code has already
dropped the skb dst
- avoid orphaning the skb, since skb_steal_sock() already did
the work for us
The above needs custom memory reclaiming on shutdown, provided
by the udp_destruct_sock().
v5 -> v6:
- don't orphan the skb on enqueue
v4 -> v5:
- replace the mem_lock with the receive queue spin lock
- ensure that the bh is always allowed to enqueue at least
a skb, even if sk_rcvbuf is exceeded
v3 -> v4:
- reworked memory accunting, simplifying the schema
- provide an helper for both memory scheduling and enqueuing
v1 -> v2:
- use a udp specific destrctor to perform memory reclaiming
- remove a couple of helpers, unneeded after the above cleanup
- do not reclaim memory on dequeue if not under memory
pressure
- reworked the fwd accounting schema to avoid potential
integer overflow
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-21 19:55:46 +08:00
|
|
|
|
2017-09-28 21:51:36 +08:00
|
|
|
int udp_v4_early_demux(struct sk_buff *skb);
|
2017-08-25 20:31:01 +08:00
|
|
|
bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
|
2013-09-24 02:33:36 +08:00
|
|
|
int udp_get_port(struct sock *sk, unsigned short snum,
|
|
|
|
int (*saddr_cmp)(const struct sock *,
|
|
|
|
const struct sock *));
|
2018-11-08 19:19:21 +08:00
|
|
|
int udp_err(struct sk_buff *, u32);
|
2016-08-24 12:06:33 +08:00
|
|
|
int udp_abort(struct sock *sk, int err);
|
2015-03-02 15:37:48 +08:00
|
|
|
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
|
2013-09-24 02:33:36 +08:00
|
|
|
int udp_push_pending_frames(struct sock *sk);
|
|
|
|
void udp_flush_pending_frames(struct sock *sk);
|
2018-04-27 01:42:20 +08:00
|
|
|
int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
|
2013-09-24 02:33:36 +08:00
|
|
|
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
|
|
|
|
int udp_rcv(struct sk_buff *skb);
|
|
|
|
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
|
udp: implement memory accounting helpers
Avoid using the generic helpers.
Use the receive queue spin lock to protect the memory
accounting operation, both on enqueue and on dequeue.
On dequeue perform partial memory reclaiming, trying to
leave a quantum of forward allocated memory.
On enqueue use a custom helper, to allow some optimizations:
- use a plain spin_lock() variant instead of the slightly
costly spin_lock_irqsave(),
- avoid dst_force check, since the calling code has already
dropped the skb dst
- avoid orphaning the skb, since skb_steal_sock() already did
the work for us
The above needs custom memory reclaiming on shutdown, provided
by the udp_destruct_sock().
v5 -> v6:
- don't orphan the skb on enqueue
v4 -> v5:
- replace the mem_lock with the receive queue spin lock
- ensure that the bh is always allowed to enqueue at least
a skb, even if sk_rcvbuf is exceeded
v3 -> v4:
- reworked memory accunting, simplifying the schema
- provide an helper for both memory scheduling and enqueuing
v1 -> v2:
- use a udp specific destrctor to perform memory reclaiming
- remove a couple of helpers, unneeded after the above cleanup
- do not reclaim memory on dequeue if not under memory
pressure
- reworked the fwd accounting schema to avoid potential
integer overflow
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-21 19:55:46 +08:00
|
|
|
int udp_init_sock(struct sock *sk);
|
2018-03-31 06:08:05 +08:00
|
|
|
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
|
2016-10-21 00:39:40 +08:00
|
|
|
int __udp_disconnect(struct sock *sk, int flags);
|
2013-09-24 02:33:36 +08:00
|
|
|
int udp_disconnect(struct sock *sk, int flags);
|
2018-06-29 00:43:44 +08:00
|
|
|
__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
|
2013-09-24 02:33:36 +08:00
|
|
|
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
|
2014-09-30 11:22:29 +08:00
|
|
|
netdev_features_t features,
|
|
|
|
bool is_ipv6);
|
2013-09-24 02:33:36 +08:00
|
|
|
int udp_lib_getsockopt(struct sock *sk, int level, int optname,
|
|
|
|
char __user *optval, int __user *optlen);
|
|
|
|
int udp_lib_setsockopt(struct sock *sk, int level, int optname,
|
|
|
|
char __user *optval, unsigned int optlen,
|
|
|
|
int (*push_pending_frames)(struct sock *));
|
|
|
|
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
|
|
|
|
__be32 daddr, __be16 dport, int dif);
|
|
|
|
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
|
2017-08-07 23:44:16 +08:00
|
|
|
__be32 daddr, __be16 dport, int dif, int sdif,
|
2016-01-05 06:41:47 +08:00
|
|
|
struct udp_table *tbl, struct sk_buff *skb);
|
2016-04-05 23:22:50 +08:00
|
|
|
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
|
|
|
|
__be16 sport, __be16 dport);
|
2013-09-24 02:33:36 +08:00
|
|
|
struct sock *udp6_lib_lookup(struct net *net,
|
|
|
|
const struct in6_addr *saddr, __be16 sport,
|
|
|
|
const struct in6_addr *daddr, __be16 dport,
|
|
|
|
int dif);
|
|
|
|
struct sock *__udp6_lib_lookup(struct net *net,
|
|
|
|
const struct in6_addr *saddr, __be16 sport,
|
|
|
|
const struct in6_addr *daddr, __be16 dport,
|
2017-08-07 23:44:20 +08:00
|
|
|
int dif, int sdif, struct udp_table *tbl,
|
2016-01-05 06:41:47 +08:00
|
|
|
struct sk_buff *skb);
|
2016-04-05 23:22:50 +08:00
|
|
|
struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
|
|
|
|
__be16 sport, __be16 dport);
|
2008-10-01 22:48:10 +08:00
|
|
|
|
2017-06-27 01:01:50 +08:00
|
|
|
/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
|
|
|
|
* possibly multiple cache miss on dequeue()
|
|
|
|
*/
|
|
|
|
struct udp_dev_scratch {
|
2017-07-25 23:57:47 +08:00
|
|
|
/* skb->truesize and the stateless bit are embedded in a single field;
|
|
|
|
* do not use a bitfield since the compiler emits better/smaller code
|
|
|
|
* this way
|
|
|
|
*/
|
|
|
|
u32 _tsize_state;
|
|
|
|
|
|
|
|
#if BITS_PER_LONG == 64
|
|
|
|
/* len and the bit needed to compute skb_csum_unnecessary
|
|
|
|
* will be on cold cache lines at recvmsg time.
|
|
|
|
* skb->len can be stored on 16 bits since the udp header has been
|
|
|
|
* already validated and pulled.
|
|
|
|
*/
|
2017-06-27 01:01:50 +08:00
|
|
|
u16 len;
|
|
|
|
bool is_linear;
|
|
|
|
bool csum_unnecessary;
|
2017-07-25 23:57:47 +08:00
|
|
|
#endif
|
2017-06-27 01:01:50 +08:00
|
|
|
};
|
|
|
|
|
2017-07-25 23:57:47 +08:00
|
|
|
static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return (struct udp_dev_scratch *)&skb->dev_scratch;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BITS_PER_LONG == 64
|
2017-06-27 01:01:50 +08:00
|
|
|
static inline unsigned int udp_skb_len(struct sk_buff *skb)
|
|
|
|
{
|
2017-07-25 23:57:47 +08:00
|
|
|
return udp_skb_scratch(skb)->len;
|
2017-06-27 01:01:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
|
|
|
|
{
|
2017-07-25 23:57:47 +08:00
|
|
|
return udp_skb_scratch(skb)->csum_unnecessary;
|
2017-06-27 01:01:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool udp_skb_is_linear(struct sk_buff *skb)
|
|
|
|
{
|
2017-07-25 23:57:47 +08:00
|
|
|
return udp_skb_scratch(skb)->is_linear;
|
2017-06-27 01:01:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
static inline unsigned int udp_skb_len(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return skb->len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return skb_csum_unnecessary(skb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool udp_skb_is_linear(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return !skb_is_nonlinear(skb);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
|
|
|
|
struct iov_iter *to)
|
|
|
|
{
|
2017-08-15 03:31:38 +08:00
|
|
|
int n;
|
2017-06-27 01:01:50 +08:00
|
|
|
|
2017-08-15 03:31:38 +08:00
|
|
|
n = copy_to_iter(skb->data + off, len, to);
|
|
|
|
if (n == len)
|
2017-06-27 01:01:50 +08:00
|
|
|
return 0;
|
|
|
|
|
2017-08-15 03:31:38 +08:00
|
|
|
iov_iter_revert(to, n);
|
2017-06-27 01:01:50 +08:00
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
2006-11-28 03:10:57 +08:00
|
|
|
/*
|
|
|
|
* SNMP statistics for UDP and UDP-Lite
|
|
|
|
*/
|
net: snmp: kill various STATS_USER() helpers
In the old days (before linux-3.0), SNMP counters were duplicated,
one for user context, and one for BH context.
After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%")
we have a single copy, and what really matters is preemption being
enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc()
respectively.
We therefore kill SNMP_INC_STATS_USER(), SNMP_ADD_STATS_USER(),
NET_INC_STATS_USER(), NET_ADD_STATS_USER(), SCTP_INC_STATS_USER(),
SNMP_INC_STATS64_USER(), SNMP_ADD_STATS64_USER(), TCP_ADD_STATS_USER(),
UDP_INC_STATS_USER(), UDP6_INC_STATS_USER(), and XFRM_INC_STATS_USER()
Following patches will rename __BH helpers to make clear their
usage is not tied to BH being disabled.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-28 07:44:27 +08:00
|
|
|
#define UDP_INC_STATS(net, field, is_udplite) do { \
|
|
|
|
if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
|
|
|
|
else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0)
|
2016-04-28 07:44:30 +08:00
|
|
|
#define __UDP_INC_STATS(net, field, is_udplite) do { \
|
2016-04-28 07:44:43 +08:00
|
|
|
if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
|
|
|
|
else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-04-28 07:44:30 +08:00
|
|
|
#define __UDP6_INC_STATS(net, field, is_udplite) do { \
|
2016-04-28 07:44:43 +08:00
|
|
|
if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\
|
|
|
|
else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \
|
2008-10-08 05:49:36 +08:00
|
|
|
} while(0)
|
net: snmp: kill various STATS_USER() helpers
In the old days (before linux-3.0), SNMP counters were duplicated,
one for user context, and one for BH context.
After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%")
we have a single copy, and what really matters is preemption being
enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc()
respectively.
We therefore kill SNMP_INC_STATS_USER(), SNMP_ADD_STATS_USER(),
NET_INC_STATS_USER(), NET_ADD_STATS_USER(), SCTP_INC_STATS_USER(),
SNMP_INC_STATS64_USER(), SNMP_ADD_STATS64_USER(), TCP_ADD_STATS_USER(),
UDP_INC_STATS_USER(), UDP6_INC_STATS_USER(), and XFRM_INC_STATS_USER()
Following patches will rename __BH helpers to make clear their
usage is not tied to BH being disabled.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-28 07:44:27 +08:00
|
|
|
#define UDP6_INC_STATS(net, field, __lite) do { \
|
|
|
|
if (__lite) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \
|
|
|
|
else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \
|
2008-10-08 05:49:36 +08:00
|
|
|
} while(0)
|
2007-12-12 03:30:32 +08:00
|
|
|
|
2011-12-10 17:48:31 +08:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2018-11-07 19:38:33 +08:00
|
|
|
#define __UDPX_MIB(sk, ipv4) \
|
|
|
|
({ \
|
|
|
|
ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \
|
|
|
|
sock_net(sk)->mib.udp_statistics) : \
|
|
|
|
(IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \
|
|
|
|
sock_net(sk)->mib.udp_stats_in6); \
|
|
|
|
})
|
2007-12-12 03:30:32 +08:00
|
|
|
#else
|
2018-11-07 19:38:33 +08:00
|
|
|
#define __UDPX_MIB(sk, ipv4) \
|
|
|
|
({ \
|
|
|
|
IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \
|
|
|
|
sock_net(sk)->mib.udp_statistics; \
|
|
|
|
})
|
2007-12-12 03:30:32 +08:00
|
|
|
#endif
|
|
|
|
|
2018-11-07 19:38:33 +08:00
|
|
|
#define __UDPX_INC_STATS(sk, field) \
|
|
|
|
__SNMP_INC_STATS(__UDPX_MIB(sk, (sk)->sk_family == AF_INET), field)
|
|
|
|
|
2018-04-11 03:31:50 +08:00
|
|
|
#ifdef CONFIG_PROC_FS
|
2005-04-17 06:20:36 +08:00
|
|
|
struct udp_seq_afinfo {
|
2011-10-30 14:46:30 +08:00
|
|
|
sa_family_t family;
|
|
|
|
struct udp_table *udp_table;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct udp_iter_state {
|
2008-03-29 09:23:33 +08:00
|
|
|
struct seq_net_private p;
|
2005-04-17 06:20:36 +08:00
|
|
|
int bucket;
|
|
|
|
};
|
|
|
|
|
2018-04-11 03:31:50 +08:00
|
|
|
void *udp_seq_start(struct seq_file *seq, loff_t *pos);
|
|
|
|
void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos);
|
|
|
|
void udp_seq_stop(struct seq_file *seq, void *v);
|
|
|
|
|
2018-04-11 01:42:55 +08:00
|
|
|
extern const struct seq_operations udp_seq_ops;
|
|
|
|
extern const struct seq_operations udp6_seq_ops;
|
2005-08-16 13:18:02 +08:00
|
|
|
|
2013-09-24 02:33:36 +08:00
|
|
|
int udp4_proc_init(void);
|
|
|
|
void udp4_proc_exit(void);
|
2018-04-11 03:31:50 +08:00
|
|
|
#endif /* CONFIG_PROC_FS */
|
2007-12-31 16:29:24 +08:00
|
|
|
|
2013-09-24 02:33:36 +08:00
|
|
|
int udpv4_offload_init(void);
|
2013-06-08 18:56:03 +08:00
|
|
|
|
2013-09-24 02:33:36 +08:00
|
|
|
void udp_init(void);
|
2009-07-09 16:09:47 +08:00
|
|
|
|
2018-10-05 23:31:40 +08:00
|
|
|
DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
|
2013-09-24 02:33:36 +08:00
|
|
|
void udp_encap_enable(void);
|
2012-04-27 16:24:08 +08:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2018-10-05 23:31:40 +08:00
|
|
|
DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
|
2013-09-24 02:33:36 +08:00
|
|
|
void udpv6_encap_enable(void);
|
2012-04-27 16:24:08 +08:00
|
|
|
#endif
|
2017-03-24 03:34:16 +08:00
|
|
|
|
2018-11-07 19:38:33 +08:00
|
|
|
static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
|
|
|
|
struct sk_buff *skb, bool ipv4)
|
|
|
|
{
|
2019-05-29 02:22:54 +08:00
|
|
|
netdev_features_t features = NETIF_F_SG;
|
2018-11-07 19:38:33 +08:00
|
|
|
struct sk_buff *segs;
|
|
|
|
|
2019-05-29 02:22:54 +08:00
|
|
|
/* Avoid csum recalculation by skb_segment unless userspace explicitly
|
|
|
|
* asks for the final checksum values
|
|
|
|
*/
|
|
|
|
if (!inet_get_convert_csum(sk))
|
|
|
|
features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
|
|
|
|
|
2020-01-30 04:20:17 +08:00
|
|
|
/* UDP segmentation expects packets of type CHECKSUM_PARTIAL or
|
|
|
|
* CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial
|
|
|
|
* packets in udp_gro_complete_segment. As does UDP GSO, verified by
|
|
|
|
* udp_send_skb. But when those packets are looped in dev_loopback_xmit
|
|
|
|
* their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this
|
|
|
|
* specific case, where PARTIAL is both correct and required.
|
|
|
|
*/
|
2020-01-28 04:40:31 +08:00
|
|
|
if (skb->pkt_type == PACKET_LOOPBACK)
|
|
|
|
skb->ip_summed = CHECKSUM_PARTIAL;
|
|
|
|
|
2018-11-07 19:38:33 +08:00
|
|
|
/* the GSO CB lays after the UDP one, no need to save and restore any
|
|
|
|
* CB fragment
|
|
|
|
*/
|
2019-05-29 02:22:54 +08:00
|
|
|
segs = __skb_gso_segment(skb, features, false);
|
2019-08-30 00:50:24 +08:00
|
|
|
if (IS_ERR_OR_NULL(segs)) {
|
2018-11-07 19:38:33 +08:00
|
|
|
int segs_nr = skb_shinfo(skb)->gso_segs;
|
|
|
|
|
|
|
|
atomic_add(segs_nr, &sk->sk_drops);
|
|
|
|
SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr);
|
|
|
|
kfree_skb(skb);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
consume_skb(skb);
|
|
|
|
return segs;
|
|
|
|
}
|
|
|
|
|
2020-03-09 19:12:38 +08:00
|
|
|
#ifdef CONFIG_BPF_STREAM_PARSER
|
|
|
|
struct sk_psock;
|
|
|
|
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
|
|
|
|
#endif /* BPF_STREAM_PARSER */
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /* _UDP_H */
|