Merge branch 'udp-peek'
Willem de Bruijn says: ==================== udp: support SO_PEEK_OFF Support peeking at a non-zero offset for UDP sockets. Match the existing behavior on Unix datagram sockets. 1/3 makes the sk_peek_offset functions safe to use outside locks 2/3 removes udp headers before enqueue, to simplify offset arithmetic 3/3 introduces SO_PEEK_OFFSET support, with Unix socket peek semantics. Changes v1->v2 - squash patches 3 and 4 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
66f8779036
|
@ -2949,7 +2949,12 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
|
|||
struct iov_iter *from, int len);
|
||||
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
|
||||
void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
|
||||
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
|
||||
void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len);
|
||||
static inline void skb_free_datagram_locked(struct sock *sk,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
__skb_free_datagram_locked(sk, skb, 0);
|
||||
}
|
||||
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
|
||||
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
|
||||
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
|
||||
|
|
|
@ -457,28 +457,32 @@ struct sock {
|
|||
#define SK_CAN_REUSE 1
|
||||
#define SK_FORCE_REUSE 2
|
||||
|
||||
int sk_set_peek_off(struct sock *sk, int val);
|
||||
|
||||
static inline int sk_peek_offset(struct sock *sk, int flags)
|
||||
{
|
||||
if ((flags & MSG_PEEK) && (sk->sk_peek_off >= 0))
|
||||
return sk->sk_peek_off;
|
||||
else
|
||||
return 0;
|
||||
if (unlikely(flags & MSG_PEEK)) {
|
||||
s32 off = READ_ONCE(sk->sk_peek_off);
|
||||
if (off >= 0)
|
||||
return off;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void sk_peek_offset_bwd(struct sock *sk, int val)
|
||||
{
|
||||
if (sk->sk_peek_off >= 0) {
|
||||
if (sk->sk_peek_off >= val)
|
||||
sk->sk_peek_off -= val;
|
||||
else
|
||||
sk->sk_peek_off = 0;
|
||||
s32 off = READ_ONCE(sk->sk_peek_off);
|
||||
|
||||
if (unlikely(off >= 0)) {
|
||||
off = max_t(s32, off - val, 0);
|
||||
WRITE_ONCE(sk->sk_peek_off, off);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sk_peek_offset_fwd(struct sock *sk, int val)
|
||||
{
|
||||
if (sk->sk_peek_off >= 0)
|
||||
sk->sk_peek_off += val;
|
||||
sk_peek_offset_bwd(sk, -val);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1862,6 +1866,7 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer,
|
|||
|
||||
void sk_stop_timer(struct sock *sk, struct timer_list *timer);
|
||||
|
||||
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
|
||||
int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
|
||||
|
||||
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
|
||||
|
|
|
@ -158,6 +158,15 @@ static inline __sum16 udp_v4_check(int len, __be32 saddr,
|
|||
void udp_set_csum(bool nocheck, struct sk_buff *skb,
|
||||
__be32 saddr, __be32 daddr, int len);
|
||||
|
||||
static inline void udp_csum_pull_header(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->ip_summed == CHECKSUM_NONE)
|
||||
skb->csum = csum_partial(udp_hdr(skb), sizeof(struct udphdr),
|
||||
skb->csum);
|
||||
skb_pull_rcsum(skb, sizeof(struct udphdr));
|
||||
UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr);
|
||||
}
|
||||
|
||||
struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
|
||||
struct udphdr *uh);
|
||||
int udp_gro_complete(struct sk_buff *skb, int nhoff);
|
||||
|
|
|
@ -301,16 +301,19 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
|
|||
}
|
||||
EXPORT_SYMBOL(skb_free_datagram);
|
||||
|
||||
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
|
||||
void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
|
||||
{
|
||||
bool slow;
|
||||
|
||||
if (likely(atomic_read(&skb->users) == 1))
|
||||
smp_rmb();
|
||||
else if (likely(!atomic_dec_and_test(&skb->users)))
|
||||
else if (likely(!atomic_dec_and_test(&skb->users))) {
|
||||
sk_peek_offset_bwd(sk, len);
|
||||
return;
|
||||
}
|
||||
|
||||
slow = lock_sock_fast(sk);
|
||||
sk_peek_offset_bwd(sk, len);
|
||||
skb_orphan(skb);
|
||||
sk_mem_reclaim_partial(sk);
|
||||
unlock_sock_fast(sk, slow);
|
||||
|
@ -318,7 +321,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
|
|||
/* skb is now orphaned, can be freed outside of locked section */
|
||||
__kfree_skb(skb);
|
||||
}
|
||||
EXPORT_SYMBOL(skb_free_datagram_locked);
|
||||
EXPORT_SYMBOL(__skb_free_datagram_locked);
|
||||
|
||||
/**
|
||||
* skb_kill_datagram - Free a datagram skbuff forcibly
|
||||
|
|
|
@ -402,9 +402,8 @@ static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
|
|||
}
|
||||
|
||||
|
||||
int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
||||
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
int err;
|
||||
unsigned long flags;
|
||||
struct sk_buff_head *list = &sk->sk_receive_queue;
|
||||
|
||||
|
@ -414,10 +413,6 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
err = sk_filter(sk, skb);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
|
||||
atomic_inc(&sk->sk_drops);
|
||||
return -ENOBUFS;
|
||||
|
@ -440,6 +435,18 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||
sk->sk_data_ready(sk);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__sock_queue_rcv_skb);
|
||||
|
||||
int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = sk_filter(sk, skb);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return __sock_queue_rcv_skb(sk, skb);
|
||||
}
|
||||
EXPORT_SYMBOL(sock_queue_rcv_skb);
|
||||
|
||||
int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
|
||||
|
@ -2180,6 +2187,15 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
|
|||
}
|
||||
EXPORT_SYMBOL(__sk_mem_reclaim);
|
||||
|
||||
int sk_set_peek_off(struct sock *sk, int val)
|
||||
{
|
||||
if (val < 0)
|
||||
return -EINVAL;
|
||||
|
||||
sk->sk_peek_off = val;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_set_peek_off);
|
||||
|
||||
/*
|
||||
* Set of default routines for initialising struct proto_ops when
|
||||
|
|
|
@ -948,6 +948,7 @@ const struct proto_ops inet_dgram_ops = {
|
|||
.recvmsg = inet_recvmsg,
|
||||
.mmap = sock_no_mmap,
|
||||
.sendpage = inet_sendpage,
|
||||
.set_peek_off = sk_set_peek_off,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_setsockopt = compat_sock_common_setsockopt,
|
||||
.compat_getsockopt = compat_sock_common_getsockopt,
|
||||
|
|
|
@ -1294,7 +1294,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
|
|||
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
|
||||
struct sk_buff *skb;
|
||||
unsigned int ulen, copied;
|
||||
int peeked, off = 0;
|
||||
int peeked, peeking, off;
|
||||
int err;
|
||||
int is_udplite = IS_UDPLITE(sk);
|
||||
bool checksum_valid = false;
|
||||
|
@ -1304,15 +1304,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
|
|||
return ip_recv_error(sk, msg, len, addr_len);
|
||||
|
||||
try_again:
|
||||
peeking = off = sk_peek_offset(sk, flags);
|
||||
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
|
||||
&peeked, &off, &err);
|
||||
if (!skb)
|
||||
goto out;
|
||||
return err;
|
||||
|
||||
ulen = skb->len - sizeof(struct udphdr);
|
||||
ulen = skb->len;
|
||||
copied = len;
|
||||
if (copied > ulen)
|
||||
copied = ulen;
|
||||
if (copied > ulen - off)
|
||||
copied = ulen - off;
|
||||
else if (copied < ulen)
|
||||
msg->msg_flags |= MSG_TRUNC;
|
||||
|
||||
|
@ -1322,18 +1323,16 @@ try_again:
|
|||
* coverage checksum (UDP-Lite), do it before the copy.
|
||||
*/
|
||||
|
||||
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
|
||||
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
|
||||
checksum_valid = !udp_lib_checksum_complete(skb);
|
||||
if (!checksum_valid)
|
||||
goto csum_copy_err;
|
||||
}
|
||||
|
||||
if (checksum_valid || skb_csum_unnecessary(skb))
|
||||
err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
|
||||
msg, copied);
|
||||
err = skb_copy_datagram_msg(skb, off, msg, copied);
|
||||
else {
|
||||
err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr),
|
||||
msg);
|
||||
err = skb_copy_and_csum_datagram_msg(skb, off, msg);
|
||||
|
||||
if (err == -EINVAL)
|
||||
goto csum_copy_err;
|
||||
|
@ -1346,7 +1345,8 @@ try_again:
|
|||
UDP_INC_STATS_USER(sock_net(sk),
|
||||
UDP_MIB_INERRORS, is_udplite);
|
||||
}
|
||||
goto out_free;
|
||||
skb_free_datagram_locked(sk, skb);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (!peeked)
|
||||
|
@ -1370,9 +1370,7 @@ try_again:
|
|||
if (flags & MSG_TRUNC)
|
||||
err = ulen;
|
||||
|
||||
out_free:
|
||||
skb_free_datagram_locked(sk, skb);
|
||||
out:
|
||||
__skb_free_datagram_locked(sk, skb, peeking ? -err : err);
|
||||
return err;
|
||||
|
||||
csum_copy_err:
|
||||
|
@ -1500,7 +1498,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||
sk_incoming_cpu_update(sk);
|
||||
}
|
||||
|
||||
rc = sock_queue_rcv_skb(sk, skb);
|
||||
rc = __sock_queue_rcv_skb(sk, skb);
|
||||
if (rc < 0) {
|
||||
int is_udplite = IS_UDPLITE(sk);
|
||||
|
||||
|
@ -1616,10 +1614,14 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||
}
|
||||
}
|
||||
|
||||
if (rcu_access_pointer(sk->sk_filter) &&
|
||||
udp_lib_checksum_complete(skb))
|
||||
goto csum_error;
|
||||
if (rcu_access_pointer(sk->sk_filter)) {
|
||||
if (udp_lib_checksum_complete(skb))
|
||||
goto csum_error;
|
||||
if (sk_filter(sk, skb))
|
||||
goto drop;
|
||||
}
|
||||
|
||||
udp_csum_pull_header(skb);
|
||||
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
|
||||
UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
|
||||
is_udplite);
|
||||
|
|
|
@ -561,6 +561,7 @@ const struct proto_ops inet6_dgram_ops = {
|
|||
.recvmsg = inet_recvmsg, /* ok */
|
||||
.mmap = sock_no_mmap,
|
||||
.sendpage = sock_no_sendpage,
|
||||
.set_peek_off = sk_set_peek_off,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_setsockopt = compat_sock_common_setsockopt,
|
||||
.compat_getsockopt = compat_sock_common_getsockopt,
|
||||
|
|
|
@ -357,7 +357,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
|||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
unsigned int ulen, copied;
|
||||
int peeked, off = 0;
|
||||
int peeked, peeking, off;
|
||||
int err;
|
||||
int is_udplite = IS_UDPLITE(sk);
|
||||
bool checksum_valid = false;
|
||||
|
@ -371,15 +371,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
|||
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
|
||||
|
||||
try_again:
|
||||
peeking = off = sk_peek_offset(sk, flags);
|
||||
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
|
||||
&peeked, &off, &err);
|
||||
if (!skb)
|
||||
goto out;
|
||||
return err;
|
||||
|
||||
ulen = skb->len - sizeof(struct udphdr);
|
||||
ulen = skb->len;
|
||||
copied = len;
|
||||
if (copied > ulen)
|
||||
copied = ulen;
|
||||
if (copied > ulen - off)
|
||||
copied = ulen - off;
|
||||
else if (copied < ulen)
|
||||
msg->msg_flags |= MSG_TRUNC;
|
||||
|
||||
|
@ -391,17 +392,16 @@ try_again:
|
|||
* coverage checksum (UDP-Lite), do it before the copy.
|
||||
*/
|
||||
|
||||
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
|
||||
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
|
||||
checksum_valid = !udp_lib_checksum_complete(skb);
|
||||
if (!checksum_valid)
|
||||
goto csum_copy_err;
|
||||
}
|
||||
|
||||
if (checksum_valid || skb_csum_unnecessary(skb))
|
||||
err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
|
||||
msg, copied);
|
||||
err = skb_copy_datagram_msg(skb, off, msg, copied);
|
||||
else {
|
||||
err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg);
|
||||
err = skb_copy_and_csum_datagram_msg(skb, off, msg);
|
||||
if (err == -EINVAL)
|
||||
goto csum_copy_err;
|
||||
}
|
||||
|
@ -418,7 +418,8 @@ try_again:
|
|||
UDP_MIB_INERRORS,
|
||||
is_udplite);
|
||||
}
|
||||
goto out_free;
|
||||
skb_free_datagram_locked(sk, skb);
|
||||
return err;
|
||||
}
|
||||
if (!peeked) {
|
||||
if (is_udp4)
|
||||
|
@ -466,9 +467,7 @@ try_again:
|
|||
if (flags & MSG_TRUNC)
|
||||
err = ulen;
|
||||
|
||||
out_free:
|
||||
skb_free_datagram_locked(sk, skb);
|
||||
out:
|
||||
__skb_free_datagram_locked(sk, skb, peeking ? -err : err);
|
||||
return err;
|
||||
|
||||
csum_copy_err:
|
||||
|
@ -554,7 +553,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||
sk_incoming_cpu_update(sk);
|
||||
}
|
||||
|
||||
rc = sock_queue_rcv_skb(sk, skb);
|
||||
rc = __sock_queue_rcv_skb(sk, skb);
|
||||
if (rc < 0) {
|
||||
int is_udplite = IS_UDPLITE(sk);
|
||||
|
||||
|
@ -648,8 +647,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||
if (rcu_access_pointer(sk->sk_filter)) {
|
||||
if (udp_lib_checksum_complete(skb))
|
||||
goto csum_error;
|
||||
if (sk_filter(sk, skb))
|
||||
goto drop;
|
||||
}
|
||||
|
||||
udp_csum_pull_header(skb);
|
||||
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
|
||||
UDP6_INC_STATS_BH(sock_net(sk),
|
||||
UDP_MIB_RCVBUFERRORS, is_udplite);
|
||||
|
|
Loading…
Reference in New Issue