udp: enable MSG_PEEK at non-zero offset

Enable peeking at UDP datagrams at the offset specified with socket
option SOL_SOCKET/SO_PEEK_OFF. Peek at any datagram in the queue, up
to the end of the given datagram.

Implement the SO_PEEK_OFF semantics introduced in commit ef64a54f6e
("sock: Introduce the SO_PEEK_OFF sock option"). Increase the offset
on peek, decrease it on regular reads.

When peeking, always checksum the packet immediately, to avoid
recomputation on subsequent peeks and final read.

The socket lock is not held for the duration of udp_recvmsg, so
peek and read operations can run concurrently. Only the last store
to sk_peek_off is preserved.

Signed-off-by: Sam Kumar <samanthakumar@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
samanthakumar 2016-04-05 12:41:16 -04:00 committed by David S. Miller
parent e6afc8ace6
commit 627d2d6b55
8 changed files with 47 additions and 26 deletions

View File

@ -2949,7 +2949,12 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
struct iov_iter *from, int len); struct iov_iter *from, int len);
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb); void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len);
static inline void skb_free_datagram_locked(struct sock *sk,
struct sk_buff *skb)
{
__skb_free_datagram_locked(sk, skb, 0);
}
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);

View File

@ -457,6 +457,8 @@ struct sock {
#define SK_CAN_REUSE 1 #define SK_CAN_REUSE 1
#define SK_FORCE_REUSE 2 #define SK_FORCE_REUSE 2
int sk_set_peek_off(struct sock *sk, int val);
static inline int sk_peek_offset(struct sock *sk, int flags) static inline int sk_peek_offset(struct sock *sk, int flags)
{ {
if (unlikely(flags & MSG_PEEK)) { if (unlikely(flags & MSG_PEEK)) {

View File

@ -301,16 +301,19 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
} }
EXPORT_SYMBOL(skb_free_datagram); EXPORT_SYMBOL(skb_free_datagram);
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
{ {
bool slow; bool slow;
if (likely(atomic_read(&skb->users) == 1)) if (likely(atomic_read(&skb->users) == 1))
smp_rmb(); smp_rmb();
else if (likely(!atomic_dec_and_test(&skb->users))) else if (likely(!atomic_dec_and_test(&skb->users))) {
sk_peek_offset_bwd(sk, len);
return; return;
}
slow = lock_sock_fast(sk); slow = lock_sock_fast(sk);
sk_peek_offset_bwd(sk, len);
skb_orphan(skb); skb_orphan(skb);
sk_mem_reclaim_partial(sk); sk_mem_reclaim_partial(sk);
unlock_sock_fast(sk, slow); unlock_sock_fast(sk, slow);
@ -318,7 +321,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
/* skb is now orphaned, can be freed outside of locked section */ /* skb is now orphaned, can be freed outside of locked section */
__kfree_skb(skb); __kfree_skb(skb);
} }
EXPORT_SYMBOL(skb_free_datagram_locked); EXPORT_SYMBOL(__skb_free_datagram_locked);
/** /**
* skb_kill_datagram - Free a datagram skbuff forcibly * skb_kill_datagram - Free a datagram skbuff forcibly

View File

@ -2187,6 +2187,15 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
} }
EXPORT_SYMBOL(__sk_mem_reclaim); EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
{
if (val < 0)
return -EINVAL;
sk->sk_peek_off = val;
return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);
/* /*
* Set of default routines for initialising struct proto_ops when * Set of default routines for initialising struct proto_ops when

View File

@ -948,6 +948,7 @@ const struct proto_ops inet_dgram_ops = {
.recvmsg = inet_recvmsg, .recvmsg = inet_recvmsg,
.mmap = sock_no_mmap, .mmap = sock_no_mmap,
.sendpage = inet_sendpage, .sendpage = inet_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt, .compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt, .compat_getsockopt = compat_sock_common_getsockopt,

View File

@ -1294,7 +1294,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
struct sk_buff *skb; struct sk_buff *skb;
unsigned int ulen, copied; unsigned int ulen, copied;
int peeked, off = 0; int peeked, peeking, off;
int err; int err;
int is_udplite = IS_UDPLITE(sk); int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false; bool checksum_valid = false;
@ -1304,15 +1304,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
return ip_recv_error(sk, msg, len, addr_len); return ip_recv_error(sk, msg, len, addr_len);
try_again: try_again:
peeking = off = sk_peek_offset(sk, flags);
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &off, &err); &peeked, &off, &err);
if (!skb) if (!skb)
goto out; return err;
ulen = skb->len; ulen = skb->len;
copied = len; copied = len;
if (copied > ulen) if (copied > ulen - off)
copied = ulen; copied = ulen - off;
else if (copied < ulen) else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC; msg->msg_flags |= MSG_TRUNC;
@ -1322,16 +1323,16 @@ try_again:
* coverage checksum (UDP-Lite), do it before the copy. * coverage checksum (UDP-Lite), do it before the copy.
*/ */
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
checksum_valid = !udp_lib_checksum_complete(skb); checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid) if (!checksum_valid)
goto csum_copy_err; goto csum_copy_err;
} }
if (checksum_valid || skb_csum_unnecessary(skb)) if (checksum_valid || skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, 0, msg, copied); err = skb_copy_datagram_msg(skb, off, msg, copied);
else { else {
err = skb_copy_and_csum_datagram_msg(skb, 0, msg); err = skb_copy_and_csum_datagram_msg(skb, off, msg);
if (err == -EINVAL) if (err == -EINVAL)
goto csum_copy_err; goto csum_copy_err;
@ -1344,7 +1345,8 @@ try_again:
UDP_INC_STATS_USER(sock_net(sk), UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_INERRORS, is_udplite); UDP_MIB_INERRORS, is_udplite);
} }
goto out_free; skb_free_datagram_locked(sk, skb);
return err;
} }
if (!peeked) if (!peeked)
@ -1368,9 +1370,7 @@ try_again:
if (flags & MSG_TRUNC) if (flags & MSG_TRUNC)
err = ulen; err = ulen;
out_free: __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
skb_free_datagram_locked(sk, skb);
out:
return err; return err;
csum_copy_err: csum_copy_err:

View File

@ -561,6 +561,7 @@ const struct proto_ops inet6_dgram_ops = {
.recvmsg = inet_recvmsg, /* ok */ .recvmsg = inet_recvmsg, /* ok */
.mmap = sock_no_mmap, .mmap = sock_no_mmap,
.sendpage = sock_no_sendpage, .sendpage = sock_no_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt, .compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt, .compat_getsockopt = compat_sock_common_getsockopt,

View File

@ -357,7 +357,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb; struct sk_buff *skb;
unsigned int ulen, copied; unsigned int ulen, copied;
int peeked, off = 0; int peeked, peeking, off;
int err; int err;
int is_udplite = IS_UDPLITE(sk); int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false; bool checksum_valid = false;
@ -371,15 +371,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return ipv6_recv_rxpmtu(sk, msg, len, addr_len); return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again: try_again:
peeking = off = sk_peek_offset(sk, flags);
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &off, &err); &peeked, &off, &err);
if (!skb) if (!skb)
goto out; return err;
ulen = skb->len; ulen = skb->len;
copied = len; copied = len;
if (copied > ulen) if (copied > ulen - off)
copied = ulen; copied = ulen - off;
else if (copied < ulen) else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC; msg->msg_flags |= MSG_TRUNC;
@ -391,16 +392,16 @@ try_again:
* coverage checksum (UDP-Lite), do it before the copy. * coverage checksum (UDP-Lite), do it before the copy.
*/ */
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
checksum_valid = !udp_lib_checksum_complete(skb); checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid) if (!checksum_valid)
goto csum_copy_err; goto csum_copy_err;
} }
if (checksum_valid || skb_csum_unnecessary(skb)) if (checksum_valid || skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, 0, msg, copied); err = skb_copy_datagram_msg(skb, off, msg, copied);
else { else {
err = skb_copy_and_csum_datagram_msg(skb, 0, msg); err = skb_copy_and_csum_datagram_msg(skb, off, msg);
if (err == -EINVAL) if (err == -EINVAL)
goto csum_copy_err; goto csum_copy_err;
} }
@ -417,7 +418,8 @@ try_again:
UDP_MIB_INERRORS, UDP_MIB_INERRORS,
is_udplite); is_udplite);
} }
goto out_free; skb_free_datagram_locked(sk, skb);
return err;
} }
if (!peeked) { if (!peeked) {
if (is_udp4) if (is_udp4)
@ -465,9 +467,7 @@ try_again:
if (flags & MSG_TRUNC) if (flags & MSG_TRUNC)
err = ulen; err = ulen;
out_free: __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
skb_free_datagram_locked(sk, skb);
out:
return err; return err;
csum_copy_err: csum_copy_err: