ipv4: PKTINFO doesnt need dst reference
Le lundi 07 novembre 2011 à 15:33 +0100, Eric Dumazet a écrit : > At least, in recent kernels we dont change dst->refcnt in forwarding > patch (usinf NOREF skb->dst) > > One particular point is the atomic_inc(dst->refcnt) we have to perform > when queuing an UDP packet if socket asked PKTINFO stuff (for example a > typical DNS server has to setup this option) > > I have one patch somewhere that stores the information in skb->cb[] and > avoid the atomic_{inc|dec}(dst->refcnt). > OK I found it, I did some extra tests and believe its ready. [PATCH net-next] ipv4: IP_PKTINFO doesnt need dst reference When a socket uses IP_PKTINFO notifications, we currently force a dst reference for each received skb. Reader has to access dst to get needed information (rt_iif & rt_spec_dst) and must release dst reference. We also forced a dst reference if skb was put in socket backlog, even without IP_PKTINFO handling. This happens under stress/load. We can instead store the needed information in skb->cb[], so that only softirq handler really access dst, improving cache hit ratios. This removes two atomic operations per packet, and false sharing as well. On a benchmark using a mono threaded receiver (doing only recvmsg() calls), I can reach 720.000 pps instead of 570.000 pps. IP_PKTINFO is typically used by DNS servers, and any multihomed aware UDP application. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
acb32ba3de
commit
d826eb14ec
|
@ -450,7 +450,7 @@ extern int ip_options_rcv_srr(struct sk_buff *skb);
|
|||
* Functions provided by ip_sockglue.c
|
||||
*/
|
||||
|
||||
extern int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
|
||||
extern void ipv4_pktinfo_prepare(struct sk_buff *skb);
|
||||
extern void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
|
||||
extern int ip_cmsg_send(struct net *net,
|
||||
struct msghdr *msg, struct ipcm_cookie *ipc);
|
||||
|
|
|
@ -55,20 +55,13 @@
|
|||
/*
|
||||
* SOL_IP control messages.
|
||||
*/
|
||||
#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb))
|
||||
|
||||
static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
|
||||
{
|
||||
struct in_pktinfo info;
|
||||
struct rtable *rt = skb_rtable(skb);
|
||||
struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
|
||||
|
||||
info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
|
||||
if (rt) {
|
||||
info.ipi_ifindex = rt->rt_iif;
|
||||
info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
|
||||
} else {
|
||||
info.ipi_ifindex = 0;
|
||||
info.ipi_spec_dst.s_addr = 0;
|
||||
}
|
||||
|
||||
put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
|
||||
}
|
||||
|
@ -992,20 +985,28 @@ e_inval:
|
|||
}
|
||||
|
||||
/**
|
||||
* ip_queue_rcv_skb - Queue an skb into sock receive queue
|
||||
* ipv4_pktinfo_prepare - transfert some info from rtable to skb
|
||||
* @sk: socket
|
||||
* @skb: buffer
|
||||
*
|
||||
* Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
|
||||
* is not set, we drop skb dst entry now, while dst cache line is hot.
|
||||
* To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst
|
||||
* in skb->cb[] before dst drop.
|
||||
* This way, receiver doesnt make cache line misses to read rtable.
|
||||
*/
|
||||
int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
||||
void ipv4_pktinfo_prepare(struct sk_buff *skb)
|
||||
{
|
||||
if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
|
||||
skb_dst_drop(skb);
|
||||
return sock_queue_rcv_skb(sk, skb);
|
||||
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
|
||||
const struct rtable *rt = skb_rtable(skb);
|
||||
|
||||
if (rt) {
|
||||
pktinfo->ipi_ifindex = rt->rt_iif;
|
||||
pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst;
|
||||
} else {
|
||||
pktinfo->ipi_ifindex = 0;
|
||||
pktinfo->ipi_spec_dst.s_addr = 0;
|
||||
}
|
||||
skb_dst_drop(skb);
|
||||
}
|
||||
EXPORT_SYMBOL(ip_queue_rcv_skb);
|
||||
|
||||
int ip_setsockopt(struct sock *sk, int level,
|
||||
int optname, char __user *optval, unsigned int optlen)
|
||||
|
|
|
@ -292,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
|
|||
{
|
||||
/* Charge it to the socket. */
|
||||
|
||||
if (ip_queue_rcv_skb(sk, skb) < 0) {
|
||||
ipv4_pktinfo_prepare(skb);
|
||||
if (sock_queue_rcv_skb(sk, skb) < 0) {
|
||||
kfree_skb(skb);
|
||||
return NET_RX_DROP;
|
||||
}
|
||||
|
|
|
@ -1357,7 +1357,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||
if (inet_sk(sk)->inet_daddr)
|
||||
sock_rps_save_rxhash(sk, skb);
|
||||
|
||||
rc = ip_queue_rcv_skb(sk, skb);
|
||||
rc = sock_queue_rcv_skb(sk, skb);
|
||||
if (rc < 0) {
|
||||
int is_udplite = IS_UDPLITE(sk);
|
||||
|
||||
|
@ -1473,6 +1473,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||
|
||||
rc = 0;
|
||||
|
||||
ipv4_pktinfo_prepare(skb);
|
||||
bh_lock_sock(sk);
|
||||
if (!sock_owned_by_user(sk))
|
||||
rc = __udp_queue_rcv_skb(sk, skb);
|
||||
|
|
|
@ -383,7 +383,8 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
|||
}
|
||||
|
||||
/* Charge it to the socket. */
|
||||
if (ip_queue_rcv_skb(sk, skb) < 0) {
|
||||
skb_dst_drop(skb);
|
||||
if (sock_queue_rcv_skb(sk, skb) < 0) {
|
||||
kfree_skb(skb);
|
||||
return NET_RX_DROP;
|
||||
}
|
||||
|
|
|
@ -538,7 +538,9 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
|
|||
goto drop;
|
||||
}
|
||||
|
||||
if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
|
||||
skb_dst_drop(skb);
|
||||
rc = sock_queue_rcv_skb(sk, skb);
|
||||
if (rc < 0) {
|
||||
/* Note that an ENOMEM error is charged twice */
|
||||
if (rc == -ENOMEM)
|
||||
UDP6_INC_STATS_BH(sock_net(sk),
|
||||
|
|
Loading…
Reference in New Issue