Merge branch 'so_incoming_cpu'

Eric Dumazet says:

====================
net: SO_INCOMING_CPU support

SO_INCOMING_CPU socket option (read by getsockopt()) provides
an alternative to RPS/RFS for high performance servers using
multi queues NIC.

TCP should use sk_mark_napi_id() for established sockets only.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-11-11 13:00:11 -05:00
commit b00394c007
21 changed files with 56 additions and 4 deletions

View File

@ -87,4 +87,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */

View File

@ -80,4 +80,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _UAPI__ASM_AVR32_SOCKET_H */ #endif /* _UAPI__ASM_AVR32_SOCKET_H */

View File

@ -82,6 +82,8 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */

View File

@ -80,5 +80,7 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */

View File

@ -89,4 +89,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _ASM_IA64_SOCKET_H */ #endif /* _ASM_IA64_SOCKET_H */

View File

@ -80,4 +80,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _ASM_M32R_SOCKET_H */ #endif /* _ASM_M32R_SOCKET_H */

View File

@ -98,4 +98,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */

View File

@ -80,4 +80,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */

View File

@ -79,4 +79,6 @@
#define SO_BPF_EXTENSIONS 0x4029 #define SO_BPF_EXTENSIONS 0x4029
#define SO_INCOMING_CPU 0x402A
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */

View File

@ -87,4 +87,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _ASM_POWERPC_SOCKET_H */ #endif /* _ASM_POWERPC_SOCKET_H */

View File

@ -86,4 +86,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */

View File

@ -76,6 +76,8 @@
#define SO_BPF_EXTENSIONS 0x0032 #define SO_BPF_EXTENSIONS 0x0032
#define SO_INCOMING_CPU 0x0033
/* Security levels - as per NRL IPv6 - don't actually do anything */ /* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002

View File

@ -91,4 +91,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* _XTENSA_SOCKET_H */ #endif /* _XTENSA_SOCKET_H */

View File

@ -273,6 +273,7 @@ struct cg_proto;
* @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_rxhash: flow hash received from netif layer * @sk_rxhash: flow hash received from netif layer
* @sk_incoming_cpu: record cpu processing incoming packets
* @sk_txhash: computed flow hash for use on transmit * @sk_txhash: computed flow hash for use on transmit
* @sk_filter: socket filtering instructions * @sk_filter: socket filtering instructions
* @sk_protinfo: private area, net family specific, when not using slab * @sk_protinfo: private area, net family specific, when not using slab
@ -350,6 +351,12 @@ struct sock {
#ifdef CONFIG_RPS #ifdef CONFIG_RPS
__u32 sk_rxhash; __u32 sk_rxhash;
#endif #endif
u16 sk_incoming_cpu;
/* 16bit hole
* Warned : sk_incoming_cpu can be set from softirq,
* Do not use this hole without fully understanding possible issues.
*/
__u32 sk_txhash; __u32 sk_txhash;
#ifdef CONFIG_NET_RX_BUSY_POLL #ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_napi_id; unsigned int sk_napi_id;
@ -833,6 +840,11 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
return sk->sk_backlog_rcv(sk, skb); return sk->sk_backlog_rcv(sk, skb);
} }
static inline void sk_incoming_cpu_update(struct sock *sk)
{
sk->sk_incoming_cpu = raw_smp_processor_id();
}
static inline void sock_rps_record_flow_hash(__u32 hash) static inline void sock_rps_record_flow_hash(__u32 hash)
{ {
#ifdef CONFIG_RPS #ifdef CONFIG_RPS

View File

@ -82,4 +82,6 @@
#define SO_BPF_EXTENSIONS 48 #define SO_BPF_EXTENSIONS 48
#define SO_INCOMING_CPU 49
#endif /* __ASM_GENERIC_SOCKET_H */ #endif /* __ASM_GENERIC_SOCKET_H */

View File

@ -1213,6 +1213,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_max_pacing_rate; v.val = sk->sk_max_pacing_rate;
break; break;
case SO_INCOMING_CPU:
v.val = sk->sk_incoming_cpu;
break;
default: default:
return -ENOPROTOOPT; return -ENOPROTOOPT;
} }
@ -1517,6 +1521,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_err = 0; newsk->sk_err = 0;
newsk->sk_priority = 0; newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
/* /*
* Before updating sk_refcnt, we must commit prior changes to memory * Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details) * (Documentation/RCU/rculist_nulls.txt for details)

View File

@ -1429,6 +1429,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
struct dst_entry *dst = sk->sk_rx_dst; struct dst_entry *dst = sk->sk_rx_dst;
sock_rps_save_rxhash(sk, skb); sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) { if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
dst->ops->check(dst, 0) == NULL) { dst->ops->check(dst, 0) == NULL) {
@ -1450,6 +1451,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
if (nsk != sk) { if (nsk != sk) {
sock_rps_save_rxhash(nsk, skb); sock_rps_save_rxhash(nsk, skb);
sk_mark_napi_id(sk, skb);
if (tcp_child_process(sk, nsk, skb)) { if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk; rsk = nsk;
goto reset; goto reset;
@ -1661,7 +1663,7 @@ process:
if (sk_filter(sk, skb)) if (sk_filter(sk, skb))
goto discard_and_relse; goto discard_and_relse;
sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk);
skb->dev = NULL; skb->dev = NULL;
bh_lock_sock_nested(sk); bh_lock_sock_nested(sk);

View File

@ -1445,6 +1445,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (inet_sk(sk)->inet_daddr) { if (inet_sk(sk)->inet_daddr) {
sock_rps_save_rxhash(sk, skb); sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb); sk_mark_napi_id(sk, skb);
sk_incoming_cpu_update(sk);
} }
rc = sock_queue_rcv_skb(sk, skb); rc = sock_queue_rcv_skb(sk, skb);

View File

@ -1293,6 +1293,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
struct dst_entry *dst = sk->sk_rx_dst; struct dst_entry *dst = sk->sk_rx_dst;
sock_rps_save_rxhash(sk, skb); sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) { if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
dst->ops->check(dst, np->rx_dst_cookie) == NULL) { dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
@ -1322,6 +1323,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
*/ */
if (nsk != sk) { if (nsk != sk) {
sock_rps_save_rxhash(nsk, skb); sock_rps_save_rxhash(nsk, skb);
sk_mark_napi_id(sk, skb);
if (tcp_child_process(sk, nsk, skb)) if (tcp_child_process(sk, nsk, skb))
goto reset; goto reset;
if (opt_skb) if (opt_skb)
@ -1454,7 +1456,7 @@ process:
if (sk_filter(sk, skb)) if (sk_filter(sk, skb))
goto discard_and_relse; goto discard_and_relse;
sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk);
skb->dev = NULL; skb->dev = NULL;
bh_lock_sock_nested(sk); bh_lock_sock_nested(sk);

View File

@ -577,6 +577,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (!ipv6_addr_any(&sk->sk_v6_daddr)) { if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
sock_rps_save_rxhash(sk, skb); sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb); sk_mark_napi_id(sk, skb);
sk_incoming_cpu_update(sk);
} }
rc = sock_queue_rcv_skb(sk, skb); rc = sock_queue_rcv_skb(sk, skb);

View File

@ -205,9 +205,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN))
goto out_free; goto out_free;
if (!sctp_ulpevent_is_notification(event)) if (!sctp_ulpevent_is_notification(event)) {
sk_mark_napi_id(sk, skb); sk_mark_napi_id(sk, skb);
sk_incoming_cpu_update(sk);
}
/* Check if the user wishes to receive this event. */ /* Check if the user wishes to receive this event. */
if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe))
goto out_free; goto out_free;