sock: add tracepoint for send recv length
Add 2 tracepoints to monitor the tcp/udp traffic of per process and per cgroup. Regarding monitoring the tcp/udp traffic of each process, there are two existing solutions, the first one is https://www.atoptool.nl/netatop.php. The second is via kprobe/kretprobe. Netatop solution is implemented by registering the hook function at the hook point provided by the netfilter framework. These hook functions may be in the soft interrupt context and cannot directly obtain the pid. Some data structures are added to bind packets and processes. For example, struct taskinfobucket, struct taskinfo ... Every time the process sends and receives packets it needs multiple hashmaps,resulting in low performance and it has the problem fo inaccurate tcp/udp traffic statistics(for example: multiple threads share sockets). We can obtain the information with kretprobe, but as we know, kprobe gets the result by trappig in an exception, which loses performance compared to tracepoint. We compared the performance of tracepoints with the above two methods, and the results are as follows: ab -n 1000000 -c 1000 -r http://127.0.0.1/index.html without trace: Time per request: 39.660 [ms] (mean) Time per request: 0.040 [ms] (mean, across all concurrent requests) netatop: Time per request: 50.717 [ms] (mean) Time per request: 0.051 [ms] (mean, across all concurrent requests) kr: Time per request: 43.168 [ms] (mean) Time per request: 0.043 [ms] (mean, across all concurrent requests) tracepoint: Time per request: 41.004 [ms] (mean) Time per request: 0.041 [ms] (mean, across all concurrent requests It can be seen that tracepoint has better performance. Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com> Signed-off-by: Xiongchun Duan <duanxiongchun@bytedance.com> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
8e8b6c63cc
commit
6e6eda44b9
|
@ -263,6 +263,51 @@ TRACE_EVENT(inet_sk_error_report,
|
||||||
__entry->error)
|
__entry->error)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sock send/recv msg length
|
||||||
|
*/
|
||||||
|
DECLARE_EVENT_CLASS(sock_msg_length,
|
||||||
|
|
||||||
|
TP_PROTO(struct sock *sk, int ret, int flags),
|
||||||
|
|
||||||
|
TP_ARGS(sk, ret, flags),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(void *, sk)
|
||||||
|
__field(__u16, family)
|
||||||
|
__field(__u16, protocol)
|
||||||
|
__field(int, ret)
|
||||||
|
__field(int, flags)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->sk = sk;
|
||||||
|
__entry->family = sk->sk_family;
|
||||||
|
__entry->protocol = sk->sk_protocol;
|
||||||
|
__entry->ret = ret;
|
||||||
|
__entry->flags = flags;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("sk address = %p, family = %s protocol = %s, length = %d, error = %d, flags = 0x%x",
|
||||||
|
__entry->sk, show_family_name(__entry->family),
|
||||||
|
show_inet_protocol_name(__entry->protocol),
|
||||||
|
!(__entry->flags & MSG_PEEK) ?
|
||||||
|
(__entry->ret > 0 ? __entry->ret : 0) : 0,
|
||||||
|
__entry->ret < 0 ? __entry->ret : 0,
|
||||||
|
__entry->flags)
|
||||||
|
);
|
||||||
|
|
||||||
|
DEFINE_EVENT(sock_msg_length, sock_send_length,
|
||||||
|
TP_PROTO(struct sock *sk, int ret, int flags),
|
||||||
|
|
||||||
|
TP_ARGS(sk, ret, flags)
|
||||||
|
);
|
||||||
|
|
||||||
|
DEFINE_EVENT(sock_msg_length, sock_recv_length,
|
||||||
|
TP_PROTO(struct sock *sk, int ret, int flags),
|
||||||
|
|
||||||
|
TP_ARGS(sk, ret, flags)
|
||||||
|
);
|
||||||
#endif /* _TRACE_SOCK_H */
|
#endif /* _TRACE_SOCK_H */
|
||||||
|
|
||||||
/* This part must be outside protection */
|
/* This part must be outside protection */
|
||||||
|
|
33
net/socket.c
33
net/socket.c
|
@ -106,6 +106,7 @@
|
||||||
#include <net/busy_poll.h>
|
#include <net/busy_poll.h>
|
||||||
#include <linux/errqueue.h>
|
#include <linux/errqueue.h>
|
||||||
#include <linux/ptp_clock_kernel.h>
|
#include <linux/ptp_clock_kernel.h>
|
||||||
|
#include <trace/events/sock.h>
|
||||||
|
|
||||||
#ifdef CONFIG_NET_RX_BUSY_POLL
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
||||||
unsigned int sysctl_net_busy_read __read_mostly;
|
unsigned int sysctl_net_busy_read __read_mostly;
|
||||||
|
@ -709,12 +710,22 @@ INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
|
||||||
size_t));
|
size_t));
|
||||||
INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
|
INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
|
||||||
size_t));
|
size_t));
|
||||||
|
|
||||||
|
static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
|
||||||
|
int flags)
|
||||||
|
{
|
||||||
|
trace_sock_send_length(sk, ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
|
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
|
||||||
{
|
{
|
||||||
int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
|
int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
|
||||||
inet_sendmsg, sock, msg,
|
inet_sendmsg, sock, msg,
|
||||||
msg_data_left(msg));
|
msg_data_left(msg));
|
||||||
BUG_ON(ret == -EIOCBQUEUED);
|
BUG_ON(ret == -EIOCBQUEUED);
|
||||||
|
|
||||||
|
if (trace_sock_send_length_enabled())
|
||||||
|
call_trace_sock_send_length(sock->sk, ret, 0);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -989,12 +1000,21 @@ INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
|
||||||
size_t, int));
|
size_t, int));
|
||||||
INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
|
INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
|
||||||
size_t, int));
|
size_t, int));
|
||||||
|
|
||||||
|
static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
|
||||||
|
{
|
||||||
|
trace_sock_recv_length(sk, ret, flags);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
|
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
|
||||||
int flags)
|
int flags)
|
||||||
{
|
{
|
||||||
return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
|
int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
|
||||||
inet_recvmsg, sock, msg, msg_data_left(msg),
|
inet_recvmsg, sock, msg,
|
||||||
flags);
|
msg_data_left(msg), flags);
|
||||||
|
if (trace_sock_recv_length_enabled())
|
||||||
|
call_trace_sock_recv_length(sock->sk, ret, flags);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1044,6 +1064,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
|
||||||
{
|
{
|
||||||
struct socket *sock;
|
struct socket *sock;
|
||||||
int flags;
|
int flags;
|
||||||
|
int ret;
|
||||||
|
|
||||||
sock = file->private_data;
|
sock = file->private_data;
|
||||||
|
|
||||||
|
@ -1051,7 +1072,11 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
|
||||||
/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
|
/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
|
||||||
flags |= more;
|
flags |= more;
|
||||||
|
|
||||||
return kernel_sendpage(sock, page, offset, size, flags);
|
ret = kernel_sendpage(sock, page, offset, size, flags);
|
||||||
|
|
||||||
|
if (trace_sock_send_length_enabled())
|
||||||
|
call_trace_sock_send_length(sock->sk, ret, 0);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
|
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
|
||||||
|
|
Loading…
Reference in New Issue