517 lines
11 KiB
C
517 lines
11 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/* Net Latency Monitor base on Quality Monitor Buffer
|
|
* Aim to provide net latency for a long running system
|
|
*
|
|
* Author: mengensun <mengensun@tencent.com>
|
|
* Author: yuehongwu <yuehongwu@tencent.com>
|
|
* Copyright (C) 2024 Tencent, Inc
|
|
*/
|
|
|
|
#include<net/net_namespace.h>
|
|
#include<net/tcp.h>
|
|
#include<net/netns/generic.h>
|
|
#include<net/netns_mbuf.h>
|
|
#include "netlat.h"
|
|
|
|
struct netlat_net_data {
|
|
int ack;
|
|
int pick;
|
|
int queue;
|
|
int enable;
|
|
unsigned long *ports;
|
|
struct ctl_table_header *netlat_hdr;
|
|
};
|
|
|
|
static unsigned int netlat_net_id __read_mostly;
|
|
DEFINE_STATIC_KEY_FALSE(enable_netlat);
|
|
|
|
static inline int get_ack_lat(struct net *net)
|
|
{
|
|
struct netlat_net_data *pdata;
|
|
|
|
pdata = net_generic(net, netlat_net_id);
|
|
return pdata->ack;
|
|
}
|
|
|
|
static inline int get_pick_lat(struct net *net)
|
|
{
|
|
struct netlat_net_data *pdata;
|
|
|
|
pdata = net_generic(net, netlat_net_id);
|
|
return pdata->pick;
|
|
}
|
|
|
|
static inline int get_queue_lat(struct net *net)
|
|
{
|
|
struct netlat_net_data *pdata;
|
|
|
|
pdata = net_generic(net, netlat_net_id);
|
|
return pdata->queue;
|
|
}
|
|
|
|
static inline long *get_net_ports(struct net *net)
|
|
{
|
|
struct netlat_net_data *pdata;
|
|
|
|
pdata = net_generic(net, netlat_net_id);
|
|
return pdata->ports;
|
|
}
|
|
|
|
static inline u32 get_rtxq_skb_jiffies(struct sk_buff *skb)
|
|
{
|
|
return TCP_SKB_CB(skb)->first_xmit_time;
|
|
}
|
|
|
|
static inline void set_rtxq_skb_jiffies(struct sk_buff *skb)
|
|
{
|
|
TCP_SKB_CB(skb)->first_xmit_time = tcp_jiffies32;
|
|
}
|
|
|
|
/* sk is not used for now, but, may be used in the future
|
|
*/
|
|
void netlat_copy_rtxq_skb(struct sock *sk, struct sk_buff *dst,
|
|
struct sk_buff *src)
|
|
{
|
|
if (!static_branch_unlikely(&enable_netlat))
|
|
return;
|
|
TCP_SKB_CB(dst)->first_xmit_time = TCP_SKB_CB(src)->first_xmit_time;
|
|
}
|
|
EXPORT_SYMBOL(netlat_copy_rtxq_skb);
|
|
|
|
static inline u32 tcp_jiffies32_delt(struct sk_buff *skb)
|
|
{
|
|
u32 j1, j2;
|
|
|
|
j1 = tcp_jiffies32;
|
|
j2 = get_rtxq_skb_jiffies(skb);
|
|
|
|
/* here leave a small time windows
|
|
* when skb is alloced ack_num is inited to 0
|
|
* if we do not touch the time stamp in ack_num
|
|
* it is zero
|
|
*/
|
|
if (!j2)
|
|
return 0;
|
|
|
|
if (likely(j1 >= j2))
|
|
return j1 - j2;
|
|
/* when u32 is wrap around */
|
|
return U32_MAX - (j2 - j1) + 1;
|
|
}
|
|
|
|
/* sk is not used for now, but, may be used in the future
|
|
*/
|
|
void netlat_tcp_enrtxqueue(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
if (!static_branch_unlikely(&enable_netlat))
|
|
return;
|
|
set_rtxq_skb_jiffies(skb);
|
|
}
|
|
EXPORT_SYMBOL(netlat_tcp_enrtxqueue);
|
|
|
|
/* print msg to per net mbuf when ack latency is
|
|
* watched
|
|
*/
|
|
void netlat_ack_check(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
struct net *net;
|
|
s64 thresh;
|
|
s64 lat;
|
|
long *ports;
|
|
|
|
if (!static_branch_unlikely(&enable_netlat))
|
|
return;
|
|
|
|
net = sock_net(sk);
|
|
|
|
thresh = get_ack_lat(net);
|
|
if (!thresh)
|
|
return;
|
|
|
|
lat = tcp_jiffies32_delt(skb);
|
|
if (lat < thresh)
|
|
return;
|
|
|
|
ports = get_net_ports(net);
|
|
if (!test_bit(sk->sk_num, ports))
|
|
return;
|
|
|
|
net_mbuf_print(net, "TCP AC %u %pI4 %d %pI4 %d\n",
|
|
(unsigned int)(jiffies_to_msecs(lat)),
|
|
&sk->sk_rcv_saddr, (int)sk->sk_num,
|
|
&sk->sk_daddr, (int)ntohs(sk->sk_dport));
|
|
}
|
|
EXPORT_SYMBOL(netlat_ack_check);
|
|
|
|
/* netlat/enable only can be seen in root netns
|
|
*
|
|
* following three function must be called after lock
|
|
* the `lock` above we follow the following rule
|
|
*
|
|
* 1. when disable `enable`: if we have opened the
|
|
* net_timestamp, closed it
|
|
*
|
|
* 2. when enable `enable`: if `pick/queue` need
|
|
* net_timestamp, enabled it
|
|
*
|
|
* 3. when `pick/queue` are writing and need enable
|
|
* net_timestamp and if `enable` disabled, just
|
|
* say `i need net_timestamp` and do nothing leaveing
|
|
* it to 2 above
|
|
*
|
|
* 4. when `pick/queue` are writing and need enable
|
|
* net_timestamp and if `enable` enabled, just
|
|
* enable net_timestamp by themself
|
|
*/
|
|
static struct mutex lock = __MUTEX_INITIALIZER(lock);
|
|
static unsigned long need_time_stamp;
|
|
|
|
/* for pick/queue write: see comment above */
|
|
static void handle_net_timestamp(bool closed)
|
|
{
|
|
/*!0->0*/
|
|
if (closed) {
|
|
need_time_stamp--;
|
|
if (need_time_stamp == 0 &&
|
|
static_branch_unlikely(&enable_netlat))
|
|
net_disable_timestamp();
|
|
return;
|
|
}
|
|
|
|
/*0->!0*/
|
|
need_time_stamp++;
|
|
if (need_time_stamp == 1 &&
|
|
static_branch_unlikely(&enable_netlat))
|
|
net_enable_timestamp();
|
|
}
|
|
|
|
/* for enable write: see comment above */
|
|
static void handle_netlat_enable(bool closed)
|
|
{
|
|
/*!0->0*/
|
|
if (closed) {
|
|
if (need_time_stamp)
|
|
net_disable_timestamp();
|
|
static_branch_disable(&enable_netlat);
|
|
return;
|
|
}
|
|
|
|
/*0->!0*/
|
|
if (need_time_stamp)
|
|
net_enable_timestamp();
|
|
static_branch_enable(&enable_netlat);
|
|
}
|
|
|
|
/* for netns exits: see comment above */
|
|
static void handle_net_timestamp_exit(bool queue, bool pick)
|
|
{
|
|
need_time_stamp -= queue;
|
|
need_time_stamp -= pick;
|
|
|
|
if (!static_branch_unlikely(&enable_netlat))
|
|
return;
|
|
/* if we dec the counter to zero and netlat enabled
|
|
* disable the timestamp
|
|
*/
|
|
if (!need_time_stamp && (queue || pick))
|
|
net_disable_timestamp();
|
|
}
|
|
|
|
static int proc_do_netlat_pick(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
int prev;
|
|
int ret;
|
|
struct netlat_net_data *pdata;
|
|
|
|
mutex_lock(&lock);
|
|
|
|
pdata = container_of(table->data, struct netlat_net_data, pick);
|
|
prev = pdata->pick;
|
|
|
|
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
/* only change timestamp from 0->!0 or !0->0 */
|
|
if (!!prev == !!pdata->pick)
|
|
goto unlock;
|
|
handle_net_timestamp(!!prev);
|
|
|
|
unlock:
|
|
mutex_unlock(&lock);
|
|
return ret;
|
|
}
|
|
|
|
static int proc_do_netlat_queue(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
int prev;
|
|
int ret;
|
|
struct netlat_net_data *pdata;
|
|
|
|
mutex_lock(&lock);
|
|
pdata = container_of(table->data, struct netlat_net_data, queue);
|
|
prev = pdata->queue;
|
|
|
|
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
/* only change timestamp from 0->!0 or !0->0 */
|
|
if (!!prev == !!pdata->queue)
|
|
goto unlock;
|
|
handle_net_timestamp(!!prev);
|
|
|
|
unlock:
|
|
mutex_unlock(&lock);
|
|
return ret;
|
|
}
|
|
|
|
static int proc_do_netlat_enable(struct ctl_table *table, int write,
|
|
void __user *buffer,
|
|
size_t *lenp, loff_t *ppos)
|
|
{
|
|
int prev;
|
|
int ret;
|
|
struct netlat_net_data *pdata;
|
|
|
|
mutex_lock(&lock);
|
|
|
|
pdata = container_of(table->data, struct netlat_net_data, enable);
|
|
prev = pdata->enable;
|
|
|
|
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
if (!!prev == !!pdata->enable)
|
|
goto unlock;
|
|
handle_netlat_enable(!!prev);
|
|
|
|
unlock:
|
|
mutex_unlock(&lock);
|
|
return ret;
|
|
}
|
|
|
|
static struct ctl_table ipv4_netlat[] = {
|
|
{
|
|
.procname = "lports",
|
|
.data = NULL,
|
|
.maxlen = 65536,
|
|
.mode = 0644,
|
|
.proc_handler = proc_do_large_bitmap,
|
|
},
|
|
{
|
|
.procname = "ack",
|
|
.data = NULL,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_INT_MAX,
|
|
},
|
|
{
|
|
.procname = "queue",
|
|
.data = NULL,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_do_netlat_queue,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_INT_MAX,
|
|
},
|
|
{
|
|
.procname = "pick",
|
|
.data = NULL,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_do_netlat_pick,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_INT_MAX,
|
|
},
|
|
{
|
|
.procname = "enable",
|
|
.data = NULL,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_do_netlat_enable,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_ONE,
|
|
},
|
|
{}
|
|
};
|
|
|
|
static int netlat_init_ipv4_ctl_table(struct net *net)
|
|
{
|
|
int ret;
|
|
struct netlat_net_data *pdata;
|
|
struct ctl_table *table;
|
|
|
|
table = ipv4_netlat;
|
|
pdata = net_generic(net, netlat_net_id);
|
|
|
|
ret = 0;
|
|
if (!net_eq(net, &init_net)) {
|
|
table = kmemdup(table, sizeof(ipv4_netlat), GFP_KERNEL);
|
|
if (!table) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
/* do not export enable to son netns */
|
|
memset(&table[4], 0, sizeof(struct ctl_table));
|
|
}
|
|
|
|
pdata->ports = kzalloc(65536 / 8, GFP_KERNEL);
|
|
if (!pdata->ports) {
|
|
ret = -ENOMEM;
|
|
goto free_table;
|
|
}
|
|
|
|
table[0].data = &pdata->ports;
|
|
table[1].data = &pdata->ack;
|
|
table[2].data = &pdata->queue;
|
|
table[3].data = &pdata->pick;
|
|
|
|
/* do not export enable to son netns*/
|
|
if (net_eq(net, &init_net))
|
|
table[4].data = &pdata->enable;
|
|
|
|
pdata->netlat_hdr = register_net_sysctl_sz(net, "net/ipv4/netlat",
|
|
table, ARRAY_SIZE(ipv4_netlat));
|
|
if (!pdata->netlat_hdr) {
|
|
ret = -ENOMEM;
|
|
goto free_ports;
|
|
}
|
|
return ret;
|
|
|
|
free_ports:
|
|
kfree(pdata->ports);
|
|
free_table:
|
|
if (!net_eq(net, &init_net))
|
|
kfree(table);
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static void netlat_exit_ipv4_ctl_table(struct net *net)
|
|
{
|
|
struct netlat_net_data *pdata;
|
|
struct ctl_table *table;
|
|
|
|
pdata = net_generic(net, netlat_net_id);
|
|
|
|
table = pdata->netlat_hdr->ctl_table_arg;
|
|
unregister_net_sysctl_table(pdata->netlat_hdr);
|
|
|
|
/* root netns never exit*/
|
|
if (net_eq(net, &init_net))
|
|
return;
|
|
|
|
mutex_lock(&lock);
|
|
handle_net_timestamp_exit(!!pdata->queue, !!pdata->pick);
|
|
mutex_unlock(&lock);
|
|
|
|
kfree(table);
|
|
kfree(pdata->ports);
|
|
}
|
|
|
|
/* print msg to per net mbuf when latency from
|
|
* netif to queued on tcp receive queue
|
|
*/
|
|
void netlat_queue_check(struct sock *sk, struct sk_buff *skb, int flags)
|
|
{
|
|
struct net *net;
|
|
s64 lat;
|
|
int thresh;
|
|
long *ports;
|
|
|
|
if (!static_branch_unlikely(&enable_netlat))
|
|
return;
|
|
|
|
net = sock_net(sk);
|
|
if (!skb->tstamp)
|
|
return;
|
|
|
|
thresh = get_queue_lat(net);
|
|
if (!thresh)
|
|
return;
|
|
|
|
ports = get_net_ports(net);
|
|
if (!test_bit(sk->sk_num, ports))
|
|
return;
|
|
|
|
if (!skb->tstamp)
|
|
return;
|
|
|
|
lat = ktime_to_ms(net_timedelta(skb->tstamp));
|
|
lat = lat < 0 ? 0 : lat;
|
|
if (lat < thresh)
|
|
return;
|
|
if (flags & QUEUE_FLAG_RCV)
|
|
net_mbuf_print(net, "TCP QU %u %pI4 %d %pI4 %d\n",
|
|
(unsigned int)lat,
|
|
&sk->sk_rcv_saddr, (int)sk->sk_num,
|
|
&sk->sk_daddr, (int)ntohs(sk->sk_dport));
|
|
else /* QUEUE_FLAG_OFO for now */
|
|
net_mbuf_print(net, "TCP OO %u %pI4 %d %pI4 %d\n",
|
|
(unsigned int)lat,
|
|
&sk->sk_rcv_saddr, (int)sk->sk_num,
|
|
&sk->sk_daddr, (int)ntohs(sk->sk_dport));
|
|
}
|
|
EXPORT_SYMBOL(netlat_queue_check);
|
|
|
|
/* print msg to per net mbuf when latency from
|
|
* netif to pick by usr app
|
|
*/
|
|
void netlat_pick_check(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
struct net *net;
|
|
s64 lat;
|
|
int thresh;
|
|
long *ports;
|
|
|
|
if (!static_branch_unlikely(&enable_netlat))
|
|
return;
|
|
|
|
net = sock_net(sk);
|
|
if (!skb->tstamp)
|
|
return;
|
|
|
|
thresh = get_pick_lat(net);
|
|
if (!thresh)
|
|
return;
|
|
|
|
ports = get_net_ports(net);
|
|
if (!test_bit(sk->sk_num, ports))
|
|
return;
|
|
|
|
if (!skb->tstamp)
|
|
return;
|
|
|
|
lat = ktime_to_ms(net_timedelta(skb->tstamp));
|
|
lat = lat < 0 ? 0 : lat;
|
|
if (lat < thresh)
|
|
return;
|
|
|
|
net_mbuf_print(net, "TCP PI %u %pI4 %d %pI4 %d\n",
|
|
(unsigned int)lat, &sk->sk_rcv_saddr, (int)sk->sk_num,
|
|
&sk->sk_daddr, (int)ntohs(sk->sk_dport));
|
|
}
|
|
EXPORT_SYMBOL(netlat_pick_check);
|
|
|
|
static struct pernet_operations netlat_net_ops = {
|
|
.init = netlat_init_ipv4_ctl_table,
|
|
.exit = netlat_exit_ipv4_ctl_table,
|
|
.id = &netlat_net_id,
|
|
.size = sizeof(struct netlat_net_data),
|
|
};
|
|
|
|
/* add some config file in proc
|
|
*/
|
|
int netlat_net_init(void)
|
|
{
|
|
return register_pernet_subsys(&netlat_net_ops);
|
|
}
|
|
EXPORT_SYMBOL(netlat_net_init);
|
|
|
|
void netlat_net_exit(void)
|
|
{
|
|
unregister_pernet_subsys(&netlat_net_ops);
|
|
}
|
|
EXPORT_SYMBOL(netlat_net_exit);
|