net: csig toa patch

toa patch from csig luckyqiu@tencent.com

Signed-off-by: luckyqiu luckyqiu@tencent.com
This commit is contained in:
luckyqiu 2024-08-06 17:34:55 +08:00
parent aa493c7aa2
commit 91d9b56ad0
5 changed files with 716 additions and 97 deletions

View File

@ -245,6 +245,15 @@ struct sock_common {
/* public: */
};
/*tvpc data*/
struct tvpc_info {
u32 vpcid;
__be32 vmip;
__be32 vip;
__be16 sport;
__be16 vport;
};
struct bpf_local_storage;
struct sk_filter;
@ -547,6 +556,9 @@ struct sock {
netns_tracker ns_tracker;
struct hlist_node sk_bind2_node;
/*VPC INFO*/
struct tvpc_info sk_tvpc_info;
KABI_RESERVE(1);
KABI_RESERVE(2);
KABI_RESERVE(3);

View File

@ -160,4 +160,7 @@
#endif
/* get the tvpcinfo which have been set by tcp_v4_syn_recv_sock_toa*/
#define SO_TVPC_INFO 5000
#endif /* __ASM_GENERIC_SOCKET_H */

View File

@ -2002,6 +2002,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
v.val = READ_ONCE(sk->sk_txrehash);
break;
case SO_TVPC_INFO:
if (len > sizeof(sk->sk_tvpc_info))
len = sizeof(sk->sk_tvpc_info);
if (copy_to_sockptr(optval, &sk->sk_tvpc_info, len))
return -EFAULT;
goto lenout;
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).

View File

@ -1,4 +1,17 @@
#include "toa.h"
#include <net/genetlink.h>
#include <net/inet_common.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#define NIPQUAD_FMT "%u.%u.%u.%u"
#define NIPQUAD(addr) \
((unsigned char *)&addr)[0], \
((unsigned char *)&addr)[1], \
((unsigned char *)&addr)[2], \
((unsigned char *)&addr)[3]
/*
* TOA a new Tcp Option as Address,
@ -7,7 +20,7 @@
* with LVS FULLNAT model, the realservice are still able to receive real {IP,Port} info.
* So far, this module only supports IPv4 and IPv6 mapped IPv4.
*
* Authors:
* Authors:
* Wen Li <steel.mental@gmail.com>
* Yan Tian <tianyan.7c00@gmail.com>
* Jiaming Wu <pukong.wjm@taobao.com>
@ -19,13 +32,30 @@
* 2 of the License, or (at your option) any later version.
*
*/
static int vtoa;
module_param(vtoa, int, 0600);
MODULE_PARM_DESC(vtoa, "vtoa module control.value can be 0 or 1 ;default is 0.");
static int ali_cip;
module_param(ali_cip, int, 0600);
MODULE_PARM_DESC(ali_cip, "Enable ali cip option: 0xfe. Value could be 0 or 1, defaults to 0.");
#define VTOA_GETPEERNAME_IPV4_DISABLE (0x1 << 0)
#define VTOA_GETPEERNAME_IPV6_DISABLE (0x1 << 1)
static int disable_getpeername;
module_param(disable_getpeername, int, 0600);
MODULE_PARM_DESC(disable_getpeername, \
"vtoa module control support getpeername.disable 1 ipv4, 2 ipv6, 3 ipv4 & ipv6;default is 0.");
static int dbg;
module_param(dbg, int, 0600);
MODULE_PARM_DESC(dbg, "debug log switch");
unsigned long sk_data_ready_addr = 0;
static struct kprobe kp_exec;
static typeof(kallsyms_lookup_name) *ksyms_lookup_name = NULL;
/*
* Statistics of toa in proc /proc/net/toa_stats
* Statistics of toa in proc /proc/net/toa_stats
*/
struct toa_stats_entry toa_stats[] = {
@ -35,13 +65,349 @@ struct toa_stats_entry toa_stats[] = {
TOA_STAT_ITEM("getname_toa_mismatch", GETNAME_TOA_MISMATCH_CNT),
TOA_STAT_ITEM("getname_toa_bypass", GETNAME_TOA_BYPASS_CNT),
TOA_STAT_ITEM("getname_toa_empty", GETNAME_TOA_EMPTY_CNT),
TOA_STAT_ITEM("vtoa_get", GETNAME_VTOA_GET_CNT),
TOA_STAT_ITEM("vtoa_attr_err", GETNAME_VTOA_GET_ATTR_ERR_CNT),
TOA_STAT_ITEM("vtoa_lookup_err", GETNAME_VTOA_GET_LOCKUP_ERR_CNT),
TOA_STAT_ITEM("vtoa_netlink_err", GETNAME_VTOA_GET_NETLINK_ERR_CNT),
TOA_STAT_ITEM("getname_vtoa_ipv4_err", GETNAME_VTOA_GETPEERNAME_IPV4_ERR_CNT),
TOA_STAT_ITEM("getname_vtoa_ipv6_err", GETNAME_VTOA_GETPEERNAME_IPV6_ERR_CNT),
TOA_STAT_END
};
DEFINE_TOA_STAT(struct toa_stat_mib, ext_stats);
enum {
VTOA_CMD_UNSPEC,
VTOA_CMD_GET,
__VTOA_CMD_MAX
};
#define VTOA_CMD_MAX (__VTOA_CMD_MAX - 1)
enum {
VTOA_ATTR_UNSPEC,
VTOA_ATTR_REQ,
VTOA_ATTR_I_VPCID,
VTOA_ATTR_I_VMIP,
VTOA_ATTR_I_SPORT,
VTOA_ATTR_I_VIP,
VTOA_ATTR_I_VPORT,
__VTOA_ATTR_MAX
};
#define VTOA_ATTR_MAX (__VTOA_ATTR_MAX - 1)
static struct nla_policy vtoa_policy[VTOA_ATTR_MAX + 1] =
{
[VTOA_ATTR_REQ] = { .type = NLA_STRING},
[VTOA_ATTR_I_VPCID] = { .type = NLA_U32},
[VTOA_ATTR_I_VMIP] = { .type = NLA_U32},
[VTOA_ATTR_I_SPORT] = { .type = NLA_U16},
[VTOA_ATTR_I_VIP] = { .type = NLA_U32},
[VTOA_ATTR_I_VPORT] = { .type = NLA_U16},
};
static struct genl_family vtoa_genl_family;
struct vtoa_req{
u32 if_idx;
__be32 sip;
__be32 dip;
__be16 sport;
__be16 dport;
};
struct vtoa_info{
u32 i_vpcid;
__be32 i_vmip;
__be32 i_vip;
__be16 i_sport;
__be16 i_vport;
};
/*
* Funcs for toa hooks
* Funcs for vtoa hooks
*/
/* Parse TCP options in skb, try to get client ip, port, vpcid, vmip, vport
* @param skb [in] received skb, it should be a ack/get-ack packet.
* @return 0 if we don't get client ip/port , vpcid and vmip/vport;
* 1 we get vtoa data;
* -1 something wrong .
*/
static int get_vtoa_data(struct sk_buff *skb, struct sock *sk)
{
struct tcphdr *th;
int length;
unsigned char *ptr;
int is_toa = 0;
/*TOA_DBG("get_vtoa_data called\n");*/
if (NULL != skb) {
th = tcp_hdr(skb);
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (unsigned char *) (th + 1);
while (length > 0) {
int opcode = *ptr++;
int opsize;
switch (opcode) {
case TCPOPT_EOL:
return -1;
case TCPOPT_NOP:/* Ref: RFC 793 section 3.1 */
length--;
continue;
default:
opsize = *ptr++;
if (opsize < 2) /* "silly options" */
return -1;
if (opsize > length)
/* don't parse partial options */
return -1;
if (TCPOPT_REAL_CLIENTIP == opcode
&& TCPOLEN_REAL_CLIENTIP == opsize) {
sk->sk_tvpc_info.sport =
*((__u16 *)(ptr));
sk->sk_tvpc_info.vmip =
*((__u32 *)(ptr + 2));
is_toa++;
} else if (TCPOPT_VM_VPCID == opcode
&& TCPOLEN_VM_VPCID == opsize) {
sk->sk_tvpc_info.vpcid =
*((__u32 *)(ptr));
is_toa++;
} else if (TCPOPT_VIP == opcode
&& TCPOLEN_VIP == opsize) {
sk->sk_tvpc_info.vport =
*((__u16 *)(ptr));
sk->sk_tvpc_info.vip =
*((__u32 *)(ptr + 2));
is_toa++;
}
ptr += opsize - 2;
length -= opsize;
}
}
TOA_DBG("%s tcp source:%u dest:%u vpcid:%u vmip:%pI4 sport:%u vip:%pI4 vport:%u\n",
__func__, ntohs(th->source), ntohs(th->dest), sk->sk_tvpc_info.vpcid,
&sk->sk_tvpc_info.vmip, ntohs(sk->sk_tvpc_info.sport),
&sk->sk_tvpc_info.vip, ntohs(sk->sk_tvpc_info.vport));
}
return is_toa;
}
/* get client ip from socket
* @param sock [in] the socket to getpeername() or getsockname()
* @param uaddr [out] the place to put client ip, port
* @param uaddr_len [out] lenth of @uaddr
* @peer [in] if(peer), try to get remote address; if(!peer), try to get local address
* @return return what the original inet_getname() returns.
*/
static int
inet_getname_vtoa(struct socket *sock,
struct sockaddr *uaddr,
int peer)
{
int retval = 0;
struct sock *sk = sock->sk;
struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
/* call orginal one */
retval = inet_getname(sock, uaddr, peer);
if (disable_getpeername & VTOA_GETPEERNAME_IPV4_DISABLE){
TOA_INC_STATS(ext_stats,
GETNAME_VTOA_GETPEERNAME_IPV4_ERR_CNT);
TOA_DBG("%s called, toa ipv getpeername disable\n", __func__);
return retval;
}
/* set our value if need */
if (retval == sizeof(struct sockaddr_in) && peer && sk) {
if (sk_data_ready_addr == (unsigned long) sk->sk_data_ready) {
/*syscall accept must go here,
*so change the client ip/port and vip/vport here
*/
if (0 != sk->sk_tvpc_info.vmip
&& 0 != sk->sk_tvpc_info.sport) {
TOA_INC_STATS(ext_stats, GETNAME_TOA_OK_CNT);
sin->sin_port = sk->sk_tvpc_info.sport;
sin->sin_addr.s_addr = sk->sk_tvpc_info.vmip;
} else if (0 == sk->sk_tvpc_info.vmip
&& 0 == sk->sk_tvpc_info.sport) {
TOA_INC_STATS(ext_stats,
SYN_RECV_SOCK_NO_TOA_CNT);
sk->sk_tvpc_info.sport = sin->sin_port;
sk->sk_tvpc_info.vmip = sin->sin_addr.s_addr;
} else
TOA_INFO("vmip or sport can not 0.\n");
/*do not get vip and vport,
*maybe vpcgw do not pass them,
*maybe it is a flow just for this host
*/
if (0 == sk->sk_tvpc_info.vip
&& 0 == sk->sk_tvpc_info.vport) {
struct sockaddr_in addr;
if (inet_getname(sock, (struct sockaddr *)&addr,
0) == sizeof(struct sockaddr_in)) {
sk->sk_tvpc_info.vport = addr.sin_port;
sk->sk_tvpc_info.vip =
addr.sin_addr.s_addr;
}
}
} else {
TOA_INC_STATS(ext_stats, GETNAME_TOA_BYPASS_CNT);
}
} else { /* no need to get client ip */
TOA_INC_STATS(ext_stats, GETNAME_TOA_EMPTY_CNT);
}
TOA_DBG("%s called, sk_data_ready:%px retval:%d peer:%d vmip:%08x sport:%u vip:%08x vport:%u\n",
__func__, sk?sk->sk_data_ready:NULL, retval, peer,
sk?ntohl(sk->sk_tvpc_info.vmip):0, sk?ntohs(sk->sk_tvpc_info.sport):0,
sk?ntohl(sk->sk_tvpc_info.vip):0, sk?ntohs(sk->sk_tvpc_info.vport):0);
return retval;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static int
inet6_getname_vtoa(struct socket *sock,
struct sockaddr *uaddr,
int peer)
{
int retval = 0;
struct sock *sk = sock->sk;
struct sockaddr_in6 *sin = (struct sockaddr_in6 *) uaddr;
/* call orginal one */
retval = inet6_getname(sock, uaddr, peer);
if (disable_getpeername & VTOA_GETPEERNAME_IPV6_DISABLE){
TOA_INC_STATS(ext_stats,
GETNAME_VTOA_GETPEERNAME_IPV6_ERR_CNT);
TOA_DBG("%s called, toa ipv6 getpeername disable\n", __func__);
return retval;
}
/* set our value if need */
if (retval == sizeof(struct sockaddr_in6) && peer && sk) {
if (sk_data_ready_addr == (unsigned long) sk->sk_data_ready) {
if (0 != sk->sk_tvpc_info.vmip
&& 0 != sk->sk_tvpc_info.sport) {
TOA_INC_STATS(ext_stats, GETNAME_TOA_OK_CNT);
sin->sin6_port = sk->sk_tvpc_info.sport;
ipv6_addr_set(&sin->sin6_addr, 0, 0,
htonl(0x0000FFFF),
sk->sk_tvpc_info.vmip);
} else if (0 == sk->sk_tvpc_info.vmip
&& 0 == sk->sk_tvpc_info.sport) {
TOA_INC_STATS(ext_stats,
SYN_RECV_SOCK_NO_TOA_CNT);
sk->sk_tvpc_info.sport = sin->sin6_port;
sk->sk_tvpc_info.vmip =
sin->sin6_addr.s6_addr32[3];
} else
TOA_INFO("vmip or sport can not 0.\n");
if (0 == sk->sk_tvpc_info.vip
&& 0 == sk->sk_tvpc_info.vport) {
struct sockaddr_in6 addr;
if (inet6_getname(sock,
(struct sockaddr *)&addr,
0) == sizeof(struct sockaddr_in6)) {
sk->sk_tvpc_info.vport =
addr.sin6_port;
sk->sk_tvpc_info.vip =
addr.sin6_addr.s6_addr32[3];
}
}
} else {
TOA_INC_STATS(ext_stats, GETNAME_TOA_BYPASS_CNT);
}
} else { /* no need to get client ip */
TOA_INC_STATS(ext_stats, GETNAME_TOA_EMPTY_CNT);
}
TOA_DBG("%s called, sk_data_ready:%px retval:%d peer:%d vmip:%08x sport:%u vip:%08x vport:%u\n",
__func__, sk?sk->sk_data_ready:NULL, retval, peer,
sk?ntohl(sk->sk_tvpc_info.vmip):0, sk?ntohs(sk->sk_tvpc_info.sport):0,
sk?ntohl(sk->sk_tvpc_info.vip):0, sk?ntohs(sk->sk_tvpc_info.vport):0);
return retval;
}
#endif
/* The three way handshake has completed - we got a valid synack -
* now create the new socket.
* We need to save toa data into the new socket.
* @param sk [out] the socket
* @param skb [in] the ack/ack-get packet
* @param req [in] the open request for this connection
* @param dst [out] route cache entry
* @return NULL if fail new socket if succeed.
*/
static struct sock *
tcp_v4_syn_recv_sock_vtoa(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst,
struct request_sock *req_unhash,
bool *own_req)
{
struct sock *newsock = NULL;
int ret = 0;
TOA_DBG("%s called\n", __func__);
/* call orginal one */
newsock = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
/* set our value if need */
if (NULL != newsock) {
memset(&newsock->sk_tvpc_info, 0,
sizeof(newsock->sk_tvpc_info));
ret = get_vtoa_data(skb, newsock);
if (ret > 0)
TOA_INC_STATS(ext_stats, SYN_RECV_SOCK_TOA_CNT);
else
TOA_INC_STATS(ext_stats,
SYN_RECV_SOCK_NO_TOA_CNT);
}
return newsock;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static struct sock *
tcp_v6_syn_recv_sock_vtoa(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst,
struct request_sock *req_unhash,
bool *own_req)
{
struct sock *newsock = NULL;
int ret = 0;
TOA_DBG("%s called\n", __func__);
/* call orginal one */
newsock = tcp_v6_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
/* set our value if need */
if (NULL != newsock) {
memset(&newsock->sk_tvpc_info, 0,
sizeof(newsock->sk_tvpc_info));
ret = get_vtoa_data(skb, newsock);
if (ret > 0)
TOA_INC_STATS(ext_stats, SYN_RECV_SOCK_TOA_CNT);
else
TOA_INC_STATS(ext_stats,
SYN_RECV_SOCK_NO_TOA_CNT);
}
return newsock;
}
#endif
/*
* Funcs for toa hooks
*/
/* Parse TCP options in skb, try to get client ip, port
@ -49,7 +415,7 @@ DEFINE_TOA_STAT(struct toa_stat_mib, ext_stats);
* @return NULL if we don't get client ip/port;
* value of toa_data in ret_ptr if we get client ip/port.
*/
static void *get_toa_data(struct sk_buff *skb)
static void * get_toa_data(struct sk_buff *skb)
{
struct tcphdr *th;
int length;
@ -64,7 +430,7 @@ static void *get_toa_data(struct sk_buff *skb)
if (NULL != skb) {
th = tcp_hdr(skb);
length = (th->doff * 4) - sizeof (struct tcphdr);
ptr = (unsigned char *)(th + 1);
ptr = (unsigned char *) (th + 1);
while (length > 0) {
int opcode = *ptr++;
@ -81,13 +447,12 @@ static void *get_toa_data(struct sk_buff *skb)
return NULL;
if (opsize > length)
return NULL; /* don't parse partial options */
if ((TCPOPT_TOA == opcode && TCPOLEN_TOA == opsize) ||
(ali_cip == 1 && TCPOPT_TOA_ALI_CIP == opcode && TCPOLEN_TOA_ALI_CIP == opsize)) {
if (TCPOPT_REAL_CLIENTIP == opcode && TCPOLEN_REAL_CLIENTIP == opsize) {
memcpy(&tdata, ptr - 2, sizeof (tdata));
//TOA_DBG("find toa data: ip = %u.%u.%u.%u, port = %u\n", NIPQUAD(tdata.ip),
//ntohs(tdata.port));
TOA_DBG("find toa data: ip = %u.%u.%u.%u, port = %u\n", NIPQUAD(tdata.ip),
ntohs(tdata.port));
memcpy(&ret_ptr, &tdata, sizeof (ret_ptr));
//TOA_DBG("coded toa data: %p\n", ret_ptr);
TOA_DBG("coded toa data: %px\n", ret_ptr);
return ret_ptr;
}
ptr += opsize - 2;
@ -98,7 +463,7 @@ static void *get_toa_data(struct sk_buff *skb)
return NULL;
}
/* get client ip from socket
/* get client ip from socket
* @param sock [in] the socket to getpeername() or getsockname()
* @param uaddr [out] the place to put client ip, port
* @param uaddr_len [out] lenth of @uaddr
@ -110,20 +475,17 @@ inet_getname_toa(struct socket *sock, struct sockaddr *uaddr, int peer)
{
int retval = 0;
struct sock *sk = sock->sk;
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
struct toa_data tdata;
//TOA_DBG("inet_getname_toa called, sk->sk_user_data is %p\n", sk->sk_user_data);
/* call orginal one */
retval = inet_getname(sock, uaddr, peer);
/* set our value if need */
if (retval == 0 && NULL != sk->sk_user_data && peer) {
if (sock_def_readable == sk->sk_data_ready) {
if (sk && retval == sizeof(struct sockaddr_in) && NULL != sk->sk_user_data && peer) {
if (sk_data_ready_addr == (unsigned long) sk->sk_data_ready) {
memcpy(&tdata, &sk->sk_user_data, sizeof (tdata));
if ((TCPOPT_TOA == tdata.opcode && TCPOLEN_TOA == tdata.opsize) ||
(ali_cip == 1 && TCPOPT_TOA_ALI_CIP == tdata.opcode && TCPOLEN_TOA_ALI_CIP == tdata.opsize)) {
if (TCPOPT_REAL_CLIENTIP == tdata.opcode && TCPOLEN_REAL_CLIENTIP == tdata.opsize) {
TOA_INC_STATS(ext_stats, GETNAME_TOA_OK_CNT);
//TOA_DBG("inet_getname_toa: set new sockaddr, ip %u.%u.%u.%u -> %u.%u.%u.%u, port %u -> %u\n",
// NIPQUAD(sin->sin_addr.s_addr), NIPQUAD(tdata.ip), ntohs(sin->sin_port),
@ -140,7 +502,10 @@ inet_getname_toa(struct socket *sock, struct sockaddr *uaddr, int peer)
}
} else { /* no need to get client ip */
TOA_INC_STATS(ext_stats, GETNAME_TOA_EMPTY_CNT);
}
}
TOA_DBG("%s called, sk_user_data:%px sk_data_ready:%px retval:%d peer:%d\n",
__func__, sk?sk->sk_user_data:NULL, sk?sk->sk_data_ready:NULL, retval, peer);
return retval;
}
@ -154,17 +519,14 @@ inet6_getname_toa(struct socket *sock, struct sockaddr *uaddr, int peer)
struct sockaddr_in6 *sin = (struct sockaddr_in6 *) uaddr;
struct toa_data tdata;
//TOA_DBG("inet6_getname_toa called, sk->sk_user_data is %p\n", sk->sk_user_data);
/* call orginal one */
retval = inet6_getname(sock, uaddr, peer);
/* set our value if need */
if (retval == 0 && NULL != sk->sk_user_data && peer) {
if (sock_def_readable == sk->sk_data_ready) {
if (sk && retval == sizeof(struct sockaddr_in6) && NULL != sk->sk_user_data && peer) {
if (sk_data_ready_addr == (unsigned long) sk->sk_data_ready) {
memcpy(&tdata, &sk->sk_user_data, sizeof (tdata));
if ((TCPOPT_TOA == tdata.opcode && TCPOLEN_TOA == tdata.opsize) ||
(ali_cip == 1 && TCPOPT_TOA_ALI_CIP == tdata.opcode && TCPOLEN_TOA_ALI_CIP == tdata.opsize)) {
if (TCPOPT_REAL_CLIENTIP == tdata.opcode && TCPOLEN_REAL_CLIENTIP == tdata.opsize) {
TOA_INC_STATS(ext_stats, GETNAME_TOA_OK_CNT);
sin->sin6_port = tdata.port;
ipv6_addr_set(&sin->sin6_addr, 0, 0, htonl(0x0000FFFF), tdata.ip);
@ -176,7 +538,10 @@ inet6_getname_toa(struct socket *sock, struct sockaddr *uaddr, int peer)
}
} else { /* no need to get client ip */
TOA_INC_STATS(ext_stats, GETNAME_TOA_EMPTY_CNT);
}
}
TOA_DBG("%s called, sk_user_data:%px sk_data_ready:%px retval:%d peer:%d\n",
__func__, sk?sk->sk_user_data:NULL, sk?sk->sk_data_ready:NULL, retval, peer);
return retval;
}
@ -200,21 +565,22 @@ tcp_v4_syn_recv_sock_toa(const struct sock *sk, struct sk_buff *skb,
{
struct sock *newsock = NULL;
//TOA_DBG("tcp_v4_syn_recv_sock_toa called\n");
/* call orginal one */
newsock = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
/* set our value if need */
if (NULL != newsock && NULL == newsock->sk_user_data) {
newsock->sk_user_data = get_toa_data(skb);
if (NULL != newsock->sk_user_data) {
if(NULL != newsock->sk_user_data){
TOA_INC_STATS(ext_stats, SYN_RECV_SOCK_TOA_CNT);
} else {
TOA_INC_STATS(ext_stats, SYN_RECV_SOCK_NO_TOA_CNT);
}
//TOA_DBG("tcp_v4_syn_recv_sock_toa: set sk->sk_user_data to %p\n", newsock->sk_user_data);
}
TOA_DBG("%s called, newsock:%px sk_user_data:%px\n",
__func__, newsock, newsock?newsock->sk_user_data:NULL);
return newsock;
}
@ -228,30 +594,33 @@ tcp_v6_syn_recv_sock_toa(const struct sock *sk, struct sk_buff *skb,
{
struct sock *newsock = NULL;
//TOA_DBG("tcp_v4_syn_recv_sock_toa called\n");
/* call orginal one */
newsock = tcp_v6_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
/* set our value if need */
if (NULL != newsock && NULL == newsock->sk_user_data) {
newsock->sk_user_data = get_toa_data(skb);
if (NULL != newsock->sk_user_data) {
if(NULL != newsock->sk_user_data){
TOA_INC_STATS(ext_stats, SYN_RECV_SOCK_TOA_CNT);
} else {
TOA_INC_STATS(ext_stats, SYN_RECV_SOCK_NO_TOA_CNT);
}
}
TOA_DBG("%s called, newsock:%px sk_user_data:%px\n",
__func__, newsock, newsock?newsock->sk_user_data:NULL);
return newsock;
}
#endif
/*
* HOOK FUNCS
* HOOK FUNCS
*/
/* replace the functions with our functions */
static inline int hook_toa_functions(void)
static inline int
hook_toa_functions(void)
{
struct proto_ops *inet_stream_ops_p;
struct proto_ops *inet6_stream_ops_p;
@ -260,29 +629,45 @@ static inline int hook_toa_functions(void)
/* hook inet_getname for ipv4 */
inet_stream_ops_p = (struct proto_ops *)&inet_stream_ops;
inet_stream_ops_p->getname = inet_getname_toa;
TOA_INFO("CPU [%u] hooked inet_getname <%p> --> <%p>\n", smp_processor_id(), inet_getname,
if (vtoa)
inet_stream_ops_p->getname = inet_getname_vtoa;
else
inet_stream_ops_p->getname = inet_getname_toa;
TOA_INFO("CPU [%u] hooked inet_getname <%px> --> <%px>\n", smp_processor_id(), inet_getname,
inet_stream_ops_p->getname);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/* hook inet6_getname for ipv6 */
inet6_stream_ops_p = (struct proto_ops *)&inet6_stream_ops;
inet6_stream_ops_p->getname = inet6_getname_toa;
TOA_INFO("CPU [%u] hooked inet6_getname <%p> --> <%p>\n", smp_processor_id(), inet6_getname,
if (vtoa)
inet6_stream_ops_p->getname = inet6_getname_vtoa;
else
inet6_stream_ops_p->getname = inet6_getname_toa;
TOA_INFO("CPU [%u] hooked inet6_getname <%px> --> <%px>\n", smp_processor_id(), inet6_getname,
inet6_stream_ops_p->getname);
#endif
/* hook tcp_v4_syn_recv_sock for ipv4 */
ipv4_specific_p = (struct inet_connection_sock_af_ops *)&ipv4_specific;
ipv4_specific_p->syn_recv_sock = tcp_v4_syn_recv_sock_toa;
TOA_INFO("CPU [%u] hooked tcp_v4_syn_recv_sock <%p> --> <%p>\n", smp_processor_id(), tcp_v4_syn_recv_sock,
if (vtoa)
ipv4_specific_p->syn_recv_sock = tcp_v4_syn_recv_sock_vtoa;
else
ipv4_specific_p->syn_recv_sock = tcp_v4_syn_recv_sock_toa;
TOA_INFO("CPU [%u] hooked tcp_v4_syn_recv_sock <%px> --> <%px>\n", smp_processor_id(), tcp_v4_syn_recv_sock,
ipv4_specific_p->syn_recv_sock);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/* hook tcp_v6_syn_recv_sock for ipv6 */
ipv6_specific_p = (struct inet_connection_sock_af_ops *)&ipv6_specific;
ipv6_specific_p->syn_recv_sock = tcp_v6_syn_recv_sock_toa;
TOA_INFO("CPU [%u] hooked tcp_v6_syn_recv_sock <%p> --> <%p>\n", smp_processor_id(), tcp_v6_syn_recv_sock,
if (vtoa)
ipv6_specific_p->syn_recv_sock = tcp_v6_syn_recv_sock_vtoa;
else
ipv6_specific_p->syn_recv_sock = tcp_v6_syn_recv_sock_toa;
TOA_INFO("CPU [%u] hooked tcp_v6_syn_recv_sock <%px> --> <%px>\n", smp_processor_id(), tcp_v6_syn_recv_sock,
ipv6_specific_p->syn_recv_sock);
#endif
@ -293,10 +678,10 @@ static inline int hook_toa_functions(void)
static int
unhook_toa_functions(void)
{
struct proto_ops *inet_stream_ops_p;
struct proto_ops *inet6_stream_ops_p;
struct inet_connection_sock_af_ops *ipv4_specific_p;
struct inet_connection_sock_af_ops *ipv6_specific_p;
struct proto_ops *inet_stream_ops_p;
struct proto_ops *inet6_stream_ops_p;
struct inet_connection_sock_af_ops *ipv4_specific_p;
struct inet_connection_sock_af_ops *ipv6_specific_p;
/* unhook inet_getname for ipv4 */
inet_stream_ops_p = (struct proto_ops *)&inet_stream_ops;
@ -326,27 +711,25 @@ unhook_toa_functions(void)
}
/*
* Statistics of toa in proc /proc/net/toa_stats
* Statistics of toa in proc /proc/net/toa_stats
*/
static int toa_stats_show(struct seq_file *seq, void *v)
{
static int toa_stats_show(struct seq_file *seq, void *v){
int i, j;
/* print CPU first */
seq_printf(seq, " ");
for (i = 0; i < NR_CPUS; i++)
if (cpu_online(i))
seq_printf(seq, "CPU%d ", i);
for_each_online_cpu(i) {
seq_printf(seq, "CPU%d ", i);
}
seq_putc(seq, '\n');
i = 0;
while (NULL != toa_stats[i].name) {
seq_printf(seq, "%-25s:", toa_stats[i].name);
for (j = 0; j < NR_CPUS; j++) {
if (cpu_online(j)) {
seq_printf(seq, "%10lu ",
*(((unsigned long *) per_cpu_ptr(ext_stats, j)) + toa_stats[i].entry));
}
for_each_online_cpu(j) {
seq_printf(seq, "%10lu ",
*(((unsigned long *) per_cpu_ptr(ext_stats, j)) + toa_stats[i].entry));
}
seq_putc(seq, '\n');
i++;
@ -359,32 +742,232 @@ static int toa_stats_seq_open(struct inode *inode, struct file *file)
return single_open(file, toa_stats_show, NULL);
}
static const struct proc_ops toa_stats_ops = {
static const struct proc_ops toa_stats_fops = {
.proc_open = toa_stats_seq_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
static int vpc_inet_dump_one_sk(struct net *net, struct vtoa_req *req, struct vtoa_info *info)
{
struct sock *sk;
int err = -EINVAL;
rcu_read_lock();
sk = inet_lookup(net, &tcp_hashinfo, NULL, 0, req->sip,
req->sport, req->dip,
req->dport, req->if_idx);
if (!sk){
TOA_INFO("inet_lookup fail(%d), if_idx %u sip %pI4 dip %pI4 sport %u dport %u.\n",
err, req->if_idx, &req->sip, &req->dip,
ntohs(req->sport), ntohs(req->dport));
TOA_INC_STATS(ext_stats, GETNAME_VTOA_GET_LOCKUP_ERR_CNT);
goto out_nosk;
}
if (sk->sk_state == TCP_LISTEN){
TOA_INFO("socket listen fail(%d), if_idx %u sip %pI4 dip %pI4 sport %u dport %u sk_state %u.\n",
err, req->if_idx, &req->sip, &req->dip,
ntohs(req->sport), ntohs(req->dport), sk->sk_state);
TOA_INC_STATS(ext_stats, GETNAME_VTOA_GET_LOCKUP_ERR_CNT);
goto out_listen;
}
info->i_vpcid = sk->sk_tvpc_info.vpcid;
info->i_vmip = sk->sk_tvpc_info.vmip;
info->i_sport = sk->sk_tvpc_info.sport;
info->i_vip = sk->sk_tvpc_info.vip;
info->i_vport = sk->sk_tvpc_info.vport;
err = 0;
out_listen:
if (sk) {
if (sk->sk_state == TCP_TIME_WAIT)
inet_twsk_put((struct inet_timewait_sock *)sk);
else
sock_put(sk);
}
out_nosk:
rcu_read_unlock();
return err;
}
static int vpc_vtoa_netlink_send(struct net *net, struct nlmsghdr *nlh, struct vtoa_info *info)
{
struct sk_buff *skb;
void *hdr;
int err;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb) {
TOA_INFO("nlmsg_new fail, vpcid %d vmip %pI4 sport %u vip %pI4 vport %u.\n",
info->i_vpcid, &info->i_vmip, ntohs(info->i_sport),
&info->i_vip, ntohs(info->i_vport));
TOA_INC_STATS(ext_stats, GETNAME_VTOA_GET_NETLINK_ERR_CNT);
return -ENOMEM;
}
hdr = genlmsg_put(skb, nlh->nlmsg_pid, 0, &vtoa_genl_family, 0, VTOA_CMD_GET);
if (!hdr){
TOA_INFO("genlmsg_put fail, vpcid %d vmip %pI4 sport %u vip %pI4 vport %u.\n",
info->i_vpcid, &info->i_vmip, ntohs(info->i_sport),
&info->i_vip, ntohs(info->i_vport));
TOA_INC_STATS(ext_stats, GETNAME_VTOA_GET_NETLINK_ERR_CNT);
nlmsg_free(skb);
return -ENOMEM;
}
nla_put_u32(skb, VTOA_ATTR_I_VPCID, info->i_vpcid);
nla_put_u32(skb, VTOA_ATTR_I_VMIP, info->i_vmip);
nla_put_u16(skb, VTOA_ATTR_I_SPORT, info->i_sport);
nla_put_u32(skb, VTOA_ATTR_I_VIP, info->i_vip);
nla_put_u16(skb, VTOA_ATTR_I_VPORT, info->i_vport);
genlmsg_end(skb, hdr);
err = genlmsg_unicast(net, skb, nlh->nlmsg_pid);
if (err < 0){
TOA_INFO("genlmsg_unicast fail(%d), vpcid %d vmip %pI4 sport %u vip %pI4 vport %u.\n",
err,
info->i_vpcid, &info->i_vmip, ntohs(info->i_sport),
&info->i_vip, ntohs(info->i_vport));
TOA_INC_STATS(ext_stats, GETNAME_VTOA_GET_NETLINK_ERR_CNT);
return -EAGAIN;
}
return 0;
}
static int vpc_vtoa_get(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = NULL;
struct nlattr **attrs = info->attrs;
struct nlmsghdr *nlh = nlmsg_hdr(skb);
struct vtoa_req *req;
struct vtoa_info vinfo;
int err;
TOA_INC_STATS(ext_stats, GETNAME_VTOA_GET_CNT);
if (!attrs ||
!attrs[VTOA_ATTR_REQ]) {
TOA_INFO("attrs null.\n");
TOA_INC_STATS(ext_stats, GETNAME_VTOA_GET_ATTR_ERR_CNT);
return -EINVAL;
}
if (nla_len(attrs[VTOA_ATTR_REQ]) != sizeof(struct vtoa_req)){
TOA_INFO("attrs len(%u) err.\n", nla_len(attrs[VTOA_ATTR_REQ]));
TOA_INC_STATS(ext_stats, GETNAME_VTOA_GET_ATTR_ERR_CNT);
return -EINVAL;
}
req = nla_data(attrs[VTOA_ATTR_REQ]);
if (skb->sk){
net = sock_net(skb->sk);
}
if (!net){
TOA_INFO("get netns %u fail, if_idx:%u sip:%pI4 dip:%pI4 sport:%u dport:%u sk:%px.\n",
nlh->nlmsg_pid, req->if_idx, &req->sip, &req->dip,
ntohs(req->sport), ntohs(req->dport), skb->sk);
return -EINVAL;
}
memset(&vinfo, 0, sizeof(vinfo));
err = vpc_inet_dump_one_sk(net, req, &vinfo);
if (err != 0){
return err;
}
err = vpc_vtoa_netlink_send(net, nlh, &vinfo);
TOA_DBG("pid %u req[if_idx:%u sip:%pI4 dip:%pI4 sport:%u dport:%u] info["
"vpcid:%u vmip:%pI4 sport:%u vip:%pI4 vport:%u] err:%d\n",
nlh->nlmsg_pid, req->if_idx, &req->sip, &req->dip,
ntohs(req->sport), ntohs(req->dport), vinfo.i_vpcid, &vinfo.i_vmip,
ntohs(vinfo.i_sport), &vinfo.i_vip, ntohs(vinfo.i_vport), err);
return err;
}
static struct genl_ops vtoa_genl_ops[] =
{
{
.cmd = VTOA_CMD_GET,
.flags = 0,
.doit = vpc_vtoa_get,
.dumpit = NULL,
},
};
static struct genl_family vtoa_genl_family = {
.hdrsize = 0,
.name = "VPC_VTOA",
.version = 1,
.maxattr = VTOA_ATTR_MAX,
.netnsok = 1,
.policy = vtoa_policy,
.module = THIS_MODULE,
.ops = vtoa_genl_ops,
.n_ops = ARRAY_SIZE(vtoa_genl_ops),
};
static int ksyms_lookup_init(void)
{
int ret;
kp_exec.symbol_name = "kallsyms_lookup_name";
ret = register_kprobe(&kp_exec);
if(ret < 0)
{
TOA_INFO("cannot find kallsyms_lookup_name by kprobe.\n");
return -1;
}
TOA_INFO("kallsyms_lookup_name at %p\n", (void *)kp_exec.addr);
ksyms_lookup_name = (unsigned long (*)(const char *))kp_exec.addr;
unregister_kprobe(&kp_exec);
return 0;
}
/*
* TOA module init and destory
* TOA module init and destory
*/
/* module init */
static int __init toa_init(void)
static int __init
toa_init(void)
{
TOA_INFO("TOA " TOA_VERSION " by pukong.wjm\n");
if (ksyms_lookup_init())
return 1;
/* alloc statistics array for toa */
ext_stats = alloc_percpu(struct toa_stat_mib);
if (NULL == ext_stats)
if (NULL == (ext_stats = alloc_percpu(struct toa_stat_mib)))
return 1;
proc_create("toa_stats", 0, init_net.proc_net, &toa_stats_ops);
proc_create("toa_stats", 0, init_net.proc_net, &toa_stats_fops);
/* get the address of function sock_def_readable
* so later we can know whether the sock is for rpc, tux or others
*/
sk_data_ready_addr = ksyms_lookup_name("sock_def_readable");
TOA_INFO("CPU [%u] sk_data_ready_addr = kallsyms_lookup_name(sock_def_readable) = %lu\n",
smp_processor_id(), sk_data_ready_addr);
if(0 == sk_data_ready_addr) {
TOA_INFO("cannot find sock_def_readable.\n");
goto err;
}
if (genl_register_family(&vtoa_genl_family) != 0){
TOA_INFO("vtoa_genl_family register fail.\n");
goto err;
}
/* hook funcs for parse and get toa */
if (!hook_toa_functions())
goto err;
hook_toa_functions();
TOA_INFO("toa loaded\n");
return 0;
@ -400,11 +983,13 @@ err:
}
/* module cleanup*/
static void __exit toa_exit(void)
static void __exit
toa_exit(void)
{
unhook_toa_functions();
synchronize_net();
genl_unregister_family(&vtoa_genl_family);
remove_proc_entry("toa_stats", init_net.proc_net);
if (NULL != ext_stats) {
free_percpu(ext_stats);

View File

@ -20,27 +20,28 @@
#include <net/ipv6.h>
#include <net/transp_v6.h>
#define TOA_VERSION "1.0.0.0"
#define TOA_VERSION "2.0.0.0"
#define TOA_DBG(msg...) \
do { \
printk(KERN_DEBUG "[DEBUG] TOA: " msg); \
} while (0)
#define TOA_DBG(msg,...) \
do { \
if (0xff==dbg){printk(KERN_DEBUG "[DEBUG] TOA: comm:%s pid:%d " msg, current->comm, (int)current->pid, ##__VA_ARGS__);} \
} while (0)
#define TOA_INFO(msg...) \
do { \
if (net_ratelimit()) \
printk(KERN_INFO "TOA: " msg);\
} while (0)
do { \
if(net_ratelimit()) \
printk(KERN_INFO "TOA: " msg);\
} while(0)
#define TCPOPT_TOA 200
/* MUST be 4n !!!! */
/* |opcode|size|ip+port| = 1 + 1 + 6 */
#define TCPOLEN_TOA 8
#define TCPOPT_TOA_ALI_CIP 0xfe
/* |opcode|size|sport|sip| = 1 + 1 + 2 + 4 */
#define TCPOLEN_TOA_ALI_CIP 8
#define TCPOPT_REAL_CLIENTIP 200
/* |opcode|size|vmip+sport| = 1 + 1 + 6 */
#define TCPOLEN_REAL_CLIENTIP 8
#define TCPOPT_VM_VPCID 201
/* |opcode=1|opcode=1|opcode|size|vpcid| = 1 + 1 + 4 */
#define TCPOLEN_VM_VPCID 6
#define TCPOPT_VIP 202
/* |opcode|size|vip+vport| = 1 + 1 + 6 */
#define TCPOLEN_VIP 8
/* MUST be 4 bytes alignment */
struct toa_data {
@ -58,6 +59,12 @@ enum {
GETNAME_TOA_MISMATCH_CNT,
GETNAME_TOA_BYPASS_CNT,
GETNAME_TOA_EMPTY_CNT,
GETNAME_VTOA_GET_CNT,
GETNAME_VTOA_GET_ATTR_ERR_CNT,
GETNAME_VTOA_GET_LOCKUP_ERR_CNT,
GETNAME_VTOA_GET_NETLINK_ERR_CNT,
GETNAME_VTOA_GETPEERNAME_IPV4_ERR_CNT,
GETNAME_VTOA_GETPEERNAME_IPV6_ERR_CNT,
TOA_STAT_LAST
};
@ -67,13 +74,13 @@ struct toa_stats_entry {
};
#define TOA_STAT_ITEM(_name, _entry) { \
.name = _name, \
.entry = _entry, \
.name = _name, \
.entry = _entry, \
}
#define TOA_STAT_END { \
NULL, \
0, \
NULL, \
0, \
}
struct toa_stat_mib {
@ -81,7 +88,11 @@ struct toa_stat_mib {
};
#define DEFINE_TOA_STAT(type, name) \
__typeof__(type)(*name)
__typeof__(type) *name
#define TOA_INC_STATS(mib, field) \
(per_cpu_ptr(mib, smp_processor_id())->mibs[field]++)
(per_cpu_ptr(mib, smp_processor_id())->mibs[field]++)
#endif