bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock
This patch adds a helper function BPF_FUNC_tcp_sock and it is currently available for cg_skb and sched_(cls|act): struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk); int cg_skb_foo(struct __sk_buff *skb) { struct bpf_tcp_sock *tp; struct bpf_sock *sk; __u32 snd_cwnd; sk = skb->sk; if (!sk) return 1; tp = bpf_tcp_sock(sk); if (!tp) return 1; snd_cwnd = tp->snd_cwnd; /* ... */ return 1; } A 'struct bpf_tcp_sock' is also added to the uapi bpf.h to provide read-only access. bpf_tcp_sock has all the existing tcp_sock's fields that has already been exposed by the bpf_sock_ops. i.e. no new tcp_sock's fields are exposed in bpf.h. This helper returns a pointer to the tcp_sock. If it is not a tcp_sock or it cannot be traced back to a tcp_sock by sk_to_full_sk(), it returns NULL. Hence, the caller needs to check for NULL before accessing it. The current use case is to expose members from tcp_sock to allow a cg_skb_bpf_prog to provide per cgroup traffic policing/shaping. Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
9b1f3d6e5a
commit
655a51e536
|
@ -204,6 +204,7 @@ enum bpf_return_type {
|
|||
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
|
||||
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
|
||||
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
|
||||
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
|
||||
};
|
||||
|
||||
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
|
||||
|
@ -259,6 +260,8 @@ enum bpf_reg_type {
|
|||
PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
|
||||
PTR_TO_SOCK_COMMON, /* reg points to sock_common */
|
||||
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
|
||||
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
|
||||
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
|
||||
};
|
||||
|
||||
/* The information passed from prog-specific *_is_valid_access
|
||||
|
@ -956,4 +959,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_INET
|
||||
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info);
|
||||
|
||||
u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog,
|
||||
u32 *target_size);
|
||||
#else
|
||||
static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog,
|
||||
u32 *target_size)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_INET */
|
||||
|
||||
#endif /* _LINUX_BPF_H */
|
||||
|
|
|
@ -2337,6 +2337,15 @@ union bpf_attr {
|
|||
* Return
|
||||
* A **struct bpf_sock** pointer on success, or NULL in
|
||||
* case of failure.
|
||||
*
|
||||
* struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
|
||||
* Description
|
||||
* This helper gets a **struct bpf_tcp_sock** pointer from a
|
||||
* **struct bpf_sock** pointer.
|
||||
*
|
||||
* Return
|
||||
* A **struct bpf_tcp_sock** pointer on success, or NULL in
|
||||
* case of failure.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
|
@ -2434,7 +2443,8 @@ union bpf_attr {
|
|||
FN(rc_pointer_rel), \
|
||||
FN(spin_lock), \
|
||||
FN(spin_unlock), \
|
||||
FN(sk_fullsock),
|
||||
FN(sk_fullsock), \
|
||||
FN(tcp_sock),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
|
@ -2616,6 +2626,45 @@ struct bpf_sock {
|
|||
__u32 state;
|
||||
};
|
||||
|
||||
struct bpf_tcp_sock {
|
||||
__u32 snd_cwnd; /* Sending congestion window */
|
||||
__u32 srtt_us; /* smoothed round trip time << 3 in usecs */
|
||||
__u32 rtt_min;
|
||||
__u32 snd_ssthresh; /* Slow start size threshold */
|
||||
__u32 rcv_nxt; /* What we want to receive next */
|
||||
__u32 snd_nxt; /* Next sequence we send */
|
||||
__u32 snd_una; /* First byte we want an ack for */
|
||||
__u32 mss_cache; /* Cached effective mss, not including SACKS */
|
||||
__u32 ecn_flags; /* ECN status bits. */
|
||||
__u32 rate_delivered; /* saved rate sample: packets delivered */
|
||||
__u32 rate_interval_us; /* saved rate sample: time elapsed */
|
||||
__u32 packets_out; /* Packets which are "in flight" */
|
||||
__u32 retrans_out; /* Retransmitted packets out */
|
||||
__u32 total_retrans; /* Total retransmits for entire connection */
|
||||
__u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
|
||||
* total number of segments in.
|
||||
*/
|
||||
__u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
|
||||
* total number of data segments in.
|
||||
*/
|
||||
__u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
|
||||
* The total number of segments sent.
|
||||
*/
|
||||
__u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
|
||||
* total number of data segments sent.
|
||||
*/
|
||||
__u32 lost_out; /* Lost packets */
|
||||
__u32 sacked_out; /* SACK'd packets */
|
||||
__u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
|
||||
* sum(delta(rcv_nxt)), or how many bytes
|
||||
* were acked.
|
||||
*/
|
||||
__u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
|
||||
* sum(delta(snd_una)), or how many bytes
|
||||
* were acked.
|
||||
*/
|
||||
};
|
||||
|
||||
struct bpf_sock_tuple {
|
||||
union {
|
||||
struct {
|
||||
|
|
|
@ -334,14 +334,16 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
|
|||
static bool type_is_sk_pointer(enum bpf_reg_type type)
|
||||
{
|
||||
return type == PTR_TO_SOCKET ||
|
||||
type == PTR_TO_SOCK_COMMON;
|
||||
type == PTR_TO_SOCK_COMMON ||
|
||||
type == PTR_TO_TCP_SOCK;
|
||||
}
|
||||
|
||||
static bool reg_type_may_be_null(enum bpf_reg_type type)
|
||||
{
|
||||
return type == PTR_TO_MAP_VALUE_OR_NULL ||
|
||||
type == PTR_TO_SOCKET_OR_NULL ||
|
||||
type == PTR_TO_SOCK_COMMON_OR_NULL;
|
||||
type == PTR_TO_SOCK_COMMON_OR_NULL ||
|
||||
type == PTR_TO_TCP_SOCK_OR_NULL;
|
||||
}
|
||||
|
||||
static bool type_is_refcounted(enum bpf_reg_type type)
|
||||
|
@ -407,6 +409,8 @@ static const char * const reg_type_str[] = {
|
|||
[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
|
||||
[PTR_TO_SOCK_COMMON] = "sock_common",
|
||||
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
|
||||
[PTR_TO_TCP_SOCK] = "tcp_sock",
|
||||
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
|
||||
};
|
||||
|
||||
static char slot_type_char[] = {
|
||||
|
@ -1209,6 +1213,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
|
|||
case PTR_TO_SOCKET_OR_NULL:
|
||||
case PTR_TO_SOCK_COMMON:
|
||||
case PTR_TO_SOCK_COMMON_OR_NULL:
|
||||
case PTR_TO_TCP_SOCK:
|
||||
case PTR_TO_TCP_SOCK_OR_NULL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -1662,6 +1668,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
|
|||
case PTR_TO_SOCKET:
|
||||
valid = bpf_sock_is_valid_access(off, size, t, &info);
|
||||
break;
|
||||
case PTR_TO_TCP_SOCK:
|
||||
valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
|
||||
break;
|
||||
default:
|
||||
valid = false;
|
||||
}
|
||||
|
@ -1823,6 +1832,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
|
|||
case PTR_TO_SOCK_COMMON:
|
||||
pointer_desc = "sock_common ";
|
||||
break;
|
||||
case PTR_TO_TCP_SOCK:
|
||||
pointer_desc = "tcp_sock ";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -3148,6 +3160,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
|||
/* For mark_ptr_or_null_reg() */
|
||||
regs[BPF_REG_0].id = ++env->id_gen;
|
||||
}
|
||||
} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
|
||||
mark_reg_known_zero(env, regs, BPF_REG_0);
|
||||
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
|
||||
regs[BPF_REG_0].id = ++env->id_gen;
|
||||
} else {
|
||||
verbose(env, "unknown return type %d of func %s#%d\n",
|
||||
fn->ret_type, func_id_name(func_id), func_id);
|
||||
|
@ -3409,6 +3425,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
|
|||
case PTR_TO_SOCKET_OR_NULL:
|
||||
case PTR_TO_SOCK_COMMON:
|
||||
case PTR_TO_SOCK_COMMON_OR_NULL:
|
||||
case PTR_TO_TCP_SOCK:
|
||||
case PTR_TO_TCP_SOCK_OR_NULL:
|
||||
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
|
||||
dst, reg_type_str[ptr_reg->type]);
|
||||
return -EACCES;
|
||||
|
@ -4644,6 +4662,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
|
|||
reg->type = PTR_TO_SOCKET;
|
||||
} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
|
||||
reg->type = PTR_TO_SOCK_COMMON;
|
||||
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
|
||||
reg->type = PTR_TO_TCP_SOCK;
|
||||
}
|
||||
if (is_null || !(reg_is_refcounted(reg) ||
|
||||
reg_may_point_to_spin_lock(reg))) {
|
||||
|
@ -5839,6 +5859,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
|
|||
case PTR_TO_SOCKET_OR_NULL:
|
||||
case PTR_TO_SOCK_COMMON:
|
||||
case PTR_TO_SOCK_COMMON_OR_NULL:
|
||||
case PTR_TO_TCP_SOCK:
|
||||
case PTR_TO_TCP_SOCK_OR_NULL:
|
||||
/* Only valid matches are exact, which memcmp() above
|
||||
* would have accepted
|
||||
*/
|
||||
|
@ -6161,6 +6183,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
|
|||
case PTR_TO_SOCKET_OR_NULL:
|
||||
case PTR_TO_SOCK_COMMON:
|
||||
case PTR_TO_SOCK_COMMON_OR_NULL:
|
||||
case PTR_TO_TCP_SOCK:
|
||||
case PTR_TO_TCP_SOCK_OR_NULL:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
|
@ -7166,6 +7190,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
|||
case PTR_TO_SOCK_COMMON:
|
||||
convert_ctx_access = bpf_sock_convert_ctx_access;
|
||||
break;
|
||||
case PTR_TO_TCP_SOCK:
|
||||
convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -5315,6 +5315,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
|
|||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
|
||||
return false;
|
||||
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
|
||||
switch (off) {
|
||||
case offsetof(struct bpf_tcp_sock, bytes_received):
|
||||
case offsetof(struct bpf_tcp_sock, bytes_acked):
|
||||
return size == sizeof(__u64);
|
||||
default:
|
||||
return size == sizeof(__u32);
|
||||
}
|
||||
}
|
||||
|
||||
u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog, u32 *target_size)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
||||
#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
|
||||
do { \
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \
|
||||
FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
|
||||
si->dst_reg, si->src_reg, \
|
||||
offsetof(struct tcp_sock, FIELD)); \
|
||||
} while (0)
|
||||
|
||||
CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
|
||||
BPF_TCP_SOCK_GET_COMMON);
|
||||
|
||||
if (insn > insn_buf)
|
||||
return insn - insn_buf;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct bpf_tcp_sock, rtt_min):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
|
||||
sizeof(struct minmax));
|
||||
BUILD_BUG_ON(sizeof(struct minmax) <
|
||||
sizeof(struct minmax_sample));
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
|
||||
offsetof(struct tcp_sock, rtt_min) +
|
||||
offsetof(struct minmax_sample, v));
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
|
||||
{
|
||||
sk = sk_to_full_sk(sk);
|
||||
|
||||
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
|
||||
return (unsigned long)sk;
|
||||
|
||||
return (unsigned long)NULL;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_tcp_sock_proto = {
|
||||
.func = bpf_tcp_sock,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
|
||||
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
|
||||
};
|
||||
|
||||
#endif /* CONFIG_INET */
|
||||
|
||||
bool bpf_helper_changes_pkt_data(void *func)
|
||||
|
@ -5470,6 +5543,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
return &bpf_get_local_storage_proto;
|
||||
case BPF_FUNC_sk_fullsock:
|
||||
return &bpf_sk_fullsock_proto;
|
||||
#ifdef CONFIG_INET
|
||||
case BPF_FUNC_tcp_sock:
|
||||
return &bpf_tcp_sock_proto;
|
||||
#endif
|
||||
default:
|
||||
return sk_filter_func_proto(func_id, prog);
|
||||
}
|
||||
|
@ -5560,6 +5637,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
return &bpf_sk_lookup_udp_proto;
|
||||
case BPF_FUNC_sk_release:
|
||||
return &bpf_sk_release_proto;
|
||||
case BPF_FUNC_tcp_sock:
|
||||
return &bpf_tcp_sock_proto;
|
||||
#endif
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
|
|
Loading…
Reference in New Issue