bpf: Fix bpf_tcp_sock and bpf_sk_fullsock issue related to bpf_sk_release

Lorenz Bauer [thanks!] reported that a ptr returned by bpf_tcp_sock(sk)
can still be accessed after bpf_sk_release(sk).
Both bpf_tcp_sock() and bpf_sk_fullsock() have the same issue.
This patch addresses them together.

A simple reproducer looks like this:

	sk = bpf_sk_lookup_tcp();
	/* if (!sk) ... */
	tp = bpf_tcp_sock(sk);
	/* if (!tp) ... */
	bpf_sk_release(sk);
	snd_cwnd = tp->snd_cwnd; /* oops! The verifier does not complain. */

The problem is the verifier did not scrub the register's states of
the tcp_sock ptr (tp) after bpf_sk_release(sk).

[ Note that when calling bpf_tcp_sock(sk), the sk is not always
  refcount-acquired. e.g. bpf_tcp_sock(skb->sk). The verifier works
  fine for this case. ]

Currently, the verifier does not track if a helper's return ptr (in REG_0)
is "carry"-ing one of its argument's refcount status. To carry this info,
the reg1->id needs to be stored in reg0.

One approach was tried, like "reg0->id = reg1->id", when calling
"bpf_tcp_sock()".  The main idea was to avoid adding another "ref_obj_id"
for the same reg.  However, overlapping the NULL marking and ref
tracking purpose in one "id" does not work well:

	ref_sk = bpf_sk_lookup_tcp();
	fullsock = bpf_sk_fullsock(ref_sk);
	tp = bpf_tcp_sock(ref_sk);
	if (!fullsock) {
	     bpf_sk_release(ref_sk);
	     return 0;
	}
	/* fullsock_reg->id is marked for NOT-NULL.
	 * Same for tp_reg->id because they have the same id.
	 */

	/* oops. verifier did not complain about the missing !tp check */
	snd_cwnd = tp->snd_cwnd;

Hence, a new "ref_obj_id" is needed in "struct bpf_reg_state".
With a new ref_obj_id, when bpf_sk_release(sk) is called, the verifier can
scrub all reg states which has a ref_obj_id match.  It is done with the
changes in release_reg_references() in this patch.

While fixing it, sk_to_full_sk() is removed from bpf_tcp_sock() and
bpf_sk_fullsock() to avoid these helpers from returning
another ptr. It will make bpf_sk_release(tp) possible:

	sk = bpf_sk_lookup_tcp();
	/* if (!sk) ... */
	tp = bpf_tcp_sock(sk);
	/* if (!tp) ... */
	bpf_sk_release(tp);

A separate helper "bpf_get_listener_sock()" will be added in a later
patch to do sk_to_full_sk().

Misc change notes:
- To allow bpf_sk_release(tp), the arg of bpf_sk_release() is changed
  from ARG_PTR_TO_SOCKET to ARG_PTR_TO_SOCK_COMMON.  ARG_PTR_TO_SOCKET
  is removed from bpf.h since no helper is using it.

- arg_type_is_refcounted() is renamed to arg_type_may_be_refcounted()
  because ARG_PTR_TO_SOCK_COMMON is the only one and skb->sk is not
  refcounted.  All bpf_sk_release(), bpf_sk_fullsock() and bpf_tcp_sock()
  take ARG_PTR_TO_SOCK_COMMON.

- check_refcount_ok() ensures is_acquire_function() cannot take
  arg_type_may_be_refcounted() as its argument.

- The check_func_arg() can only allow one refcount-ed arg.  It is
  guaranteed by check_refcount_ok() which ensures at most one arg can be
  refcounted.  Hence, it is a verifier internal error if >1 refcount arg
  found in check_func_arg().

- In release_reference(), release_reference_state() is called
  first to ensure a match on "reg->ref_obj_id" can be found before
  scrubbing the reg states with release_reg_references().

- reg_is_refcounted() is no longer needed.
  1. In mark_ptr_or_null_regs(), its usage is replaced by
     "ref_obj_id && ref_obj_id == id" because,
     when is_null == true, release_reference_state() should only be
     called on the ref_obj_id obtained by a acquire helper (i.e.
     is_acquire_function() == true).  Otherwise, the following
     would happen:

	sk = bpf_sk_lookup_tcp();
	/* if (!sk) { ... } */
	fullsock = bpf_sk_fullsock(sk);
	if (!fullsock) {
		/*
		 * release_reference_state(fullsock_reg->ref_obj_id)
		 * where fullsock_reg->ref_obj_id == sk_reg->ref_obj_id.
		 *
		 * Hence, the following bpf_sk_release(sk) will fail
		 * because the ref state has already been released in the
		 * earlier release_reference_state(fullsock_reg->ref_obj_id).
		 */
		bpf_sk_release(sk);
	}

  2. In release_reg_references(), the current reg_is_refcounted() call
     is unnecessary because the id check is enough.

- The type_is_refcounted() and type_is_refcounted_or_null()
  are no longer needed also because reg_is_refcounted() is removed.

Fixes: 655a51e536 ("bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock")
Reported-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Martin KaFai Lau 2019-03-12 10:23:02 -07:00 committed by Alexei Starovoitov
parent 6bf21b54a5
commit 1b98658968
4 changed files with 116 additions and 64 deletions

View File

@ -193,7 +193,6 @@ enum bpf_arg_type {
ARG_PTR_TO_CTX, /* pointer to context */ ARG_PTR_TO_CTX, /* pointer to context */
ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
}; };

View File

@ -66,6 +66,46 @@ struct bpf_reg_state {
* same reference to the socket, to determine proper reference freeing. * same reference to the socket, to determine proper reference freeing.
*/ */
u32 id; u32 id;
/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
* from a pointer-cast helper, bpf_sk_fullsock() and
* bpf_tcp_sock().
*
* Consider the following where "sk" is a reference counted
* pointer returned from "sk = bpf_sk_lookup_tcp();":
*
* 1: sk = bpf_sk_lookup_tcp();
* 2: if (!sk) { return 0; }
* 3: fullsock = bpf_sk_fullsock(sk);
* 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
* 5: tp = bpf_tcp_sock(fullsock);
* 6: if (!tp) { bpf_sk_release(sk); return 0; }
* 7: bpf_sk_release(sk);
* 8: snd_cwnd = tp->snd_cwnd; // verifier will complain
*
* After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
* "tp" ptr should be invalidated also. In order to do that,
* the reg holding "fullsock" and "sk" need to remember
* the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
* such that the verifier can reset all regs which have
* ref_obj_id matching the sk_reg->id.
*
* sk_reg->ref_obj_id is set to sk_reg->id at line 1.
* sk_reg->id will stay as NULL-marking purpose only.
* After NULL-marking is done, sk_reg->id can be reset to 0.
*
* After "fullsock = bpf_sk_fullsock(sk);" at line 3,
* fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
*
* After "tp = bpf_tcp_sock(fullsock);" at line 5,
* tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
* which is the same as sk_reg->ref_obj_id.
*
* From the verifier perspective, if sk, fullsock and tp
* are not NULL, they are the same ptr with different
* reg->type. In particular, bpf_sk_release(tp) is also
* allowed and has the same effect as bpf_sk_release(sk).
*/
u32 ref_obj_id;
/* For scalar types (SCALAR_VALUE), this represents our knowledge of /* For scalar types (SCALAR_VALUE), this represents our knowledge of
* the actual value. * the actual value.
* For pointer types, this represents the variable part of the offset * For pointer types, this represents the variable part of the offset

View File

@ -212,7 +212,7 @@ struct bpf_call_arg_meta {
int access_size; int access_size;
s64 msize_smax_value; s64 msize_smax_value;
u64 msize_umax_value; u64 msize_umax_value;
int ptr_id; int ref_obj_id;
int func_id; int func_id;
}; };
@ -346,35 +346,15 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
type == PTR_TO_TCP_SOCK_OR_NULL; type == PTR_TO_TCP_SOCK_OR_NULL;
} }
static bool type_is_refcounted(enum bpf_reg_type type)
{
return type == PTR_TO_SOCKET;
}
static bool type_is_refcounted_or_null(enum bpf_reg_type type)
{
return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
}
static bool reg_is_refcounted(const struct bpf_reg_state *reg)
{
return type_is_refcounted(reg->type);
}
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{ {
return reg->type == PTR_TO_MAP_VALUE && return reg->type == PTR_TO_MAP_VALUE &&
map_value_has_spin_lock(reg->map_ptr); map_value_has_spin_lock(reg->map_ptr);
} }
static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
{ {
return type_is_refcounted_or_null(reg->type); return type == ARG_PTR_TO_SOCK_COMMON;
}
static bool arg_type_is_refcounted(enum bpf_arg_type type)
{
return type == ARG_PTR_TO_SOCKET;
} }
/* Determine whether the function releases some resources allocated by another /* Determine whether the function releases some resources allocated by another
@ -392,6 +372,12 @@ static bool is_acquire_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_sk_lookup_udp; func_id == BPF_FUNC_sk_lookup_udp;
} }
static bool is_ptr_cast_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_tcp_sock ||
func_id == BPF_FUNC_sk_fullsock;
}
/* string representation of 'enum bpf_reg_type' */ /* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = { static const char * const reg_type_str[] = {
[NOT_INIT] = "?", [NOT_INIT] = "?",
@ -465,7 +451,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (t == PTR_TO_STACK) if (t == PTR_TO_STACK)
verbose(env, ",call_%d", func(env, reg)->callsite); verbose(env, ",call_%d", func(env, reg)->callsite);
} else { } else {
verbose(env, "(id=%d", reg->id); verbose(env, "(id=%d ref_obj_id=%d", reg->id,
reg->ref_obj_id);
if (t != SCALAR_VALUE) if (t != SCALAR_VALUE)
verbose(env, ",off=%d", reg->off); verbose(env, ",off=%d", reg->off);
if (type_is_pkt_pointer(t)) if (type_is_pkt_pointer(t))
@ -2414,16 +2401,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
if (!type_is_sk_pointer(type)) if (!type_is_sk_pointer(type))
goto err_type; goto err_type;
} else if (arg_type == ARG_PTR_TO_SOCKET) { if (reg->ref_obj_id) {
expected_type = PTR_TO_SOCKET; if (meta->ref_obj_id) {
if (type != expected_type) verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
goto err_type; regno, reg->ref_obj_id,
if (meta->ptr_id || !reg->id) { meta->ref_obj_id);
verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n", return -EFAULT;
meta->ptr_id, reg->id); }
return -EFAULT; meta->ref_obj_id = reg->ref_obj_id;
} }
meta->ptr_id = reg->id;
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) { if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true)) if (process_spin_lock(env, regno, true))
@ -2740,32 +2726,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
return true; return true;
} }
static bool check_refcount_ok(const struct bpf_func_proto *fn) static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
{ {
int count = 0; int count = 0;
if (arg_type_is_refcounted(fn->arg1_type)) if (arg_type_may_be_refcounted(fn->arg1_type))
count++; count++;
if (arg_type_is_refcounted(fn->arg2_type)) if (arg_type_may_be_refcounted(fn->arg2_type))
count++; count++;
if (arg_type_is_refcounted(fn->arg3_type)) if (arg_type_may_be_refcounted(fn->arg3_type))
count++; count++;
if (arg_type_is_refcounted(fn->arg4_type)) if (arg_type_may_be_refcounted(fn->arg4_type))
count++; count++;
if (arg_type_is_refcounted(fn->arg5_type)) if (arg_type_may_be_refcounted(fn->arg5_type))
count++; count++;
/* A reference acquiring function cannot acquire
* another refcounted ptr.
*/
if (is_acquire_function(func_id) && count)
return false;
/* We only support one arg being unreferenced at the moment, /* We only support one arg being unreferenced at the moment,
* which is sufficient for the helper functions we have right now. * which is sufficient for the helper functions we have right now.
*/ */
return count <= 1; return count <= 1;
} }
static int check_func_proto(const struct bpf_func_proto *fn) static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{ {
return check_raw_mode_ok(fn) && return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) && check_arg_pair_ok(fn) &&
check_refcount_ok(fn) ? 0 : -EINVAL; check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
} }
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@ -2799,19 +2791,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
} }
static void release_reg_references(struct bpf_verifier_env *env, static void release_reg_references(struct bpf_verifier_env *env,
struct bpf_func_state *state, int id) struct bpf_func_state *state,
int ref_obj_id)
{ {
struct bpf_reg_state *regs = state->regs, *reg; struct bpf_reg_state *regs = state->regs, *reg;
int i; int i;
for (i = 0; i < MAX_BPF_REG; i++) for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].id == id) if (regs[i].ref_obj_id == ref_obj_id)
mark_reg_unknown(env, regs, i); mark_reg_unknown(env, regs, i);
bpf_for_each_spilled_reg(i, state, reg) { bpf_for_each_spilled_reg(i, state, reg) {
if (!reg) if (!reg)
continue; continue;
if (reg_is_refcounted(reg) && reg->id == id) if (reg->ref_obj_id == ref_obj_id)
__mark_reg_unknown(reg); __mark_reg_unknown(reg);
} }
} }
@ -2820,15 +2813,20 @@ static void release_reg_references(struct bpf_verifier_env *env,
* resources. Identify all copies of the same pointer and clear the reference. * resources. Identify all copies of the same pointer and clear the reference.
*/ */
static int release_reference(struct bpf_verifier_env *env, static int release_reference(struct bpf_verifier_env *env,
struct bpf_call_arg_meta *meta) int ref_obj_id)
{ {
struct bpf_verifier_state *vstate = env->cur_state; struct bpf_verifier_state *vstate = env->cur_state;
int err;
int i; int i;
for (i = 0; i <= vstate->curframe; i++) err = release_reference_state(cur_func(env), ref_obj_id);
release_reg_references(env, vstate->frame[i], meta->ptr_id); if (err)
return err;
return release_reference_state(cur_func(env), meta->ptr_id); for (i = 0; i <= vstate->curframe; i++)
release_reg_references(env, vstate->frame[i], ref_obj_id);
return 0;
} }
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@ -3047,7 +3045,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
memset(&meta, 0, sizeof(meta)); memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access; meta.pkt_access = fn->pkt_access;
err = check_func_proto(fn); err = check_func_proto(fn, func_id);
if (err) { if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n", verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id); func_id_name(func_id), func_id);
@ -3093,7 +3091,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return err; return err;
} }
} else if (is_release_function(func_id)) { } else if (is_release_function(func_id)) {
err = release_reference(env, &meta); err = release_reference(env, meta.ref_obj_id);
if (err) { if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n", verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id); func_id_name(func_id), func_id);
@ -3154,8 +3152,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
if (id < 0) if (id < 0)
return id; return id;
/* For release_reference() */ /* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = id; regs[BPF_REG_0].id = id;
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = id;
} else { } else {
/* For mark_ptr_or_null_reg() */ /* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].id = ++env->id_gen;
@ -3170,6 +3170,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL; return -EINVAL;
} }
if (is_ptr_cast_function(func_id))
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
do_refine_retval_range(regs, fn->ret_type, func_id, &meta); do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
err = check_map_func_compatibility(env, meta.map_ptr, func_id); err = check_map_func_compatibility(env, meta.map_ptr, func_id);
@ -4665,11 +4669,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
reg->type = PTR_TO_TCP_SOCK; reg->type = PTR_TO_TCP_SOCK;
} }
if (is_null || !(reg_is_refcounted(reg) || if (is_null) {
reg_may_point_to_spin_lock(reg))) { /* We don't need id and ref_obj_id from this point
/* We don't need id from this point onwards anymore, * onwards anymore, thus we should better reset it,
* thus we should better reset it, so that state * so that state pruning has chances to take effect.
* pruning has chances to take effect. */
reg->id = 0;
reg->ref_obj_id = 0;
} else if (!reg_may_point_to_spin_lock(reg)) {
/* For not-NULL ptr, reg->ref_obj_id will be reset
* in release_reg_references().
*
* reg->id is still used by spin_lock ptr. Other
* than spin_lock ptr type, reg->id can be reset.
*/ */
reg->id = 0; reg->id = 0;
} }
@ -4684,11 +4696,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
{ {
struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_reg_state *reg, *regs = state->regs; struct bpf_reg_state *reg, *regs = state->regs;
u32 ref_obj_id = regs[regno].ref_obj_id;
u32 id = regs[regno].id; u32 id = regs[regno].id;
int i, j; int i, j;
if (reg_is_refcounted_or_null(&regs[regno]) && is_null) if (ref_obj_id && ref_obj_id == id && is_null)
release_reference_state(state, id); /* regs[regno] is in the " == NULL" branch.
* No one could have freed the reference state before
* doing the NULL check.
*/
WARN_ON_ONCE(release_reference_state(state, id));
for (i = 0; i < MAX_BPF_REG; i++) for (i = 0; i < MAX_BPF_REG; i++)
mark_ptr_or_null_reg(state, &regs[i], id, is_null); mark_ptr_or_null_reg(state, &regs[i], id, is_null);

View File

@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
{ {
sk = sk_to_full_sk(sk);
return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
} }
@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release, .func = bpf_sk_release,
.gpl_only = false, .gpl_only = false,
.ret_type = RET_INTEGER, .ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCKET, .arg1_type = ARG_PTR_TO_SOCK_COMMON,
}; };
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{ {
sk = sk_to_full_sk(sk);
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
return (unsigned long)sk; return (unsigned long)sk;