Merge branch 'bpf-sockmap-fixes'
John Fastabend says: ==================== When I added the test_sockmap to selftests I mistakenly changed the test logic a bit. The result of this was on redirect cases we ended up choosing the wrong sock from the BPF program and ended up sending to a socket that had no receive handler. The result was the actual receive handler, running on a different socket, is timing out and closing the socket. This results in errors (-EPIPE to be specific) on the sending side. Typically happening if the sender does not complete the send before the receive side times out. So depending on timing and the size of the send we may get errors. This exposed some bugs in the sockmap error path handling. This series fixes the errors. The primary issue is we did not do proper memory accounting in these cases which resulted in missing a sk_mem_uncharge(). This happened in the redirect path and in one case on the normal send path. See the three patches for the details. The other take-away from this is we need to fix the test_sockmap and also add more negative test cases. That will happen in bpf-next. Finally, I tested this using the existing test_sockmap program, the older sockmap sample test script, and a few real use cases with Cilium. All of these seem to be in working correctly. v2: fix compiler warning, drop iterator variable 'i' that is no longer used in patch 3. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
commit
b5b6ff7302
|
@ -326,6 +326,9 @@ retry:
|
|||
if (ret > 0) {
|
||||
if (apply)
|
||||
apply_bytes -= ret;
|
||||
|
||||
sg->offset += ret;
|
||||
sg->length -= ret;
|
||||
size -= ret;
|
||||
offset += ret;
|
||||
if (uncharge)
|
||||
|
@ -333,8 +336,6 @@ retry:
|
|||
goto retry;
|
||||
}
|
||||
|
||||
sg->length = size;
|
||||
sg->offset = offset;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -392,7 +393,8 @@ static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
|
|||
} while (i != md->sg_end);
|
||||
}
|
||||
|
||||
static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
|
||||
static void free_bytes_sg(struct sock *sk, int bytes,
|
||||
struct sk_msg_buff *md, bool charge)
|
||||
{
|
||||
struct scatterlist *sg = md->sg_data;
|
||||
int i = md->sg_start, free;
|
||||
|
@ -402,11 +404,13 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
|
|||
if (bytes < free) {
|
||||
sg[i].length -= bytes;
|
||||
sg[i].offset += bytes;
|
||||
sk_mem_uncharge(sk, bytes);
|
||||
if (charge)
|
||||
sk_mem_uncharge(sk, bytes);
|
||||
break;
|
||||
}
|
||||
|
||||
sk_mem_uncharge(sk, sg[i].length);
|
||||
if (charge)
|
||||
sk_mem_uncharge(sk, sg[i].length);
|
||||
put_page(sg_page(&sg[i]));
|
||||
bytes -= sg[i].length;
|
||||
sg[i].length = 0;
|
||||
|
@ -417,6 +421,7 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
|
|||
if (i == MAX_SKB_FRAGS)
|
||||
i = 0;
|
||||
}
|
||||
md->sg_start = i;
|
||||
}
|
||||
|
||||
static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
|
||||
|
@ -575,10 +580,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
|
|||
struct sk_msg_buff *md,
|
||||
int flags)
|
||||
{
|
||||
bool ingress = !!(md->flags & BPF_F_INGRESS);
|
||||
struct smap_psock *psock;
|
||||
struct scatterlist *sg;
|
||||
int i, err, free = 0;
|
||||
bool ingress = !!(md->flags & BPF_F_INGRESS);
|
||||
int err = 0;
|
||||
|
||||
sg = md->sg_data;
|
||||
|
||||
|
@ -606,16 +611,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
|
|||
out_rcu:
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
i = md->sg_start;
|
||||
while (sg[i].length) {
|
||||
free += sg[i].length;
|
||||
put_page(sg_page(&sg[i]));
|
||||
sg[i].length = 0;
|
||||
i++;
|
||||
if (i == MAX_SKB_FRAGS)
|
||||
i = 0;
|
||||
}
|
||||
return free;
|
||||
free_bytes_sg(NULL, send, md, false);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline void bpf_md_init(struct smap_psock *psock)
|
||||
|
@ -700,19 +697,26 @@ more_data:
|
|||
err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
|
||||
lock_sock(sk);
|
||||
|
||||
if (unlikely(err < 0)) {
|
||||
free_start_sg(sk, m);
|
||||
psock->sg_size = 0;
|
||||
if (!cork)
|
||||
*copied -= send;
|
||||
} else {
|
||||
psock->sg_size -= send;
|
||||
}
|
||||
|
||||
if (cork) {
|
||||
free_start_sg(sk, m);
|
||||
psock->sg_size = 0;
|
||||
kfree(m);
|
||||
m = NULL;
|
||||
err = 0;
|
||||
}
|
||||
if (unlikely(err))
|
||||
*copied -= err;
|
||||
else
|
||||
psock->sg_size -= send;
|
||||
break;
|
||||
case __SK_DROP:
|
||||
default:
|
||||
free_bytes_sg(sk, send, m);
|
||||
free_bytes_sg(sk, send, m, true);
|
||||
apply_bytes_dec(psock, send);
|
||||
*copied -= send;
|
||||
psock->sg_size -= send;
|
||||
|
|
Loading…
Reference in New Issue