Revert "defer call to mem_cgroup_sk_alloc()"
This patch effectively reverts commit9f1c2674b3
("net: memcontrol: defer call to mem_cgroup_sk_alloc()"). Moving mem_cgroup_sk_alloc() to the inet_csk_accept() completely breaks memcg socket memory accounting, as packets received before memcg pointer initialization are not accounted and are causing refcounting underflow on socket release. Actually the free-after-use problem was fixed by commitc0576e3975
("net: call cgroup_sk_alloc() earlier in sk_clone_lock()") for the cgroup pointer. So, let's revert it and call mem_cgroup_sk_alloc() just before cgroup_sk_alloc(). This is safe, as we hold a reference to the socket we're cloning, and it holds a reference to the memcg. Also, let's drop BUG_ON(mem_cgroup_is_root()) check from mem_cgroup_sk_alloc(). I see no reasons why bumping the root memcg counter is a good reason to panic, and there are no realistic ways to hit it. Signed-off-by: Roman Gushchin <guro@fb.com> Cc: Eric Dumazet <edumazet@google.com> Cc: David S. Miller <davem@davemloft.net> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
4db428a7c9
commit
edbe69ef2c
|
@ -5747,6 +5747,20 @@ void mem_cgroup_sk_alloc(struct sock *sk)
|
||||||
if (!mem_cgroup_sockets_enabled)
|
if (!mem_cgroup_sockets_enabled)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Socket cloning can throw us here with sk_memcg already
|
||||||
|
* filled. It won't however, necessarily happen from
|
||||||
|
* process context. So the test for root memcg given
|
||||||
|
* the current task's memcg won't help us in this case.
|
||||||
|
*
|
||||||
|
* Respecting the original socket's memcg is a better
|
||||||
|
* decision in this case.
|
||||||
|
*/
|
||||||
|
if (sk->sk_memcg) {
|
||||||
|
css_get(&sk->sk_memcg->css);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
memcg = mem_cgroup_from_task(current);
|
memcg = mem_cgroup_from_task(current);
|
||||||
if (memcg == root_mem_cgroup)
|
if (memcg == root_mem_cgroup)
|
||||||
|
|
|
@ -1683,16 +1683,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
|
||||||
newsk->sk_dst_pending_confirm = 0;
|
newsk->sk_dst_pending_confirm = 0;
|
||||||
newsk->sk_wmem_queued = 0;
|
newsk->sk_wmem_queued = 0;
|
||||||
newsk->sk_forward_alloc = 0;
|
newsk->sk_forward_alloc = 0;
|
||||||
|
|
||||||
/* sk->sk_memcg will be populated at accept() time */
|
|
||||||
newsk->sk_memcg = NULL;
|
|
||||||
|
|
||||||
atomic_set(&newsk->sk_drops, 0);
|
atomic_set(&newsk->sk_drops, 0);
|
||||||
newsk->sk_send_head = NULL;
|
newsk->sk_send_head = NULL;
|
||||||
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
|
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
|
||||||
atomic_set(&newsk->sk_zckey, 0);
|
atomic_set(&newsk->sk_zckey, 0);
|
||||||
|
|
||||||
sock_reset_flag(newsk, SOCK_DONE);
|
sock_reset_flag(newsk, SOCK_DONE);
|
||||||
|
mem_cgroup_sk_alloc(newsk);
|
||||||
cgroup_sk_alloc(&newsk->sk_cgrp_data);
|
cgroup_sk_alloc(&newsk->sk_cgrp_data);
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
|
@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
|
||||||
}
|
}
|
||||||
spin_unlock_bh(&queue->fastopenq.lock);
|
spin_unlock_bh(&queue->fastopenq.lock);
|
||||||
}
|
}
|
||||||
mem_cgroup_sk_alloc(newsk);
|
|
||||||
out:
|
out:
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
if (req)
|
if (req)
|
||||||
|
|
Loading…
Reference in New Issue