net/tls: prevent skb_orphan() from leaking TLS plain text with offload
sk_validate_xmit_skb() and drivers depend on the sk member of
struct sk_buff to identify segments requiring encryption.
Any operation which removes or does not preserve the original TLS
socket such as skb_orphan() or skb_clone() will cause clear text
leaks.
Make the TCP socket underlying an offloaded TLS connection
mark all skbs as decrypted, if TLS TX is in offload mode.
Then in sk_validate_xmit_skb() catch skbs which have no socket
(or a socket with no validation) and decrypted flag set.
Note that CONFIG_SOCK_VALIDATE_XMIT, CONFIG_TLS_DEVICE and
sk->sk_validate_xmit_skb are slightly interchangeable right now,
they all imply TLS offload. The new checks are guarded by
CONFIG_TLS_DEVICE because that's the option guarding the
sk_buff->decrypted member.
Second, smaller issue with orphaning is that it breaks
the guarantee that packets will be delivered to device
queues in-order. All TLS offload drivers depend on that
scheduling property. This means skb_orphan_partial()'s
trick of preserving partial socket references will cause
issues in the drivers. We need a full orphan, and as a
result netem delay/throttling will cause all TLS offload
skbs to be dropped.
Reusing the sk_buff->decrypted flag also protects from
leaking clear text when incoming, decrypted skb is redirected
(e.g. by TC).
See commit 0608c69c9a
("bpf: sk_msg, sock{map|hash} redirect
through ULP") for justification why the internal flag is safe.
The only location which could leak the flag in is tcp_bpf_sendmsg(),
which is taken care of by clearing the previously unused bit.
v2:
- remove superfluous decrypted mark copy (Willem);
- remove the stale doc entry (Boris);
- rely entirely on EOR marking to prevent coalescing (Boris);
- use an internal sendpages flag instead of marking the socket
(Boris).
v3 (Willem):
- reorganize the can_skb_orphan_partial() condition;
- fix the flag leak-in through tcp_bpf_sendmsg.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
0de94de180
commit
414776621d
|
@ -506,21 +506,3 @@ Drivers should ignore the changes to TLS the device feature flags.
|
|||
These flags will be acted upon accordingly by the core ``ktls`` code.
|
||||
TLS device feature flags only control adding of new TLS connection
|
||||
offloads, old connections will remain active after flags are cleared.
|
||||
|
||||
Known bugs
|
||||
==========
|
||||
|
||||
skb_orphan() leaks clear text
|
||||
-----------------------------
|
||||
|
||||
Currently drivers depend on the :c:member:`sk` member of
|
||||
:c:type:`struct sk_buff <sk_buff>` to identify segments requiring
|
||||
encryption. Any operation which removes or does not preserve the socket
|
||||
association such as :c:func:`skb_orphan` or :c:func:`skb_clone`
|
||||
will cause the driver to miss the packets and lead to clear text leaks.
|
||||
|
||||
Redirects leak clear text
|
||||
-------------------------
|
||||
|
||||
In the RX direction, if segment has already been decrypted by the device
|
||||
and it gets redirected or mirrored - clear text will be transmitted out.
|
||||
|
|
|
@ -1374,6 +1374,14 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
|
|||
to->l4_hash = from->l4_hash;
|
||||
};
|
||||
|
||||
static inline void skb_copy_decrypted(struct sk_buff *to,
|
||||
const struct sk_buff *from)
|
||||
{
|
||||
#ifdef CONFIG_TLS_DEVICE
|
||||
to->decrypted = from->decrypted;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef NET_SKBUFF_DATA_USES_OFFSET
|
||||
static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
|
||||
{
|
||||
|
|
|
@ -292,6 +292,9 @@ struct ucred {
|
|||
#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */
|
||||
#define MSG_EOF MSG_FIN
|
||||
#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
|
||||
#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry
|
||||
* plain text and require encryption
|
||||
*/
|
||||
|
||||
#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
|
||||
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
|
||||
|
|
|
@ -2482,6 +2482,7 @@ static inline bool sk_fullsock(const struct sock *sk)
|
|||
|
||||
/* Checks if this SKB belongs to an HW offloaded socket
|
||||
* and whether any SW fallbacks are required based on dev.
|
||||
* Check decrypted mark in case skb_orphan() cleared socket.
|
||||
*/
|
||||
static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
|
||||
struct net_device *dev)
|
||||
|
@ -2489,8 +2490,15 @@ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
|
|||
#ifdef CONFIG_SOCK_VALIDATE_XMIT
|
||||
struct sock *sk = skb->sk;
|
||||
|
||||
if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb)
|
||||
if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) {
|
||||
skb = sk->sk_validate_xmit_skb(sk, dev, skb);
|
||||
#ifdef CONFIG_TLS_DEVICE
|
||||
} else if (unlikely(skb->decrypted)) {
|
||||
pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n");
|
||||
kfree_skb(skb);
|
||||
skb = NULL;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return skb;
|
||||
|
|
|
@ -1992,6 +1992,19 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
|
|||
}
|
||||
EXPORT_SYMBOL(skb_set_owner_w);
|
||||
|
||||
static bool can_skb_orphan_partial(const struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_TLS_DEVICE
|
||||
/* Drivers depend on in-order delivery for crypto offload,
|
||||
* partial orphan breaks out-of-order-OK logic.
|
||||
*/
|
||||
if (skb->decrypted)
|
||||
return false;
|
||||
#endif
|
||||
return (skb->destructor == sock_wfree ||
|
||||
(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
|
||||
}
|
||||
|
||||
/* This helper is used by netem, as it can hold packets in its
|
||||
* delay queue. We want to allow the owner socket to send more
|
||||
* packets, as if they were already TX completed by a typical driver.
|
||||
|
@ -2003,11 +2016,7 @@ void skb_orphan_partial(struct sk_buff *skb)
|
|||
if (skb_is_tcp_pure_ack(skb))
|
||||
return;
|
||||
|
||||
if (skb->destructor == sock_wfree
|
||||
#ifdef CONFIG_INET
|
||||
|| skb->destructor == tcp_wfree
|
||||
#endif
|
||||
) {
|
||||
if (can_skb_orphan_partial(skb)) {
|
||||
struct sock *sk = skb->sk;
|
||||
|
||||
if (refcount_inc_not_zero(&sk->sk_refcnt)) {
|
||||
|
|
|
@ -984,6 +984,9 @@ new_segment:
|
|||
if (!skb)
|
||||
goto wait_for_memory;
|
||||
|
||||
#ifdef CONFIG_TLS_DEVICE
|
||||
skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
|
||||
#endif
|
||||
skb_entail(sk, skb);
|
||||
copy = size_goal;
|
||||
}
|
||||
|
|
|
@ -398,10 +398,14 @@ more_data:
|
|||
static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
|
||||
{
|
||||
struct sk_msg tmp, *msg_tx = NULL;
|
||||
int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
|
||||
int copied = 0, err = 0;
|
||||
struct sk_psock *psock;
|
||||
long timeo;
|
||||
int flags;
|
||||
|
||||
/* Don't let internal do_tcp_sendpages() flags through */
|
||||
flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED);
|
||||
flags |= MSG_NO_SHARED_FRAGS;
|
||||
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
|
|
|
@ -1320,6 +1320,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
|
|||
buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
|
||||
if (!buff)
|
||||
return -ENOMEM; /* We'll just try again later. */
|
||||
skb_copy_decrypted(buff, skb);
|
||||
|
||||
sk->sk_wmem_queued += buff->truesize;
|
||||
sk_mem_charge(sk, buff->truesize);
|
||||
|
@ -1874,6 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
|
|||
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
|
||||
if (unlikely(!buff))
|
||||
return -ENOMEM;
|
||||
skb_copy_decrypted(buff, skb);
|
||||
|
||||
sk->sk_wmem_queued += buff->truesize;
|
||||
sk_mem_charge(sk, buff->truesize);
|
||||
|
@ -2143,6 +2145,7 @@ static int tcp_mtu_probe(struct sock *sk)
|
|||
sk_mem_charge(sk, nskb->truesize);
|
||||
|
||||
skb = tcp_send_head(sk);
|
||||
skb_copy_decrypted(nskb, skb);
|
||||
|
||||
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
|
||||
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
|
||||
|
|
|
@ -373,9 +373,9 @@ static int tls_push_data(struct sock *sk,
|
|||
struct tls_context *tls_ctx = tls_get_ctx(sk);
|
||||
struct tls_prot_info *prot = &tls_ctx->prot_info;
|
||||
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
|
||||
int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
|
||||
int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
|
||||
struct tls_record_info *record = ctx->open_record;
|
||||
int tls_push_record_flags;
|
||||
struct page_frag *pfrag;
|
||||
size_t orig_size = size;
|
||||
u32 max_open_record_len;
|
||||
|
@ -390,6 +390,9 @@ static int tls_push_data(struct sock *sk,
|
|||
if (sk->sk_err)
|
||||
return -sk->sk_err;
|
||||
|
||||
flags |= MSG_SENDPAGE_DECRYPTED;
|
||||
tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
|
||||
|
||||
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
|
||||
if (tls_is_partially_sent_record(tls_ctx)) {
|
||||
rc = tls_push_partial_record(sk, tls_ctx, flags);
|
||||
|
@ -576,7 +579,9 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
|
|||
gfp_t sk_allocation = sk->sk_allocation;
|
||||
|
||||
sk->sk_allocation = GFP_ATOMIC;
|
||||
tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL);
|
||||
tls_push_partial_record(sk, ctx,
|
||||
MSG_DONTWAIT | MSG_NOSIGNAL |
|
||||
MSG_SENDPAGE_DECRYPTED);
|
||||
sk->sk_allocation = sk_allocation;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue