Commit 41477662 authored by Jakub Kicinski's avatar Jakub Kicinski Committed by David S. Miller

net/tls: prevent skb_orphan() from leaking TLS plain text with offload

sk_validate_xmit_skb() and drivers depend on the sk member of
struct sk_buff to identify segments requiring encryption.
Any operation which removes or does not preserve the original TLS
socket such as skb_orphan() or skb_clone() will cause clear text
leaks.

Make the TCP socket underlying an offloaded TLS connection
mark all skbs as decrypted, if TLS TX is in offload mode.
Then in sk_validate_xmit_skb() catch skbs which have no socket
(or a socket with no validation) and decrypted flag set.

Note that CONFIG_SOCK_VALIDATE_XMIT, CONFIG_TLS_DEVICE and
sk->sk_validate_xmit_skb are slightly interchangeable right now,
they all imply TLS offload. The new checks are guarded by
CONFIG_TLS_DEVICE because that's the option guarding the
sk_buff->decrypted member.

Second, smaller issue with orphaning is that it breaks
the guarantee that packets will be delivered to device
queues in-order. All TLS offload drivers depend on that
scheduling property. This means skb_orphan_partial()'s
trick of preserving partial socket references will cause
issues in the drivers. We need a full orphan, and as a
result netem delay/throttling will cause all TLS offload
skbs to be dropped.

Reusing the sk_buff->decrypted flag also protects from
leaking clear text when incoming, decrypted skb is redirected
(e.g. by TC).

See commit 0608c69c ("bpf: sk_msg, sock{map|hash} redirect
through ULP") for justification why the internal flag is safe.
The only location which could leak the flag in is tcp_bpf_sendmsg(),
which is taken care of by clearing the previously unused bit.

v2:
 - remove superfluous decrypted mark copy (Willem);
 - remove the stale doc entry (Boris);
 - rely entirely on EOR marking to prevent coalescing (Boris);
 - use an internal sendpages flag instead of marking the socket
   (Boris).
v3 (Willem):
 - reorganize the can_skb_orphan_partial() condition;
 - fix the flag leak-in through tcp_bpf_sendmsg.
Signed-off-by: default avatarJakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: default avatarWillem de Bruijn <willemb@google.com>
Reviewed-by: default avatarBoris Pismenny <borisp@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0de94de1
...@@ -506,21 +506,3 @@ Drivers should ignore the changes to TLS the device feature flags. ...@@ -506,21 +506,3 @@ Drivers should ignore the changes to TLS the device feature flags.
These flags will be acted upon accordingly by the core ``ktls`` code. These flags will be acted upon accordingly by the core ``ktls`` code.
TLS device feature flags only control adding of new TLS connection TLS device feature flags only control adding of new TLS connection
offloads, old connections will remain active after flags are cleared. offloads, old connections will remain active after flags are cleared.
Known bugs
==========
skb_orphan() leaks clear text
-----------------------------
Currently drivers depend on the :c:member:`sk` member of
:c:type:`struct sk_buff <sk_buff>` to identify segments requiring
encryption. Any operation which removes or does not preserve the socket
association such as :c:func:`skb_orphan` or :c:func:`skb_clone`
will cause the driver to miss the packets and lead to clear text leaks.
Redirects leak clear text
-------------------------
In the RX direction, if segment has already been decrypted by the device
and it gets redirected or mirrored - clear text will be transmitted out.
...@@ -1374,6 +1374,14 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) ...@@ -1374,6 +1374,14 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
to->l4_hash = from->l4_hash; to->l4_hash = from->l4_hash;
}; };
static inline void skb_copy_decrypted(struct sk_buff *to,
const struct sk_buff *from)
{
#ifdef CONFIG_TLS_DEVICE
to->decrypted = from->decrypted;
#endif
}
#ifdef NET_SKBUFF_DATA_USES_OFFSET #ifdef NET_SKBUFF_DATA_USES_OFFSET
static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
{ {
......
...@@ -292,6 +292,9 @@ struct ucred { ...@@ -292,6 +292,9 @@ struct ucred {
#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ #define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */
#define MSG_EOF MSG_FIN #define MSG_EOF MSG_FIN
#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */ #define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry
* plain text and require encryption
*/
#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
......
...@@ -2482,6 +2482,7 @@ static inline bool sk_fullsock(const struct sock *sk) ...@@ -2482,6 +2482,7 @@ static inline bool sk_fullsock(const struct sock *sk)
/* Checks if this SKB belongs to an HW offloaded socket /* Checks if this SKB belongs to an HW offloaded socket
* and whether any SW fallbacks are required based on dev. * and whether any SW fallbacks are required based on dev.
* Check decrypted mark in case skb_orphan() cleared socket.
*/ */
static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
struct net_device *dev) struct net_device *dev)
...@@ -2489,8 +2490,15 @@ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, ...@@ -2489,8 +2490,15 @@ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
#ifdef CONFIG_SOCK_VALIDATE_XMIT #ifdef CONFIG_SOCK_VALIDATE_XMIT
struct sock *sk = skb->sk; struct sock *sk = skb->sk;
if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) {
skb = sk->sk_validate_xmit_skb(sk, dev, skb); skb = sk->sk_validate_xmit_skb(sk, dev, skb);
#ifdef CONFIG_TLS_DEVICE
} else if (unlikely(skb->decrypted)) {
pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n");
kfree_skb(skb);
skb = NULL;
#endif
}
#endif #endif
return skb; return skb;
......
...@@ -1992,6 +1992,19 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) ...@@ -1992,6 +1992,19 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
} }
EXPORT_SYMBOL(skb_set_owner_w); EXPORT_SYMBOL(skb_set_owner_w);
static bool can_skb_orphan_partial(const struct sk_buff *skb)
{
#ifdef CONFIG_TLS_DEVICE
/* Drivers depend on in-order delivery for crypto offload,
* partial orphan breaks out-of-order-OK logic.
*/
if (skb->decrypted)
return false;
#endif
return (skb->destructor == sock_wfree ||
(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
}
/* This helper is used by netem, as it can hold packets in its /* This helper is used by netem, as it can hold packets in its
* delay queue. We want to allow the owner socket to send more * delay queue. We want to allow the owner socket to send more
* packets, as if they were already TX completed by a typical driver. * packets, as if they were already TX completed by a typical driver.
...@@ -2003,11 +2016,7 @@ void skb_orphan_partial(struct sk_buff *skb) ...@@ -2003,11 +2016,7 @@ void skb_orphan_partial(struct sk_buff *skb)
if (skb_is_tcp_pure_ack(skb)) if (skb_is_tcp_pure_ack(skb))
return; return;
if (skb->destructor == sock_wfree if (can_skb_orphan_partial(skb)) {
#ifdef CONFIG_INET
|| skb->destructor == tcp_wfree
#endif
) {
struct sock *sk = skb->sk; struct sock *sk = skb->sk;
if (refcount_inc_not_zero(&sk->sk_refcnt)) { if (refcount_inc_not_zero(&sk->sk_refcnt)) {
......
...@@ -984,6 +984,9 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ...@@ -984,6 +984,9 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
if (!skb) if (!skb)
goto wait_for_memory; goto wait_for_memory;
#ifdef CONFIG_TLS_DEVICE
skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
#endif
skb_entail(sk, skb); skb_entail(sk, skb);
copy = size_goal; copy = size_goal;
} }
......
...@@ -398,10 +398,14 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, ...@@ -398,10 +398,14 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{ {
struct sk_msg tmp, *msg_tx = NULL; struct sk_msg tmp, *msg_tx = NULL;
int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
int copied = 0, err = 0; int copied = 0, err = 0;
struct sk_psock *psock; struct sk_psock *psock;
long timeo; long timeo;
int flags;
/* Don't let internal do_tcp_sendpages() flags through */
flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED);
flags |= MSG_NO_SHARED_FRAGS;
psock = sk_psock_get(sk); psock = sk_psock_get(sk);
if (unlikely(!psock)) if (unlikely(!psock))
......
...@@ -1320,6 +1320,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, ...@@ -1320,6 +1320,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
buff = sk_stream_alloc_skb(sk, nsize, gfp, true); buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
if (!buff) if (!buff)
return -ENOMEM; /* We'll just try again later. */ return -ENOMEM; /* We'll just try again later. */
skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize; sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize); sk_mem_charge(sk, buff->truesize);
...@@ -1874,6 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, ...@@ -1874,6 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
buff = sk_stream_alloc_skb(sk, 0, gfp, true); buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff)) if (unlikely(!buff))
return -ENOMEM; return -ENOMEM;
skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize; sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize); sk_mem_charge(sk, buff->truesize);
...@@ -2143,6 +2145,7 @@ static int tcp_mtu_probe(struct sock *sk) ...@@ -2143,6 +2145,7 @@ static int tcp_mtu_probe(struct sock *sk)
sk_mem_charge(sk, nskb->truesize); sk_mem_charge(sk, nskb->truesize);
skb = tcp_send_head(sk); skb = tcp_send_head(sk);
skb_copy_decrypted(nskb, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
......
...@@ -373,9 +373,9 @@ static int tls_push_data(struct sock *sk, ...@@ -373,9 +373,9 @@ static int tls_push_data(struct sock *sk,
struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE); int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
struct tls_record_info *record = ctx->open_record; struct tls_record_info *record = ctx->open_record;
int tls_push_record_flags;
struct page_frag *pfrag; struct page_frag *pfrag;
size_t orig_size = size; size_t orig_size = size;
u32 max_open_record_len; u32 max_open_record_len;
...@@ -390,6 +390,9 @@ static int tls_push_data(struct sock *sk, ...@@ -390,6 +390,9 @@ static int tls_push_data(struct sock *sk,
if (sk->sk_err) if (sk->sk_err)
return -sk->sk_err; return -sk->sk_err;
flags |= MSG_SENDPAGE_DECRYPTED;
tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
if (tls_is_partially_sent_record(tls_ctx)) { if (tls_is_partially_sent_record(tls_ctx)) {
rc = tls_push_partial_record(sk, tls_ctx, flags); rc = tls_push_partial_record(sk, tls_ctx, flags);
...@@ -576,7 +579,9 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx) ...@@ -576,7 +579,9 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
gfp_t sk_allocation = sk->sk_allocation; gfp_t sk_allocation = sk->sk_allocation;
sk->sk_allocation = GFP_ATOMIC; sk->sk_allocation = GFP_ATOMIC;
tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL); tls_push_partial_record(sk, ctx,
MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_DECRYPTED);
sk->sk_allocation = sk_allocation; sk->sk_allocation = sk_allocation;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment