Commit 7303524e authored by Liu Jian's avatar Liu Jian Committed by Daniel Borkmann

skmsg: Lose offset info in sk_psock_skb_ingress

If sockmap enable strparser, there are lose offset info in
sk_psock_skb_ingress(). If the length determined by parse_msg function is not
skb->len, the skb will be converted to sk_msg multiple times, and userspace
app will get the data multiple times.

Fix this by get the offset and length from strp_msg. And as Cong suggested,
add one bit in skb->_sk_redir to distinguish enable or disable strparser.

Fixes: 604326b4 ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: default avatarLiu Jian <liujian56@huawei.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarCong Wang <cong.wang@bytedance.com>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20211029141216.211899-1-liujian56@huawei.com
parent 0133c204
...@@ -508,8 +508,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) ...@@ -508,8 +508,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
#if IS_ENABLED(CONFIG_NET_SOCK_MSG) #if IS_ENABLED(CONFIG_NET_SOCK_MSG)
/* We only have one bit so far. */ #define BPF_F_STRPARSER (1UL << 1)
#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
/* We only have two bits so far. */
#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER)
static inline bool skb_bpf_strparser(const struct sk_buff *skb)
{
unsigned long sk_redir = skb->_sk_redir;
return sk_redir & BPF_F_STRPARSER;
}
static inline void skb_bpf_set_strparser(struct sk_buff *skb)
{
skb->_sk_redir |= BPF_F_STRPARSER;
}
static inline bool skb_bpf_ingress(const struct sk_buff *skb) static inline bool skb_bpf_ingress(const struct sk_buff *skb)
{ {
......
...@@ -494,6 +494,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, ...@@ -494,6 +494,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
} }
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
u32 off, u32 len,
struct sk_psock *psock, struct sk_psock *psock,
struct sock *sk, struct sock *sk,
struct sk_msg *msg) struct sk_msg *msg)
...@@ -507,11 +508,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, ...@@ -507,11 +508,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
*/ */
if (skb_linearize(skb)) if (skb_linearize(skb))
return -EAGAIN; return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
if (unlikely(num_sge < 0)) if (unlikely(num_sge < 0))
return num_sge; return num_sge;
copied = skb->len; copied = len;
msg->sg.start = 0; msg->sg.start = 0;
msg->sg.size = copied; msg->sg.size = copied;
msg->sg.end = num_sge; msg->sg.end = num_sge;
...@@ -522,9 +523,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, ...@@ -522,9 +523,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
return copied; return copied;
} }
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb); static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len);
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
{ {
struct sock *sk = psock->sk; struct sock *sk = psock->sk;
struct sk_msg *msg; struct sk_msg *msg;
...@@ -535,7 +538,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) ...@@ -535,7 +538,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* correctly. * correctly.
*/ */
if (unlikely(skb->sk == sk)) if (unlikely(skb->sk == sk))
return sk_psock_skb_ingress_self(psock, skb); return sk_psock_skb_ingress_self(psock, skb, off, len);
msg = sk_psock_create_ingress_msg(sk, skb); msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg) if (!msg)
return -EAGAIN; return -EAGAIN;
...@@ -547,7 +550,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) ...@@ -547,7 +550,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* into user buffers. * into user buffers.
*/ */
skb_set_owner_r(skb, sk); skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0) if (err < 0)
kfree(msg); kfree(msg);
return err; return err;
...@@ -557,7 +560,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) ...@@ -557,7 +560,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* skb. In this case we do not need to check memory limits or skb_set_owner_r * skb. In this case we do not need to check memory limits or skb_set_owner_r
* because the skb is already accounted for here. * because the skb is already accounted for here.
*/ */
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb) static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
{ {
struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
struct sock *sk = psock->sk; struct sock *sk = psock->sk;
...@@ -567,7 +571,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb ...@@ -567,7 +571,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
return -EAGAIN; return -EAGAIN;
sk_msg_init(msg); sk_msg_init(msg);
skb_set_owner_r(skb, sk); skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0) if (err < 0)
kfree(msg); kfree(msg);
return err; return err;
...@@ -581,7 +585,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, ...@@ -581,7 +585,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
return -EAGAIN; return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len); return skb_send_sock(psock->sk, skb, off, len);
} }
return sk_psock_skb_ingress(psock, skb); return sk_psock_skb_ingress(psock, skb, off, len);
} }
static void sk_psock_skb_state(struct sk_psock *psock, static void sk_psock_skb_state(struct sk_psock *psock,
...@@ -624,6 +628,12 @@ static void sk_psock_backlog(struct work_struct *work) ...@@ -624,6 +628,12 @@ static void sk_psock_backlog(struct work_struct *work)
while ((skb = skb_dequeue(&psock->ingress_skb))) { while ((skb = skb_dequeue(&psock->ingress_skb))) {
len = skb->len; len = skb->len;
off = 0; off = 0;
if (skb_bpf_strparser(skb)) {
struct strp_msg *stm = strp_msg(skb);
off = stm->offset;
len = stm->full_len;
}
start: start:
ingress = skb_bpf_ingress(skb); ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb); skb_bpf_redirect_clear(skb);
...@@ -863,6 +873,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) ...@@ -863,6 +873,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
* return code, but then didn't set a redirect interface. * return code, but then didn't set a redirect interface.
*/ */
if (unlikely(!sk_other)) { if (unlikely(!sk_other)) {
skb_bpf_redirect_clear(skb);
sock_drop(from->sk, skb); sock_drop(from->sk, skb);
return -EIO; return -EIO;
} }
...@@ -930,6 +941,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, ...@@ -930,6 +941,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
{ {
struct sock *sk_other; struct sock *sk_other;
int err = 0; int err = 0;
u32 len, off;
switch (verdict) { switch (verdict) {
case __SK_PASS: case __SK_PASS:
...@@ -937,6 +949,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, ...@@ -937,6 +949,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
sk_other = psock->sk; sk_other = psock->sk;
if (sock_flag(sk_other, SOCK_DEAD) || if (sock_flag(sk_other, SOCK_DEAD) ||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_bpf_redirect_clear(skb);
goto out_free; goto out_free;
} }
...@@ -949,7 +962,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, ...@@ -949,7 +962,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
* retrying later from workqueue. * retrying later from workqueue.
*/ */
if (skb_queue_empty(&psock->ingress_skb)) { if (skb_queue_empty(&psock->ingress_skb)) {
err = sk_psock_skb_ingress_self(psock, skb); len = skb->len;
off = 0;
if (skb_bpf_strparser(skb)) {
struct strp_msg *stm = strp_msg(skb);
off = stm->offset;
len = stm->full_len;
}
err = sk_psock_skb_ingress_self(psock, skb, off, len);
} }
if (err < 0) { if (err < 0) {
spin_lock_bh(&psock->ingress_lock); spin_lock_bh(&psock->ingress_lock);
...@@ -1015,6 +1036,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) ...@@ -1015,6 +1036,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
skb_dst_drop(skb); skb_dst_drop(skb);
skb_bpf_redirect_clear(skb); skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = bpf_prog_run_pin_on_cpu(prog, skb);
if (ret == SK_PASS)
skb_bpf_set_strparser(skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL; skb->sk = NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment