Commit 02c558b2 authored by John Fastabend's avatar John Fastabend Committed by Daniel Borkmann

bpf: sockmap, support for msg_peek in sk_msg with redirect ingress

This adds support for the MSG_PEEK flag when doing redirect to ingress
and receiving on the sk_msg psock queue. Previously the flag was
being ignored which could confuse applications if they expected the
flag to work as normal.
Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 8734a162
...@@ -2089,7 +2089,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, ...@@ -2089,7 +2089,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len); int nonblock, int flags, int *addr_len);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len); struct msghdr *msg, int len, int flags);
/* Call BPF_SOCK_OPS program that returns an int. If the return value /* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF * is < 0, then the BPF op failed (for example if the loaded BPF
......
...@@ -39,17 +39,19 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, ...@@ -39,17 +39,19 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
} }
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len) struct msghdr *msg, int len, int flags)
{ {
struct iov_iter *iter = &msg->msg_iter; struct iov_iter *iter = &msg->msg_iter;
int peek = flags & MSG_PEEK;
int i, ret, copied = 0; int i, ret, copied = 0;
while (copied != len) {
struct scatterlist *sge;
struct sk_msg *msg_rx; struct sk_msg *msg_rx;
msg_rx = list_first_entry_or_null(&psock->ingress_msg, msg_rx = list_first_entry_or_null(&psock->ingress_msg,
struct sk_msg, list); struct sk_msg, list);
while (copied != len) {
struct scatterlist *sge;
if (unlikely(!msg_rx)) if (unlikely(!msg_rx))
break; break;
...@@ -70,22 +72,30 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, ...@@ -70,22 +72,30 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
} }
copied += copy; copied += copy;
if (likely(!peek)) {
sge->offset += copy; sge->offset += copy;
sge->length -= copy; sge->length -= copy;
sk_mem_uncharge(sk, copy); sk_mem_uncharge(sk, copy);
msg_rx->sg.size -= copy; msg_rx->sg.size -= copy;
if (!sge->length) { if (!sge->length) {
i++; sk_msg_iter_var_next(i);
if (i == MAX_SKB_FRAGS)
i = 0;
if (!msg_rx->skb) if (!msg_rx->skb)
put_page(page); put_page(page);
} }
} else {
sk_msg_iter_var_next(i);
}
if (copied == len) if (copied == len)
break; break;
} while (i != msg_rx->sg.end); } while (i != msg_rx->sg.end);
if (unlikely(peek)) {
msg_rx = list_next_entry(msg_rx, list);
continue;
}
msg_rx->sg.start = i; msg_rx->sg.start = i;
if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
list_del(&msg_rx->list); list_del(&msg_rx->list);
...@@ -93,6 +103,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, ...@@ -93,6 +103,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
consume_skb(msg_rx->skb); consume_skb(msg_rx->skb);
kfree(msg_rx); kfree(msg_rx);
} }
msg_rx = list_first_entry_or_null(&psock->ingress_msg,
struct sk_msg, list);
} }
return copied; return copied;
...@@ -115,7 +127,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -115,7 +127,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk); lock_sock(sk);
msg_bytes_ready: msg_bytes_ready:
copied = __tcp_bpf_recvmsg(sk, psock, msg, len); copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
if (!copied) { if (!copied) {
int data, err = 0; int data, err = 0;
long timeo; long timeo;
......
...@@ -1478,7 +1478,8 @@ int tls_sw_recvmsg(struct sock *sk, ...@@ -1478,7 +1478,8 @@ int tls_sw_recvmsg(struct sock *sk,
skb = tls_wait_data(sk, psock, flags, timeo, &err); skb = tls_wait_data(sk, psock, flags, timeo, &err);
if (!skb) { if (!skb) {
if (psock) { if (psock) {
int ret = __tcp_bpf_recvmsg(sk, psock, msg, len); int ret = __tcp_bpf_recvmsg(sk, psock,
msg, len, flags);
if (ret > 0) { if (ret > 0) {
copied += ret; copied += ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment