Commit 3981f955 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2020-01-15

The following pull-request contains BPF updates for your *net* tree.

We've added 12 non-merge commits during the last 9 day(s) which contain
a total of 13 files changed, 95 insertions(+), 43 deletions(-).

The main changes are:

1) Fix refcount leak for TCP time wait and request sockets for socket lookup
   related BPF helpers, from Lorenz Bauer.

2) Fix wrong verification of ARSH instruction under ALU32, from Daniel Borkmann.

3) Batch of several sockmap and related TLS fixes found while operating
   more complex BPF programs with Cilium and OpenSSL, from John Fastabend.

4) Fix sockmap to read psock's ingress_msg queue before regular sk_receive_queue()
   to avoid purging data upon teardown, from Lingpeng Chen.

5) Fix printing incorrect pointer in bpftool's btf_dump_ptr() in order to properly
   dump a BPF map's value with BTF, from Martin KaFai Lau.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 567110f1 85ddd9c3
......@@ -358,17 +358,22 @@ static inline void sk_psock_update_proto(struct sock *sk,
static inline void sk_psock_restore_proto(struct sock *sk,
struct sk_psock *psock)
{
sk->sk_write_space = psock->saved_write_space;
sk->sk_prot->unhash = psock->saved_unhash;
if (psock->sk_proto) {
struct inet_connection_sock *icsk = inet_csk(sk);
bool has_ulp = !!icsk->icsk_ulp_data;
if (has_ulp)
tcp_update_ulp(sk, psock->sk_proto);
else
if (has_ulp) {
tcp_update_ulp(sk, psock->sk_proto,
psock->saved_write_space);
} else {
sk->sk_prot = psock->sk_proto;
sk->sk_write_space = psock->saved_write_space;
}
psock->sk_proto = NULL;
} else {
sk->sk_write_space = psock->saved_write_space;
}
}
......
......@@ -30,7 +30,7 @@ struct tnum tnum_lshift(struct tnum a, u8 shift);
/* Shift (rsh) a tnum right (by a fixed shift) */
struct tnum tnum_rshift(struct tnum a, u8 shift);
/* Shift (arsh) a tnum right (by a fixed min_shift) */
struct tnum tnum_arshift(struct tnum a, u8 min_shift);
struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness);
/* Add two tnums, return @a + @b */
struct tnum tnum_add(struct tnum a, struct tnum b);
/* Subtract two tnums, return @a - @b */
......
......@@ -2147,7 +2147,8 @@ struct tcp_ulp_ops {
/* initialize ulp */
int (*init)(struct sock *sk);
/* update ulp */
void (*update)(struct sock *sk, struct proto *p);
void (*update)(struct sock *sk, struct proto *p,
void (*write_space)(struct sock *sk));
/* cleanup ulp */
void (*release)(struct sock *sk);
/* diagnostic */
......@@ -2162,7 +2163,8 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type);
int tcp_set_ulp(struct sock *sk, const char *name);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);
void tcp_update_ulp(struct sock *sk, struct proto *p);
void tcp_update_ulp(struct sock *sk, struct proto *p,
void (*write_space)(struct sock *sk));
#define MODULE_ALIAS_TCP_ULP(name) \
__MODULE_INFO(alias, alias_userspace, name); \
......
......@@ -44,14 +44,19 @@ struct tnum tnum_rshift(struct tnum a, u8 shift)
return TNUM(a.value >> shift, a.mask >> shift);
}
struct tnum tnum_arshift(struct tnum a, u8 min_shift)
struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness)
{
/* if a.value is negative, arithmetic shifting by minimum shift
* will have larger negative offset compared to more shifting.
* If a.value is nonnegative, arithmetic shifting by minimum shift
* will have larger positive offset compare to more shifting.
*/
return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
if (insn_bitness == 32)
return TNUM((u32)(((s32)a.value) >> min_shift),
(u32)(((s32)a.mask) >> min_shift));
else
return TNUM((s64)a.value >> min_shift,
(s64)a.mask >> min_shift);
}
struct tnum tnum_add(struct tnum a, struct tnum b)
......
......@@ -5049,9 +5049,16 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
/* Upon reaching here, src_known is true and
* umax_val is equal to umin_val.
*/
if (insn_bitness == 32) {
dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val);
dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val);
} else {
dst_reg->smin_value >>= umin_val;
dst_reg->smax_value >>= umin_val;
dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
}
dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val,
insn_bitness);
/* blow away the dst_reg umin_value/umax_value and rely on
* dst_reg var_off to refine the result.
......
......@@ -2231,10 +2231,10 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
offset += len;
len = sk_msg_elem(msg, i)->length;
if (start < offset + len)
break;
offset += len;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
......@@ -2346,7 +2346,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
u32, len, u64, flags)
{
struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
u32 new, i = 0, l, space, copy = 0, offset = 0;
u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
u8 *raw, *to, *from;
struct page *page;
......@@ -2356,11 +2356,11 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
offset += l;
l = sk_msg_elem(msg, i)->length;
if (start < offset + l)
break;
offset += l;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
......@@ -2415,6 +2415,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
sk_msg_iter_var_next(i);
sg_unmark_end(psge);
sg_unmark_end(&rsge);
sk_msg_iter_next(msg, end);
}
......@@ -2506,7 +2507,7 @@ static void sk_msg_shift_right(struct sk_msg *msg, int i)
BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
u32, len, u64, flags)
{
u32 i = 0, l, space, offset = 0;
u32 i = 0, l = 0, space, offset = 0;
u64 last = start + len;
int pop;
......@@ -2516,11 +2517,11 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
offset += l;
l = sk_msg_elem(msg, i)->length;
if (start < offset + l)
break;
offset += l;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
......@@ -5318,7 +5319,6 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
if (sk) {
sk = sk_to_full_sk(sk);
if (!sk_fullsock(sk)) {
if (!sock_flag(sk, SOCK_RCU_FREE))
sock_gen_put(sk);
return NULL;
}
......@@ -5356,7 +5356,6 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
if (sk) {
sk = sk_to_full_sk(sk);
if (!sk_fullsock(sk)) {
if (!sock_flag(sk, SOCK_RCU_FREE))
sock_gen_put(sk);
return NULL;
}
......@@ -5424,7 +5423,8 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
BPF_CALL_1(bpf_sk_release, struct sock *, sk)
{
if (!sock_flag(sk, SOCK_RCU_FREE))
/* Only full sockets have sk->sk_flags. */
if (!sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE))
sock_gen_put(sk);
return 0;
}
......
......@@ -594,6 +594,8 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
sock_owned_by_me(sk);
sk_psock_cork_free(psock);
sk_psock_zap_ingress(psock);
......
......@@ -241,8 +241,11 @@ static void sock_map_free(struct bpf_map *map)
struct sock *sk;
sk = xchg(psk, NULL);
if (sk)
if (sk) {
lock_sock(sk);
sock_map_unref(sk, psk);
release_sock(sk);
}
}
raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock();
......@@ -862,7 +865,9 @@ static void sock_hash_free(struct bpf_map *map)
raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node);
lock_sock(elem->sk);
sock_map_unref(elem->sk, elem);
release_sock(elem->sk);
}
raw_spin_unlock_bh(&bucket->lock);
}
......
......@@ -121,14 +121,14 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct sk_psock *psock;
int copied, ret;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
if (!skb_queue_empty(&sk->sk_receive_queue))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
psock = sk_psock_get(sk);
if (unlikely(!psock))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
if (!skb_queue_empty(&sk->sk_receive_queue) &&
sk_psock_queue_empty(psock))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
msg_bytes_ready:
copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
......@@ -139,7 +139,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
timeo = sock_rcvtimeo(sk, nonblock);
data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
if (data) {
if (skb_queue_empty(&sk->sk_receive_queue))
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
release_sock(sk);
sk_psock_put(sk, psock);
......@@ -315,10 +315,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
*/
delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
if (msg->sg.size < delta)
delta -= msg->sg.size;
else
delta = 0;
}
if (msg->cork_bytes &&
......
......@@ -99,17 +99,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen)
rcu_read_unlock();
}
void tcp_update_ulp(struct sock *sk, struct proto *proto)
void tcp_update_ulp(struct sock *sk, struct proto *proto,
void (*write_space)(struct sock *sk))
{
struct inet_connection_sock *icsk = inet_csk(sk);
if (!icsk->icsk_ulp_ops) {
sk->sk_write_space = write_space;
sk->sk_prot = proto;
return;
}
if (icsk->icsk_ulp_ops->update)
icsk->icsk_ulp_ops->update(sk, proto);
icsk->icsk_ulp_ops->update(sk, proto, write_space);
}
void tcp_cleanup_ulp(struct sock *sk)
......
......@@ -732,15 +732,19 @@ static int tls_init(struct sock *sk)
return rc;
}
static void tls_update(struct sock *sk, struct proto *p)
static void tls_update(struct sock *sk, struct proto *p,
void (*write_space)(struct sock *sk))
{
struct tls_context *ctx;
ctx = tls_get_ctx(sk);
if (likely(ctx))
if (likely(ctx)) {
ctx->sk_write_space = write_space;
ctx->sk_proto = p;
else
} else {
sk->sk_prot = p;
sk->sk_write_space = write_space;
}
}
static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
......
......@@ -680,12 +680,32 @@ static int tls_push_record(struct sock *sk, int flags,
split_point = msg_pl->apply_bytes;
split = split_point && split_point < msg_pl->sg.size;
if (unlikely((!split &&
msg_pl->sg.size +
prot->overhead_size > msg_en->sg.size) ||
(split &&
split_point +
prot->overhead_size > msg_en->sg.size))) {
split = true;
split_point = msg_en->sg.size;
}
if (split) {
rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en,
split_point, prot->overhead_size,
&orig_end);
if (rc < 0)
return rc;
/* This can happen if above tls_split_open_record allocates
* a single large encryption buffer instead of two smaller
* ones. In this case adjust pointers and continue without
* split.
*/
if (!msg_pl->sg.size) {
tls_merge_open_record(sk, rec, tmp, orig_end);
msg_pl = &rec->msg_plaintext;
msg_en = &rec->msg_encrypted;
split = false;
}
sk_msg_trim(sk, msg_en, msg_pl->sg.size +
prot->overhead_size);
}
......@@ -707,6 +727,12 @@ static int tls_push_record(struct sock *sk, int flags,
sg_mark_end(sk_msg_elem(msg_pl, i));
}
if (msg_pl->sg.end < msg_pl->sg.start) {
sg_chain(&msg_pl->sg.data[msg_pl->sg.start],
MAX_SKB_FRAGS - msg_pl->sg.start + 1,
msg_pl->sg.data);
}
i = msg_pl->sg.start;
sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]);
......@@ -781,10 +807,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
if (psock->eval == __SK_NONE) {
delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
if (delta < msg->sg.size)
delta -= msg->sg.size;
else
delta = 0;
}
if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
!enospc && !full_record) {
......
......@@ -26,7 +26,7 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw,
bool is_plain_text)
{
if (is_plain_text)
jsonw_printf(jw, "%p", data);
jsonw_printf(jw, "%p", *(void **)data);
else
jsonw_printf(jw, "%lu", *(unsigned long *)data);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment