Commit 81110384 authored by John Fastabend's avatar John Fastabend Committed by Daniel Borkmann

bpf: sockmap, add hash map support

Sockmap is currently backed by an array and enforces keys to be
four bytes. This works well for many use cases and was originally
modeled after devmap which also uses four bytes keys. However,
this has become limiting in larger use cases where a hash would
be more appropriate. For example users may want to use the 5-tuple
of the socket as the lookup key.

To support this add hash support.
Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Acked-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent e5cd3abc
...@@ -668,6 +668,7 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) ...@@ -668,6 +668,7 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET) #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
#else #else
static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
...@@ -675,6 +676,12 @@ static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) ...@@ -675,6 +676,12 @@ static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
return NULL; return NULL;
} }
static inline struct sock *__sock_hash_lookup_elem(struct bpf_map *map,
void *key)
{
return NULL;
}
static inline int sock_map_prog(struct bpf_map *map, static inline int sock_map_prog(struct bpf_map *map,
struct bpf_prog *prog, struct bpf_prog *prog,
u32 type) u32 type)
...@@ -724,6 +731,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto; ...@@ -724,6 +731,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
/* Shared helpers among cBPF and eBPF. */ /* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void); void bpf_user_rnd_init_once(void);
......
...@@ -47,6 +47,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) ...@@ -47,6 +47,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET) #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif #endif
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS) #if defined(CONFIG_XDP_SOCKETS)
......
...@@ -118,6 +118,7 @@ enum bpf_map_type { ...@@ -118,6 +118,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_SOCKMAP, BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
}; };
enum bpf_prog_type { enum bpf_prog_type {
...@@ -1828,7 +1829,6 @@ union bpf_attr { ...@@ -1828,7 +1829,6 @@ union bpf_attr {
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
* *
*
* int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
* Description * Description
* Do FIB lookup in kernel tables using parameters in *params*. * Do FIB lookup in kernel tables using parameters in *params*.
...@@ -1855,6 +1855,53 @@ union bpf_attr { ...@@ -1855,6 +1855,53 @@ union bpf_attr {
* Egress device index on success, 0 if packet needs to continue * Egress device index on success, 0 if packet needs to continue
* up the stack for further processing or a negative error in case * up the stack for further processing or a negative error in case
* of failure. * of failure.
*
* int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a sockhash *map* referencing sockets.
* The *skops* is used as a new value for the entry associated to
* *key*. *flags* is one of:
*
* **BPF_NOEXIST**
* The entry for *key* must not exist in the map.
* **BPF_EXIST**
* The entry for *key* must already exist in the map.
* **BPF_ANY**
* No condition on the existence of the entry for *key*.
*
* If the *map* has eBPF programs (parser and verdict), those will
* be inherited by the socket being added. If the socket is
* already attached to eBPF programs, this results in an error.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* socket level. If the message *msg* is allowed to pass (i.e. if
* the verdict eBPF program returns **SK_PASS**), redirect it to
* the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
* egress interfaces can be used for redirection. The
* **BPF_F_INGRESS** value in *flags* is used to make the
* distinction (ingress path is selected if the flag is present,
* egress path otherwise). This is the only flag supported for now.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
* int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
* if the verdeict eBPF program returns **SK_PASS**), redirect it
* to the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
* egress interfaces can be used for redirection. The
* **BPF_F_INGRESS** value in *flags* is used to make the
* distinction (ingress path is selected if the flag is present,
* egress otherwise). This is the only flag supported for now.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -1926,7 +1973,10 @@ union bpf_attr { ...@@ -1926,7 +1973,10 @@ union bpf_attr {
FN(skb_get_xfrm_state), \ FN(skb_get_xfrm_state), \
FN(get_stack), \ FN(get_stack), \
FN(skb_load_bytes_relative), \ FN(skb_load_bytes_relative), \
FN(fib_lookup), FN(fib_lookup), \
FN(sock_hash_update), \
FN(msg_redirect_hash), \
FN(sk_redirect_hash),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -1707,6 +1707,7 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; ...@@ -1707,6 +1707,7 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_sock_map_update_proto __weak; const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{ {
......
This diff is collapsed.
...@@ -2093,6 +2093,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, ...@@ -2093,6 +2093,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_msg_redirect_map) func_id != BPF_FUNC_msg_redirect_map)
goto error; goto error;
break; break;
case BPF_MAP_TYPE_SOCKHASH:
if (func_id != BPF_FUNC_sk_redirect_hash &&
func_id != BPF_FUNC_sock_hash_update &&
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_hash)
goto error;
break;
default: default:
break; break;
} }
...@@ -2130,11 +2137,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, ...@@ -2130,11 +2137,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
break; break;
case BPF_FUNC_sk_redirect_map: case BPF_FUNC_sk_redirect_map:
case BPF_FUNC_msg_redirect_map: case BPF_FUNC_msg_redirect_map:
case BPF_FUNC_sock_map_update:
if (map->map_type != BPF_MAP_TYPE_SOCKMAP) if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
goto error; goto error;
break; break;
case BPF_FUNC_sock_map_update: case BPF_FUNC_sk_redirect_hash:
if (map->map_type != BPF_MAP_TYPE_SOCKMAP) case BPF_FUNC_msg_redirect_hash:
case BPF_FUNC_sock_hash_update:
if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
goto error; goto error;
break; break;
default: default:
......
...@@ -2074,6 +2074,33 @@ static const struct bpf_func_proto bpf_redirect_proto = { ...@@ -2074,6 +2074,33 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING,
}; };
BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
struct bpf_map *, map, void *, key, u64, flags)
{
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
/* If user passes invalid input drop the packet. */
if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
tcb->bpf.flags = flags;
tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
if (!tcb->bpf.sk_redir)
return SK_DROP;
return SK_PASS;
}
static const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
.func = bpf_sk_redirect_hash,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_PTR_TO_MAP_KEY,
.arg4_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct bpf_map *, map, u32, key, u64, flags) struct bpf_map *, map, u32, key, u64, flags)
{ {
...@@ -2108,6 +2135,31 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { ...@@ -2108,6 +2135,31 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.arg4_type = ARG_ANYTHING, .arg4_type = ARG_ANYTHING,
}; };
BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg_buff *, msg,
struct bpf_map *, map, void *, key, u64, flags)
{
/* If user passes invalid input drop the packet. */
if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
msg->flags = flags;
msg->sk_redir = __sock_hash_lookup_elem(map, key);
if (!msg->sk_redir)
return SK_DROP;
return SK_PASS;
}
static const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
.func = bpf_msg_redirect_hash,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_PTR_TO_MAP_KEY,
.arg4_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg, BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
struct bpf_map *, map, u32, key, u64, flags) struct bpf_map *, map, u32, key, u64, flags)
{ {
...@@ -4502,6 +4554,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -4502,6 +4554,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_ops_cb_flags_set_proto; return &bpf_sock_ops_cb_flags_set_proto;
case BPF_FUNC_sock_map_update: case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto; return &bpf_sock_map_update_proto;
case BPF_FUNC_sock_hash_update:
return &bpf_sock_hash_update_proto;
default: default:
return bpf_base_func_proto(func_id); return bpf_base_func_proto(func_id);
} }
...@@ -4513,6 +4567,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -4513,6 +4567,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
switch (func_id) { switch (func_id) {
case BPF_FUNC_msg_redirect_map: case BPF_FUNC_msg_redirect_map:
return &bpf_msg_redirect_map_proto; return &bpf_msg_redirect_map_proto;
case BPF_FUNC_msg_redirect_hash:
return &bpf_msg_redirect_hash_proto;
case BPF_FUNC_msg_apply_bytes: case BPF_FUNC_msg_apply_bytes:
return &bpf_msg_apply_bytes_proto; return &bpf_msg_apply_bytes_proto;
case BPF_FUNC_msg_cork_bytes: case BPF_FUNC_msg_cork_bytes:
...@@ -4544,6 +4600,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -4544,6 +4600,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_uid_proto; return &bpf_get_socket_uid_proto;
case BPF_FUNC_sk_redirect_map: case BPF_FUNC_sk_redirect_map:
return &bpf_sk_redirect_map_proto; return &bpf_sk_redirect_map_proto;
case BPF_FUNC_sk_redirect_hash:
return &bpf_sk_redirect_hash_proto;
default: default:
return bpf_base_func_proto(func_id); return bpf_base_func_proto(func_id);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment