Commit 089b19a9 authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Daniel Borkmann

flow_dissector: switch kernel context to struct bpf_flow_dissector

struct bpf_flow_dissector has a small subset of sk_buff fields that
flow dissector BPF program is allowed to access and an optional
pointer to real skb. Real skb is used only in bpf_skb_load_bytes
helper to read non-linear data.

The real motivation for this is to be able to call flow dissector
from eth_get_headlen context where we don't have an skb and need
to dissect raw bytes.
Signed-off-by: default avatarStanislav Fomichev <sdf@google.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 7e6e185c
...@@ -1275,6 +1275,10 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) ...@@ -1275,6 +1275,10 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
} }
#endif #endif
struct bpf_flow_dissector;
bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
__be16 proto, int nhoff, int hlen);
struct bpf_flow_keys; struct bpf_flow_keys;
bool __skb_flow_bpf_dissect(struct bpf_prog *prog, bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
const struct sk_buff *skb, const struct sk_buff *skb,
......
...@@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec ...@@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
return ((char *)target_container) + flow_dissector->offset[key_id]; return ((char *)target_container) + flow_dissector->offset[key_id];
} }
struct bpf_flow_dissector {
struct bpf_flow_keys *flow_keys;
const struct sk_buff *skb;
void *data;
void *data_end;
};
#endif #endif
...@@ -364,13 +364,10 @@ struct tcf_proto { ...@@ -364,13 +364,10 @@ struct tcf_proto {
}; };
struct qdisc_skb_cb { struct qdisc_skb_cb {
union { struct {
struct { unsigned int pkt_len;
unsigned int pkt_len; u16 slave_dev_queue_mapping;
u16 slave_dev_queue_mapping; u16 tc_classid;
u16 tc_classid;
};
struct bpf_flow_keys *flow_keys;
}; };
#define QDISC_CB_PRIV_LEN 20 #define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN]; unsigned char data[QDISC_CB_PRIV_LEN];
......
...@@ -382,7 +382,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, ...@@ -382,7 +382,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
u32 repeat = kattr->test.repeat; u32 repeat = kattr->test.repeat;
struct bpf_flow_keys flow_keys; struct bpf_flow_keys flow_keys;
u64 time_start, time_spent = 0; u64 time_start, time_spent = 0;
struct bpf_skb_data_end *cb;
u32 retval, duration; u32 retval, duration;
struct sk_buff *skb; struct sk_buff *skb;
struct sock *sk; struct sock *sk;
...@@ -423,9 +422,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, ...@@ -423,9 +422,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
current->nsproxy->net_ns->loopback_dev); current->nsproxy->net_ns->loopback_dev);
skb_reset_network_header(skb); skb_reset_network_header(skb);
cb = (struct bpf_skb_data_end *)skb->cb;
cb->qdisc_cb.flow_keys = &flow_keys;
if (!repeat) if (!repeat)
repeat = 1; repeat = 1;
......
...@@ -1730,6 +1730,40 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { ...@@ -1730,6 +1730,40 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE, .arg4_type = ARG_CONST_SIZE,
}; };
BPF_CALL_4(bpf_flow_dissector_load_bytes,
const struct bpf_flow_dissector *, ctx, u32, offset,
void *, to, u32, len)
{
void *ptr;
if (unlikely(offset > 0xffff))
goto err_clear;
if (unlikely(!ctx->skb))
goto err_clear;
ptr = skb_header_pointer(ctx->skb, offset, len, to);
if (unlikely(!ptr))
goto err_clear;
if (ptr != to)
memcpy(to, ptr, len);
return 0;
err_clear:
memset(to, 0, len);
return -EFAULT;
}
static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
.func = bpf_flow_dissector_load_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
.arg4_type = ARG_CONST_SIZE,
};
BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
u32, offset, void *, to, u32, len, u32, start_header) u32, offset, void *, to, u32, len, u32, start_header)
{ {
...@@ -6121,7 +6155,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -6121,7 +6155,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{ {
switch (func_id) { switch (func_id) {
case BPF_FUNC_skb_load_bytes: case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto; return &bpf_flow_dissector_load_bytes_proto;
default: default:
return bpf_base_func_proto(func_id); return bpf_base_func_proto(func_id);
} }
...@@ -6248,9 +6282,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type ...@@ -6248,9 +6282,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false; return false;
break; break;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
if (size != sizeof(__u64)) return false;
return false;
break;
case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, tstamp):
if (size != sizeof(__u64)) if (size != sizeof(__u64))
return false; return false;
...@@ -6285,7 +6317,6 @@ static bool sk_filter_is_valid_access(int off, int size, ...@@ -6285,7 +6317,6 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range(struct __sk_buff, data_end):
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len): case bpf_ctx_range(struct __sk_buff, wire_len):
...@@ -6312,7 +6343,6 @@ static bool cg_skb_is_valid_access(int off, int size, ...@@ -6312,7 +6343,6 @@ static bool cg_skb_is_valid_access(int off, int size,
switch (off) { switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, wire_len): case bpf_ctx_range(struct __sk_buff, wire_len):
return false; return false;
case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data):
...@@ -6358,7 +6388,6 @@ static bool lwt_is_valid_access(int off, int size, ...@@ -6358,7 +6388,6 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len): case bpf_ctx_range(struct __sk_buff, wire_len):
return false; return false;
...@@ -6601,7 +6630,6 @@ static bool tc_cls_act_is_valid_access(int off, int size, ...@@ -6601,7 +6630,6 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END; info->reg_type = PTR_TO_PACKET_END;
break; break;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false; return false;
} }
...@@ -6803,7 +6831,6 @@ static bool sk_skb_is_valid_access(int off, int size, ...@@ -6803,7 +6831,6 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) { switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len): case bpf_ctx_range(struct __sk_buff, wire_len):
return false; return false;
...@@ -6877,24 +6904,65 @@ static bool flow_dissector_is_valid_access(int off, int size, ...@@ -6877,24 +6904,65 @@ static bool flow_dissector_is_valid_access(int off, int size,
const struct bpf_prog *prog, const struct bpf_prog *prog,
struct bpf_insn_access_aux *info) struct bpf_insn_access_aux *info)
{ {
const int size_default = sizeof(__u32);
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
if (type == BPF_WRITE) if (type == BPF_WRITE)
return false; return false;
switch (off) { switch (off) {
case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data):
if (size != size_default)
return false;
info->reg_type = PTR_TO_PACKET; info->reg_type = PTR_TO_PACKET;
break; return true;
case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range(struct __sk_buff, data_end):
if (size != size_default)
return false;
info->reg_type = PTR_TO_PACKET_END; info->reg_type = PTR_TO_PACKET_END;
break; return true;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_FLOW_KEYS; info->reg_type = PTR_TO_FLOW_KEYS;
break; return true;
default: default:
return false; return false;
} }
}
return bpf_skb_is_valid_access(off, size, type, prog, info); static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
switch (si->off) {
case offsetof(struct __sk_buff, data):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
si->dst_reg, si->src_reg,
offsetof(struct bpf_flow_dissector, data));
break;
case offsetof(struct __sk_buff, data_end):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
si->dst_reg, si->src_reg,
offsetof(struct bpf_flow_dissector, data_end));
break;
case offsetof(struct __sk_buff, flow_keys):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
si->dst_reg, si->src_reg,
offsetof(struct bpf_flow_dissector, flow_keys));
break;
}
return insn - insn_buf;
} }
static u32 bpf_convert_ctx_access(enum bpf_access_type type, static u32 bpf_convert_ctx_access(enum bpf_access_type type,
...@@ -7201,15 +7269,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, ...@@ -7201,15 +7269,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
skc_num, 2, target_size)); skc_num, 2, target_size));
break; break;
case offsetof(struct __sk_buff, flow_keys):
off = si->off;
off -= offsetof(struct __sk_buff, flow_keys);
off += offsetof(struct sk_buff, cb);
off += offsetof(struct qdisc_skb_cb, flow_keys);
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
si->src_reg, off);
break;
case offsetof(struct __sk_buff, tstamp): case offsetof(struct __sk_buff, tstamp):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
...@@ -8214,7 +8273,7 @@ const struct bpf_prog_ops sk_msg_prog_ops = { ...@@ -8214,7 +8273,7 @@ const struct bpf_prog_ops sk_msg_prog_ops = {
const struct bpf_verifier_ops flow_dissector_verifier_ops = { const struct bpf_verifier_ops flow_dissector_verifier_ops = {
.get_func_proto = flow_dissector_func_proto, .get_func_proto = flow_dissector_func_proto,
.is_valid_access = flow_dissector_is_valid_access, .is_valid_access = flow_dissector_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access, .convert_ctx_access = flow_dissector_convert_ctx_access,
}; };
const struct bpf_prog_ops flow_dissector_prog_ops = { const struct bpf_prog_ops flow_dissector_prog_ops = {
......
...@@ -688,39 +688,34 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, ...@@ -688,39 +688,34 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
struct flow_dissector *flow_dissector, struct flow_dissector *flow_dissector,
struct bpf_flow_keys *flow_keys) struct bpf_flow_keys *flow_keys)
{ {
struct bpf_skb_data_end cb_saved; struct bpf_flow_dissector ctx = {
struct bpf_skb_data_end *cb; .flow_keys = flow_keys,
u32 result; .skb = skb,
.data = skb->data,
/* Note that even though the const qualifier is discarded .data_end = skb->data + skb_headlen(skb),
* throughout the execution of the BPF program, all changes(the };
* control block) are reverted after the BPF program returns.
* Therefore, __skb_flow_dissect does not alter the skb. return bpf_flow_dissect(prog, &ctx, skb->protocol,
*/ skb_network_offset(skb), skb_headlen(skb));
}
cb = (struct bpf_skb_data_end *)skb->cb;
/* Save Control Block */ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
memcpy(&cb_saved, cb, sizeof(cb_saved)); __be16 proto, int nhoff, int hlen)
memset(cb, 0, sizeof(*cb)); {
struct bpf_flow_keys *flow_keys = ctx->flow_keys;
u32 result;
/* Pass parameters to the BPF program */ /* Pass parameters to the BPF program */
memset(flow_keys, 0, sizeof(*flow_keys)); memset(flow_keys, 0, sizeof(*flow_keys));
cb->qdisc_cb.flow_keys = flow_keys; flow_keys->n_proto = proto;
flow_keys->n_proto = skb->protocol; flow_keys->nhoff = nhoff;
flow_keys->nhoff = skb_network_offset(skb);
flow_keys->thoff = flow_keys->nhoff; flow_keys->thoff = flow_keys->nhoff;
bpf_compute_data_pointers((struct sk_buff *)skb); result = BPF_PROG_RUN(prog, ctx);
result = BPF_PROG_RUN(prog, skb);
/* Restore state */
memcpy(cb, &cb_saved, sizeof(cb_saved));
flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
skb_network_offset(skb), skb->len);
flow_keys->thoff = clamp_t(u16, flow_keys->thoff, flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
flow_keys->nhoff, skb->len); flow_keys->nhoff, hlen);
return result == BPF_OK; return result == BPF_OK;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment