Commit 545c321b authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-helper-improvements'

Daniel Borkmann says:

====================
BPF helper improvements

This set adds various BPF helper improvements, that is, cleaning
up and adding BPF_F_CURRENT_CPU flag for tracing helper, allowing
for preemption checks on bpf_get_smp_processor_id() helper, and
adding two new helpers bpf_skb_change_{proto, type} for tc related
programs. For further details please see individual patches.

Note, this set requires -net to be merged into -net-next tree first.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ee58b571 d2485c42
......@@ -313,6 +313,29 @@ enum bpf_func_id {
*/
BPF_FUNC_skb_get_tunnel_opt,
BPF_FUNC_skb_set_tunnel_opt,
/**
* bpf_skb_change_proto(skb, proto, flags)
* Change protocol of the skb. Currently supported is
* v4 -> v6, v6 -> v4 transitions. The helper will also
* resize the skb. eBPF program is expected to fill the
* new headers via skb_store_bytes and lX_csum_replace.
* @skb: pointer to skb
* @proto: new skb->protocol type
* @flags: reserved
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_proto,
/**
* bpf_skb_change_type(skb, type)
* Change packet type of skb.
* @skb: pointer to skb
* @type: new skb->pkt_type type
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_type,
__BPF_FUNC_MAX_ID,
};
......@@ -347,7 +370,7 @@ enum bpf_func_id {
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
/* BPF_FUNC_perf_event_output flags. */
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
......
......@@ -719,14 +719,13 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
if (unlikely(index >= array->map.max_entries))
goto out;
if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
goto out;
tail_call_cnt++;
prog = READ_ONCE(array->ptrs[index]);
if (unlikely(!prog))
if (!prog)
goto out;
/* ARG1 at this point is guaranteed to point to CTX from
......
......@@ -101,7 +101,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
return raw_smp_processor_id();
return smp_processor_id();
}
const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
......
......@@ -188,30 +188,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5)
{
struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
struct bpf_event_entry *ee;
struct perf_event *event;
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
if (index == BPF_F_CURRENT_CPU)
index = cpu;
if (unlikely(index >= array->map.max_entries))
return -E2BIG;
ee = READ_ONCE(array->ptrs[index]);
if (unlikely(!ee))
if (!ee)
return -ENOENT;
event = ee->event;
/* make sure event is local and doesn't have pmu::count */
if (event->oncpu != smp_processor_id() ||
event->pmu->count)
return -EINVAL;
if (unlikely(event->attr.type != PERF_TYPE_HARDWARE &&
event->attr.type != PERF_TYPE_RAW))
return -EINVAL;
/* make sure event is local and doesn't have pmu::count */
if (unlikely(event->oncpu != cpu || event->pmu->count))
return -EINVAL;
/*
* we don't know if the function is run successfully by the
* return value. It can be judged in other places, such as
......@@ -233,6 +238,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
struct pt_regs *regs = (struct pt_regs *) (long) r1;
struct bpf_map *map = (struct bpf_map *) (long) r2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
void *data = (void *) (long) r4;
struct perf_sample_data sample_data;
......@@ -246,12 +252,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
if (index == BPF_F_CURRENT_CPU)
index = raw_smp_processor_id();
index = cpu;
if (unlikely(index >= array->map.max_entries))
return -E2BIG;
ee = READ_ONCE(array->ptrs[index]);
if (unlikely(!ee))
if (!ee)
return -ENOENT;
event = ee->event;
......@@ -259,7 +265,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
return -EINVAL;
if (unlikely(event->oncpu != smp_processor_id()))
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
perf_sample_data_init(&sample_data, 0, 0);
......@@ -354,18 +360,12 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
enum bpf_reg_type *reg_type)
{
/* check bounds */
if (off < 0 || off >= sizeof(struct pt_regs))
return false;
/* only read is allowed */
if (type != BPF_READ)
return false;
/* disallow misaligned access */
if (off % size != 0)
return false;
return true;
}
......
......@@ -150,6 +150,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return raw_smp_processor_id();
}
static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
.func = __get_raw_cpu_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
};
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
struct bpf_insn *insn_buf)
{
......@@ -1777,6 +1783,224 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
};
EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
{
/* Caller already did skb_cow() with len as headroom,
* so no need to do it here.
*/
skb_push(skb, len);
memmove(skb->data, skb->data + len, off);
memset(skb->data + off, 0, len);
/* No skb_postpush_rcsum(skb, skb->data + off, len)
* needed here as it does not change the skb->csum
* result for checksum complete when summing over
* zeroed blocks.
*/
return 0;
}
static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
{
/* skb_ensure_writable() is not needed here, as we're
* already working on an uncloned skb.
*/
if (unlikely(!pskb_may_pull(skb, off + len)))
return -ENOMEM;
skb_postpull_rcsum(skb, skb->data + off, len);
memmove(skb->data + len, skb->data, off);
__skb_pull(skb, len);
return 0;
}
static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
{
bool trans_same = skb->transport_header == skb->network_header;
int ret;
/* There's no need for __skb_push()/__skb_pull() pair to
* get to the start of the mac header as we're guaranteed
* to always start from here under eBPF.
*/
ret = bpf_skb_generic_push(skb, off, len);
if (likely(!ret)) {
skb->mac_header -= len;
skb->network_header -= len;
if (trans_same)
skb->transport_header = skb->network_header;
}
return ret;
}
static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
{
bool trans_same = skb->transport_header == skb->network_header;
int ret;
/* Same here, __skb_push()/__skb_pull() pair not needed. */
ret = bpf_skb_generic_pop(skb, off, len);
if (likely(!ret)) {
skb->mac_header += len;
skb->network_header += len;
if (trans_same)
skb->transport_header = skb->network_header;
}
return ret;
}
static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
{
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
u32 off = skb->network_header - skb->mac_header;
int ret;
ret = skb_cow(skb, len_diff);
if (unlikely(ret < 0))
return ret;
ret = bpf_skb_net_hdr_push(skb, off, len_diff);
if (unlikely(ret < 0))
return ret;
if (skb_is_gso(skb)) {
/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
* be changed into SKB_GSO_TCPV6.
*/
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
}
/* Due to IPv6 header, MSS needs to be downgraded. */
skb_shinfo(skb)->gso_size -= len_diff;
/* Header must be checked, and gso_segs recomputed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
skb_shinfo(skb)->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IPV6);
skb_clear_hash(skb);
return 0;
}
static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
{
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
u32 off = skb->network_header - skb->mac_header;
int ret;
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
return ret;
ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
if (unlikely(ret < 0))
return ret;
if (skb_is_gso(skb)) {
/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
* be changed into SKB_GSO_TCPV4.
*/
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
}
/* Due to IPv4 header, MSS can be upgraded. */
skb_shinfo(skb)->gso_size += len_diff;
/* Header must be checked, and gso_segs recomputed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
skb_shinfo(skb)->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IP);
skb_clear_hash(skb);
return 0;
}
static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
{
__be16 from_proto = skb->protocol;
if (from_proto == htons(ETH_P_IP) &&
to_proto == htons(ETH_P_IPV6))
return bpf_skb_proto_4_to_6(skb);
if (from_proto == htons(ETH_P_IPV6) &&
to_proto == htons(ETH_P_IP))
return bpf_skb_proto_6_to_4(skb);
return -ENOTSUPP;
}
static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
__be16 proto = (__force __be16) r2;
int ret;
if (unlikely(flags))
return -EINVAL;
/* General idea is that this helper does the basic groundwork
* needed for changing the protocol, and eBPF program fills the
* rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
* and other helpers, rather than passing a raw buffer here.
*
* The rationale is to keep this minimal and without a need to
* deal with raw packet data. F.e. even if we would pass buffers
* here, the program still needs to call the bpf_lX_csum_replace()
* helpers anyway. Plus, this way we keep also separation of
* concerns, since f.e. bpf_skb_store_bytes() should only take
* care of stores.
*
* Currently, additional options and extension header space are
* not supported, but flags register is reserved so we can adapt
* that. For offloads, we mark packet as dodgy, so that headers
* need to be verified first.
*/
ret = bpf_skb_proto_xlat(skb, proto);
bpf_compute_data_end(skb);
return ret;
}
static const struct bpf_func_proto bpf_skb_change_proto_proto = {
.func = bpf_skb_change_proto,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
u32 pkt_type = r2;
/* We only allow a restricted subset to be changed for now. */
if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
pkt_type > PACKET_OTHERHOST))
return -EINVAL;
skb->pkt_type = pkt_type;
return 0;
}
static const struct bpf_func_proto bpf_skb_change_type_proto = {
.func = bpf_skb_change_type,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
};
bool bpf_helper_changes_skb_data(void *func)
{
if (func == bpf_skb_vlan_push)
......@@ -1785,6 +2009,8 @@ bool bpf_helper_changes_skb_data(void *func)
return true;
if (func == bpf_skb_store_bytes)
return true;
if (func == bpf_skb_change_proto)
return true;
if (func == bpf_l3_csum_replace)
return true;
if (func == bpf_l4_csum_replace)
......@@ -2037,7 +2263,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
return &bpf_get_raw_smp_processor_id_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
case BPF_FUNC_ktime_get_ns:
......@@ -2072,6 +2298,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_vlan_push_proto;
case BPF_FUNC_skb_vlan_pop:
return &bpf_skb_vlan_pop_proto;
case BPF_FUNC_skb_change_proto:
return &bpf_skb_change_proto_proto;
case BPF_FUNC_skb_change_type:
return &bpf_skb_change_type_proto;
case BPF_FUNC_skb_get_tunnel_key:
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
......@@ -2086,6 +2316,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_get_route_realm_proto;
case BPF_FUNC_perf_event_output:
return bpf_get_event_output_proto();
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
default:
return sk_filter_func_proto(func_id);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment