Commit 940656fb authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-big-map-entries'

Jakub Kicinski says:

====================
This series makes the control message parsing for interacting
with BPF maps more flexible.  Up until now we had a hard limit
in the ABI for key and value size to be 64B at most.  Using
TLV capability allows us to support large map entries.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents cb86d0f8 0c9864c0
...@@ -89,15 +89,32 @@ nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size) ...@@ -89,15 +89,32 @@ nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size)
return skb; return skb;
} }
static unsigned int
nfp_bpf_cmsg_map_req_size(struct nfp_app_bpf *bpf, unsigned int n)
{
unsigned int size;
size = sizeof(struct cmsg_req_map_op);
size += (bpf->cmsg_key_sz + bpf->cmsg_val_sz) * n;
return size;
}
static struct sk_buff * static struct sk_buff *
nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n) nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
{
return nfp_bpf_cmsg_alloc(bpf, nfp_bpf_cmsg_map_req_size(bpf, n));
}
static unsigned int
nfp_bpf_cmsg_map_reply_size(struct nfp_app_bpf *bpf, unsigned int n)
{ {
unsigned int size; unsigned int size;
size = sizeof(struct cmsg_req_map_op); size = sizeof(struct cmsg_reply_map_op);
size += sizeof(struct cmsg_key_value_pair) * n; size += (bpf->cmsg_key_sz + bpf->cmsg_val_sz) * n;
return nfp_bpf_cmsg_alloc(bpf, size); return size;
} }
static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb) static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
...@@ -338,6 +355,34 @@ void nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map) ...@@ -338,6 +355,34 @@ void nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map)
dev_consume_skb_any(skb); dev_consume_skb_any(skb);
} }
static void *
nfp_bpf_ctrl_req_key(struct nfp_app_bpf *bpf, struct cmsg_req_map_op *req,
unsigned int n)
{
return &req->data[bpf->cmsg_key_sz * n + bpf->cmsg_val_sz * n];
}
static void *
nfp_bpf_ctrl_req_val(struct nfp_app_bpf *bpf, struct cmsg_req_map_op *req,
unsigned int n)
{
return &req->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
}
static void *
nfp_bpf_ctrl_reply_key(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
unsigned int n)
{
return &reply->data[bpf->cmsg_key_sz * n + bpf->cmsg_val_sz * n];
}
static void *
nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
unsigned int n)
{
return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
}
static int static int
nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
enum nfp_bpf_cmsg_type op, enum nfp_bpf_cmsg_type op,
...@@ -366,12 +411,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, ...@@ -366,12 +411,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
/* Copy inputs */ /* Copy inputs */
if (key) if (key)
memcpy(&req->elem[0].key, key, map->key_size); memcpy(nfp_bpf_ctrl_req_key(bpf, req, 0), key, map->key_size);
if (value) if (value)
memcpy(&req->elem[0].value, value, map->value_size); memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
map->value_size);
skb = nfp_bpf_cmsg_communicate(bpf, skb, op, skb = nfp_bpf_cmsg_communicate(bpf, skb, op,
sizeof(*reply) + sizeof(*reply->elem)); nfp_bpf_cmsg_map_reply_size(bpf, 1));
if (IS_ERR(skb)) if (IS_ERR(skb))
return PTR_ERR(skb); return PTR_ERR(skb);
...@@ -382,9 +428,11 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, ...@@ -382,9 +428,11 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
/* Copy outputs */ /* Copy outputs */
if (out_key) if (out_key)
memcpy(out_key, &reply->elem[0].key, map->key_size); memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0),
map->key_size);
if (out_value) if (out_value)
memcpy(out_value, &reply->elem[0].value, map->value_size); memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0),
map->value_size);
dev_consume_skb_any(skb); dev_consume_skb_any(skb);
...@@ -428,6 +476,13 @@ int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, ...@@ -428,6 +476,13 @@ int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
key, NULL, 0, next_key, NULL); key, NULL, 0, next_key, NULL);
} }
unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
{
return max3((unsigned int)NFP_NET_DEFAULT_MTU,
nfp_bpf_cmsg_map_req_size(bpf, 1),
nfp_bpf_cmsg_map_reply_size(bpf, 1));
}
void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
{ {
struct nfp_app_bpf *bpf = app->priv; struct nfp_app_bpf *bpf = app->priv;
......
...@@ -52,6 +52,7 @@ enum bpf_cap_tlv_type { ...@@ -52,6 +52,7 @@ enum bpf_cap_tlv_type {
NFP_BPF_CAP_TYPE_RANDOM = 4, NFP_BPF_CAP_TYPE_RANDOM = 4,
NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5, NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5,
NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6, NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6,
NFP_BPF_CAP_TYPE_ABI_VERSION = 7,
}; };
struct nfp_bpf_cap_tlv_func { struct nfp_bpf_cap_tlv_func {
...@@ -98,6 +99,7 @@ enum nfp_bpf_cmsg_type { ...@@ -98,6 +99,7 @@ enum nfp_bpf_cmsg_type {
#define CMSG_TYPE_MAP_REPLY_BIT 7 #define CMSG_TYPE_MAP_REPLY_BIT 7
#define __CMSG_REPLY(req) (BIT(CMSG_TYPE_MAP_REPLY_BIT) | (req)) #define __CMSG_REPLY(req) (BIT(CMSG_TYPE_MAP_REPLY_BIT) | (req))
/* BPF ABIv2 fixed-length control message fields */
#define CMSG_MAP_KEY_LW 16 #define CMSG_MAP_KEY_LW 16
#define CMSG_MAP_VALUE_LW 16 #define CMSG_MAP_VALUE_LW 16
...@@ -147,24 +149,19 @@ struct cmsg_reply_map_free_tbl { ...@@ -147,24 +149,19 @@ struct cmsg_reply_map_free_tbl {
__be32 count; __be32 count;
}; };
struct cmsg_key_value_pair {
__be32 key[CMSG_MAP_KEY_LW];
__be32 value[CMSG_MAP_VALUE_LW];
};
struct cmsg_req_map_op { struct cmsg_req_map_op {
struct cmsg_hdr hdr; struct cmsg_hdr hdr;
__be32 tid; __be32 tid;
__be32 count; __be32 count;
__be32 flags; __be32 flags;
struct cmsg_key_value_pair elem[0]; u8 data[0];
}; };
struct cmsg_reply_map_op { struct cmsg_reply_map_op {
struct cmsg_reply_map_simple reply_hdr; struct cmsg_reply_map_simple reply_hdr;
__be32 count; __be32 count;
__be32 resv; __be32 resv;
struct cmsg_key_value_pair elem[0]; u8 data[0];
}; };
struct cmsg_bpf_event { struct cmsg_bpf_event {
......
...@@ -54,11 +54,14 @@ const struct rhashtable_params nfp_bpf_maps_neutral_params = { ...@@ -54,11 +54,14 @@ const struct rhashtable_params nfp_bpf_maps_neutral_params = {
static bool nfp_net_ebpf_capable(struct nfp_net *nn) static bool nfp_net_ebpf_capable(struct nfp_net *nn)
{ {
#ifdef __LITTLE_ENDIAN #ifdef __LITTLE_ENDIAN
if (nn->cap & NFP_NET_CFG_CTRL_BPF && struct nfp_app_bpf *bpf = nn->app->priv;
nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
return true; return nn->cap & NFP_NET_CFG_CTRL_BPF &&
#endif bpf->abi_version &&
nn_readb(nn, NFP_NET_CFG_BPF_ABI) == bpf->abi_version;
#else
return false; return false;
#endif
} }
static int static int
...@@ -342,6 +345,26 @@ nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value, ...@@ -342,6 +345,26 @@ nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
return 0; return 0;
} }
static int
nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value,
u32 length)
{
if (length < 4) {
nfp_err(bpf->app->cpp, "truncated ABI version TLV: %d\n",
length);
return -EINVAL;
}
bpf->abi_version = readl(value);
if (bpf->abi_version < 2 || bpf->abi_version > 3) {
nfp_warn(bpf->app->cpp, "unsupported BPF ABI version: %d\n",
bpf->abi_version);
bpf->abi_version = 0;
}
return 0;
}
static int nfp_bpf_parse_capabilities(struct nfp_app *app) static int nfp_bpf_parse_capabilities(struct nfp_app *app)
{ {
struct nfp_cpp *cpp = app->pf->cpp; struct nfp_cpp *cpp = app->pf->cpp;
...@@ -393,6 +416,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app) ...@@ -393,6 +416,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
length)) length))
goto err_release_free; goto err_release_free;
break; break;
case NFP_BPF_CAP_TYPE_ABI_VERSION:
if (nfp_bpf_parse_cap_abi_version(app->priv, value,
length))
goto err_release_free;
break;
default: default:
nfp_dbg(cpp, "unknown BPF capability: %d\n", type); nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
break; break;
...@@ -414,6 +442,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app) ...@@ -414,6 +442,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
return -EINVAL; return -EINVAL;
} }
static void nfp_bpf_init_capabilities(struct nfp_app_bpf *bpf)
{
bpf->abi_version = 2; /* Original BPF ABI version */
}
static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev) static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev)
{ {
struct nfp_app_bpf *bpf = app->priv; struct nfp_app_bpf *bpf = app->priv;
...@@ -447,10 +480,21 @@ static int nfp_bpf_init(struct nfp_app *app) ...@@ -447,10 +480,21 @@ static int nfp_bpf_init(struct nfp_app *app)
if (err) if (err)
goto err_free_bpf; goto err_free_bpf;
nfp_bpf_init_capabilities(bpf);
err = nfp_bpf_parse_capabilities(app); err = nfp_bpf_parse_capabilities(app);
if (err) if (err)
goto err_free_neutral_maps; goto err_free_neutral_maps;
if (bpf->abi_version < 3) {
bpf->cmsg_key_sz = CMSG_MAP_KEY_LW * 4;
bpf->cmsg_val_sz = CMSG_MAP_VALUE_LW * 4;
} else {
bpf->cmsg_key_sz = bpf->maps.max_key_sz;
bpf->cmsg_val_sz = bpf->maps.max_val_sz;
app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf);
}
bpf->bpf_dev = bpf_offload_dev_create(); bpf->bpf_dev = bpf_offload_dev_create();
err = PTR_ERR_OR_ZERO(bpf->bpf_dev); err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
if (err) if (err)
......
...@@ -121,12 +121,17 @@ enum pkt_vec { ...@@ -121,12 +121,17 @@ enum pkt_vec {
* @cmsg_replies: received cmsg replies waiting to be consumed * @cmsg_replies: received cmsg replies waiting to be consumed
* @cmsg_wq: work queue for waiting for cmsg replies * @cmsg_wq: work queue for waiting for cmsg replies
* *
* @cmsg_key_sz: size of key in cmsg element array
* @cmsg_val_sz: size of value in cmsg element array
*
* @map_list: list of offloaded maps * @map_list: list of offloaded maps
* @maps_in_use: number of currently offloaded maps * @maps_in_use: number of currently offloaded maps
* @map_elems_in_use: number of elements allocated to offloaded maps * @map_elems_in_use: number of elements allocated to offloaded maps
* *
* @maps_neutral: hash table of offload-neutral maps (on pointer) * @maps_neutral: hash table of offload-neutral maps (on pointer)
* *
* @abi_version: global BPF ABI version
*
* @adjust_head: adjust head capability * @adjust_head: adjust head capability
* @adjust_head.flags: extra flags for adjust head * @adjust_head.flags: extra flags for adjust head
* @adjust_head.off_min: minimal packet offset within buffer required * @adjust_head.off_min: minimal packet offset within buffer required
...@@ -164,12 +169,17 @@ struct nfp_app_bpf { ...@@ -164,12 +169,17 @@ struct nfp_app_bpf {
struct sk_buff_head cmsg_replies; struct sk_buff_head cmsg_replies;
struct wait_queue_head cmsg_wq; struct wait_queue_head cmsg_wq;
unsigned int cmsg_key_sz;
unsigned int cmsg_val_sz;
struct list_head map_list; struct list_head map_list;
unsigned int maps_in_use; unsigned int maps_in_use;
unsigned int map_elems_in_use; unsigned int map_elems_in_use;
struct rhashtable maps_neutral; struct rhashtable maps_neutral;
u32 abi_version;
struct nfp_bpf_cap_adjust_head { struct nfp_bpf_cap_adjust_head {
u32 flags; u32 flags;
int off_min; int off_min;
...@@ -492,6 +502,7 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, ...@@ -492,6 +502,7 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv); void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf);
long long int long long int
nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map); nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map);
void void
......
...@@ -40,6 +40,8 @@ ...@@ -40,6 +40,8 @@
#include "nfp_net_repr.h" #include "nfp_net_repr.h"
#define NFP_APP_CTRL_MTU_MAX U32_MAX
struct bpf_prog; struct bpf_prog;
struct net_device; struct net_device;
struct netdev_bpf; struct netdev_bpf;
...@@ -178,6 +180,7 @@ struct nfp_app_type { ...@@ -178,6 +180,7 @@ struct nfp_app_type {
* @ctrl: pointer to ctrl vNIC struct * @ctrl: pointer to ctrl vNIC struct
* @reprs: array of pointers to representors * @reprs: array of pointers to representors
* @type: pointer to const application ops and info * @type: pointer to const application ops and info
* @ctrl_mtu: MTU to set on the control vNIC (set in .init())
* @priv: app-specific priv data * @priv: app-specific priv data
*/ */
struct nfp_app { struct nfp_app {
...@@ -189,6 +192,7 @@ struct nfp_app { ...@@ -189,6 +192,7 @@ struct nfp_app {
struct nfp_reprs __rcu *reprs[NFP_REPR_TYPE_MAX + 1]; struct nfp_reprs __rcu *reprs[NFP_REPR_TYPE_MAX + 1];
const struct nfp_app_type *type; const struct nfp_app_type *type;
unsigned int ctrl_mtu;
void *priv; void *priv;
}; };
......
...@@ -3877,10 +3877,20 @@ int nfp_net_init(struct nfp_net *nn) ...@@ -3877,10 +3877,20 @@ int nfp_net_init(struct nfp_net *nn)
return err; return err;
/* Set default MTU and Freelist buffer size */ /* Set default MTU and Freelist buffer size */
if (nn->max_mtu < NFP_NET_DEFAULT_MTU) if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) {
if (nn->app->ctrl_mtu <= nn->max_mtu) {
nn->dp.mtu = nn->app->ctrl_mtu;
} else {
if (nn->app->ctrl_mtu != NFP_APP_CTRL_MTU_MAX)
nn_warn(nn, "app requested MTU above max supported %u > %u\n",
nn->app->ctrl_mtu, nn->max_mtu);
nn->dp.mtu = nn->max_mtu; nn->dp.mtu = nn->max_mtu;
else }
} else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) {
nn->dp.mtu = nn->max_mtu;
} else {
nn->dp.mtu = NFP_NET_DEFAULT_MTU; nn->dp.mtu = NFP_NET_DEFAULT_MTU;
}
nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp); nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
if (nfp_app_ctrl_uses_data_vnics(nn->app)) if (nfp_app_ctrl_uses_data_vnics(nn->app))
......
...@@ -264,7 +264,6 @@ ...@@ -264,7 +264,6 @@
* %NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code * %NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code
*/ */
#define NFP_NET_CFG_BPF_ABI 0x0080 #define NFP_NET_CFG_BPF_ABI 0x0080
#define NFP_NET_BPF_ABI 2
#define NFP_NET_CFG_BPF_CAP 0x0081 #define NFP_NET_CFG_BPF_CAP 0x0081
#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */ #define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */
#define NFP_NET_CFG_BPF_MAX_LEN 0x0082 #define NFP_NET_CFG_BPF_MAX_LEN 0x0082
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment