Merge branch 'net-bpf_net_context-cleanups'

Sebastian Andrzej Siewior says: ==================== net: bpf_net_context cleanups. a small series with bpf_net_context cleanups/ improvements. Jakub asked for #1 and #2 and while looking around I made #3. ==================== Link: https://patch.msgid.link/20240628103020.1766241-1-bigeasy@linutronix.deSigned-off-by: Paolo Abeni <pabeni@redhat.com>

Merge branch 'net-bpf_net_context-cleanups'
Sebastian Andrzej Siewior says: ==================== net: bpf_net_context cleanups. a small series with bpf_net_context cleanups/ improvements. Jakub asked for #1 and #2 and while looking around I made #3. ==================== Link: https://patch.msgid.link/20240628103020.1766241-1-bigeasy@linutronix.deSigned-off-by: Paolo Abeni <pabeni@redhat.com>
2a01a889 · Paolo Abeni · e27d7168 · e3d69f58 · 2a01a889 · 2a01a889
Commit 2a01a889 authored Jul 02, 2024 by Paolo Abeni
8 changed files
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2494,7 +2494,7 @@ struct sk_buff;
 struct bpf_dtab_netdev;
 struct bpf_cpu_map_entry;
-void __dev_flush(void);
+void __dev_flush(struct list_head *flush_list);
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 		    struct net_device *dev_rx);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
@@ -2507,7 +2507,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 			   struct bpf_prog *xdp_prog, struct bpf_map *map,
 			   bool exclude_ingress);
-void __cpu_map_flush(void);
+void __cpu_map_flush(struct list_head *flush_list);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
 		    struct net_device *dev_rx);
 int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
@@ -2644,8 +2644,6 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
 void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
 void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
-bool dev_check_flush(void);
-bool cpu_map_check_flush(void);
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -2738,7 +2736,7 @@ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
 	return ERR_PTR(-EOPNOTSUPP);
 }
-static inline void __dev_flush(void)
+static inline void __dev_flush(struct list_head *flush_list)
 {
 }
@@ -2784,7 +2782,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
-static inline void __cpu_map_flush(void)
+static inline void __cpu_map_flush(struct list_head *flush_list)
 {
 }

--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -829,6 +829,33 @@ static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
 	return &bpf_net_ctx->xskmap_map_flush_list;
 }
+static inline void bpf_net_ctx_get_all_used_flush_lists(struct list_head **lh_map,
+							struct list_head **lh_dev,
+							struct list_head **lh_xsk)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+	u32 kern_flags = bpf_net_ctx->ri.kern_flags;
+	struct list_head *lh;
+	*lh_map = *lh_dev = *lh_xsk = NULL;
+	if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
+		return;
+	lh = &bpf_net_ctx->dev_map_flush_list;
+	if (kern_flags & BPF_RI_F_DEV_MAP_INIT && !list_empty(lh))
+		*lh_dev = lh;
+	lh = &bpf_net_ctx->cpu_map_flush_list;
+	if (kern_flags & BPF_RI_F_CPU_MAP_INIT && !list_empty(lh))
+		*lh_map = lh;
+	lh = &bpf_net_ctx->xskmap_map_flush_list;
+	if (IS_ENABLED(CONFIG_XDP_SOCKETS) &&
+	    kern_flags & BPF_RI_F_XSK_MAP_INIT && !list_empty(lh))
+		*lh_xsk = lh;
+}
 /* Compute the linear packet data range [data, data_end) which
 * will be accessed by various program types (cls_bpf, act_bpf,
 * lwt, ...). Subsystems allowing direct data access must (!)

--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -121,7 +121,7 @@ struct xsk_tx_metadata_ops {
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
-void __xsk_map_flush(void);
+void __xsk_map_flush(struct list_head *flush_list);
 /**
 *  xsk_tx_metadata_to_compl - Save enough relevant metadata information
@@ -206,7 +206,7 @@ static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 	return -EOPNOTSUPP;
 }
-static inline void __xsk_map_flush(void)
+static inline void __xsk_map_flush(struct list_head *flush_list)
 {
 }
@@ -228,14 +228,4 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
 }
 #endif /* CONFIG_XDP_SOCKETS */
-#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET)
-bool xsk_map_check_flush(void);
-#else
-static inline bool xsk_map_check_flush(void)
-{
-	return false;
-}
-#endif
 #endif /* _LINUX_XDP_SOCK_H */
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -707,7 +707,6 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
 */
 static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 {
-	struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
 	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 	if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
@@ -724,8 +723,11 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 	 */
 	bq->q[bq->count++] = xdpf;
-	if (!bq->flush_node.prev)
+	if (!bq->flush_node.prev) {
+		struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
 		list_add(&bq->flush_node, flush_list);
+	}
 }
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
@@ -757,9 +759,8 @@ int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
 	return ret;
 }
-void __cpu_map_flush(void)
+void __cpu_map_flush(struct list_head *flush_list)
 {
-	struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
 	struct xdp_bulk_queue *bq, *tmp;
 	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -769,13 +770,3 @@ void __cpu_map_flush(void)
 		wake_up_process(bq->obj->kthread);
 	}
 }
-#ifdef CONFIG_DEBUG_NET
-bool cpu_map_check_flush(void)
-{
-	if (list_empty(bpf_net_ctx_get_cpu_map_flush_list()))
-		return false;
-	__cpu_map_flush();
-	return true;
-}
-#endif
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -412,9 +412,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 * driver before returning from its napi->poll() routine. See the comment above
 * xdp_do_flush() in filter.c.
 */
-void __dev_flush(void)
+void __dev_flush(struct list_head *flush_list)
 {
-	struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
 	struct xdp_dev_bulk_queue *bq, *tmp;
 	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -425,16 +424,6 @@ void __dev_flush(void)
 	}
 }
-#ifdef CONFIG_DEBUG_NET
-bool dev_check_flush(void)
-{
-	if (list_empty(bpf_net_ctx_get_dev_flush_list()))
-		return false;
-	__dev_flush();
-	return true;
-}
-#endif
 /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
 * by local_bh_disable() (from XDP calls inside NAPI). The
 * rcu_read_lock_bh_held() below makes lockdep accept both.
@@ -459,7 +448,6 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 		       struct net_device *dev_rx, struct bpf_prog *xdp_prog)
 {
-	struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
 	struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
 	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
@@ -473,6 +461,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 	 * are only ever modified together.
 	 */
 	if (!bq->dev_rx) {
+		struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
 		bq->dev_rx = dev_rx;
 		bq->xdp_prog = xdp_prog;
 		list_add(&bq->flush_node, flush_list);

--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2355,7 +2355,6 @@ __latent_entropy struct task_struct *copy_process(
 	RCU_INIT_POINTER(p->bpf_storage, NULL);
 	p->bpf_ctx = NULL;
 #endif
-	p->bpf_net_context =  NULL;
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	retval = sched_fork(clone_flags, p);

--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4277,22 +4277,39 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
 */
 void xdp_do_flush(void)
 {
-	__dev_flush();
+	struct list_head *lh_map, *lh_dev, *lh_xsk;
-	__cpu_map_flush();
-	__xsk_map_flush();
+	bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk);
+	if (lh_dev)
+		__dev_flush(lh_dev);
+	if (lh_map)
+		__cpu_map_flush(lh_map);
+	if (lh_xsk)
+		__xsk_map_flush(lh_xsk);
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush);
 #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
 void xdp_do_check_flushed(struct napi_struct *napi)
 {
-	bool ret;
+	struct list_head *lh_map, *lh_dev, *lh_xsk;
+	bool missed = false;
-	ret = dev_check_flush();
+	bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk);
-	ret |= cpu_map_check_flush();
+	if (lh_dev) {
-	ret |= xsk_map_check_flush();
+		__dev_flush(lh_dev);
+		missed = true;
+	}
+	if (lh_map) {
+		__cpu_map_flush(lh_map);
+		missed = true;
+	}
+	if (lh_xsk) {
+		__xsk_map_flush(lh_xsk);
+		missed = true;
+	}
-	WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
+	WARN_ONCE(missed, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
 		  napi->poll);
 }
 #endif

--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -370,22 +370,23 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
-	struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
 	int err;
 	err = xsk_rcv(xs, xdp);
 	if (err)
 		return err;
-	if (!xs->flush_node.prev)
+	if (!xs->flush_node.prev) {
+		struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
 		list_add(&xs->flush_node, flush_list);
+	}
 	return 0;
 }
-void __xsk_map_flush(void)
+void __xsk_map_flush(struct list_head *flush_list)
 {
-	struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
 	struct xdp_sock *xs, *tmp;
 	list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@@ -394,16 +395,6 @@ void __xsk_map_flush(void)
 	}
 }
-#ifdef CONFIG_DEBUG_NET
-bool xsk_map_check_flush(void)
-{
-	if (list_empty(bpf_net_ctx_get_xskmap_flush_list()))
-		return false;
-	__xsk_map_flush();
-	return true;
-}
-#endif
 void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
 {
 	xskq_prod_submit_n(pool->cq, nb_entries);