Merge branch 'napi_threaded_poll-enhancements'

Eric Dumazet says: ==================== net: give napi_threaded_poll() some love There is interest to revert commit 4cd13c21 ("softirq: Let ksoftirqd do its job") and use instead the napi_threaded_poll() mode. https://lore.kernel.org/netdev/140f61e2e1fcb8cf53619709046e312e343b53ca.camel@redhat.com/T/#m8a8f5b09844adba157ad0d22fc1233d97013de50 Before doing so, make sure napi_threaded_poll() benefits from recent core stack improvements, to further reduce softirq triggers. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>

Merge branch 'napi_threaded_poll-enhancements'
Eric Dumazet says: ==================== net: give napi_threaded_poll() some love There is interest to revert commit 4cd13c21 ("softirq: Let ksoftirqd do its job") and use instead the napi_threaded_poll() mode. https://lore.kernel.org/netdev/140f61e2e1fcb8cf53619709046e312e343b53ca.camel@redhat.com/T/#m8a8f5b09844adba157ad0d22fc1233d97013de50 Before doing so, make sure napi_threaded_poll() benefits from recent core stack improvements, to further reduce softirq triggers. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
6bb218b5 · David S. Miller · 19c60fde · 87eff2ec · 6bb218b5 · 6bb218b5
Commit 6bb218b5 authored Apr 23, 2023 by David S. Miller
Hide whitespace changes
Inline Side-by-side

Showing with 42 additions and 26 deletions

include/linux/netdevice.h include/linux/netdevice.h +3 -0

net/core/dev.c net/core/dev.c +34 -23

net/core/skbuff.c net/core/skbuff.c +5 -3

No files found.
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3194,7 +3194,10 @@ struct softnet_data {
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
 #endif
+
 	bool			in_net_rx_action;
+	bool			in_napi_threaded_poll;
+
 #ifdef CONFIG_NET_FLOW_LIMIT
 	struct sd_flow_limit __rcu *flow_limit;
 #endif

--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4603,10 +4603,10 @@ static void napi_schedule_rps(struct softnet_data *sd)
 		sd->rps_ipi_next = mysd->rps_ipi_list;
 		mysd->rps_ipi_list = sd;

-		/* If not called from net_rx_action()
+		/* If not called from net_rx_action() or napi_threaded_poll()
 		 * we have to raise NET_RX_SOFTIRQ.
 		 */
-		if (!mysd->in_net_rx_action)
+		if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll)
 			__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 		return;
 	}
@@ -6598,9 +6598,31 @@ static int napi_thread_wait(struct napi_struct *napi)
 	return -1;
 }

+static void skb_defer_free_flush(struct softnet_data *sd)
+{
+	struct sk_buff *skb, *next;
+
+	/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
+	if (!READ_ONCE(sd->defer_list))
+		return;
+
+	spin_lock(&sd->defer_lock);
+	skb = sd->defer_list;
+	sd->defer_list = NULL;
+	sd->defer_count = 0;
+	spin_unlock(&sd->defer_lock);
+
+	while (skb != NULL) {
+		next = skb->next;
+		napi_consume_skb(skb, 1);
+		skb = next;
+	}
+}
+
 static int napi_threaded_poll(void *data)
 {
 	struct napi_struct *napi = data;
+	struct softnet_data *sd;
 	void *have;

 	while (!napi_thread_wait(napi)) {
@@ -6608,11 +6630,21 @@ static int napi_threaded_poll(void *data)
 			bool repoll = false;

 			local_bh_disable();
+			sd = this_cpu_ptr(&softnet_data);
+			sd->in_napi_threaded_poll = true;

 			have = netpoll_poll_lock(napi);
 			__napi_poll(napi, &repoll);
 			netpoll_poll_unlock(have);

+			sd->in_napi_threaded_poll = false;
+			barrier();
+
+			if (sd_has_rps_ipi_waiting(sd)) {
+				local_irq_disable();
+				net_rps_action_and_irq_enable(sd);
+			}
+			skb_defer_free_flush(sd);
 			local_bh_enable();

 			if (!repoll)
@@ -6624,27 +6656,6 @@ static int napi_threaded_poll(void *data)
 	return 0;
 }

-static void skb_defer_free_flush(struct softnet_data *sd)
-{
-	struct sk_buff *skb, *next;
-
-	/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
-	if (!READ_ONCE(sd->defer_list))
-		return;
-
-	spin_lock_irq(&sd->defer_lock);
-	skb = sd->defer_list;
-	sd->defer_list = NULL;
-	sd->defer_count = 0;
-	spin_unlock_irq(&sd->defer_lock);
-
-	while (skb != NULL) {
-		next = skb->next;
-		napi_consume_skb(skb, 1);
-		skb = next;
-	}
-}
-
 static __latent_entropy void net_rx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);

--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6870,7 +6870,6 @@ void skb_attempt_defer_free(struct sk_buff *skb)
 {
 	int cpu = skb->alloc_cpu;
 	struct softnet_data *sd;
-	unsigned long flags;
 	unsigned int defer_max;
 	bool kick;

@@ -6881,12 +6880,15 @@ nodefer:	__kfree_skb(skb);
 		return;
 	}

+	DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
+	DEBUG_NET_WARN_ON_ONCE(skb->destructor);
+
 	sd = &per_cpu(softnet_data, cpu);
 	defer_max = READ_ONCE(sysctl_skb_defer_max);
 	if (READ_ONCE(sd->defer_count) >= defer_max)
 		goto nodefer;

-	spin_lock_irqsave(&sd->defer_lock, flags);
+	spin_lock_bh(&sd->defer_lock);
 	/* Send an IPI every time queue reaches half capacity. */
 	kick = sd->defer_count == (defer_max >> 1);
 	/* Paired with the READ_ONCE() few lines above */
@@ -6895,7 +6897,7 @@ nodefer:	__kfree_skb(skb);
 	skb->next = sd->defer_list;
 	/* Paired with READ_ONCE() in skb_defer_free_flush() */
 	WRITE_ONCE(sd->defer_list, skb);
-	spin_unlock_irqrestore(&sd->defer_lock, flags);
+	spin_unlock_bh(&sd->defer_lock);

 	/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
 	 * if we are unlucky enough (this seems very unlikely).