Commit 6bb218b5 authored by David S. Miller's avatar David S. Miller

Merge branch 'napi_threaded_poll-enhancements'

Eric Dumazet says:

====================
net: give napi_threaded_poll() some love

There is interest to revert commit 4cd13c21
("softirq: Let ksoftirqd do its job") and use instead the
napi_threaded_poll() mode.

https://lore.kernel.org/netdev/140f61e2e1fcb8cf53619709046e312e343b53ca.camel@redhat.com/T/#m8a8f5b09844adba157ad0d22fc1233d97013de50

Before doing so, make sure napi_threaded_poll() benefits
from recent core stack improvements, to further reduce
softirq triggers.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 19c60fde 87eff2ec
...@@ -3194,7 +3194,10 @@ struct softnet_data { ...@@ -3194,7 +3194,10 @@ struct softnet_data {
#ifdef CONFIG_RPS #ifdef CONFIG_RPS
struct softnet_data *rps_ipi_list; struct softnet_data *rps_ipi_list;
#endif #endif
bool in_net_rx_action; bool in_net_rx_action;
bool in_napi_threaded_poll;
#ifdef CONFIG_NET_FLOW_LIMIT #ifdef CONFIG_NET_FLOW_LIMIT
struct sd_flow_limit __rcu *flow_limit; struct sd_flow_limit __rcu *flow_limit;
#endif #endif
......
...@@ -4603,10 +4603,10 @@ static void napi_schedule_rps(struct softnet_data *sd) ...@@ -4603,10 +4603,10 @@ static void napi_schedule_rps(struct softnet_data *sd)
sd->rps_ipi_next = mysd->rps_ipi_list; sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd; mysd->rps_ipi_list = sd;
/* If not called from net_rx_action() /* If not called from net_rx_action() or napi_threaded_poll()
* we have to raise NET_RX_SOFTIRQ. * we have to raise NET_RX_SOFTIRQ.
*/ */
if (!mysd->in_net_rx_action) if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll)
__raise_softirq_irqoff(NET_RX_SOFTIRQ); __raise_softirq_irqoff(NET_RX_SOFTIRQ);
return; return;
} }
...@@ -6598,9 +6598,31 @@ static int napi_thread_wait(struct napi_struct *napi) ...@@ -6598,9 +6598,31 @@ static int napi_thread_wait(struct napi_struct *napi)
return -1; return -1;
} }
static void skb_defer_free_flush(struct softnet_data *sd)
{
struct sk_buff *skb, *next;
/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
if (!READ_ONCE(sd->defer_list))
return;
spin_lock(&sd->defer_lock);
skb = sd->defer_list;
sd->defer_list = NULL;
sd->defer_count = 0;
spin_unlock(&sd->defer_lock);
while (skb != NULL) {
next = skb->next;
napi_consume_skb(skb, 1);
skb = next;
}
}
static int napi_threaded_poll(void *data) static int napi_threaded_poll(void *data)
{ {
struct napi_struct *napi = data; struct napi_struct *napi = data;
struct softnet_data *sd;
void *have; void *have;
while (!napi_thread_wait(napi)) { while (!napi_thread_wait(napi)) {
...@@ -6608,11 +6630,21 @@ static int napi_threaded_poll(void *data) ...@@ -6608,11 +6630,21 @@ static int napi_threaded_poll(void *data)
bool repoll = false; bool repoll = false;
local_bh_disable(); local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
sd->in_napi_threaded_poll = true;
have = netpoll_poll_lock(napi); have = netpoll_poll_lock(napi);
__napi_poll(napi, &repoll); __napi_poll(napi, &repoll);
netpoll_poll_unlock(have); netpoll_poll_unlock(have);
sd->in_napi_threaded_poll = false;
barrier();
if (sd_has_rps_ipi_waiting(sd)) {
local_irq_disable();
net_rps_action_and_irq_enable(sd);
}
skb_defer_free_flush(sd);
local_bh_enable(); local_bh_enable();
if (!repoll) if (!repoll)
...@@ -6624,27 +6656,6 @@ static int napi_threaded_poll(void *data) ...@@ -6624,27 +6656,6 @@ static int napi_threaded_poll(void *data)
return 0; return 0;
} }
static void skb_defer_free_flush(struct softnet_data *sd)
{
struct sk_buff *skb, *next;
/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
if (!READ_ONCE(sd->defer_list))
return;
spin_lock_irq(&sd->defer_lock);
skb = sd->defer_list;
sd->defer_list = NULL;
sd->defer_count = 0;
spin_unlock_irq(&sd->defer_lock);
while (skb != NULL) {
next = skb->next;
napi_consume_skb(skb, 1);
skb = next;
}
}
static __latent_entropy void net_rx_action(struct softirq_action *h) static __latent_entropy void net_rx_action(struct softirq_action *h)
{ {
struct softnet_data *sd = this_cpu_ptr(&softnet_data); struct softnet_data *sd = this_cpu_ptr(&softnet_data);
......
...@@ -6870,7 +6870,6 @@ void skb_attempt_defer_free(struct sk_buff *skb) ...@@ -6870,7 +6870,6 @@ void skb_attempt_defer_free(struct sk_buff *skb)
{ {
int cpu = skb->alloc_cpu; int cpu = skb->alloc_cpu;
struct softnet_data *sd; struct softnet_data *sd;
unsigned long flags;
unsigned int defer_max; unsigned int defer_max;
bool kick; bool kick;
...@@ -6881,12 +6880,15 @@ nodefer: __kfree_skb(skb); ...@@ -6881,12 +6880,15 @@ nodefer: __kfree_skb(skb);
return; return;
} }
DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
DEBUG_NET_WARN_ON_ONCE(skb->destructor);
sd = &per_cpu(softnet_data, cpu); sd = &per_cpu(softnet_data, cpu);
defer_max = READ_ONCE(sysctl_skb_defer_max); defer_max = READ_ONCE(sysctl_skb_defer_max);
if (READ_ONCE(sd->defer_count) >= defer_max) if (READ_ONCE(sd->defer_count) >= defer_max)
goto nodefer; goto nodefer;
spin_lock_irqsave(&sd->defer_lock, flags); spin_lock_bh(&sd->defer_lock);
/* Send an IPI every time queue reaches half capacity. */ /* Send an IPI every time queue reaches half capacity. */
kick = sd->defer_count == (defer_max >> 1); kick = sd->defer_count == (defer_max >> 1);
/* Paired with the READ_ONCE() few lines above */ /* Paired with the READ_ONCE() few lines above */
...@@ -6895,7 +6897,7 @@ nodefer: __kfree_skb(skb); ...@@ -6895,7 +6897,7 @@ nodefer: __kfree_skb(skb);
skb->next = sd->defer_list; skb->next = sd->defer_list;
/* Paired with READ_ONCE() in skb_defer_free_flush() */ /* Paired with READ_ONCE() in skb_defer_free_flush() */
WRITE_ONCE(sd->defer_list, skb); WRITE_ONCE(sd->defer_list, skb);
spin_unlock_irqrestore(&sd->defer_lock, flags); spin_unlock_bh(&sd->defer_lock);
/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
* if we are unlucky enough (this seems very unlikely). * if we are unlucky enough (this seems very unlikely).
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment