Commit b26ef81c authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

drop_monitor: remove quadratic behavior

drop_monitor is using an unique list on which all netdevices in
the host have an element, regardless of their netns.

This scales poorly, not only at device unregister time (what I
caught during my netns dismantle stress tests), but also at packet
processing time whenever trace_napi_poll_hit() is called.

If the intent was to avoid adding one pointer in 'struct net_device'
then surely we prefer O(1) behavior.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 503310a5
...@@ -2236,7 +2236,9 @@ struct net_device { ...@@ -2236,7 +2236,9 @@ struct net_device {
#if IS_ENABLED(CONFIG_MRP) #if IS_ENABLED(CONFIG_MRP)
struct mrp_port __rcu *mrp_port; struct mrp_port __rcu *mrp_port;
#endif #endif
#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
struct dm_hw_stat_delta __rcu *dm_private;
#endif
struct device dev; struct device dev;
const struct attribute_group *sysfs_groups[4]; const struct attribute_group *sysfs_groups[4];
const struct attribute_group *sysfs_rx_queue_group; const struct attribute_group *sysfs_rx_queue_group;
......
...@@ -64,7 +64,6 @@ static const char * const drop_reasons[] = { ...@@ -64,7 +64,6 @@ static const char * const drop_reasons[] = {
/* net_dm_mutex /* net_dm_mutex
* *
* An overall lock guarding every operation coming from userspace. * An overall lock guarding every operation coming from userspace.
* It also guards the global 'hw_stats_list' list.
*/ */
static DEFINE_MUTEX(net_dm_mutex); static DEFINE_MUTEX(net_dm_mutex);
...@@ -100,11 +99,9 @@ struct per_cpu_dm_data { ...@@ -100,11 +99,9 @@ struct per_cpu_dm_data {
}; };
struct dm_hw_stat_delta { struct dm_hw_stat_delta {
struct net_device *dev;
unsigned long last_rx; unsigned long last_rx;
struct list_head list;
struct rcu_head rcu;
unsigned long last_drop_val; unsigned long last_drop_val;
struct rcu_head rcu;
}; };
static struct genl_family net_drop_monitor_family; static struct genl_family net_drop_monitor_family;
...@@ -115,7 +112,6 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data); ...@@ -115,7 +112,6 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
static int dm_hit_limit = 64; static int dm_hit_limit = 64;
static int dm_delay = 1; static int dm_delay = 1;
static unsigned long dm_hw_check_delta = 2*HZ; static unsigned long dm_hw_check_delta = 2*HZ;
static LIST_HEAD(hw_stats_list);
static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY; static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
static u32 net_dm_trunc_len; static u32 net_dm_trunc_len;
...@@ -287,33 +283,27 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, ...@@ -287,33 +283,27 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
int work, int budget) int work, int budget)
{ {
struct dm_hw_stat_delta *new_stat; struct net_device *dev = napi->dev;
struct dm_hw_stat_delta *stat;
/* /*
* Don't check napi structures with no associated device * Don't check napi structures with no associated device
*/ */
if (!napi->dev) if (!dev)
return; return;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { stat = rcu_dereference(dev->dm_private);
struct net_device *dev; if (stat) {
/* /*
* only add a note to our monitor buffer if: * only add a note to our monitor buffer if:
* 1) this is the dev we received on * 1) its after the last_rx delta
* 2) its after the last_rx delta * 2) our rx_dropped count has gone up
* 3) our rx_dropped count has gone up
*/ */
/* Paired with WRITE_ONCE() in dropmon_net_event() */ if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
dev = READ_ONCE(new_stat->dev); (dev->stats.rx_dropped != stat->last_drop_val)) {
if ((dev == napi->dev) &&
(time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
(napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
trace_drop_common(NULL, NULL); trace_drop_common(NULL, NULL);
new_stat->last_drop_val = napi->dev->stats.rx_dropped; stat->last_drop_val = dev->stats.rx_dropped;
new_stat->last_rx = jiffies; stat->last_rx = jiffies;
break;
} }
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -1198,7 +1188,6 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack) ...@@ -1198,7 +1188,6 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
static void net_dm_trace_off_set(void) static void net_dm_trace_off_set(void)
{ {
struct dm_hw_stat_delta *new_stat, *temp;
const struct net_dm_alert_ops *ops; const struct net_dm_alert_ops *ops;
int cpu; int cpu;
...@@ -1222,13 +1211,6 @@ static void net_dm_trace_off_set(void) ...@@ -1222,13 +1211,6 @@ static void net_dm_trace_off_set(void)
consume_skb(skb); consume_skb(skb);
} }
list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
if (new_stat->dev == NULL) {
list_del_rcu(&new_stat->list);
kfree_rcu(new_stat, rcu);
}
}
module_put(THIS_MODULE); module_put(THIS_MODULE);
} }
...@@ -1589,41 +1571,28 @@ static int dropmon_net_event(struct notifier_block *ev_block, ...@@ -1589,41 +1571,28 @@ static int dropmon_net_event(struct notifier_block *ev_block,
unsigned long event, void *ptr) unsigned long event, void *ptr)
{ {
struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *stat;
struct dm_hw_stat_delta *tmp;
switch (event) { switch (event) {
case NETDEV_REGISTER: case NETDEV_REGISTER:
new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
break;
stat = kzalloc(sizeof(*stat), GFP_KERNEL);
if (!stat)
break;
if (!new_stat) stat->last_rx = jiffies;
goto out; rcu_assign_pointer(dev->dm_private, stat);
new_stat->dev = dev;
new_stat->last_rx = jiffies;
mutex_lock(&net_dm_mutex);
list_add_rcu(&new_stat->list, &hw_stats_list);
mutex_unlock(&net_dm_mutex);
break; break;
case NETDEV_UNREGISTER: case NETDEV_UNREGISTER:
mutex_lock(&net_dm_mutex); stat = rtnl_dereference(dev->dm_private);
list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (stat) {
if (new_stat->dev == dev) { rcu_assign_pointer(dev->dm_private, NULL);
kfree_rcu(stat, rcu);
/* Paired with READ_ONCE() in trace_napi_poll_hit() */
WRITE_ONCE(new_stat->dev, NULL);
if (trace_state == TRACE_OFF) {
list_del_rcu(&new_stat->list);
kfree_rcu(new_stat, rcu);
break;
}
}
} }
mutex_unlock(&net_dm_mutex);
break; break;
} }
out:
return NOTIFY_DONE; return NOTIFY_DONE;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment