Commit 13cabc47 authored by Alexander Lobakin's avatar Alexander Lobakin Committed by Jakub Kicinski

netdevice: define and allocate &net_device _properly_

In fact, this structure contains a flexible array at the end, but
historically its size, alignment etc., is calculated manually.
There are several instances of the structure embedded into other
structures, but also there's ongoing effort to remove them and we
could in the meantime declare &net_device properly.
Declare the array explicitly, use struct_size() and store the array
size inside the structure, so that __counted_by() can be applied.
Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the
allocated buffer is aligned to what the user expects.
Also, change its alignment from %NETDEV_ALIGN to the cacheline size
as per several suggestions on the netdev ML.

bloat-o-meter for vmlinux:

free_netdev                                  445     440      -5
netdev_freemem                                24       -     -24
alloc_netdev_mqs                            1481    1450     -31

On x86_64 with several NICs of different vendors, I was never able to
get a &net_device pointer not aligned to the cacheline size after the
change.
Signed-off-by: default avatarAlexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: default avatarBreno Leitao <leitao@debian.org>
Reviewed-by: default avatarPrzemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarKees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20240710113036.2125584-1-leitao@debian.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 8341eee8
...@@ -1819,7 +1819,8 @@ enum netdev_reg_state { ...@@ -1819,7 +1819,8 @@ enum netdev_reg_state {
* @priv_flags: Like 'flags' but invisible to userspace, * @priv_flags: Like 'flags' but invisible to userspace,
* see if.h for the definitions * see if.h for the definitions
* @gflags: Global flags ( kept as legacy ) * @gflags: Global flags ( kept as legacy )
* @padded: How much padding added by alloc_netdev() * @priv_len: Size of the ->priv flexible array
* @priv: Flexible array containing private data
* @operstate: RFC2863 operstate * @operstate: RFC2863 operstate
* @link_mode: Mapping policy to operstate * @link_mode: Mapping policy to operstate
* @if_port: Selectable AUI, TP, ... * @if_port: Selectable AUI, TP, ...
...@@ -2199,10 +2200,10 @@ struct net_device { ...@@ -2199,10 +2200,10 @@ struct net_device {
unsigned short neigh_priv_len; unsigned short neigh_priv_len;
unsigned short dev_id; unsigned short dev_id;
unsigned short dev_port; unsigned short dev_port;
unsigned short padded; int irq;
u32 priv_len;
spinlock_t addr_list_lock; spinlock_t addr_list_lock;
int irq;
struct netdev_hw_addr_list uc; struct netdev_hw_addr_list uc;
struct netdev_hw_addr_list mc; struct netdev_hw_addr_list mc;
...@@ -2406,7 +2407,10 @@ struct net_device { ...@@ -2406,7 +2407,10 @@ struct net_device {
/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
struct dim_irq_moder *irq_moder; struct dim_irq_moder *irq_moder;
};
u8 priv[] ____cacheline_aligned
__counted_by(priv_len);
} ____cacheline_aligned;
#define to_net_dev(d) container_of(d, struct net_device, dev) #define to_net_dev(d) container_of(d, struct net_device, dev)
/* /*
...@@ -2596,7 +2600,7 @@ void dev_net_set(struct net_device *dev, struct net *net) ...@@ -2596,7 +2600,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
*/ */
static inline void *netdev_priv(const struct net_device *dev) static inline void *netdev_priv(const struct net_device *dev)
{ {
return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN); return (void *)dev->priv;
} }
/* Set the sysfs physical device reference for the network logical device /* Set the sysfs physical device reference for the network logical device
...@@ -3127,7 +3131,6 @@ static inline void unregister_netdevice(struct net_device *dev) ...@@ -3127,7 +3131,6 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev); int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev); void free_netdev(struct net_device *dev);
void netdev_freemem(struct net_device *dev);
void init_dummy_netdev(struct net_device *dev); void init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct net_device *netdev_get_xmit_slave(struct net_device *dev,
......
...@@ -11006,13 +11006,6 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev) ...@@ -11006,13 +11006,6 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
} }
EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on); EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
void netdev_freemem(struct net_device *dev)
{
char *addr = (char *)dev - dev->padded;
kvfree(addr);
}
/** /**
* alloc_netdev_mqs - allocate network device * alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for * @sizeof_priv: size of private data to allocate space for
...@@ -11032,8 +11025,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, ...@@ -11032,8 +11025,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs) unsigned int txqs, unsigned int rxqs)
{ {
struct net_device *dev; struct net_device *dev;
unsigned int alloc_size;
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name)); BUG_ON(strlen(name) >= sizeof(dev->name));
...@@ -11047,21 +11038,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, ...@@ -11047,21 +11038,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL; return NULL;
} }
alloc_size = sizeof(struct net_device); dev = kvzalloc(struct_size(dev, priv, sizeof_priv),
if (sizeof_priv) { GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
/* ensure 32-byte alignment of private area */ if (!dev)
alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
alloc_size += sizeof_priv;
}
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!p)
return NULL; return NULL;
dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->priv_len = sizeof_priv;
dev->padded = (char *)dev - (char *)p;
ref_tracker_dir_init(&dev->refcnt_tracker, 128, name); ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
#ifdef CONFIG_PCPU_DEV_REFCNT #ifdef CONFIG_PCPU_DEV_REFCNT
...@@ -11148,7 +11130,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, ...@@ -11148,7 +11130,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
free_percpu(dev->pcpu_refcnt); free_percpu(dev->pcpu_refcnt);
free_dev: free_dev:
#endif #endif
netdev_freemem(dev); kvfree(dev);
return NULL; return NULL;
} }
EXPORT_SYMBOL(alloc_netdev_mqs); EXPORT_SYMBOL(alloc_netdev_mqs);
...@@ -11203,7 +11185,7 @@ void free_netdev(struct net_device *dev) ...@@ -11203,7 +11185,7 @@ void free_netdev(struct net_device *dev)
/* Compatibility with error handling in drivers */ /* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED || if (dev->reg_state == NETREG_UNINITIALIZED ||
dev->reg_state == NETREG_DUMMY) { dev->reg_state == NETREG_DUMMY) {
netdev_freemem(dev); kvfree(dev);
return; return;
} }
......
...@@ -2028,7 +2028,7 @@ static void netdev_release(struct device *d) ...@@ -2028,7 +2028,7 @@ static void netdev_release(struct device *d)
* device is dead and about to be freed. * device is dead and about to be freed.
*/ */
kfree(rcu_access_pointer(dev->ifalias)); kfree(rcu_access_pointer(dev->ifalias));
netdev_freemem(dev); kvfree(dev);
} }
static const void *net_namespace(const struct device *d) static const void *net_namespace(const struct device *d)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment