Commit 69b08f62 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: use bigger pages in __netdev_alloc_frag

We currently use percpu order-0 pages in __netdev_alloc_frag
to deliver fragments used by __netdev_alloc_skb()

Depending on NIC driver and arch being 32 or 64 bit, it allows a page to
be split in several fragments (between 1 and 8), assuming PAGE_SIZE=4096

Switching to bigger pages (32768 bytes for PAGE_SIZE=4096 case) allows :

- Better filling of space (the ending hole overhead is less an issue)

- Less calls to page allocator or accesses to page->_count

- Could allow struct skb_shared_info futures changes without major
  performance impact.

This patch implements a transparent fallback to smaller
pages in case of memory pressure.

It also uses a standard "struct page_frag" instead of a custom one.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5dff747b
...@@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) ...@@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
EXPORT_SYMBOL(build_skb); EXPORT_SYMBOL(build_skb);
struct netdev_alloc_cache { struct netdev_alloc_cache {
struct page *page; struct page_frag frag;
unsigned int offset; /* we maintain a pagecount bias, so that we dont dirty cache line
* containing page->_count every time we allocate a fragment.
*/
unsigned int pagecnt_bias; unsigned int pagecnt_bias;
}; };
static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{ {
struct netdev_alloc_cache *nc; struct netdev_alloc_cache *nc;
void *data = NULL; void *data = NULL;
int order;
unsigned long flags; unsigned long flags;
local_irq_save(flags); local_irq_save(flags);
nc = &__get_cpu_var(netdev_alloc_cache); nc = &__get_cpu_var(netdev_alloc_cache);
if (unlikely(!nc->page)) { if (unlikely(!nc->frag.page)) {
refill: refill:
nc->page = alloc_page(gfp_mask); for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
if (unlikely(!nc->page)) gfp_t gfp = gfp_mask;
if (order)
gfp |= __GFP_COMP | __GFP_NOWARN;
nc->frag.page = alloc_pages(gfp, order);
if (likely(nc->frag.page))
break;
if (--order < 0)
goto end; goto end;
}
nc->frag.size = PAGE_SIZE << order;
recycle: recycle:
atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS); atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
nc->pagecnt_bias = NETDEV_PAGECNT_BIAS; nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
nc->offset = 0; nc->frag.offset = 0;
} }
if (nc->offset + fragsz > PAGE_SIZE) { if (nc->frag.offset + fragsz > nc->frag.size) {
/* avoid unnecessary locked operations if possible */ /* avoid unnecessary locked operations if possible */
if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) || if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count)) atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
goto recycle; goto recycle;
goto refill; goto refill;
} }
data = page_address(nc->page) + nc->offset; data = page_address(nc->frag.page) + nc->frag.offset;
nc->offset += fragsz; nc->frag.offset += fragsz;
nc->pagecnt_bias--; nc->pagecnt_bias--;
end: end:
local_irq_restore(flags); local_irq_restore(flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment