Commit ffde7328 authored by Alexander Duyck's avatar Alexander Duyck Committed by David S. Miller

net: Split netdev_alloc_frag into __alloc_page_frag and add __napi_alloc_frag

This patch splits the netdev_alloc_frag function up so that it can be used
on one of two page frag pools instead of being fixed on the
netdev_alloc_cache.  By doing this we can add a NAPI specific function
__napi_alloc_frag that accesses a pool that is only used from softirq
context.  The advantage to this is that we do not need to call
local_irq_save/restore which can be a significant savings.

I also took the opportunity to refactor the core bits that were placed in
__alloc_page_frag.  First I updated the allocation to do either a 32K
allocation or an order 0 page.  This is based on the changes in commmit
d9b2938a where it was found that latencies could be reduced in case of
failures.  Then I also rewrote the logic to work from the end of the page to
the start.  By doing this the size value doesn't have to be used unless we
have run out of space for page fragments.  Finally I cleaned up the atomic
bits so that we just do an atomic_sub_and_test and if that returns true then
we set the page->_count via an atomic_set.  This way we can remove the extra
conditional for the atomic_read since it would have led to an atomic_inc in
the case of success anyway.
Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@redhat.com>
Acked-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6e5f59aa
...@@ -2164,6 +2164,8 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, ...@@ -2164,6 +2164,8 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
} }
void *napi_alloc_frag(unsigned int fragsz);
/** /**
* __dev_alloc_pages - allocate page for network Rx * __dev_alloc_pages - allocate page for network Rx
* @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
......
...@@ -336,59 +336,85 @@ struct netdev_alloc_cache { ...@@ -336,59 +336,85 @@ struct netdev_alloc_cache {
unsigned int pagecnt_bias; unsigned int pagecnt_bias;
}; };
static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
gfp_t gfp_mask)
{ {
struct netdev_alloc_cache *nc; const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
void *data = NULL; struct page *page = NULL;
int order;
unsigned long flags;
local_irq_save(flags);
nc = this_cpu_ptr(&netdev_alloc_cache);
if (unlikely(!nc->frag.page)) {
refill:
for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
gfp_t gfp = gfp_mask; gfp_t gfp = gfp_mask;
if (order) if (order) {
gfp |= __GFP_COMP | __GFP_NOWARN; gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
nc->frag.page = alloc_pages(gfp, order); page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
if (likely(nc->frag.page)) nc->frag.size = PAGE_SIZE << (page ? order : 0);
break;
if (--order < 0)
goto end;
} }
nc->frag.size = PAGE_SIZE << order;
if (unlikely(!page))
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
nc->frag.page = page;
return page;
}
static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
unsigned int fragsz, gfp_t gfp_mask)
{
struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
struct page *page = nc->frag.page;
unsigned int size;
int offset;
if (unlikely(!page)) {
refill:
page = __page_frag_refill(nc, gfp_mask);
if (!page)
return NULL;
/* if size can vary use frag.size else just use PAGE_SIZE */
size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
/* Even if we own the page, we do not use atomic_set(). /* Even if we own the page, we do not use atomic_set().
* This would break get_page_unless_zero() users. * This would break get_page_unless_zero() users.
*/ */
atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1, atomic_add(size - 1, &page->_count);
&nc->frag.page->_count);
nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; /* reset page count bias and offset to start of new frag */
nc->frag.offset = 0; nc->pagecnt_bias = size;
nc->frag.offset = size;
} }
if (nc->frag.offset + fragsz > nc->frag.size) { offset = nc->frag.offset - fragsz;
if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) { if (unlikely(offset < 0)) {
if (!atomic_sub_and_test(nc->pagecnt_bias, if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
&nc->frag.page->_count))
goto refill; goto refill;
/* if size can vary use frag.size else just use PAGE_SIZE */
size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
/* OK, page count is 0, we can safely set it */ /* OK, page count is 0, we can safely set it */
atomic_set(&nc->frag.page->_count, atomic_set(&page->_count, size);
NETDEV_PAGECNT_MAX_BIAS);
} else { /* reset page count bias and offset to start of new frag */
atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias, nc->pagecnt_bias = size;
&nc->frag.page->_count); offset = size - fragsz;
}
nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
nc->frag.offset = 0;
} }
data = page_address(nc->frag.page) + nc->frag.offset;
nc->frag.offset += fragsz;
nc->pagecnt_bias--; nc->pagecnt_bias--;
end: nc->frag.offset = offset;
return page_address(page) + offset;
}
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
unsigned long flags;
void *data;
local_irq_save(flags);
data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
local_irq_restore(flags); local_irq_restore(flags);
return data; return data;
} }
...@@ -406,6 +432,17 @@ void *netdev_alloc_frag(unsigned int fragsz) ...@@ -406,6 +432,17 @@ void *netdev_alloc_frag(unsigned int fragsz)
} }
EXPORT_SYMBOL(netdev_alloc_frag); EXPORT_SYMBOL(netdev_alloc_frag);
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
}
void *napi_alloc_frag(unsigned int fragsz)
{
return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
}
EXPORT_SYMBOL(napi_alloc_frag);
/** /**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
* @dev: network device to receive on * @dev: network device to receive on
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment