Commit ebf2e337 authored by Darrick J. Wong's avatar Darrick J. Wong

Merge tag 'xfs-buf-bulk-alloc-tag' of...

Merge tag 'xfs-buf-bulk-alloc-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs into xfs-5.14-merge2

xfs: buffer cache bulk page allocation

This patchset makes use of the new bulk page allocation interface to
reduce the overhead of allocating large numbers of pages in a
loop.

The first two patches are refactoring buffer memory allocation and
converting the uncached buffer path to use the same page allocation
path, followed by converting the page allocation path to use bulk
allocation.

The rest of the patches are then consolidation of the page
allocation and freeing code to simplify the code and remove a chunk
of unnecessary abstraction. This is largely based on a series of
changes made by Christoph Hellwig.

* tag 'xfs-buf-bulk-alloc-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
  xfs: merge xfs_buf_allocate_memory
  xfs: cleanup error handling in xfs_buf_get_map
  xfs: get rid of xb_to_gfp()
  xfs: simplify the b_page_count calculation
  xfs: remove ->b_offset handling for page backed buffers
  xfs: move page freeing into _xfs_buf_free_pages()
  xfs: merge _xfs_buf_get_pages()
  xfs: use alloc_pages_bulk_array() for buffers
  xfs: use xfs_buf_alloc_pages for uncached buffers
  xfs: split up xfs_buf_allocate_memory
parents 8124c8a6 8bcac744
...@@ -43,7 +43,6 @@ xfs_get_aghdr_buf( ...@@ -43,7 +43,6 @@ xfs_get_aghdr_buf(
if (error) if (error)
return error; return error;
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
bp->b_bn = blkno; bp->b_bn = blkno;
bp->b_maps[0].bm_bn = blkno; bp->b_maps[0].bm_bn = blkno;
bp->b_ops = ops; bp->b_ops = ops;
......
...@@ -22,9 +22,6 @@ ...@@ -22,9 +22,6 @@
static kmem_zone_t *xfs_buf_zone; static kmem_zone_t *xfs_buf_zone;
#define xb_to_gfp(flags) \
((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
/* /*
* Locking orders * Locking orders
* *
...@@ -79,7 +76,7 @@ static inline int ...@@ -79,7 +76,7 @@ static inline int
xfs_buf_vmap_len( xfs_buf_vmap_len(
struct xfs_buf *bp) struct xfs_buf *bp)
{ {
return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; return (bp->b_page_count * PAGE_SIZE);
} }
/* /*
...@@ -272,51 +269,30 @@ _xfs_buf_alloc( ...@@ -272,51 +269,30 @@ _xfs_buf_alloc(
return 0; return 0;
} }
/* static void
* Allocate a page array capable of holding a specified number xfs_buf_free_pages(
* of pages, and point the page buf at it. struct xfs_buf *bp)
*/
STATIC int
_xfs_buf_get_pages(
struct xfs_buf *bp,
int page_count)
{ {
/* Make sure that we have a page list */ uint i;
if (bp->b_pages == NULL) {
bp->b_page_count = page_count; ASSERT(bp->b_flags & _XBF_PAGES);
if (page_count <= XB_PAGES) {
bp->b_pages = bp->b_page_array; if (xfs_buf_is_vmapped(bp))
} else { vm_unmap_ram(bp->b_addr, bp->b_page_count);
bp->b_pages = kmem_alloc(sizeof(struct page *) *
page_count, KM_NOFS); for (i = 0; i < bp->b_page_count; i++) {
if (bp->b_pages == NULL) if (bp->b_pages[i])
return -ENOMEM; __free_page(bp->b_pages[i]);
}
memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
} }
return 0; if (current->reclaim_state)
} current->reclaim_state->reclaimed_slab += bp->b_page_count;
/* if (bp->b_pages != bp->b_page_array)
* Frees b_pages if it was allocated.
*/
STATIC void
_xfs_buf_free_pages(
struct xfs_buf *bp)
{
if (bp->b_pages != bp->b_page_array) {
kmem_free(bp->b_pages); kmem_free(bp->b_pages);
bp->b_pages = NULL; bp->b_pages = NULL;
} bp->b_flags &= ~_XBF_PAGES;
} }
/*
* Releases the specified buffer.
*
* The modification state of any associated pages is left unchanged.
* The buffer must not be on any hash - use xfs_buf_rele instead for
* hashed and refcounted buffers
*/
static void static void
xfs_buf_free( xfs_buf_free(
struct xfs_buf *bp) struct xfs_buf *bp)
...@@ -325,73 +301,38 @@ xfs_buf_free( ...@@ -325,73 +301,38 @@ xfs_buf_free(
ASSERT(list_empty(&bp->b_lru)); ASSERT(list_empty(&bp->b_lru));
if (bp->b_flags & _XBF_PAGES) { if (bp->b_flags & _XBF_PAGES)
uint i; xfs_buf_free_pages(bp);
else if (bp->b_flags & _XBF_KMEM)
if (xfs_buf_is_vmapped(bp))
vm_unmap_ram(bp->b_addr - bp->b_offset,
bp->b_page_count);
for (i = 0; i < bp->b_page_count; i++) {
struct page *page = bp->b_pages[i];
__free_page(page);
}
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab +=
bp->b_page_count;
} else if (bp->b_flags & _XBF_KMEM)
kmem_free(bp->b_addr); kmem_free(bp->b_addr);
_xfs_buf_free_pages(bp);
xfs_buf_free_maps(bp); xfs_buf_free_maps(bp);
kmem_cache_free(xfs_buf_zone, bp); kmem_cache_free(xfs_buf_zone, bp);
} }
/* static int
* Allocates all the pages for buffer in question and builds it's page list. xfs_buf_alloc_kmem(
*/
STATIC int
xfs_buf_allocate_memory(
struct xfs_buf *bp, struct xfs_buf *bp,
uint flags) xfs_buf_flags_t flags)
{ {
size_t size; int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
size_t nbytes, offset; xfs_km_flags_t kmflag_mask = KM_NOFS;
gfp_t gfp_mask = xb_to_gfp(flags); size_t size = BBTOB(bp->b_length);
unsigned short page_count, i;
xfs_off_t start, end;
int error;
xfs_km_flags_t kmflag_mask = 0;
/* /* Assure zeroed buffer for non-read cases. */
* assure zeroed buffer for non-read cases. if (!(flags & XBF_READ))
*/
if (!(flags & XBF_READ)) {
kmflag_mask |= KM_ZERO; kmflag_mask |= KM_ZERO;
gfp_mask |= __GFP_ZERO;
}
/* bp->b_addr = kmem_alloc_io(size, align_mask, kmflag_mask);
* for buffers that are contained within a single page, just allocate if (!bp->b_addr)
* the memory from the heap - there's no need for the complexity of return -ENOMEM;
* page arrays to keep allocation down to order 0.
*/
size = BBTOB(bp->b_length);
if (size < PAGE_SIZE) {
int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
bp->b_addr = kmem_alloc_io(size, align_mask,
KM_NOFS | kmflag_mask);
if (!bp->b_addr) {
/* low memory - use alloc_page loop instead */
goto use_alloc_page;
}
if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) != if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
((unsigned long)bp->b_addr & PAGE_MASK)) { ((unsigned long)bp->b_addr & PAGE_MASK)) {
/* b_addr spans two pages - use alloc_page instead */ /* b_addr spans two pages - use alloc_page instead */
kmem_free(bp->b_addr); kmem_free(bp->b_addr);
bp->b_addr = NULL; bp->b_addr = NULL;
goto use_alloc_page; return -ENOMEM;
} }
bp->b_offset = offset_in_page(bp->b_addr); bp->b_offset = offset_in_page(bp->b_addr);
bp->b_pages = bp->b_page_array; bp->b_pages = bp->b_page_array;
...@@ -399,63 +340,64 @@ xfs_buf_allocate_memory( ...@@ -399,63 +340,64 @@ xfs_buf_allocate_memory(
bp->b_page_count = 1; bp->b_page_count = 1;
bp->b_flags |= _XBF_KMEM; bp->b_flags |= _XBF_KMEM;
return 0; return 0;
} }
use_alloc_page: static int
start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT; xfs_buf_alloc_pages(
end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1) struct xfs_buf *bp,
>> PAGE_SHIFT; xfs_buf_flags_t flags)
page_count = end - start; {
error = _xfs_buf_get_pages(bp, page_count); gfp_t gfp_mask = __GFP_NOWARN;
if (unlikely(error)) long filled = 0;
return error;
offset = bp->b_offset; if (flags & XBF_READ_AHEAD)
bp->b_flags |= _XBF_PAGES; gfp_mask |= __GFP_NORETRY;
else
gfp_mask |= GFP_NOFS;
for (i = 0; i < bp->b_page_count; i++) { /* Make sure that we have a page list */
struct page *page; bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
uint retries = 0; if (bp->b_page_count <= XB_PAGES) {
retry: bp->b_pages = bp->b_page_array;
page = alloc_page(gfp_mask); } else {
if (unlikely(page == NULL)) { bp->b_pages = kzalloc(sizeof(struct page *) * bp->b_page_count,
if (flags & XBF_READ_AHEAD) { gfp_mask);
bp->b_page_count = i; if (!bp->b_pages)
error = -ENOMEM; return -ENOMEM;
goto out_free_pages;
} }
bp->b_flags |= _XBF_PAGES;
/* Assure zeroed buffer for non-read cases. */
if (!(flags & XBF_READ))
gfp_mask |= __GFP_ZERO;
/* /*
* This could deadlock. * Bulk filling of pages can take multiple calls. Not filling the entire
* * array is not an allocation failure, so don't back off if we get at
* But until all the XFS lowlevel code is revamped to * least one extra page.
* handle buffer allocation failures we can't do much.
*/ */
if (!(++retries % 100)) for (;;) {
xfs_err(NULL, long last = filled;
"%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
current->comm, current->pid,
__func__, gfp_mask);
XFS_STATS_INC(bp->b_mount, xb_page_retries); filled = alloc_pages_bulk_array(gfp_mask, bp->b_page_count,
congestion_wait(BLK_RW_ASYNC, HZ/50); bp->b_pages);
goto retry; if (filled == bp->b_page_count) {
XFS_STATS_INC(bp->b_mount, xb_page_found);
break;
} }
XFS_STATS_INC(bp->b_mount, xb_page_found); if (filled != last)
continue;
nbytes = min_t(size_t, size, PAGE_SIZE - offset); if (flags & XBF_READ_AHEAD) {
size -= nbytes; xfs_buf_free_pages(bp);
bp->b_pages[i] = page; return -ENOMEM;
offset = 0;
} }
return 0;
out_free_pages: XFS_STATS_INC(bp->b_mount, xb_page_retries);
for (i = 0; i < bp->b_page_count; i++) congestion_wait(BLK_RW_ASYNC, HZ / 50);
__free_page(bp->b_pages[i]); }
bp->b_flags &= ~_XBF_PAGES; return 0;
return error;
} }
/* /*
...@@ -469,7 +411,7 @@ _xfs_buf_map_pages( ...@@ -469,7 +411,7 @@ _xfs_buf_map_pages(
ASSERT(bp->b_flags & _XBF_PAGES); ASSERT(bp->b_flags & _XBF_PAGES);
if (bp->b_page_count == 1) { if (bp->b_page_count == 1) {
/* A single page buffer is always mappable */ /* A single page buffer is always mappable */
bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_addr = page_address(bp->b_pages[0]);
} else if (flags & XBF_UNMAPPED) { } else if (flags & XBF_UNMAPPED) {
bp->b_addr = NULL; bp->b_addr = NULL;
} else { } else {
...@@ -496,7 +438,6 @@ _xfs_buf_map_pages( ...@@ -496,7 +438,6 @@ _xfs_buf_map_pages(
if (!bp->b_addr) if (!bp->b_addr)
return -ENOMEM; return -ENOMEM;
bp->b_addr += bp->b_offset;
} }
return 0; return 0;
...@@ -720,17 +661,22 @@ xfs_buf_get_map( ...@@ -720,17 +661,22 @@ xfs_buf_get_map(
if (error) if (error)
return error; return error;
error = xfs_buf_allocate_memory(new_bp, flags); /*
if (error) { * For buffers that fit entirely within a single page, first attempt to
xfs_buf_free(new_bp); * allocate the memory from the heap to minimise memory usage. If we
return error; * can't get heap memory for these small buffers, we fall back to using
* the page allocator.
*/
if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
xfs_buf_alloc_kmem(new_bp, flags) < 0) {
error = xfs_buf_alloc_pages(new_bp, flags);
if (error)
goto out_free_buf;
} }
error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp); error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
if (error) { if (error)
xfs_buf_free(new_bp); goto out_free_buf;
return error;
}
if (bp != new_bp) if (bp != new_bp)
xfs_buf_free(new_bp); xfs_buf_free(new_bp);
...@@ -758,6 +704,9 @@ xfs_buf_get_map( ...@@ -758,6 +704,9 @@ xfs_buf_get_map(
trace_xfs_buf_get(bp, flags, _RET_IP_); trace_xfs_buf_get(bp, flags, _RET_IP_);
*bpp = bp; *bpp = bp;
return 0; return 0;
out_free_buf:
xfs_buf_free(new_bp);
return error;
} }
int int
...@@ -950,8 +899,7 @@ xfs_buf_get_uncached( ...@@ -950,8 +899,7 @@ xfs_buf_get_uncached(
int flags, int flags,
struct xfs_buf **bpp) struct xfs_buf **bpp)
{ {
unsigned long page_count; int error;
int error, i;
struct xfs_buf *bp; struct xfs_buf *bp;
DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
...@@ -960,41 +908,25 @@ xfs_buf_get_uncached( ...@@ -960,41 +908,25 @@ xfs_buf_get_uncached(
/* flags might contain irrelevant bits, pass only what we care about */ /* flags might contain irrelevant bits, pass only what we care about */
error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
if (error) if (error)
goto fail; return error;
page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; error = xfs_buf_alloc_pages(bp, flags);
error = _xfs_buf_get_pages(bp, page_count);
if (error) if (error)
goto fail_free_buf; goto fail_free_buf;
for (i = 0; i < page_count; i++) {
bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
if (!bp->b_pages[i]) {
error = -ENOMEM;
goto fail_free_mem;
}
}
bp->b_flags |= _XBF_PAGES;
error = _xfs_buf_map_pages(bp, 0); error = _xfs_buf_map_pages(bp, 0);
if (unlikely(error)) { if (unlikely(error)) {
xfs_warn(target->bt_mount, xfs_warn(target->bt_mount,
"%s: failed to map pages", __func__); "%s: failed to map pages", __func__);
goto fail_free_mem; goto fail_free_buf;
} }
trace_xfs_buf_get_uncached(bp, _RET_IP_); trace_xfs_buf_get_uncached(bp, _RET_IP_);
*bpp = bp; *bpp = bp;
return 0; return 0;
fail_free_mem: fail_free_buf:
while (--i >= 0) xfs_buf_free(bp);
__free_page(bp->b_pages[i]);
_xfs_buf_free_pages(bp);
fail_free_buf:
xfs_buf_free_maps(bp);
kmem_cache_free(xfs_buf_zone, bp);
fail:
return error; return error;
} }
...@@ -1722,7 +1654,6 @@ xfs_buf_offset( ...@@ -1722,7 +1654,6 @@ xfs_buf_offset(
if (bp->b_addr) if (bp->b_addr)
return bp->b_addr + offset; return bp->b_addr + offset;
offset += bp->b_offset;
page = bp->b_pages[offset >> PAGE_SHIFT]; page = bp->b_pages[offset >> PAGE_SHIFT];
return page_address(page) + (offset & (PAGE_SIZE-1)); return page_address(page) + (offset & (PAGE_SIZE-1));
} }
......
...@@ -167,7 +167,8 @@ struct xfs_buf { ...@@ -167,7 +167,8 @@ struct xfs_buf {
atomic_t b_pin_count; /* pin count */ atomic_t b_pin_count; /* pin count */
atomic_t b_io_remaining; /* #outstanding I/O requests */ atomic_t b_io_remaining; /* #outstanding I/O requests */
unsigned int b_page_count; /* size of page array */ unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset in first page */ unsigned int b_offset; /* page offset of b_addr,
only for _XBF_KMEM buffers */
int b_error; /* error code on I/O */ int b_error; /* error code on I/O */
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment