Commit 13531422 authored by Nathan Scott's avatar Nathan Scott

[XFS] Fix up memory allocators to be more resilient.

SGI Modid: xfs-linux:xfs-kern:173571a
Signed-off-by: nathans@sgi.com
parent 86634e47
...@@ -126,6 +126,7 @@ xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o ...@@ -126,6 +126,7 @@ xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
# Objects in linux-2.6/ # Objects in linux-2.6/
xfs-y += $(addprefix linux-2.6/, \ xfs-y += $(addprefix linux-2.6/, \
kmem.o \
xfs_aops.o \ xfs_aops.o \
xfs_buf.o \ xfs_buf.o \
xfs_file.o \ xfs_file.o \
......
/*
* Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* Further, this software is distributed without any warranty that it is
* free of the rightful claim of any third person regarding infringement
* or the like. Any license provided herein, whether implied or
* otherwise, applies only to this software file. Patent licenses, if
* any, provided herein do not apply to combinations of this program with
* other software, or any other product whatsoever.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston MA 02111-1307, USA.
*
* Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
* Mountain View, CA 94043, or:
*
* http://www.sgi.com
*
* For further information regarding this notice, see:
*
* http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/swap.h>
#include "time.h"
#include "kmem.h"
#define MAX_VMALLOCS 6
#define MAX_SLAB_SIZE 0x20000
void *
kmem_alloc(size_t size, int flags)
{
int retries = 0, lflags = kmem_flags_convert(flags);
void *ptr;
do {
if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
ptr = kmalloc(size, lflags);
else
ptr = __vmalloc(size, lflags, PAGE_KERNEL);
if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
return ptr;
if (!(++retries % 100))
printk(KERN_ERR "possible deadlock in %s (mode:0x%x)\n",
__FUNCTION__, lflags);
} while (1);
}
void *
kmem_zalloc(size_t size, int flags)
{
void *ptr;
ptr = kmem_alloc(size, flags);
if (ptr)
memset((char *)ptr, 0, (int)size);
return ptr;
}
void
kmem_free(void *ptr, size_t size)
{
if (((unsigned long)ptr < VMALLOC_START) ||
((unsigned long)ptr >= VMALLOC_END)) {
kfree(ptr);
} else {
vfree(ptr);
}
}
void *
kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
{
void *new;
new = kmem_alloc(newsize, flags);
if (ptr) {
if (new)
memcpy(new, ptr,
((oldsize < newsize) ? oldsize : newsize));
kmem_free(ptr, oldsize);
}
return new;
}
void *
kmem_zone_alloc(kmem_zone_t *zone, int flags)
{
int retries = 0, lflags = kmem_flags_convert(flags);
void *ptr;
do {
ptr = kmem_cache_alloc(zone, lflags);
if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
return ptr;
if (!(++retries % 100))
printk(KERN_ERR "possible deadlock in %s (mode:0x%x)\n",
__FUNCTION__, lflags);
} while (1);
}
void *
kmem_zone_zalloc(kmem_zone_t *zone, int flags)
{
void *ptr;
ptr = kmem_zone_alloc(zone, flags);
if (ptr)
memset((char *)ptr, 0, kmem_cache_size(zone));
return ptr;
}
/* /*
* Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License as * under the terms of version 2 of the GNU General Public License as
...@@ -32,32 +32,34 @@ ...@@ -32,32 +32,34 @@
#ifndef __XFS_SUPPORT_KMEM_H__ #ifndef __XFS_SUPPORT_KMEM_H__
#define __XFS_SUPPORT_KMEM_H__ #define __XFS_SUPPORT_KMEM_H__
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/vmalloc.h> #include <linux/sched.h>
#include <linux/mm.h>
/*
* Cutoff point to use vmalloc instead of kmalloc.
*/
#define MAX_SLAB_SIZE 0x20000
/* /*
* XFS uses slightly different names for these due to the * memory management routines
* IRIX heritage.
*/ */
#define kmem_zone kmem_cache_s
#define kmem_zone_t kmem_cache_t
#define KM_SLEEP 0x0001 #define KM_SLEEP 0x0001
#define KM_NOSLEEP 0x0002 #define KM_NOSLEEP 0x0002
#define KM_NOFS 0x0004 #define KM_NOFS 0x0004
#define KM_MAYFAIL 0x0005 #define KM_MAYFAIL 0x0008
#define kmem_zone kmem_cache_s
#define kmem_zone_t kmem_cache_t
typedef unsigned long xfs_pflags_t; typedef unsigned long xfs_pflags_t;
#define PFLAGS_TEST_NOIO() (current->flags & PF_NOIO)
#define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS) #define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS)
#define PFLAGS_SET_NOIO() do { \
current->flags |= PF_NOIO; \
} while (0)
#define PFLAGS_CLEAR_NOIO() do { \
current->flags &= ~PF_NOIO; \
} while (0)
/* these could be nested, so we save state */ /* these could be nested, so we save state */
#define PFLAGS_SET_FSTRANS(STATEP) do { \ #define PFLAGS_SET_FSTRANS(STATEP) do { \
*(STATEP) = current->flags; \ *(STATEP) = current->flags; \
...@@ -79,8 +81,7 @@ typedef unsigned long xfs_pflags_t; ...@@ -79,8 +81,7 @@ typedef unsigned long xfs_pflags_t;
*(NSTATEP) = *(OSTATEP); \ *(NSTATEP) = *(OSTATEP); \
} while (0) } while (0)
static __inline unsigned int static __inline unsigned int kmem_flags_convert(int flags)
kmem_flags_convert(int flags)
{ {
int lflags; int lflags;
...@@ -100,83 +101,44 @@ kmem_flags_convert(int flags) ...@@ -100,83 +101,44 @@ kmem_flags_convert(int flags)
/* avoid recusive callbacks to filesystem during transactions */ /* avoid recusive callbacks to filesystem during transactions */
if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS)) if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
lflags &= ~__GFP_FS; lflags &= ~__GFP_FS;
if (!(flags & KM_MAYFAIL))
lflags |= __GFP_NOFAIL;
} }
return lflags; return lflags;
} }
static __inline void *
kmem_alloc(size_t size, int flags)
{
if (unlikely(MAX_SLAB_SIZE < size))
/* Avoid doing filesystem sensitive stuff to get this */
return __vmalloc(size, kmem_flags_convert(flags), PAGE_KERNEL);
return kmalloc(size, kmem_flags_convert(flags));
}
static __inline void *
kmem_zalloc(size_t size, int flags)
{
void *ptr = kmem_alloc(size, flags);
if (likely(ptr != NULL))
memset(ptr, 0, size);
return ptr;
}
static __inline void
kmem_free(void *ptr, size_t size)
{
if (unlikely((unsigned long)ptr < VMALLOC_START ||
(unsigned long)ptr >= VMALLOC_END))
kfree(ptr);
else
vfree(ptr);
}
static __inline void *
kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
{
void *new = kmem_alloc(newsize, flags);
if (likely(ptr != NULL)) {
if (likely(new != NULL))
memcpy(new, ptr, min(oldsize, newsize));
kmem_free(ptr, oldsize);
}
return new;
}
static __inline kmem_zone_t * static __inline kmem_zone_t *
kmem_zone_init(int size, char *zone_name) kmem_zone_init(int size, char *zone_name)
{ {
return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL); return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
} }
static __inline void * static __inline void
kmem_zone_alloc(kmem_zone_t *zone, int flags) kmem_zone_free(kmem_zone_t *zone, void *ptr)
{ {
return kmem_cache_alloc(zone, kmem_flags_convert(flags)); kmem_cache_free(zone, ptr);
} }
static __inline void * static __inline void
kmem_zone_zalloc(kmem_zone_t *zone, int flags) kmem_zone_destroy(kmem_zone_t *zone)
{ {
void *ptr = kmem_zone_alloc(zone, flags); if (zone && kmem_cache_destroy(zone))
if (likely(ptr != NULL)) BUG();
memset(ptr, 0, kmem_cache_size(zone));
return ptr;
} }
static __inline void static __inline int
kmem_zone_free(kmem_zone_t *zone, void *ptr) kmem_zone_shrink(kmem_zone_t *zone)
{ {
kmem_cache_free(zone, ptr); return kmem_cache_shrink(zone);
} }
extern void *kmem_zone_zalloc(kmem_zone_t *, int);
extern void *kmem_zone_alloc(kmem_zone_t *, int);
extern void *kmem_alloc(size_t, int);
extern void *kmem_realloc(void *, size_t, size_t, int);
extern void *kmem_zalloc(size_t, int);
extern void kmem_free(void *, size_t);
typedef struct shrinker *kmem_shaker_t; typedef struct shrinker *kmem_shaker_t;
typedef int (*kmem_shake_func_t)(int, unsigned int); typedef int (*kmem_shake_func_t)(int, unsigned int);
......
...@@ -65,7 +65,8 @@ ...@@ -65,7 +65,8 @@
*/ */
STATIC kmem_cache_t *pagebuf_cache; STATIC kmem_cache_t *pagebuf_cache;
STATIC void pagebuf_daemon_wakeup(void); STATIC kmem_shaker_t pagebuf_shake;
STATIC int pagebuf_daemon_wakeup(int, unsigned int);
STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
STATIC struct workqueue_struct *pagebuf_logio_workqueue; STATIC struct workqueue_struct *pagebuf_logio_workqueue;
STATIC struct workqueue_struct *pagebuf_dataio_workqueue; STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
...@@ -384,13 +385,13 @@ _pagebuf_lookup_pages( ...@@ -384,13 +385,13 @@ _pagebuf_lookup_pages(
* But until all the XFS lowlevel code is revamped to * But until all the XFS lowlevel code is revamped to
* handle buffer allocation failures we can't do much. * handle buffer allocation failures we can't do much.
*/ */
if (!(++retries % 100)) { if (!(++retries % 100))
printk(KERN_ERR "possibly deadlocking in %s\n", printk(KERN_ERR
__FUNCTION__); "possible deadlock in %s (mode:0x%x)\n",
} __FUNCTION__, gfp_mask);
XFS_STATS_INC(pb_page_retries); XFS_STATS_INC(pb_page_retries);
pagebuf_daemon_wakeup(); pagebuf_daemon_wakeup(0, gfp_mask);
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(10); schedule_timeout(10);
goto retry; goto retry;
...@@ -1595,12 +1596,16 @@ STATIC struct task_struct *pagebuf_daemon_task; ...@@ -1595,12 +1596,16 @@ STATIC struct task_struct *pagebuf_daemon_task;
STATIC int pagebuf_daemon_active; STATIC int pagebuf_daemon_active;
STATIC int force_flush; STATIC int force_flush;
STATIC void
pagebuf_daemon_wakeup(void) STATIC int
pagebuf_daemon_wakeup(
int priority,
unsigned int mask)
{ {
force_flush = 1; force_flush = 1;
barrier(); barrier();
wake_up_process(pagebuf_daemon_task); wake_up_process(pagebuf_daemon_task);
return 0;
} }
STATIC int STATIC int
...@@ -1784,21 +1789,28 @@ pagebuf_init(void) ...@@ -1784,21 +1789,28 @@ pagebuf_init(void)
pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0, pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL); SLAB_HWCACHE_ALIGN, NULL, NULL);
if (pagebuf_cache == NULL) { if (pagebuf_cache == NULL) {
printk("pagebuf: couldn't init pagebuf cache\n"); printk("XFS: couldn't init xfs_buf_t cache\n");
pagebuf_terminate(); pagebuf_terminate();
return -ENOMEM; return -ENOMEM;
} }
for (i = 0; i < NHASH; i++) {
spin_lock_init(&pbhash[i].pb_hash_lock);
INIT_LIST_HEAD(&pbhash[i].pb_hash);
}
#ifdef PAGEBUF_TRACE #ifdef PAGEBUF_TRACE
pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
#endif #endif
pagebuf_daemon_start(); pagebuf_daemon_start();
pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup);
if (pagebuf_shake == NULL) {
pagebuf_terminate();
return -ENOMEM;
}
for (i = 0; i < NHASH; i++) {
spin_lock_init(&pbhash[i].pb_hash_lock);
INIT_LIST_HEAD(&pbhash[i].pb_hash);
}
return 0; return 0;
} }
...@@ -1817,5 +1829,6 @@ pagebuf_terminate(void) ...@@ -1817,5 +1829,6 @@ pagebuf_terminate(void)
ktrace_free(pagebuf_trace_buf); ktrace_free(pagebuf_trace_buf);
#endif #endif
kmem_cache_destroy(pagebuf_cache); kmem_zone_destroy(pagebuf_cache);
kmem_shake_deregister(pagebuf_shake);
} }
...@@ -76,7 +76,8 @@ ...@@ -76,7 +76,8 @@
STATIC struct quotactl_ops linvfs_qops; STATIC struct quotactl_ops linvfs_qops;
STATIC struct super_operations linvfs_sops; STATIC struct super_operations linvfs_sops;
STATIC struct export_operations linvfs_export_ops; STATIC struct export_operations linvfs_export_ops;
STATIC kmem_cache_t * linvfs_inode_cachep; STATIC kmem_zone_t *linvfs_inode_zone;
STATIC kmem_shaker_t xfs_inode_shaker;
STATIC struct xfs_mount_args * STATIC struct xfs_mount_args *
xfs_args_allocate( xfs_args_allocate(
...@@ -289,7 +290,7 @@ linvfs_alloc_inode( ...@@ -289,7 +290,7 @@ linvfs_alloc_inode(
{ {
vnode_t *vp; vnode_t *vp;
vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_cachep, vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone,
kmem_flags_convert(KM_SLEEP)); kmem_flags_convert(KM_SLEEP));
if (!vp) if (!vp)
return NULL; return NULL;
...@@ -300,7 +301,20 @@ STATIC void ...@@ -300,7 +301,20 @@ STATIC void
linvfs_destroy_inode( linvfs_destroy_inode(
struct inode *inode) struct inode *inode)
{ {
kmem_cache_free(linvfs_inode_cachep, LINVFS_GET_VP(inode)); kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode));
}
int
xfs_inode_shake(
int priority,
unsigned int gfp_mask)
{
int pages;
pages = kmem_zone_shrink(linvfs_inode_zone);
pages += kmem_zone_shrink(xfs_inode_zone);
return pages;
} }
STATIC void STATIC void
...@@ -319,12 +333,12 @@ init_once( ...@@ -319,12 +333,12 @@ init_once(
STATIC int STATIC int
init_inodecache( void ) init_inodecache( void )
{ {
linvfs_inode_cachep = kmem_cache_create("linvfs_icache", linvfs_inode_zone = kmem_cache_create("linvfs_icache",
sizeof(vnode_t), 0, sizeof(vnode_t), 0,
SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
init_once, NULL); init_once, NULL);
if (linvfs_inode_cachep == NULL) if (linvfs_inode_zone == NULL)
return -ENOMEM; return -ENOMEM;
return 0; return 0;
} }
...@@ -332,7 +346,7 @@ init_inodecache( void ) ...@@ -332,7 +346,7 @@ init_inodecache( void )
STATIC void STATIC void
destroy_inodecache( void ) destroy_inodecache( void )
{ {
if (kmem_cache_destroy(linvfs_inode_cachep)) if (kmem_cache_destroy(linvfs_inode_zone))
printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
} }
...@@ -837,6 +851,8 @@ init_xfs_fs( void ) ...@@ -837,6 +851,8 @@ init_xfs_fs( void )
uuid_init(); uuid_init();
vfs_initquota(); vfs_initquota();
xfs_inode_shaker = kmem_shake_register(xfs_inode_shake);
error = register_filesystem(&xfs_fs_type); error = register_filesystem(&xfs_fs_type);
if (error) if (error)
goto undo_register; goto undo_register;
...@@ -859,6 +875,7 @@ exit_xfs_fs( void ) ...@@ -859,6 +875,7 @@ exit_xfs_fs( void )
vfs_exitquota(); vfs_exitquota();
XFS_DM_EXIT(&xfs_fs_type); XFS_DM_EXIT(&xfs_fs_type);
unregister_filesystem(&xfs_fs_type); unregister_filesystem(&xfs_fs_type);
kmem_shake_deregister(xfs_inode_shaker);
xfs_cleanup(); xfs_cleanup();
pagebuf_terminate(); pagebuf_terminate();
destroy_inodecache(); destroy_inodecache();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment