Commit ec559266 authored by Anton Blanchard's avatar Anton Blanchard

Merge samba.org:/scratch/anton/linux-2.5

into samba.org:/scratch/anton/linux-2.5_ppc64_new
parents 5bab5ff4 6865038a
......@@ -171,6 +171,11 @@ void show_stack(unsigned long *sp)
dik_show_trace(sp);
}
void dump_stack(void)
{
show_stack(NULL);
}
void
die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
{
......
......@@ -230,8 +230,12 @@ watchdog_bite_hook(struct pt_regs *regs)
#endif
}
/* This is normally the 'Oops' routine */
void dump_stack(void)
{
show_stack(NULL);
}
/* This is normally the 'Oops' routine */
void
die_if_kernel(const char * str, struct pt_regs * regs, long err)
{
......
......@@ -25,6 +25,15 @@ CONFIG_SMP
If you don't know what to do here, say N.
CONFIG_HUGETLB_PAGE
This enables support for huge pages. User space applications
can make use of this support with the sys_alloc_hugepages and
sys_free_hugepages system calls. If your applications are
huge page aware and your processor (Pentium or later for x86)
supports this, then say Y here.
Otherwise, say N.
CONFIG_PREEMPT
This option reduces the latency of the kernel when reacting to
real-time or interactive events by allowing a low priority process to
......
......@@ -154,6 +154,8 @@ if [ "$CONFIG_MWINCHIP3D" = "y" ]; then
define_bool CONFIG_X86_OOSTORE y
fi
bool 'IA-32 Huge TLB Page Support (if available on processor)' CONFIG_HUGETLB_PAGE
bool 'Symmetric multi-processing support' CONFIG_SMP
bool 'Preemptible Kernel' CONFIG_PREEMPT
if [ "$CONFIG_SMP" != "y" ]; then
......
......@@ -759,8 +759,8 @@ ENTRY(sys_call_table)
.long sys_io_getevents
.long sys_io_submit
.long sys_io_cancel
.long sys_ni_syscall /* 250 */ /* sys_alloc_hugepages */
.long sys_ni_syscall /* sys_free_hugepages */
.long sys_alloc_hugepages /* 250 */
.long sys_free_hugepages
.long sys_exit_group
.rept NR_syscalls-(.-sys_call_table)/4
......
......@@ -246,3 +246,94 @@ asmlinkage int sys_olduname(struct oldold_utsname * name)
return error;
}
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_ALIGN(x) (((unsigned long)x + (HPAGE_SIZE -1)) & HPAGE_MASK)
extern long sys_munmap(unsigned long, size_t);
/* get_addr function gets the currently unused virtaul range in
* current process's address space. It returns the LARGE_PAGE_SIZE
* aligned address (in cases of success). Other kernel generic
* routines only could gurantee that allocated address is PAGE_SIZSE aligned.
*/
static unsigned long
get_addr(unsigned long addr, unsigned long len)
{
struct vm_area_struct *vma;
if (addr) {
addr = HPAGE_ALIGN(addr);
vma = find_vma(current->mm, addr);
if (((TASK_SIZE - len) >= addr) &&
(!vma || addr + len <= vma->vm_start))
goto found_addr;
}
addr = HPAGE_ALIGN(TASK_UNMAPPED_BASE);
for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
if (TASK_SIZE - len < addr)
return -ENOMEM;
if (!vma || ((addr + len) < vma->vm_start))
goto found_addr;
addr = vma->vm_end;
}
found_addr:
addr = HPAGE_ALIGN(addr);
return addr;
}
asmlinkage unsigned long
sys_alloc_hugepages(int key, unsigned long addr, unsigned long len, int prot, int flag)
{
struct mm_struct *mm = current->mm;
unsigned long raddr;
int retval = 0;
extern int alloc_hugetlb_pages(int, unsigned long, unsigned long, int, int);
if (!(cpu_has_pse))
return -EINVAL;
if (key < 0)
return -EINVAL;
if (len & (HPAGE_SIZE - 1))
return -EINVAL;
down_write(&mm->mmap_sem);
raddr = get_addr(addr, len);
if (raddr == -ENOMEM)
goto raddr_out;
retval = alloc_hugetlb_pages(key, raddr, len, prot, flag);
raddr_out: up_write(&mm->mmap_sem);
if (retval < 0)
return (unsigned long) retval;
return raddr;
}
asmlinkage int
sys_free_hugepages(unsigned long addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int retval;
extern int free_hugepages(struct vm_area_struct *);
vma = find_vma(current->mm, addr);
if ((!vma) || (!is_vm_hugetlb_page(vma)) || (vma->vm_start!=addr))
return -EINVAL;
down_write(&mm->mmap_sem);
spin_lock(&mm->page_table_lock);
retval = free_hugepages(vma);
spin_unlock(&mm->page_table_lock);
up_write(&mm->mmap_sem);
return retval;
}
#else
asmlinkage unsigned long
sys_alloc_hugepages(int key, unsigned long addr, size_t len, int prot, int flag)
{
return -ENOSYS;
}
asmlinkage int
sys_free_hugepages(unsigned long addr)
{
return -ENOSYS;
}
#endif
......@@ -189,6 +189,14 @@ void show_stack(unsigned long * esp)
show_trace(esp);
}
/*
* The architecture-independent dump_stack generator
*/
void dump_stack(void)
{
show_stack(0);
}
void show_registers(struct pt_regs *regs)
{
int i;
......
......@@ -12,5 +12,6 @@ O_TARGET := mm.o
obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o
obj-$(CONFIG_DISCONTIGMEM) += discontig.o
export-objs := pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
include $(TOPDIR)/Rules.make
This diff is collapsed.
......@@ -215,19 +215,14 @@ void __init permanent_kmaps_init(pgd_t *pgd_base)
void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
{
if (!page_is_ram(pfn)) {
SetPageReserved(page);
return;
}
if (bad_ppro && page_kills_ppro(pfn)) {
SetPageReserved(page);
return;
}
if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
ClearPageReserved(page);
set_bit(PG_highmem, &page->flags);
atomic_set(&page->count, 1);
set_page_count(page, 1);
__free_page(page);
totalhigh_pages++;
} else
SetPageReserved(page);
}
#ifndef CONFIG_DISCONTIGMEM
......@@ -431,6 +426,13 @@ static void __init set_max_mapnr_init(void)
extern void set_max_mapnr_init(void);
#endif /* !CONFIG_DISCONTIGMEM */
#ifdef CONFIG_HUGETLB_PAGE
long htlbpagemem = 0;
int htlbpage_max;
long htlbzone_pages;
extern struct list_head htlbpage_freelist;
#endif
void __init mem_init(void)
{
extern int ppro_with_ram_bug(void);
......@@ -493,6 +495,30 @@ void __init mem_init(void)
#ifndef CONFIG_SMP
zap_low_mappings();
#endif
#ifdef CONFIG_HUGETLB_PAGE
{
long i, j;
struct page *page, *map;
/*For now reserve quarter for hugetlb_pages.*/
htlbzone_pages = (max_low_pfn >> ((HPAGE_SHIFT - PAGE_SHIFT) + 2)) ;
/*Will make this kernel command line. */
INIT_LIST_HEAD(&htlbpage_freelist);
for (i=0; i<htlbzone_pages; i++) {
page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER);
if (page == NULL)
break;
map = page;
for (j=0; j<(HPAGE_SIZE/PAGE_SIZE); j++) {
SetPageReserved(map);
map++;
}
list_add(&page->list, &htlbpage_freelist);
}
printk("Total Huge_TLB_Page memory pages allocated %ld\n", i);
htlbzone_pages = htlbpagemem = i;
htlbpage_max = i;
}
#endif
}
#if CONFIG_X86_PAE
......
......@@ -2345,22 +2345,19 @@ static int
check_task_state(struct task_struct *task)
{
int ret = 0;
#ifdef CONFIG_SMP
/* We must wait until the state has been completely
* saved. There can be situations where the reader arrives before
* after the task is marked as STOPPED but before pfm_save_regs()
* is completed.
*/
if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) return -EBUSY;
DBprintk(("before wait_task_inactive [%d] state %ld\n", task->pid, task->state));
wait_task_inactive(task);
DBprintk(("after wait_task_inactive [%d] state %ld\n", task->pid, task->state));
#else
if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
ret = -EBUSY;
}
#endif
DBprintk(("before wait_task_inactive [%d] state %ld\n", task->pid, task->state));
wait_task_inactive(task);
DBprintk(("after wait_task_inactive [%d] state %ld\n", task->pid, task->state));
return ret;
}
......
......@@ -332,6 +332,29 @@ struct block_device *bdget(dev_t dev)
return bdev;
}
long nr_blockdev_pages(void)
{
long ret = 0;
int i;
spin_lock(&bdev_lock);
for (i = 0; i < ARRAY_SIZE(bdev_hashtable); i++) {
struct list_head *head = &bdev_hashtable[i];
struct list_head *lh;
if (head == NULL)
continue;
list_for_each(lh, head) {
struct block_device *bdev;
bdev = list_entry(lh, struct block_device, bd_hash);
ret += bdev->bd_inode->i_mapping->nrpages;
}
}
spin_unlock(&bdev_lock);
return ret;
}
static inline void __bd_forget(struct inode *inode)
{
list_del_init(&inode->i_devices);
......
......@@ -61,10 +61,8 @@ void __buffer_error(char *file, int line)
return;
enough++;
printk("buffer layer error at %s:%d\n", file, line);
#ifdef CONFIG_X86
printk("Pass this trace through ksymoops for reporting\n");
show_stack(0);
#endif
dump_stack();
}
EXPORT_SYMBOL(__buffer_error);
......
......@@ -46,18 +46,18 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
unsigned long desc;
struct ext3_group_desc * gdp;
if (block_group >= sb->u.ext3_sb.s_groups_count) {
if (block_group >= EXT3_SB(sb)->s_groups_count) {
ext3_error (sb, "ext3_get_group_desc",
"block_group >= groups_count - "
"block_group = %d, groups_count = %lu",
block_group, sb->u.ext3_sb.s_groups_count);
block_group, EXT3_SB(sb)->s_groups_count);
return NULL;
}
group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
desc = block_group % EXT3_DESC_PER_BLOCK(sb);
if (!sb->u.ext3_sb.s_group_desc[group_desc]) {
if (!EXT3_SB(sb)->s_group_desc[group_desc]) {
ext3_error (sb, "ext3_get_group_desc",
"Group descriptor not loaded - "
"block_group = %d, group_desc = %lu, desc = %lu",
......@@ -66,9 +66,9 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
}
gdp = (struct ext3_group_desc *)
sb->u.ext3_sb.s_group_desc[group_desc]->b_data;
EXT3_SB(sb)->s_group_desc[group_desc]->b_data;
if (bh)
*bh = sb->u.ext3_sb.s_group_desc[group_desc];
*bh = EXT3_SB(sb)->s_group_desc[group_desc];
return gdp + desc;
}
......@@ -119,7 +119,7 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
return;
}
lock_super (sb);
es = sb->u.ext3_sb.s_es;
es = EXT3_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) ||
(block + count) > le32_to_cpu(es->s_blocks_count)) {
ext3_error (sb, "ext3_free_blocks",
......@@ -155,9 +155,9 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
in_range (block, le32_to_cpu(gdp->bg_inode_table),
sb->u.ext3_sb.s_itb_per_group) ||
EXT3_SB(sb)->s_itb_per_group) ||
in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
sb->u.ext3_sb.s_itb_per_group))
EXT3_SB(sb)->s_itb_per_group))
ext3_error (sb, "ext3_free_blocks",
"Freeing blocks in system zones - "
"Block = %lu, count = %lu",
......@@ -183,8 +183,8 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
if (err)
goto error_return;
BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
if (err)
goto error_return;
......@@ -253,8 +253,8 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
if (!err) err = ret;
/* And the superblock */
BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock");
ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock");
ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
if (!err) err = ret;
if (overflow && !err) {
......@@ -408,12 +408,12 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
}
lock_super(sb);
es = sb->u.ext3_sb.s_es;
es = EXT3_SB(sb)->s_es;
if (le32_to_cpu(es->s_free_blocks_count) <=
le32_to_cpu(es->s_r_blocks_count) &&
((sb->u.ext3_sb.s_resuid != current->fsuid) &&
(sb->u.ext3_sb.s_resgid == 0 ||
!in_group_p(sb->u.ext3_sb.s_resgid)) &&
((EXT3_SB(sb)->s_resuid != current->fsuid) &&
(EXT3_SB(sb)->s_resgid == 0 ||
!in_group_p(EXT3_SB(sb)->s_resgid)) &&
!capable(CAP_SYS_RESOURCE)))
goto out;
......@@ -464,9 +464,9 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
* Now search the rest of the groups. We assume that
* i and gdp correctly point to the last group visited.
*/
for (bit = 0; bit < sb->u.ext3_sb.s_groups_count; bit++) {
for (bit = 0; bit < EXT3_SB(sb)->s_groups_count; bit++) {
group_no++;
if (group_no >= sb->u.ext3_sb.s_groups_count)
if (group_no >= EXT3_SB(sb)->s_groups_count)
group_no = 0;
gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
if (!gdp) {
......@@ -518,8 +518,8 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
if (fatal)
goto out;
BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
if (fatal)
goto out;
......@@ -529,7 +529,7 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
if (target_block == le32_to_cpu(gdp->bg_block_bitmap) ||
target_block == le32_to_cpu(gdp->bg_inode_bitmap) ||
in_range(target_block, le32_to_cpu(gdp->bg_inode_table),
sb->u.ext3_sb.s_itb_per_group))
EXT3_SB(sb)->s_itb_per_group))
ext3_error(sb, "ext3_new_block",
"Allocating block in system zone - "
"block = %u", target_block);
......@@ -594,9 +594,9 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
if (!fatal)
fatal = err;
BUFFER_TRACE(sb->u.ext3_sb.s_sbh,
BUFFER_TRACE(EXT3_SB(sb)->s_sbh,
"journal_dirty_metadata for superblock");
err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
if (!fatal)
fatal = err;
......@@ -637,11 +637,11 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
int i;
lock_super(sb);
es = sb->u.ext3_sb.s_es;
es = EXT3_SB(sb)->s_es;
desc_count = 0;
bitmap_count = 0;
gdp = NULL;
for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
gdp = ext3_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
......@@ -662,7 +662,7 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
unlock_super(sb);
return bitmap_count;
#else
return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count);
#endif
}
......@@ -671,7 +671,7 @@ static inline int block_in_use(unsigned long block,
unsigned char * map)
{
return ext3_test_bit ((block -
le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) %
le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
EXT3_BLOCKS_PER_GROUP(sb), map);
}
......@@ -738,11 +738,11 @@ void ext3_check_blocks_bitmap (struct super_block * sb)
struct ext3_group_desc *gdp;
int i;
es = sb->u.ext3_sb.s_es;
es = EXT3_SB(sb)->s_es;
desc_count = 0;
bitmap_count = 0;
gdp = NULL;
for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
gdp = ext3_get_group_desc (sb, i, NULL);
if (!gdp)
continue;
......@@ -776,7 +776,7 @@ void ext3_check_blocks_bitmap (struct super_block * sb)
"Inode bitmap for group %d is marked free",
i);
for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++)
for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++)
if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
sb, bitmap_bh->b_data))
ext3_error (sb, "ext3_check_blocks_bitmap",
......
......@@ -54,7 +54,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
error_msg = "directory entry across blocks";
else if (le32_to_cpu(de->inode) >
le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
error_msg = "inode out of bounds";
if (error_msg != NULL)
......
......@@ -127,7 +127,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
clear_inode (inode);
lock_super (sb);
es = sb->u.ext3_sb.s_es;
es = EXT3_SB(sb)->s_es;
if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
ext3_error (sb, "ext3_free_inode",
"reserved or nonexistent inode %lu", ino);
......@@ -155,8 +155,8 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
fatal = ext3_journal_get_write_access(handle, bh2);
if (fatal) goto error_return;
BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access");
fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access");
fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
if (fatal) goto error_return;
if (gdp) {
......@@ -171,9 +171,9 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
if (!fatal) fatal = err;
es->s_free_inodes_count =
cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
BUFFER_TRACE(sb->u.ext3_sb.s_sbh,
BUFFER_TRACE(EXT3_SB(sb)->s_sbh,
"call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
if (!fatal) fatal = err;
}
BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
......@@ -222,16 +222,16 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
ei = EXT3_I(inode);
lock_super (sb);
es = sb->u.ext3_sb.s_es;
es = EXT3_SB(sb)->s_es;
repeat:
gdp = NULL;
i = 0;
if (S_ISDIR(mode)) {
avefreei = le32_to_cpu(es->s_free_inodes_count) /
sb->u.ext3_sb.s_groups_count;
EXT3_SB(sb)->s_groups_count;
if (!gdp) {
for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
for (j = 0; j < EXT3_SB(sb)->s_groups_count; j++) {
struct buffer_head *temp_buffer;
tmp = ext3_get_group_desc (sb, j, &temp_buffer);
if (tmp &&
......@@ -261,10 +261,10 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
* Use a quadratic hash to find a group with a
* free inode
*/
for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
for (j = 1; j < EXT3_SB(sb)->s_groups_count; j <<= 1) {
i += j;
if (i >= sb->u.ext3_sb.s_groups_count)
i -= sb->u.ext3_sb.s_groups_count;
if (i >= EXT3_SB(sb)->s_groups_count)
i -= EXT3_SB(sb)->s_groups_count;
tmp = ext3_get_group_desc (sb, i, &bh2);
if (tmp &&
le16_to_cpu(tmp->bg_free_inodes_count)) {
......@@ -278,8 +278,8 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
* That failed: try linear search for a free inode
*/
i = EXT3_I(dir)->i_block_group + 1;
for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
if (++i >= sb->u.ext3_sb.s_groups_count)
for (j = 2; j < EXT3_SB(sb)->s_groups_count; j++) {
if (++i >= EXT3_SB(sb)->s_groups_count)
i = 0;
tmp = ext3_get_group_desc (sb, i, &bh2);
if (tmp &&
......@@ -357,13 +357,13 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
err = ext3_journal_dirty_metadata(handle, bh2);
if (err) goto fail;
BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
if (err) goto fail;
es->s_free_inodes_count =
cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
sb->s_dirt = 1;
if (err) goto fail;
......@@ -417,7 +417,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
if (IS_DIRSYNC(inode))
handle->h_sync = 1;
insert_inode_hash(inode);
inode->i_generation = sb->u.ext3_sb.s_next_generation++;
inode->i_generation = EXT3_SB(sb)->s_next_generation++;
ei->i_state = EXT3_STATE_NEW;
err = ext3_mark_inode_dirty(handle, inode);
......@@ -512,11 +512,11 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
int i;
lock_super (sb);
es = sb->u.ext3_sb.s_es;
es = EXT3_SB(sb)->s_es;
desc_count = 0;
bitmap_count = 0;
gdp = NULL;
for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
gdp = ext3_get_group_desc (sb, i, NULL);
if (!gdp)
continue;
......@@ -537,7 +537,7 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
unlock_super(sb);
return desc_count;
#else
return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count);
return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count);
#endif
}
......@@ -551,11 +551,11 @@ void ext3_check_inodes_bitmap (struct super_block * sb)
struct ext3_group_desc * gdp;
int i;
es = sb->u.ext3_sb.s_es;
es = EXT3_SB(sb)->s_es;
desc_count = 0;
bitmap_count = 0;
gdp = NULL;
for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
gdp = ext3_get_group_desc (sb, i, NULL);
if (!gdp)
continue;
......
......@@ -471,7 +471,7 @@ static inline unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
* the same cylinder group then.
*/
return (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block);
le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
}
/**
......@@ -2141,20 +2141,20 @@ int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
inode->i_ino != EXT3_JOURNAL_INO &&
inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
inode->i_ino > le32_to_cpu(
inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) {
EXT3_SB(inode->i_sb)->s_es->s_inodes_count)) {
ext3_error (inode->i_sb, "ext3_get_inode_loc",
"bad inode number: %lu", inode->i_ino);
goto bad_inode;
}
block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb);
if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) {
if (block_group >= EXT3_SB(inode->i_sb)->s_groups_count) {
ext3_error (inode->i_sb, "ext3_get_inode_loc",
"group >= groups count");
goto bad_inode;
}
group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb);
desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1);
bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc];
bh = EXT3_SB(inode->i_sb)->s_group_desc[group_desc];
if (!bh) {
ext3_error (inode->i_sb, "ext3_get_inode_loc",
"Descriptor not loaded");
......@@ -2224,7 +2224,7 @@ void ext3_read_inode(struct inode * inode)
*/
if (inode->i_nlink == 0) {
if (inode->i_mode == 0 ||
!(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) {
!(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
/* this inode is deleted */
brelse (bh);
goto bad_inode;
......@@ -2394,7 +2394,7 @@ static int ext3_do_update_inode(handle_t *handle,
* created, add a flag to the superblock.
*/
err = ext3_journal_get_write_access(handle,
sb->u.ext3_sb.s_sbh);
EXT3_SB(sb)->s_sbh);
if (err)
goto out_brelse;
ext3_update_dynamic_rev(sb);
......@@ -2403,7 +2403,7 @@ static int ext3_do_update_inode(handle_t *handle,
sb->s_dirt = 1;
handle->h_sync = 1;
err = ext3_journal_dirty_metadata(handle,
sb->u.ext3_sb.s_sbh);
EXT3_SB(sb)->s_sbh);
}
}
}
......
......@@ -159,12 +159,12 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
int ret = 0;
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) {
add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
schedule();
ret = 1;
}
remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
return ret;
}
#endif
......
......@@ -729,8 +729,8 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
if (err)
goto out_unlock;
......@@ -741,7 +741,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
/* Insert this inode at the head of the on-disk orphan list... */
NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
if (!err)
err = rc;
......
......@@ -120,7 +120,7 @@ static int ext3_error_behaviour(struct super_block *sb)
/* If no overrides were specified on the mount, then fall back
* to the default behaviour set in the filesystem's superblock
* on disk. */
switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) {
switch (le16_to_cpu(EXT3_SB(sb)->s_es->s_errors)) {
case EXT3_ERRORS_PANIC:
return EXT3_ERRORS_PANIC;
case EXT3_ERRORS_RO:
......@@ -268,9 +268,9 @@ void ext3_abort (struct super_block * sb, const char * function,
return;
printk (KERN_CRIT "Remounting filesystem read-only\n");
sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS;
EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
sb->s_flags |= MS_RDONLY;
sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT;
EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
journal_abort(EXT3_SB(sb)->s_journal, -EIO);
}
......@@ -439,7 +439,8 @@ void ext3_put_super (struct super_block * sb)
ext3_blkdev_remove(sbi);
}
clear_ro_after(sb);
sb->u.generic_sbp = NULL;
kfree(sbi);
return;
}
......@@ -877,7 +878,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
sb->s_flags &= ~MS_RDONLY;
}
if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) {
if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
if (es->s_last_orphan)
jbd_debug(1, "Errors on filesystem, "
"clearing orphan list.\n");
......@@ -949,7 +950,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
{
struct buffer_head * bh;
struct ext3_super_block *es = 0;
struct ext3_sb_info *sbi = EXT3_SB(sb);
struct ext3_sb_info *sbi;
unsigned long sb_block = 1;
unsigned long logic_sb_block = 1;
unsigned long offset = 0;
......@@ -970,7 +971,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
* This is important for devices that have a hardware
* sectorsize that is larger than the default.
*/
sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->u.generic_sbp = sbi;
memset(sbi, 0, sizeof(*sbi));
sbi->s_mount_opt = 0;
sbi->s_resuid = EXT3_DEF_RESUID;
sbi->s_resgid = EXT3_DEF_RESGID;
......@@ -1266,6 +1271,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
ext3_blkdev_remove(sbi);
brelse(bh);
out_fail:
sb->u.generic_sbp = NULL;
kfree(sbi);
return -EINVAL;
}
......@@ -1520,11 +1527,11 @@ static void ext3_commit_super (struct super_block * sb,
int sync)
{
es->s_wtime = cpu_to_le32(CURRENT_TIME);
BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty");
mark_buffer_dirty(sb->u.ext3_sb.s_sbh);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "marking dirty");
mark_buffer_dirty(EXT3_SB(sb)->s_sbh);
if (sync) {
ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh);
wait_on_buffer(sb->u.ext3_sb.s_sbh);
ll_rw_block(WRITE, 1, &EXT3_SB(sb)->s_sbh);
wait_on_buffer(EXT3_SB(sb)->s_sbh);
}
}
......@@ -1575,7 +1582,7 @@ static void ext3_clear_journal_err(struct super_block * sb,
ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
"filesystem check.");
sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS;
EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
ext3_commit_super (sb, es, 1);
......
......@@ -487,7 +487,18 @@ int proc_pid_statm(struct task_struct *task, char * buffer)
while (vma) {
pgd_t *pgd = pgd_offset(mm, vma->vm_start);
int pages = 0, shared = 0, dirty = 0, total = 0;
if (is_vm_hugetlb_page(vma)) {
int num_pages = ((vma->vm_end - vma->vm_start)/PAGE_SIZE);
resident += num_pages;
if (!(vma->vm_flags & VM_DONTCOPY))
share += num_pages;
if (vma->vm_flags & VM_WRITE)
dt += num_pages;
drs += num_pages;
vma = vma->vm_next;
continue;
}
statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total);
resident += pages;
share += shared;
......
......@@ -136,16 +136,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
struct sysinfo i;
int len, committed;
struct page_state ps;
int cpu;
unsigned long inactive;
unsigned long active;
unsigned long flushes = 0;
unsigned long non_flushes = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
flushes += mmu_gathers[cpu].flushes;
non_flushes += mmu_gathers[cpu].avoided_flushes;
}
get_page_state(&ps);
get_zone_counts(&active, &inactive);
......@@ -165,6 +157,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"MemTotal: %8lu kB\n"
"MemFree: %8lu kB\n"
"MemShared: %8lu kB\n"
"Buffers: %8lu kB\n"
"Cached: %8lu kB\n"
"SwapCached: %8lu kB\n"
"Active: %8lu kB\n"
......@@ -177,15 +170,15 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"SwapFree: %8lu kB\n"
"Dirty: %8lu kB\n"
"Writeback: %8lu kB\n"
"Mapped: %8lu kB\n"
"Committed_AS: %8u kB\n"
"PageTables: %8lu kB\n"
"ReverseMaps: %8lu\n"
"TLB flushes: %8lu\n"
"non flushes: %8lu\n",
"ReverseMaps: %8lu\n",
K(i.totalram),
K(i.freeram),
K(i.sharedram),
K(ps.nr_pagecache-swapper_space.nrpages),
K(i.bufferram),
K(ps.nr_pagecache-swapper_space.nrpages-i.bufferram),
K(swapper_space.nrpages),
K(active),
K(inactive),
......@@ -197,13 +190,25 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
K(i.freeswap),
K(ps.nr_dirty),
K(ps.nr_writeback),
K(ps.nr_mapped),
K(committed),
K(ps.nr_page_table_pages),
ps.nr_reverse_maps,
flushes,
non_flushes
ps.nr_reverse_maps
);
#ifdef CONFIG_HUGETLB_PAGE
{
extern unsigned long htlbpagemem, htlbzone_pages;
len += sprintf(page + len,
"HugePages: %8lu\n"
"Available: %8lu\n"
"Size: %8lu kB\n",
htlbzone_pages,
htlbpagemem,
HPAGE_SIZE/1024);
}
#endif
return proc_calc_metrics(page, start, off, count, eof, len);
#undef K
}
......
......@@ -21,7 +21,7 @@
* and page free order so much..
*/
#ifdef CONFIG_SMP
#define FREE_PTE_NR 507
#define FREE_PTE_NR 506
#define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
#else
#define FREE_PTE_NR 1
......@@ -40,8 +40,6 @@ typedef struct free_pte_ctx {
unsigned int fullmm; /* non-zero means full mm flush */
unsigned long freed;
struct page * pages[FREE_PTE_NR];
unsigned long flushes;/* stats: count avoided flushes */
unsigned long avoided_flushes;
} mmu_gather_t;
/* Users of the generic TLB shootdown code must declare this storage space. */
......@@ -67,17 +65,10 @@ static inline mmu_gather_t *tlb_gather_mmu(struct mm_struct *mm, unsigned int fu
static inline void tlb_flush_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end)
{
unsigned long nr;
if (!tlb->need_flush) {
tlb->avoided_flushes++;
if (!tlb->need_flush)
return;
}
tlb->need_flush = 0;
tlb->flushes++;
tlb_flush(tlb);
nr = tlb->nr;
if (!tlb_fast_mode(tlb)) {
free_pages_and_swap_cache(tlb->pages, tlb->nr);
tlb->nr = 0;
......
......@@ -44,14 +44,22 @@ typedef struct { unsigned long pte_low, pte_high; } pte_t;
typedef struct { unsigned long long pmd; } pmd_t;
typedef struct { unsigned long long pgd; } pgd_t;
#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
#define HPAGE_SHIFT 21
#else
typedef struct { unsigned long pte_low; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
#define pte_val(x) ((x).pte_low)
#define HPAGE_SHIFT 22
#endif
#define PTE_MASK PAGE_MASK
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#endif
typedef struct { unsigned long pgprot; } pgprot_t;
#define pmd_val(x) ((x).pmd)
......
......@@ -327,7 +327,7 @@ extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bd
extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *);
extern void generic_unplug_device(void *);
extern long nr_blockdev_pages(void);
/*
* tag stuff
......
......@@ -97,9 +97,9 @@
# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
#endif
#ifdef __KERNEL__
#define EXT3_ADDR_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_addr_per_block_bits)
#define EXT3_INODE_SIZE(s) ((s)->u.ext3_sb.s_inode_size)
#define EXT3_FIRST_INO(s) ((s)->u.ext3_sb.s_first_ino)
#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits)
#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size)
#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino)
#else
#define EXT3_INODE_SIZE(s) (((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
EXT3_GOOD_OLD_INODE_SIZE : \
......@@ -116,8 +116,8 @@
#define EXT3_MAX_FRAG_SIZE 4096
#define EXT3_MIN_FRAG_LOG_SIZE 10
#ifdef __KERNEL__
# define EXT3_FRAG_SIZE(s) ((s)->u.ext3_sb.s_frag_size)
# define EXT3_FRAGS_PER_BLOCK(s) ((s)->u.ext3_sb.s_frags_per_block)
# define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size)
# define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block)
#else
# define EXT3_FRAG_SIZE(s) (EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size)
# define EXT3_FRAGS_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s))
......@@ -164,10 +164,10 @@ struct ext3_group_desc
* Macro-instructions used to manage group descriptors
*/
#ifdef __KERNEL__
# define EXT3_BLOCKS_PER_GROUP(s) ((s)->u.ext3_sb.s_blocks_per_group)
# define EXT3_DESC_PER_BLOCK(s) ((s)->u.ext3_sb.s_desc_per_block)
# define EXT3_INODES_PER_GROUP(s) ((s)->u.ext3_sb.s_inodes_per_group)
# define EXT3_DESC_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_desc_per_block_bits)
# define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group)
# define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block)
# define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group)
# define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits)
#else
# define EXT3_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
# define EXT3_DESC_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc))
......@@ -346,7 +346,7 @@ struct ext3_inode {
#ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt
#define set_opt(o, opt) o |= EXT3_MOUNT_##opt
#define test_opt(sb, opt) ((sb)->u.ext3_sb.s_mount_opt & \
#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \
EXT3_MOUNT_##opt)
#else
#define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD
......@@ -444,7 +444,10 @@ struct ext3_super_block {
};
#ifdef __KERNEL__
#define EXT3_SB(sb) (&((sb)->u.ext3_sb))
static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
{
return sb->u.generic_sbp;
}
static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
{
return container_of(inode, struct ext3_inode_info, vfs_inode);
......
......@@ -96,6 +96,8 @@ extern const char *print_tainted(void);
#define TAINT_FORCED_MODULE (1<<1)
#define TAINT_UNSAFE_SMP (1<<2)
extern void dump_stack(void);
#if DEBUG
#define pr_debug(fmt,arg...) \
printk(KERN_DEBUG fmt,##arg)
......
......@@ -19,9 +19,6 @@ extern unsigned long max_mapnr;
extern unsigned long num_physpages;
extern void * high_memory;
extern int page_cluster;
/* The inactive_clean lists are per zone. */
extern struct list_head active_list;
extern struct list_head inactive_list;
#include <asm/page.h>
#include <asm/pgtable.h>
......@@ -104,6 +101,7 @@ struct vm_area_struct {
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_STACK_FLAGS (0x00000100 | VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT)
......@@ -377,6 +375,20 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
int __set_page_dirty_buffers(struct page *page);
int __set_page_dirty_nobuffers(struct page *page);
#ifdef CONFIG_HUGETLB_PAGE
#define is_vm_hugetlb_page(vma) (vma->vm_flags & VM_HUGETLB)
extern int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
extern int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
extern int free_hugepages(struct vm_area_struct *);
#else
#define is_vm_hugetlb_page(vma) (0)
#define follow_hugetlb_page(mm, vma, pages, vmas, start, len, i) (0)
#define copy_hugetlb_page_range(dst, src, vma) (0)
#define free_hugepages(mpnt) do { } while(0)
#endif
/*
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
......
......@@ -16,7 +16,7 @@
*/
#ifndef CONFIG_FORCE_MAX_ZONEORDER
#define MAX_ORDER 10
#define MAX_ORDER 11
#else
#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
#endif
......@@ -151,8 +151,8 @@ struct zonelist {
* On NUMA machines, each NUMA node would have a pg_data_t to describe
* it's memory layout.
*
* XXX: we need to move the global memory statistics (active_list, ...)
* into the pg_data_t to properly support NUMA.
* Memory statistics and page replacement data structures are maintained on a
* per-zone basis.
*/
struct bootmem_data;
typedef struct pglist_data {
......
......@@ -78,6 +78,7 @@ extern struct page_state {
unsigned long nr_pagecache;
unsigned long nr_page_table_pages;
unsigned long nr_reverse_maps;
unsigned long nr_mapped;
} ____cacheline_aligned_in_smp page_states[NR_CPUS];
extern void get_page_state(struct page_state *ret);
......
......@@ -690,7 +690,11 @@ extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
#ifdef CONFIG_SMP
extern void wait_task_inactive(task_t * p);
#else
#define wait_task_inactive(p) do { } while (0)
#endif
extern void kick_if_running(task_t * p);
#define __wait_event(wq, condition) \
......@@ -956,6 +960,34 @@ static inline void cond_resched(void)
__cond_resched();
}
#ifdef CONFIG_PREEMPT
/*
* cond_resched_lock() - if a reschedule is pending, drop the given lock,
* call schedule, and on return reacquire the lock.
*
* Note: this does not assume the given lock is the _only_ lock held.
* The kernel preemption counter gives us "free" checking that we are
* atomic -- let's use it.
*/
static inline void cond_resched_lock(spinlock_t * lock)
{
if (need_resched() && preempt_count() == 1) {
_raw_spin_unlock(lock);
preempt_enable_no_resched();
__cond_resched();
spin_lock(lock);
}
}
#else
static inline void cond_resched_lock(spinlock_t * lock)
{
}
#endif
/* Reevaluate whether the task has signals pending delivery.
This is required every time the blocked sigset_t changes.
Athread cathreaders should have t->sigmask_lock. */
......
......@@ -128,6 +128,7 @@ enum
KERN_TAINTED=53, /* int: various kernel tainted flags */
KERN_CADPID=54, /* int: PID of the process to notify on CAD */
KERN_PIDMAX=55, /* int: PID # limit */
KERN_HUGETLB_PAGE_NUM=56, /* int: Number of available Huge Pages */
};
......
......@@ -55,10 +55,8 @@ void release_task(struct task_struct * p)
if (p->state != TASK_ZOMBIE)
BUG();
#ifdef CONFIG_SMP
if (p != current)
wait_task_inactive(p);
#endif
atomic_dec(&p->user->processes);
security_ops->task_free_security(p);
free_uid(p->user);
......
......@@ -605,3 +605,6 @@ EXPORT_SYMBOL(pidhash);
#if defined(CONFIG_SMP) && defined(__GENERIC_PER_CPU)
EXPORT_SYMBOL(__per_cpu_offset);
#endif
/* debug */
EXPORT_SYMBOL(dump_stack);
......@@ -69,9 +69,7 @@ int ptrace_check_attach(struct task_struct *child, int kill)
if (!kill) {
if (child->state != TASK_STOPPED)
return -ESRCH;
#ifdef CONFIG_SMP
wait_task_inactive(child);
#endif
}
/* All systems go.. */
......
......@@ -98,6 +98,11 @@ int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
extern int acct_parm[];
#endif
#ifdef CONFIG_HUGETLB_PAGE
extern int htlbpage_max;
extern int set_hugetlb_mem_size(int);
#endif
static int parse_table(int *, int, void *, size_t *, void *, size_t,
ctl_table *, void **);
static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
......@@ -258,6 +263,10 @@ static ctl_table kern_table[] = {
#endif
{KERN_PIDMAX, "pid_max", &pid_max, sizeof (int),
0600, NULL, &proc_dointvec},
#ifdef CONFIG_HUGETLB_PAGE
{KERN_HUGETLB_PAGE_NUM, "numhugepages", &htlbpage_max, sizeof(int), 0644, NULL,
&proc_dointvec},
#endif
{0}
};
......@@ -897,6 +906,10 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
val = -val;
buffer += len;
left -= len;
#ifdef CONFIG_HUGETLB_PAGE
if (i == &htlbpage_max)
val = set_hugetlb_mem_size(val);
#endif
switch(op) {
case OP_SET: *i = val; break;
case OP_AND: *i &= val; break;
......
......@@ -12,7 +12,7 @@ export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o \
crc32.o rbtree.o radix-tree.o
obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \
bust_spinlocks.o rbtree.o radix-tree.o
bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
......
/*
* Provide a default dump_stack() function for architectures
* which don't implement their own.
*/
#include <linux/kernel.h>
#include <linux/module.h>
void dump_stack(void)
{
printk(KERN_NOTICE
"This architecture does not implement dump_stack()\n");
}
......@@ -208,6 +208,9 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
unsigned long end = vma->vm_end;
unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst, src, vma);
src_pgd = pgd_offset(src, address)-1;
dst_pgd = pgd_offset(dst, address)-1;
......@@ -389,8 +392,8 @@ void unmap_page_range(mmu_gather_t *tlb, struct vm_area_struct *vma, unsigned lo
{
pgd_t * dir;
if (address >= end)
BUG();
BUG_ON(address >= end);
dir = pgd_offset(vma->vm_mm, address);
tlb_start_vma(tlb, vma);
do {
......@@ -401,30 +404,56 @@ void unmap_page_range(mmu_gather_t *tlb, struct vm_area_struct *vma, unsigned lo
tlb_end_vma(tlb, vma);
}
/*
* remove user pages in a given range.
/* Dispose of an entire mmu_gather_t per rescheduling point */
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
#define ZAP_BLOCK_SIZE (FREE_PTE_NR * PAGE_SIZE)
#endif
/* For UP, 256 pages at a time gives nice low latency */
#if !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
#define ZAP_BLOCK_SIZE (256 * PAGE_SIZE)
#endif
/* No preempt: go for the best straight-line efficiency */
#if !defined(CONFIG_PREEMPT)
#define ZAP_BLOCK_SIZE (~(0UL))
#endif
/**
* zap_page_range - remove user pages in a given range
* @vma: vm_area_struct holding the applicable pages
* @address: starting address of pages to zap
* @size: number of bytes to zap
*/
void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size)
{
struct mm_struct *mm = vma->vm_mm;
mmu_gather_t *tlb;
unsigned long start = address, end = address + size;
unsigned long end, block;
spin_lock(&mm->page_table_lock);
/*
* This is a long-lived spinlock. That's fine.
* There's no contention, because the page table
* lock only protects against kswapd anyway, and
* even if kswapd happened to be looking at this
* process we _want_ it to get stuck.
* This was once a long-held spinlock. Now we break the
* work up into ZAP_BLOCK_SIZE units and relinquish the
* lock after each interation. This drastically lowers
* lock contention and allows for a preemption point.
*/
if (address >= end)
BUG();
spin_lock(&mm->page_table_lock);
flush_cache_range(vma, address, end);
while (size) {
block = (size > ZAP_BLOCK_SIZE) ? ZAP_BLOCK_SIZE : size;
end = address + block;
flush_cache_range(vma, address, end);
tlb = tlb_gather_mmu(mm, 0);
unmap_page_range(tlb, vma, address, end);
tlb_finish_mmu(tlb, start, end);
tlb_finish_mmu(tlb, address, end);
cond_resched_lock(&mm->page_table_lock);
address += block;
size -= block;
}
spin_unlock(&mm->page_table_lock);
}
......@@ -504,6 +533,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|| !(flags & vma->vm_flags))
return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &len, i);
continue;
}
spin_lock(&mm->page_table_lock);
do {
struct page *map;
......
......@@ -196,10 +196,11 @@ void * mempool_alloc(mempool_t *pool, int gfp_mask)
return element;
/*
* If the pool is less than 50% full then try harder
* to allocate an element:
* If the pool is less than 50% full and we can perform effective
* page reclaim then try harder to allocate an element.
*/
if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) {
if ((gfp_mask & __GFP_FS) && (gfp_mask != gfp_nowait) &&
(pool->curr_nr <= pool->min_nr/2)) {
element = pool->alloc(gfp_mask, pool->pool_data);
if (likely(element != NULL))
return element;
......
......@@ -1031,10 +1031,14 @@ static struct vm_area_struct *touched_by_munmap(struct mm_struct *mm,
touched = NULL;
do {
struct vm_area_struct *next = mpnt->vm_next;
if (!(is_vm_hugetlb_page(mpnt))) {
mpnt->vm_next = touched;
touched = mpnt;
mm->map_count--;
rb_erase(&mpnt->vm_rb, &mm->mm_rb);
mm->map_count--;
}
else
free_hugepages(mpnt);
mpnt = next;
} while (mpnt && mpnt->vm_start < end);
*npp = mpnt;
......@@ -1273,7 +1277,10 @@ void exit_mmap(struct mm_struct * mm)
vm_unacct_memory((end - start) >> PAGE_SHIFT);
mm->map_count--;
if (!(is_vm_hugetlb_page(mpnt)))
unmap_page_range(tlb, mpnt, start, end);
else
mpnt->vm_ops->close(mpnt);
mpnt = mpnt->vm_next;
}
......
......@@ -321,6 +321,11 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
if (is_vm_hugetlb_page(vma)) {
error = -EACCES;
goto out;
}
newflags = prot | (vma->vm_flags & ~(PROT_READ | PROT_WRITE | PROT_EXEC));
if ((newflags & ~(newflags >> 4)) & 0xf) {
error = -EACCES;
......
......@@ -311,6 +311,10 @@ unsigned long do_mremap(unsigned long addr,
vma = find_vma(current->mm, addr);
if (!vma || vma->vm_start > addr)
goto out;
if (is_vm_hugetlb_page(vma)) {
ret = -EINVAL;
goto out;
}
/* We can't remap across vm area boundaries */
if (old_len > vma->vm_end - addr)
goto out;
......
......@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
#include <linux/blkdev.h>
unsigned long totalram_pages;
unsigned long totalhigh_pages;
......@@ -561,6 +562,7 @@ void get_page_state(struct page_state *ret)
ret->nr_pagecache += ps->nr_pagecache;
ret->nr_page_table_pages += ps->nr_page_table_pages;
ret->nr_reverse_maps += ps->nr_reverse_maps;
ret->nr_mapped += ps->nr_mapped;
}
}
......@@ -589,7 +591,7 @@ void si_meminfo(struct sysinfo *val)
val->totalram = totalram_pages;
val->sharedram = 0;
val->freeram = nr_free_pages();
val->bufferram = get_page_cache_size();
val->bufferram = nr_blockdev_pages();
#ifdef CONFIG_HIGHMEM
val->totalhigh = totalhigh_pages;
val->freehigh = nr_free_highpages();
......
......@@ -48,9 +48,9 @@ read_pages(struct file *file, struct address_space *mapping,
struct page *page = list_entry(pages->prev, struct page, list);
list_del(&page->list);
if (!add_to_page_cache(page, mapping, page->index)) {
mapping->a_ops->readpage(file, page);
if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add(&lru_pvec);
mapping->a_ops->readpage(file, page);
} else {
page_cache_release(page);
}
......
......@@ -214,6 +214,7 @@ void page_add_rmap(struct page * page, pte_t * ptep)
if (page->pte.direct == 0) {
page->pte.direct = pte_paddr;
SetPageDirect(page);
inc_page_state(nr_mapped);
goto out;
}
......@@ -336,6 +337,8 @@ void page_remove_rmap(struct page * page, pte_t * ptep)
out:
pte_chain_unlock(page);
if (!page_mapped(page))
dec_page_state(nr_mapped);
return;
}
......@@ -447,6 +450,7 @@ int try_to_unmap(struct page * page)
ret = try_to_unmap_one(page, page->pte.direct);
if (ret == SWAP_SUCCESS) {
page->pte.direct = 0;
dec_page_state(nr_reverse_maps);
ClearPageDirect(page);
}
goto out;
......@@ -500,6 +504,8 @@ int try_to_unmap(struct page * page)
}
}
out:
if (!page_mapped(page))
dec_page_state(nr_mapped);
return ret;
}
......
......@@ -487,7 +487,7 @@ void __init kmem_cache_sizes_init(void)
/* Inc off-slab bufctl limit until the ceiling is hit. */
if (!(OFF_SLAB(sizes->cs_cachep))) {
offslab_limit = sizes->cs_size-sizeof(slab_t);
offslab_limit /= 2;
offslab_limit /= sizeof(kmem_bufctl_t);
}
sizes->cs_dmacachep = kmem_cache_create(
cache_names[sizes-cache_sizes].name_dma,
......
......@@ -11,6 +11,8 @@
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/vmalloc.h>
#include <asm/uaccess.h>
......@@ -309,6 +311,7 @@ void __vunmap(void *addr, int deallocate_pages)
*/
void vfree(void *addr)
{
BUG_ON(in_interrupt());
__vunmap(addr, 1);
}
......@@ -324,6 +327,7 @@ void vfree(void *addr)
*/
void vunmap(void *addr)
{
BUG_ON(in_interrupt());
__vunmap(addr, 0);
}
......
......@@ -536,6 +536,20 @@ shrink_caches(struct zone *classzone, int priority,
/*
* This is the main entry point to page reclaim.
*
* If a full scan of the inactive list fails to free enough memory then we
* are "out of memory" and something needs to be killed.
*
* If the caller is !__GFP_FS then the probability of a failure is reasonably
* high - the zone may be full of dirty or under-writeback pages, which this
* caller can't do much about. So for !__GFP_FS callers, we just perform a
* small LRU walk and if that didn't work out, fail the allocation back to the
* caller. GFP_NOFS allocators need to know how to deal with it. Kicking
* bdflush, waiting and retrying will work.
*
* This is a fairly lame algorithm - it can result in excessive CPU burning and
* excessive rotation of the inactive list, which is _supposed_ to be an LRU,
* yes?
*/
int
try_to_free_pages(struct zone *classzone,
......@@ -546,12 +560,15 @@ try_to_free_pages(struct zone *classzone,
KERNEL_STAT_INC(pageoutrun);
do {
for (priority = DEF_PRIORITY; priority; priority--) {
nr_pages = shrink_caches(classzone, priority,
gfp_mask, nr_pages);
if (nr_pages <= 0)
return 1;
} while (--priority);
if (!(gfp_mask & __GFP_FS))
break;
}
if (gfp_mask & __GFP_FS)
out_of_memory();
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment