Commit ef01a983 authored by Linus Torvalds's avatar Linus Torvalds

Import 2.3.8pre1

parent cbf5d468
VERSION = 2
PATCHLEVEL = 3
SUBLEVEL = 7
SUBLEVEL = 8
EXTRAVERSION =
ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
......
......@@ -125,7 +125,7 @@ ssize_t block_write(struct file * filp, const char * buf,
buffercount=0;
}
balance_dirty(dev);
if(write_error)
if (write_error)
break;
}
if ( buffercount ){
......
This diff is collapsed.
......@@ -512,7 +512,9 @@ void locks_remove_flock(struct file *filp)
while ((fl = *before) != NULL) {
if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) {
int (*lock)(struct file *, int, struct file_lock *);
lock = filp->f_op->lock;
lock = NULL;
if (filp->f_op)
lock = filp->f_op->lock;
if (lock) {
file_lock = *fl;
file_lock.fl_type = F_UNLCK;
......
......@@ -50,7 +50,7 @@
#endif
#define spin_lock_init(lock) do { } while(0)
#define spin_lock(lock) do { } while(0)
#define spin_lock(lock) (void)(lock) /* Not "unused variable". */
#define spin_trylock(lock) (1)
#define spin_unlock_wait(lock) do { } while(0)
#define spin_unlock(lock) do { } while(0)
......@@ -109,9 +109,9 @@ typedef struct {
#define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
#endif
#define read_lock(lock) do { } while(0)
#define read_lock(lock) (void)(lock) /* Not "unused variable". */
#define read_unlock(lock) do { } while(0)
#define write_lock(lock) do { } while(0)
#define write_lock(lock) (void)(lock) /* Not "unused variable". */
#define write_unlock(lock) do { } while(0)
#else /* __SMP__ */
......
......@@ -85,7 +85,7 @@ extern void swap_setup (void);
extern int try_to_free_pages(unsigned int gfp_mask);
/* linux/mm/page_io.c */
extern void rw_swap_page(int, unsigned long, char *, int);
extern void rw_swap_page(int, struct page *, int);
extern void rw_swap_page_nocache(int, unsigned long, char *);
extern void rw_swap_page_nolock(int, unsigned long, char *, int);
extern void swap_after_unlock_page (unsigned long entry);
......@@ -146,13 +146,6 @@ extern unsigned long swap_cache_find_total;
extern unsigned long swap_cache_find_success;
#endif
extern inline unsigned long in_swap_cache(struct page *page)
{
if (PageSwapCache(page))
return page->offset;
return 0;
}
/*
* Work out if there are any other processes sharing this page, ignoring
* any page reference coming from the swap cache, or from outstanding
......
......@@ -69,7 +69,6 @@ extern int console_loglevel;
static int init(void *);
extern int bdflush(void *);
extern int kswapd(void *);
extern int kpiod(void *);
extern void kswapd_setup(void);
extern void init_IRQ(void);
......@@ -1304,7 +1303,6 @@ static void __init do_basic_setup(void)
kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
/* Start the background pageout daemon. */
kswapd_setup();
kernel_thread(kpiod, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
#if CONFIG_AP1000
......
......@@ -37,28 +37,9 @@
atomic_t page_cache_size = ATOMIC_INIT(0);
struct page * page_hash_table[PAGE_HASH_SIZE];
/*
* Define a request structure for outstanding page write requests
* to the background page io daemon
*/
struct pio_request
{
struct pio_request * next;
struct file * file;
unsigned long offset;
unsigned long page;
};
static struct pio_request *pio_first = NULL, **pio_last = &pio_first;
static kmem_cache_t *pio_request_cache;
static DECLARE_WAIT_QUEUE_HEAD(pio_wait);
spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
static inline void
make_pio_request(struct file *, unsigned long, unsigned long);
void __add_page_to_hash_queue(struct page * page, struct page **p){
atomic_inc(&page_cache_size);
if((page->next_hash = *p) != NULL)
......@@ -308,15 +289,11 @@ int shrink_mmap(int priority, int gfp_mask)
/* Is it a buffer page? */
if (page->buffers) {
kdev_t dev = page->buffers->b_dev;
spin_unlock(&pagecache_lock);
count--;
if (try_to_free_buffers(page))
goto made_progress;
if (!atomic_read(&too_many_dirty_buffers)) {
atomic_set(&too_many_dirty_buffers, 1);
balance_dirty(dev);
}
goto unlock_continue;
spin_lock(&pagecache_lock);
}
/* We can't free pages unless there's just one user */
......@@ -646,7 +623,6 @@ struct page * __find_get_page (struct inode * inode,
struct page * __find_lock_page (struct inode * inode,
unsigned long offset, struct page **hash)
{
int locked;
struct page *page;
/*
......@@ -656,16 +632,12 @@ struct page * __find_lock_page (struct inode * inode,
repeat:
spin_lock(&pagecache_lock);
page = __find_page_nolock(inode, offset, *hash);
locked = 0;
if (page) {
if (page)
get_page(page);
if (TryLockPage(page))
locked = 1;
}
spin_unlock(&pagecache_lock);
/* Found the page, sleep if locked. */
if (page && locked) {
if (page && TryLockPage(page)) {
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
......@@ -1460,7 +1432,6 @@ static inline int do_write_page(struct inode * inode, struct file * file,
{
int retval;
unsigned long size;
loff_t loff = offset;
int (*writepage) (struct file *, struct page *);
struct page * page;
......@@ -1479,15 +1450,8 @@ static inline int do_write_page(struct inode * inode, struct file * file,
page = mem_map + MAP_NR(page_addr);
lock_page(page);
if (writepage) {
retval = writepage(file, page);
} else {
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
if (size == file->f_op->write(file, page_addr, size, &loff))
retval = 0;
set_fs(old_fs);
}
retval = writepage(file, page);
UnlockPage(page);
return retval;
}
......@@ -1505,25 +1469,12 @@ static int filemap_write_page(struct vm_area_struct * vma,
file = vma->vm_file;
dentry = file->f_dentry;
inode = dentry->d_inode;
if (!file->f_op->write)
return -EIO;
/*
* If a task terminates while we're swapping the page, the vma and
* and file could be released ... increment the count to be safe.
*/
file->f_count++;
/*
* If this is a swapping operation rather than msync(), then
* leave the actual IO, and the restoration of the file count,
* to the kpiod thread. Just queue the request for now.
*/
if (!wait) {
make_pio_request(file, offset, page);
return 0;
}
result = do_write_page(inode, file, (const char *) page, offset);
fput(file);
return result;
......@@ -1535,9 +1486,12 @@ static int filemap_write_page(struct vm_area_struct * vma,
* trying to swap something out and swap something in
* at the same time..
*/
extern void wakeup_bdflush(int);
int filemap_swapout(struct vm_area_struct * vma, struct page * page)
{
return filemap_write_page(vma, page->offset, page_address(page), 0);
int retval = filemap_write_page(vma, page->offset, page_address(page), 0);
wakeup_bdflush(0);
return retval;
}
static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
......@@ -1712,8 +1666,11 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
struct inode *inode = file->f_dentry->d_inode;
ops = &file_private_mmap;
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
if (!inode->i_op || !inode->i_op->writepage)
return -EINVAL;
ops = &file_shared_mmap;
}
if (!inode->i_sb || !S_ISREG(inode->i_mode))
return -EACCES;
if (!inode->i_op || !inode->i_op->readpage)
......@@ -1950,126 +1907,3 @@ void put_cached_page(unsigned long addr)
page_count(page));
page_cache_release(page);
}
/* Add request for page IO to the queue */
static inline void put_pio_request(struct pio_request *p)
{
*pio_last = p;
p->next = NULL;
pio_last = &p->next;
}
/* Take the first page IO request off the queue */
static inline struct pio_request * get_pio_request(void)
{
struct pio_request * p = pio_first;
pio_first = p->next;
if (!pio_first)
pio_last = &pio_first;
return p;
}
/* Make a new page IO request and queue it to the kpiod thread */
static inline void make_pio_request(struct file *file,
unsigned long offset,
unsigned long pageaddr)
{
struct pio_request *p;
struct page *page;
page = page_cache_entry(pageaddr);
get_page(page);
/*
* We need to allocate without causing any recursive IO in the
* current thread's context. We might currently be swapping out
* as a result of an allocation made while holding a critical
* filesystem lock. To avoid deadlock, we *MUST* not reenter
* the filesystem in this thread.
*
* We can wait for kswapd to free memory, or we can try to free
* pages without actually performing further IO, without fear of
* deadlock. --sct
*/
while ((p = kmem_cache_alloc(pio_request_cache, GFP_BUFFER)) == NULL) {
if (try_to_free_pages(__GFP_WAIT))
continue;
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(HZ/10);
}
p->file = file;
p->offset = offset;
p->page = pageaddr;
put_pio_request(p);
wake_up(&pio_wait);
}
/*
* This is the only thread which is allowed to write out filemap pages
* while swapping.
*
* To avoid deadlock, it is important that we never reenter this thread.
* Although recursive memory allocations within this thread may result
* in more page swapping, that swapping will always be done by queuing
* another IO request to the same thread: we will never actually start
* that IO request until we have finished with the current one, and so
* we will not deadlock.
*/
int kpiod(void * unused)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
struct inode * inode;
struct dentry * dentry;
struct pio_request * p;
tsk->session = 1;
tsk->pgrp = 1;
strcpy(tsk->comm, "kpiod");
sigfillset(&tsk->blocked);
/*
* Mark this task as a memory allocator - we don't want to get caught
* up in the regular mm freeing frenzy if we have to allocate memory
* in order to write stuff out.
*/
tsk->flags |= PF_MEMALLOC;
lock_kernel();
pio_request_cache = kmem_cache_create("pio_request",
sizeof(struct pio_request),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (!pio_request_cache)
panic ("Could not create pio_request slab cache");
while (1) {
tsk->state = TASK_INTERRUPTIBLE;
add_wait_queue(&pio_wait, &wait);
if (!pio_first)
schedule();
remove_wait_queue(&pio_wait, &wait);
tsk->state = TASK_RUNNING;
while (pio_first) {
p = get_pio_request();
dentry = p->file->f_dentry;
inode = dentry->d_inode;
do_write_page(inode, p->file,
(const char *) p->page, p->offset);
fput(p->file);
page_cache_free(p->page);
kmem_cache_free(pio_request_cache, p);
}
}
}
......@@ -124,6 +124,9 @@ int __free_page(struct page *page)
if (!PageReserved(page) && put_page_testzero(page)) {
if (PageSwapCache(page))
PAGE_BUG(page);
if (PageLocked(page))
PAGE_BUG(page);
page->flags &= ~(1 << PG_referenced);
free_pages_ok(page - mem_map, 0);
return 1;
......@@ -140,6 +143,8 @@ int free_pages(unsigned long addr, unsigned long order)
if (!PageReserved(map) && put_page_testzero(map)) {
if (PageSwapCache(map))
PAGE_BUG(map);
if (PageLocked(map))
PAGE_BUG(map);
map->flags &= ~(1 << PG_referenced);
free_pages_ok(map_nr, order);
return 1;
......
......@@ -35,7 +35,7 @@ static DECLARE_WAIT_QUEUE_HEAD(lock_queue);
* that shared pages stay shared while being swapped.
*/
static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait)
static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait, int dolock)
{
unsigned long type, offset;
struct swap_info_struct * p;
......@@ -84,25 +84,12 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
return;
}
if (PageSwapCache(page)) {
if (dolock) {
/* Make sure we are the only process doing I/O with this swap page. */
while (test_and_set_bit(offset,p->swap_lockmap)) {
run_task_queue(&tq_disk);
sleep_on(&lock_queue);
}
/*
* Make sure that we have a swap cache association for this
* page. We need this to find which swap page to unlock once
* the swap IO has completed to the physical page. If the page
* is not already in the cache, just overload the offset entry
* as if it were: we are not allowed to manipulate the inode
* hashing for locked pages.
*/
if (page->offset != entry) {
printk ("swap entry mismatch");
return;
}
}
if (rw == READ) {
ClearPageUptodate(page);
......@@ -162,9 +149,9 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
/* Do some cleaning up so if this ever happens we can hopefully
* trigger controlled shutdown.
*/
if (PageSwapCache(page)) {
if (dolock) {
if (!test_and_clear_bit(offset,p->swap_lockmap))
printk("swap_after_unlock_page: lock already cleared\n");
printk("rw_swap_page_base: lock already cleared\n");
wake_up(&lock_queue);
}
put_page(page);
......@@ -174,7 +161,7 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
set_bit(PG_decr_after, &page->flags);
atomic_inc(&nr_async_pages);
}
if (PageSwapCache(page)) {
if (dolock) {
/* only lock/unlock swap cache pages! */
set_bit(PG_swap_unlock_after, &page->flags);
}
......@@ -210,7 +197,7 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
* just before it removes the page from the page cache.
*/
/* This is run when asynchronous page I/O has completed. */
void swap_after_unlock_page (unsigned long entry)
void swap_after_unlock_page(unsigned long entry)
{
unsigned long type, offset;
struct swap_info_struct * p;
......@@ -231,65 +218,42 @@ void swap_after_unlock_page (unsigned long entry)
wake_up(&lock_queue);
}
/* A simple wrapper so the base function doesn't need to enforce
* that all swap pages go through the swap cache!
/*
* A simple wrapper so the base function doesn't need to enforce
* that all swap pages go through the swap cache! We verify that:
* - the page is locked
* - it's marked as being swap-cache
* - it's associated with the swap inode
*/
void rw_swap_page(int rw, unsigned long entry, char *buf, int wait)
void rw_swap_page(int rw, struct page *page, int wait)
{
struct page *page = mem_map + MAP_NR(buf);
unsigned long entry = page->offset;
if (page->inode && page->inode != &swapper_inode)
if (!PageLocked(page))
PAGE_BUG(page);
/*
* Make sure that we have a swap cache association for this
* page. We need this to find which swap page to unlock once
* the swap IO has completed to the physical page. If the page
* is not already in the cache, just overload the offset entry
* as if it were: we are not allowed to manipulate the inode
* hashing for locked pages.
*/
if (!PageSwapCache(page)) {
printk("VM: swap page is not in swap cache\n");
return;
}
if (page->offset != entry) {
printk ("swap entry mismatch");
return;
}
rw_swap_page_base(rw, entry, page, wait);
if (!PageSwapCache(page))
PAGE_BUG(page);
if (page->inode != &swapper_inode)
PAGE_BUG(page);
rw_swap_page_base(rw, entry, page, wait, 1);
}
/*
* Setting up a new swap file needs a simple wrapper just to read the
* swap signature. SysV shared memory also needs a simple wrapper.
*/
void rw_swap_page_nocache(int rw, unsigned long entry, char *buffer)
void rw_swap_page_nocache(int rw, unsigned long entry, char *buf)
{
struct page *page;
struct page *page = mem_map + MAP_NR(buf);
page = mem_map + MAP_NR((unsigned long) buffer);
if (TryLockPage(page))
PAGE_BUG(page);
if (test_and_set_bit(PG_swap_cache, &page->flags))
if (PageSwapCache(page))
PAGE_BUG(page);
if (page->inode)
PAGE_BUG(page);
get_page(page); /* Protect from shrink_mmap() */
page->inode = &swapper_inode;
page->offset = entry;
rw_swap_page(rw, entry, buffer, 1);
/*
* and now remove it from the pagecache ...
*/
if (TryLockPage(page))
PAGE_BUG(page);
PageClearSwapCache(page);
remove_inode_page(page);
page_cache_release(page);
UnlockPage(page);
rw_swap_page_base(rw, entry, page, 1, 1);
}
/*
......@@ -298,17 +262,13 @@ void rw_swap_page_nocache(int rw, unsigned long entry, char *buffer)
* Therefore we can't use it. Later when we can remove the need for the
* lock map and we can reduce the number of functions exported.
*/
void rw_swap_page_nolock(int rw, unsigned long entry, char *buffer, int wait)
void rw_swap_page_nolock(int rw, unsigned long entry, char *buf, int wait)
{
struct page *page = mem_map + MAP_NR((unsigned long) buffer);
struct page *page = mem_map + MAP_NR(buf);
if (!PageLocked(page)) {
printk("VM: rw_swap_page_nolock: page not locked!\n");
return;
}
if (PageSwapCache(page)) {
printk ("VM: rw_swap_page_nolock: page in swap cache!\n");
return;
}
rw_swap_page_base(rw, entry, page, wait);
if (!PageLocked(page))
PAGE_BUG(page);
if (PageSwapCache(page))
PAGE_BUG(page);
rw_swap_page_base(rw, entry, page, wait, 0);
}
......@@ -202,21 +202,27 @@ int swap_count(unsigned long entry)
static inline void remove_from_swap_cache(struct page *page)
{
if (!page->inode) {
struct inode *inode = page->inode;
if (!inode) {
printk ("VM: Removing swap cache page with zero inode hash "
"on page %08lx\n", page_address(page));
return;
}
if (page->inode != &swapper_inode) {
if (inode != &swapper_inode) {
printk ("VM: Removing swap cache page with wrong inode hash "
"on page %08lx\n", page_address(page));
}
if (!PageSwapCache(page))
PAGE_BUG(page);
#ifdef DEBUG_SWAP
printk("DebugVM: remove_from_swap_cache(%08lx count %d)\n",
page_address(page), page_count(page));
#endif
PageClearSwapCache(page);
if (inode->i_op->flushpage)
inode->i_op->flushpage(inode, page, 0);
remove_inode_page(page);
}
......@@ -266,8 +272,14 @@ void free_page_and_swap_cache(unsigned long addr)
/*
* If we are the only user, then free up the swap cache.
*/
if (PageSwapCache(page) && !is_page_shared(page))
delete_from_swap_cache(page);
lock_page(page);
if (PageSwapCache(page) && !is_page_shared(page)) {
long entry = page->offset;
remove_from_swap_cache(page);
swap_free(entry);
page_cache_release(page);
}
UnlockPage(page);
__free_page(page);
}
......@@ -355,7 +367,7 @@ struct page * read_swap_cache_async(unsigned long entry, int wait)
goto out_free_page;
LockPage(new_page);
rw_swap_page(READ, entry, (char *) new_page_addr, wait);
rw_swap_page(READ, new_page, wait);
#ifdef DEBUG_SWAP
printk("DebugVM: read_swap_cache_async created "
"entry %08lx at %p\n",
......@@ -370,4 +382,3 @@ struct page * read_swap_cache_async(unsigned long entry, int wait)
out:
return found_page;
}
......@@ -36,31 +36,35 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
{
pte_t pte;
unsigned long entry;
unsigned long page;
struct page * page_map;
unsigned long page_addr;
struct page * page;
pte = *page_table;
if (!pte_present(pte))
return 0;
page = pte_page(pte);
if (MAP_NR(page) >= max_mapnr)
return 0;
page_map = mem_map + MAP_NR(page);
goto out_failed;
page_addr = pte_page(pte);
if (MAP_NR(page_addr) >= max_mapnr)
goto out_failed;
page = mem_map + MAP_NR(page_addr);
if (pte_young(pte)) {
/*
* Dont be too eager to get aging right if
* memory is dangerously low.
*/
if (!low_on_memory && pte_young(pte)) {
/*
* Transfer the "accessed" bit from the page
* tables to the global page map.
*/
set_pte(page_table, pte_mkold(pte));
set_bit(PG_referenced, &page_map->flags);
return 0;
set_bit(PG_referenced, &page->flags);
goto out_failed;
}
if (PageReserved(page_map)
|| PageLocked(page_map)
|| ((gfp_mask & __GFP_DMA) && !PageDMA(page_map)))
return 0;
if (PageReserved(page)
|| PageLocked(page)
|| ((gfp_mask & __GFP_DMA) && !PageDMA(page)))
goto out_failed;
/*
* Is the page already in the swap cache? If so, then
......@@ -70,15 +74,15 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
* Return 0, as we didn't actually free any real
* memory, and we should just continue our scan.
*/
if (PageSwapCache(page_map)) {
entry = page_map->offset;
if (PageSwapCache(page)) {
entry = page->offset;
swap_duplicate(entry);
set_pte(page_table, __pte(entry));
drop_pte:
vma->vm_mm->rss--;
flush_tlb_page(vma, address);
__free_page(page_map);
return 0;
__free_page(page);
goto out_failed;
}
/*
......@@ -105,7 +109,7 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
* locks etc.
*/
if (!(gfp_mask & __GFP_IO))
return 0;
goto out_failed;
/*
* Ok, it's really dirty. That means that
......@@ -120,7 +124,7 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
* assume we free'd something.
*
* NOTE NOTE NOTE! This should just set a
* dirty bit in page_map, and just drop the
* dirty bit in 'page', and just drop the
* pte. All the hard work would be done by
* shrink_mmap().
*
......@@ -133,10 +137,9 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
flush_tlb_page(vma, address);
vma->vm_mm->rss--;
if (vma->vm_ops->swapout(vma, page_map))
if (vma->vm_ops->swapout(vma, page))
kill_proc(pid, SIGBUS, 1);
__free_page(page_map);
return 1;
goto out_free_success;
}
/*
......@@ -147,23 +150,26 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
*/
entry = get_swap_page();
if (!entry)
return 0; /* No swap space left */
goto out_failed; /* No swap space left */
vma->vm_mm->rss--;
tsk->nswap++;
set_pte(page_table, __pte(entry));
flush_tlb_page(vma, address);
swap_duplicate(entry); /* One for the process, one for the swap cache */
add_to_swap_cache(page_map, entry);
add_to_swap_cache(page, entry);
/* We checked we were unlocked way up above, and we
have been careful not to stall until here */
LockPage(page_map);
LockPage(page);
/* OK, do a physical asynchronous write to swap. */
rw_swap_page(WRITE, entry, (char *) page, 0);
rw_swap_page(WRITE, page, 0);
__free_page(page_map);
out_free_success:
__free_page(page);
return 1;
out_failed:
return 0;
}
/*
......@@ -490,8 +496,8 @@ int kswapd(void *unused)
if (!do_try_to_free_pages(GFP_KSWAPD))
break;
run_task_queue(&tq_disk);
} while (!tsk->need_resched);
run_task_queue(&tq_disk);
tsk->state = TASK_INTERRUPTIBLE;
schedule_timeout(HZ);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment