Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
e7144e64
Commit
e7144e64
authored
Sep 22, 2002
by
Linus Torvalds
Browse files
Options
Browse Files
Download
Plain Diff
Merge master.kernel.org:/home/davem/BK/net-2.5
into home.transmeta.com:/home/torvalds/v2.5/linux
parents
da29f6a8
407ee6c8
Changes
18
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
397 additions
and
126 deletions
+397
-126
drivers/block/ll_rw_blk.c
drivers/block/ll_rw_blk.c
+135
-25
drivers/char/sysrq.c
drivers/char/sysrq.c
+4
-3
fs/buffer.c
fs/buffer.c
+4
-6
fs/ext2/ialloc.c
fs/ext2/ialloc.c
+8
-1
fs/ext3/inode.c
fs/ext3/inode.c
+2
-2
fs/fs-writeback.c
fs/fs-writeback.c
+22
-18
fs/mpage.c
fs/mpage.c
+13
-0
include/linux/backing-dev.h
include/linux/backing-dev.h
+14
-0
include/linux/blkdev.h
include/linux/blkdev.h
+1
-0
include/linux/buffer_head.h
include/linux/buffer_head.h
+0
-1
include/linux/sched.h
include/linux/sched.h
+2
-0
include/linux/writeback.h
include/linux/writeback.h
+4
-0
kernel/suspend.c
kernel/suspend.c
+0
-1
mm/filemap.c
mm/filemap.c
+4
-0
mm/mempool.c
mm/mempool.c
+2
-2
mm/page-writeback.c
mm/page-writeback.c
+40
-15
mm/swapfile.c
mm/swapfile.c
+20
-0
mm/vmscan.c
mm/vmscan.c
+122
-52
No files found.
drivers/block/ll_rw_blk.c
View file @
e7144e64
...
...
@@ -46,13 +46,76 @@ static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
struct
blk_dev_struct
blk_dev
[
MAX_BLKDEV
];
/* initialized by blk_dev_init() */
/*
*
How many reqeusts do we allocate per queue,
*
and how many do we "batch" on freeing them?
*
Number of requests per queue. This many for reads and for writes (twice
*
this number, total).
*/
int
queue_nr_requests
,
batch_requests
;
static
int
queue_nr_requests
;
/*
* How many free requests must be available before we wake a process which
* is waiting for a request?
*/
static
int
batch_requests
;
unsigned
long
blk_max_low_pfn
,
blk_max_pfn
;
int
blk_nohighio
=
0
;
static
struct
congestion_state
{
wait_queue_head_t
wqh
;
atomic_t
nr_congested_queues
;
}
congestion_states
[
2
];
/*
* Return the threshold (number of free requests) at which the queue is
* considered to be congested. It include a little hysteresis to keep the
* context switch rate down.
*/
static
inline
int
queue_congestion_on_threshold
(
void
)
{
int
ret
;
ret
=
queue_nr_requests
/
4
-
1
;
if
(
ret
<
0
)
ret
=
1
;
return
ret
;
}
/*
* The threshold at which a queue is considered to be uncongested
*/
static
inline
int
queue_congestion_off_threshold
(
void
)
{
int
ret
;
ret
=
queue_nr_requests
/
4
+
1
;
if
(
ret
>
queue_nr_requests
)
ret
=
queue_nr_requests
;
return
ret
;
}
static
void
clear_queue_congested
(
request_queue_t
*
q
,
int
rw
)
{
enum
bdi_state
bit
;
struct
congestion_state
*
cs
=
&
congestion_states
[
rw
];
bit
=
(
rw
==
WRITE
)
?
BDI_write_congested
:
BDI_read_congested
;
if
(
test_and_clear_bit
(
bit
,
&
q
->
backing_dev_info
.
state
))
atomic_dec
(
&
cs
->
nr_congested_queues
);
if
(
waitqueue_active
(
&
cs
->
wqh
))
wake_up
(
&
cs
->
wqh
);
}
static
void
set_queue_congested
(
request_queue_t
*
q
,
int
rw
)
{
enum
bdi_state
bit
;
bit
=
(
rw
==
WRITE
)
?
BDI_write_congested
:
BDI_read_congested
;
if
(
!
test_and_set_bit
(
bit
,
&
q
->
backing_dev_info
.
state
))
atomic_inc
(
&
congestion_states
[
rw
].
nr_congested_queues
);
}
/**
* bdev_get_queue: - return the queue that matches the given device
* @bdev: device
...
...
@@ -360,8 +423,8 @@ int blk_queue_init_tags(request_queue_t *q, int depth)
struct
blk_queue_tag
*
tags
;
int
bits
,
i
;
if
(
depth
>
queue_nr_requests
)
{
depth
=
queue_nr_requests
;
if
(
depth
>
(
queue_nr_requests
*
2
)
)
{
depth
=
(
queue_nr_requests
*
2
)
;
printk
(
"blk_queue_init_tags: adjusted depth to %d
\n
"
,
depth
);
}
...
...
@@ -1019,7 +1082,7 @@ static int __blk_cleanup_queue(struct request_list *list)
**/
void
blk_cleanup_queue
(
request_queue_t
*
q
)
{
int
count
=
queue_nr_requests
;
int
count
=
(
queue_nr_requests
*
2
)
;
count
-=
__blk_cleanup_queue
(
&
q
->
rq
[
READ
]);
count
-=
__blk_cleanup_queue
(
&
q
->
rq
[
WRITE
]);
...
...
@@ -1050,7 +1113,7 @@ static int blk_init_free_list(request_queue_t *q)
* Divide requests in half between read and write
*/
rl
=
&
q
->
rq
[
READ
];
for
(
i
=
0
;
i
<
queue_nr_requests
;
i
++
)
{
for
(
i
=
0
;
i
<
(
queue_nr_requests
*
2
)
;
i
++
)
{
rq
=
kmem_cache_alloc
(
request_cachep
,
SLAB_KERNEL
);
if
(
!
rq
)
goto
nomem
;
...
...
@@ -1058,7 +1121,7 @@ static int blk_init_free_list(request_queue_t *q)
/*
* half way through, switch to WRITE list
*/
if
(
i
==
queue_nr_requests
/
2
)
if
(
i
==
queue_nr_requests
)
rl
=
&
q
->
rq
[
WRITE
];
memset
(
rq
,
0
,
sizeof
(
struct
request
));
...
...
@@ -1144,7 +1207,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
* Get a free request. queue lock must be held and interrupts
* disabled on the way in.
*/
static
inline
struct
request
*
get_request
(
request_queue_t
*
q
,
int
rw
)
static
struct
request
*
get_request
(
request_queue_t
*
q
,
int
rw
)
{
struct
request
*
rq
=
NULL
;
struct
request_list
*
rl
=
q
->
rq
+
rw
;
...
...
@@ -1153,6 +1216,8 @@ static inline struct request *get_request(request_queue_t *q, int rw)
rq
=
blkdev_free_rq
(
&
rl
->
free
);
list_del
(
&
rq
->
queuelist
);
rl
->
count
--
;
if
(
rl
->
count
<
queue_congestion_on_threshold
())
set_queue_congested
(
q
,
rw
);
rq
->
flags
=
0
;
rq
->
rq_status
=
RQ_ACTIVE
;
rq
->
special
=
NULL
;
...
...
@@ -1365,13 +1430,50 @@ void blk_put_request(struct request *req)
* it didn't come out of our reserved rq pools
*/
if
(
rl
)
{
int
rw
=
0
;
list_add
(
&
req
->
queuelist
,
&
rl
->
free
);
if
(
++
rl
->
count
>=
batch_requests
&&
waitqueue_active
(
&
rl
->
wait
))
if
(
rl
==
&
q
->
rq
[
WRITE
])
rw
=
WRITE
;
else
if
(
rl
==
&
q
->
rq
[
READ
])
rw
=
READ
;
else
BUG
();
rl
->
count
++
;
if
(
rl
->
count
>=
queue_congestion_off_threshold
())
clear_queue_congested
(
q
,
rw
);
if
(
rl
->
count
>=
batch_requests
&&
waitqueue_active
(
&
rl
->
wait
))
wake_up
(
&
rl
->
wait
);
}
}
/**
* blk_congestion_wait - wait for a queue to become uncongested
* @rw: READ or WRITE
* @timeout: timeout in jiffies
*
* Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
* If no queues are congested then just return, in the hope that the caller
* will submit some more IO.
*/
void
blk_congestion_wait
(
int
rw
,
long
timeout
)
{
DECLARE_WAITQUEUE
(
wait
,
current
);
struct
congestion_state
*
cs
=
&
congestion_states
[
rw
];
if
(
atomic_read
(
&
cs
->
nr_congested_queues
)
==
0
)
return
;
blk_run_queues
();
set_current_state
(
TASK_UNINTERRUPTIBLE
);
add_wait_queue
(
&
cs
->
wqh
,
&
wait
);
if
(
atomic_read
(
&
cs
->
nr_congested_queues
)
!=
0
)
schedule_timeout
(
timeout
);
set_current_state
(
TASK_RUNNING
);
remove_wait_queue
(
&
cs
->
wqh
,
&
wait
);
}
/*
* Has to be called with the request spinlock acquired
*/
...
...
@@ -1868,6 +1970,7 @@ void end_that_request_last(struct request *req)
int
__init
blk_dev_init
(
void
)
{
int
total_ram
=
nr_free_pages
()
<<
(
PAGE_SHIFT
-
10
);
int
i
;
request_cachep
=
kmem_cache_create
(
"blkdev_requests"
,
sizeof
(
struct
request
),
0
,
...
...
@@ -1876,26 +1979,33 @@ int __init blk_dev_init(void)
panic
(
"Can't create request pool slab cache
\n
"
);
/*
* Free request slots per queue.
* (Half for reads, half for writes)
*/
queue_nr_requests
=
(
total_ram
>>
8
)
&
~
15
;
/* One per quarter-megabyte */
if
(
queue_nr_requests
<
32
)
queue_nr_requests
=
32
;
if
(
queue_nr_requests
>
256
)
queue_nr_requests
=
256
;
/*
* Batch frees according to queue length
* Free request slots per queue. One per quarter-megabyte.
* We use this many requests for reads, and this many for writes.
*/
if
((
batch_requests
=
queue_nr_requests
/
4
)
>
32
)
batch_requests
=
32
;
printk
(
"block: %d slots per queue, batch=%d
\n
"
,
queue_nr_requests
,
batch_requests
);
queue_nr_requests
=
(
total_ram
>>
9
)
&
~
7
;
if
(
queue_nr_requests
<
16
)
queue_nr_requests
=
16
;
if
(
queue_nr_requests
>
128
)
queue_nr_requests
=
128
;
batch_requests
=
queue_nr_requests
/
8
;
if
(
batch_requests
>
8
)
batch_requests
=
8
;
printk
(
"block request queues:
\n
"
);
printk
(
" %d requests per read queue
\n
"
,
queue_nr_requests
);
printk
(
" %d requests per write queue
\n
"
,
queue_nr_requests
);
printk
(
" %d requests per batch
\n
"
,
batch_requests
);
printk
(
" enter congestion at %d
\n
"
,
queue_congestion_on_threshold
());
printk
(
" exit congestion at %d
\n
"
,
queue_congestion_off_threshold
());
blk_max_low_pfn
=
max_low_pfn
;
blk_max_pfn
=
max_pfn
;
for
(
i
=
0
;
i
<
ARRAY_SIZE
(
congestion_states
);
i
++
)
{
init_waitqueue_head
(
&
congestion_states
[
i
].
wqh
);
atomic_set
(
&
congestion_states
[
i
].
nr_congested_queues
,
0
);
}
return
0
;
};
...
...
drivers/char/sysrq.c
View file @
e7144e64
...
...
@@ -28,7 +28,8 @@
#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/buffer_head.h>
/* for fsync_bdev()/wakeup_bdflush() */
#include <linux/writeback.h>
#include <linux/buffer_head.h>
/* for fsync_bdev() */
#include <linux/spinlock.h>
...
...
@@ -227,7 +228,7 @@ static void sysrq_handle_sync(int key, struct pt_regs *pt_regs,
struct
tty_struct
*
tty
)
{
emergency_sync_scheduled
=
EMERG_SYNC
;
wakeup_bdflush
();
wakeup_bdflush
(
0
);
}
static
struct
sysrq_key_op
sysrq_sync_op
=
{
handler:
sysrq_handle_sync
,
...
...
@@ -239,7 +240,7 @@ static void sysrq_handle_mountro(int key, struct pt_regs *pt_regs,
struct
tty_struct
*
tty
)
{
emergency_sync_scheduled
=
EMERG_REMOUNT
;
wakeup_bdflush
();
wakeup_bdflush
(
0
);
}
static
struct
sysrq_key_op
sysrq_mountro_op
=
{
handler:
sysrq_handle_mountro
,
...
...
fs/buffer.c
View file @
e7144e64
...
...
@@ -458,19 +458,17 @@ void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
}
/*
* FIXME: What is this function actually trying to do? Why "zones[0]"?
* Is it still correct/needed if/when blockdev mappings use GFP_HIGHUSER?
* Kick pdflush then try to free up some ZONE_NORMAL memory.
*/
static
void
free_more_memory
(
void
)
{
struct
zone
*
zone
;
zone
=
contig_page_data
.
node_zonelists
[
GFP_NOFS
&
GFP_ZONEMASK
].
zones
[
0
];
wakeup_bdflush
();
try_to_free_pages
(
zone
,
GFP_NOFS
,
0
);
zone
=
contig_page_data
.
node_zonelists
[
GFP_NOFS
&
GFP_ZONEMASK
].
zones
[
0
];
wakeup_bdflush
(
1024
);
blk_run_queues
();
yield
();
try_to_free_pages
(
zone
,
GFP_NOFS
,
0
);
}
/*
...
...
fs/ext2/ialloc.c
View file @
e7144e64
...
...
@@ -16,9 +16,9 @@
#include "ext2.h"
#include <linux/quotaops.h>
#include <linux/sched.h>
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
/*
* ialloc.c contains the inodes allocation and deallocation routines
*/
...
...
@@ -169,6 +169,13 @@ static void ext2_preread_inode(struct inode *inode)
unsigned
long
block
;
struct
buffer_head
*
bh
;
struct
ext2_group_desc
*
gdp
;
struct
backing_dev_info
*
bdi
;
bdi
=
inode
->
i_mapping
->
backing_dev_info
;
if
(
bdi_read_congested
(
bdi
))
return
;
if
(
bdi_write_congested
(
bdi
))
return
;
block_group
=
(
inode
->
i_ino
-
1
)
/
EXT2_INODES_PER_GROUP
(
inode
->
i_sb
);
gdp
=
ext2_get_group_desc
(
inode
->
i_sb
,
block_group
,
&
bh
);
...
...
fs/ext3/inode.c
View file @
e7144e64
...
...
@@ -1473,7 +1473,7 @@ struct address_space_operations ext3_aops = {
};
/* For writeback mode, we can use mpage_writepages() */
#if 0 /* Doesn't work for shared mappings */
static int
ext3_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
...
...
@@ -1486,12 +1486,12 @@ ext3_writepages(struct address_space *mapping, struct writeback_control *wbc)
ret = err;
return ret;
}
#endif
struct
address_space_operations
ext3_writeback_aops
=
{
.
readpage
=
ext3_readpage
,
/* BKL not held. Don't need */
.
readpages
=
ext3_readpages
,
/* BKL not held. Don't need */
.
writepage
=
ext3_writepage
,
/* BKL not held. We take it */
.
writepages
=
ext3_writepages
,
/* BKL not held. Don't need */
.
sync_page
=
block_sync_page
,
.
prepare_write
=
ext3_prepare_write
,
/* BKL not held. We take it */
.
commit_write
=
ext3_commit_write
,
/* BKL not held. We take it */
...
...
fs/fs-writeback.c
View file @
e7144e64
...
...
@@ -220,44 +220,52 @@ __writeback_single_inode(struct inode *inode, int sync,
*
* FIXME: this linear search could get expensive with many fileystems. But
* how to fix? We need to go from an address_space to all inodes which share
* a queue with that address_space.
* a queue with that address_space. (Easy: have a global "dirty superblocks"
* list).
*
* The inodes to be written are parked on sb->s_io. They are moved back onto
* sb->s_dirty as they are selected for writing. This way, none can be missed
* on the writer throttling path, and we get decent balancing between many
* thrlttled threads: we don't want them all piling up on __wait_on_inode.
* thr
o
lttled threads: we don't want them all piling up on __wait_on_inode.
*/
static
void
sync_sb_inodes
(
struct
super_block
*
sb
,
struct
writeback_control
*
wbc
)
{
struct
list_head
*
tmp
;
struct
list_head
*
head
;
const
unsigned
long
start
=
jiffies
;
/* livelock avoidance */
list_splice_init
(
&
sb
->
s_dirty
,
&
sb
->
s_io
);
head
=
&
sb
->
s_io
;
while
((
tmp
=
head
->
prev
)
!=
head
)
{
struct
inode
*
inode
=
list_entry
(
tmp
,
struct
inode
,
i_list
);
while
(
!
list_empty
(
&
sb
->
s_io
))
{
struct
inode
*
inode
=
list_entry
(
sb
->
s_io
.
prev
,
struct
inode
,
i_list
);
struct
address_space
*
mapping
=
inode
->
i_mapping
;
struct
backing_dev_info
*
bdi
;
struct
backing_dev_info
*
bdi
=
mapping
->
backing_dev_info
;
int
really_sync
;
if
(
wbc
->
bdi
&&
mapping
->
backing_dev_info
!=
wbc
->
bdi
)
{
if
(
wbc
->
nonblocking
&&
bdi_write_congested
(
bdi
))
{
wbc
->
encountered_congestion
=
1
;
if
(
sb
!=
blockdev_superblock
)
break
;
/*
inappropriate superblock
*/
break
;
/*
Skip a congested fs
*/
list_move
(
&
inode
->
i_list
,
&
sb
->
s_dirty
);
continue
;
/* not this blockdev */
continue
;
/* Skip a congested blockdev */
}
if
(
wbc
->
bdi
&&
bdi
!=
wbc
->
bdi
)
{
if
(
sb
!=
blockdev_superblock
)
break
;
/* fs has the wrong queue */
list_move
(
&
inode
->
i_list
,
&
sb
->
s_dirty
);
continue
;
/* blockdev has wrong queue */
}
/* Was this inode dirtied after sync_sb_inodes was called? */
if
(
time_after
(
mapping
->
dirtied_when
,
start
))
break
;
/* Was this inode dirtied too recently? */
if
(
wbc
->
older_than_this
&&
time_after
(
mapping
->
dirtied_when
,
*
wbc
->
older_than_this
))
goto
out
;
break
;
bdi
=
mapping
->
backing_dev_info
;
/* Is another pdflush already flushing this queue? */
if
(
current_is_pdflush
()
&&
!
writeback_acquire
(
bdi
))
break
;
...
...
@@ -278,11 +286,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
if
(
wbc
->
nr_to_write
<=
0
)
break
;
}
out:
/*
* Leave any unwritten inodes on s_io.
*/
return
;
return
;
/* Leave any unwritten inodes on s_io */
}
/*
...
...
fs/mpage.c
View file @
e7144e64
...
...
@@ -22,6 +22,7 @@
#include <linux/prefetch.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
/*
...
...
@@ -522,6 +523,7 @@ int
mpage_writepages
(
struct
address_space
*
mapping
,
struct
writeback_control
*
wbc
,
get_block_t
get_block
)
{
struct
backing_dev_info
*
bdi
=
mapping
->
backing_dev_info
;
struct
bio
*
bio
=
NULL
;
sector_t
last_block_in_bio
=
0
;
int
ret
=
0
;
...
...
@@ -530,6 +532,12 @@ mpage_writepages(struct address_space *mapping,
struct
pagevec
pvec
;
int
(
*
writepage
)(
struct
page
*
);
if
(
wbc
->
nonblocking
&&
bdi_write_congested
(
bdi
))
{
blk_run_queues
();
wbc
->
encountered_congestion
=
1
;
return
0
;
}
writepage
=
NULL
;
if
(
get_block
==
NULL
)
writepage
=
mapping
->
a_ops
->
writepage
;
...
...
@@ -585,6 +593,11 @@ mpage_writepages(struct address_space *mapping,
}
if
(
ret
||
(
--
(
wbc
->
nr_to_write
)
<=
0
))
done
=
1
;
if
(
wbc
->
nonblocking
&&
bdi_write_congested
(
bdi
))
{
blk_run_queues
();
wbc
->
encountered_congestion
=
1
;
done
=
1
;
}
}
else
{
unlock_page
(
page
);
}
...
...
include/linux/backing-dev.h
View file @
e7144e64
...
...
@@ -8,11 +8,15 @@
#ifndef _LINUX_BACKING_DEV_H
#define _LINUX_BACKING_DEV_H
#include <asm/atomic.h>
/*
* Bits in backing_dev_info.state
*/
enum
bdi_state
{
BDI_pdflush
,
/* A pdflush thread is working this device */
BDI_write_congested
,
/* The write queue is getting full */
BDI_read_congested
,
/* The read queue is getting full */
BDI_unused
,
/* Available bits start here */
};
...
...
@@ -28,4 +32,14 @@ int writeback_acquire(struct backing_dev_info *bdi);
int
writeback_in_progress
(
struct
backing_dev_info
*
bdi
);
void
writeback_release
(
struct
backing_dev_info
*
bdi
);
static
inline
int
bdi_read_congested
(
struct
backing_dev_info
*
bdi
)
{
return
test_bit
(
BDI_read_congested
,
&
bdi
->
state
);
}
static
inline
int
bdi_write_congested
(
struct
backing_dev_info
*
bdi
)
{
return
test_bit
(
BDI_write_congested
,
&
bdi
->
state
);
}
#endif
/* _LINUX_BACKING_DEV_H */
include/linux/blkdev.h
View file @
e7144e64
...
...
@@ -345,6 +345,7 @@ extern void blk_queue_end_tag(request_queue_t *, struct request *);
extern
int
blk_queue_init_tags
(
request_queue_t
*
,
int
);
extern
void
blk_queue_free_tags
(
request_queue_t
*
);
extern
void
blk_queue_invalidate_tags
(
request_queue_t
*
);
extern
void
blk_congestion_wait
(
int
rw
,
long
timeout
);
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
...
...
include/linux/buffer_head.h
View file @
e7144e64
...
...
@@ -163,7 +163,6 @@ struct buffer_head * __getblk(struct block_device *, sector_t, int);
void
__brelse
(
struct
buffer_head
*
);
void
__bforget
(
struct
buffer_head
*
);
struct
buffer_head
*
__bread
(
struct
block_device
*
,
sector_t
block
,
int
size
);
void
wakeup_bdflush
(
void
);
struct
buffer_head
*
alloc_buffer_head
(
void
);
void
free_buffer_head
(
struct
buffer_head
*
bh
);
void
FASTCALL
(
unlock_buffer
(
struct
buffer_head
*
bh
));
...
...
include/linux/sched.h
View file @
e7144e64
...
...
@@ -273,6 +273,7 @@ extern struct user_struct root_user;
#define INIT_USER (&root_user)
typedef
struct
prio_array
prio_array_t
;
struct
backing_dev_info
;
struct
task_struct
{
volatile
long
state
;
/* -1 unrunnable, 0 runnable, >0 stopped */
...
...
@@ -398,6 +399,7 @@ struct task_struct {
/* journalling filesystem info */
void
*
journal_info
;
struct
dentry
*
proc_dentry
;
struct
backing_dev_info
*
backing_dev_info
;
};
extern
void
__put_task_struct
(
struct
task_struct
*
tsk
);
...
...
include/linux/writeback.h
View file @
e7144e64
...
...
@@ -43,6 +43,8 @@ struct writeback_control {
older than this */
long
nr_to_write
;
/* Write this many pages, and decrement
this for each page written */
int
nonblocking
;
/* Don't get stuck on request queues */
int
encountered_congestion
;
/* An output: a queue is full */
};
void
writeback_inodes
(
struct
writeback_control
*
wbc
);
...
...
@@ -61,6 +63,8 @@ static inline void wait_on_inode(struct inode *inode)
/*
* mm/page-writeback.c
*/
int
wakeup_bdflush
(
long
nr_pages
);
/* These 5 are exported to sysctl. */
extern
int
dirty_background_ratio
;
extern
int
dirty_async_ratio
;
...
...
kernel/suspend.c
View file @
e7144e64
...
...
@@ -81,7 +81,6 @@ unsigned char software_suspend_enabled = 0;
#define TIMEOUT (6 * HZ)
/* Timeout for stopping processes */
#define ADDRESS(x) ((unsigned long) phys_to_virt(((x) << PAGE_SHIFT)))
extern
void
wakeup_bdflush
(
void
);
extern
int
C_A_D
;
/* References to section boundaries */
...
...
mm/filemap.c
View file @
e7144e64
...
...
@@ -1755,6 +1755,9 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov,
if
(
unlikely
(
pos
<
0
))
return
-
EINVAL
;
/* We can write back this queue in page reclaim */
current
->
backing_dev_info
=
mapping
->
backing_dev_info
;
pagevec_init
(
&
lru_pvec
);
if
(
unlikely
(
file
->
f_error
))
{
...
...
@@ -1959,6 +1962,7 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov,
err
=
written
?
written
:
status
;
out:
pagevec_lru_add
(
&
lru_pvec
);
current
->
backing_dev_info
=
0
;
return
err
;
}
...
...
mm/mempool.c
View file @
e7144e64
...
...
@@ -12,7 +12,7 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/mempool.h>
#include <linux/
buffer_head.h>
/* for wakeup_bdflush() */
#include <linux/
writeback.h>
static
void
add_element
(
mempool_t
*
pool
,
void
*
element
)
{
...
...
@@ -210,7 +210,7 @@ void * mempool_alloc(mempool_t *pool, int gfp_mask)
/*
* Kick the VM at this point.
*/
wakeup_bdflush
();
wakeup_bdflush
(
0
);
spin_lock_irqsave
(
&
pool
->
lock
,
flags
);
if
(
likely
(
pool
->
curr_nr
))
{
...
...
mm/page-writeback.c
View file @
e7144e64
...
...
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/sysrq.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/mpage.h>
#include <linux/notifier.h>
#include <linux/smp.h>
...
...
@@ -172,33 +173,47 @@ static void background_writeout(unsigned long _min_pages)
.
sync_mode
=
WB_SYNC_NONE
,
.
older_than_this
=
NULL
,
.
nr_to_write
=
0
,
.
nonblocking
=
1
,
};
CHECK_EMERGENCY_SYNC
background_thresh
=
(
dirty_background_ratio
*
total_pages
)
/
100
;
do
{
for
(
;
;
)
{
struct
page_state
ps
;
get_page_state
(
&
ps
);
if
(
ps
.
nr_dirty
<
background_thresh
&&
min_pages
<=
0
)
break
;
wbc
.
encountered_congestion
=
0
;
wbc
.
nr_to_write
=
MAX_WRITEBACK_PAGES
;
writeback_inodes
(
&
wbc
);
min_pages
-=
MAX_WRITEBACK_PAGES
-
wbc
.
nr_to_write
;
}
while
(
wbc
.
nr_to_write
<=
0
);
if
(
wbc
.
nr_to_write
==
MAX_WRITEBACK_PAGES
)
{
/* Wrote nothing */
if
(
wbc
.
encountered_congestion
)
blk_congestion_wait
(
WRITE
,
HZ
/
10
);
else
break
;
}
}
blk_run_queues
();
}
/*
* Start heavy writeback of everything.
* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
* the whole world. Returns 0 if a pdflush thread was dispatched. Returns
* -1 if all pdflush threads were busy.
*/
void
wakeup_bdflush
(
void
)
int
wakeup_bdflush
(
long
nr_pages
)
{
if
(
nr_pages
==
0
)
{
struct
page_state
ps
;
get_page_state
(
&
ps
);
pdflush_operation
(
background_writeout
,
ps
.
nr_dirty
);
nr_pages
=
ps
.
nr_dirty
;
}
return
pdflush_operation
(
background_writeout
,
nr_pages
);
}
static
struct
timer_list
wb_timer
;
...
...
@@ -223,25 +238,36 @@ static void wb_kupdate(unsigned long arg)
unsigned
long
oldest_jif
;
unsigned
long
start_jif
;
unsigned
long
next_jif
;
long
nr_to_write
;
struct
page_state
ps
;
struct
writeback_control
wbc
=
{
.
bdi
=
NULL
,
.
sync_mode
=
WB_SYNC_NONE
,
.
older_than_this
=
&
oldest_jif
,
.
nr_to_write
=
0
,
.
nonblocking
=
1
,
};
sync_supers
();
get_page_state
(
&
ps
);
get_page_state
(
&
ps
);
oldest_jif
=
jiffies
-
(
dirty_expire_centisecs
*
HZ
)
/
100
;
start_jif
=
jiffies
;
next_jif
=
start_jif
+
(
dirty_writeback_centisecs
*
HZ
)
/
100
;
wbc
.
nr_to_write
=
ps
.
nr_dirty
;
nr_to_write
=
ps
.
nr_dirty
;
while
(
nr_to_write
>
0
)
{
wbc
.
encountered_congestion
=
0
;
wbc
.
nr_to_write
=
MAX_WRITEBACK_PAGES
;
writeback_inodes
(
&
wbc
);
if
(
wbc
.
nr_to_write
==
MAX_WRITEBACK_PAGES
)
{
if
(
wbc
.
encountered_congestion
)
blk_congestion_wait
(
WRITE
,
HZ
);
else
break
;
/* All the old data is written */
}
nr_to_write
-=
MAX_WRITEBACK_PAGES
-
wbc
.
nr_to_write
;
}
blk_run_queues
();
yield
();
if
(
time_before
(
next_jif
,
jiffies
+
HZ
))
next_jif
=
jiffies
+
HZ
;
mod_timer
(
&
wb_timer
,
next_jif
);
...
...
@@ -493,7 +519,6 @@ int __set_page_dirty_buffers(struct page *page)
buffer_error
();
spin_lock
(
&
mapping
->
private_lock
);
if
(
page_has_buffers
(
page
))
{
struct
buffer_head
*
head
=
page_buffers
(
page
);
struct
buffer_head
*
bh
=
head
;
...
...
@@ -506,6 +531,7 @@ int __set_page_dirty_buffers(struct page *page)
bh
=
bh
->
b_this_page
;
}
while
(
bh
!=
head
);
}
spin_unlock
(
&
mapping
->
private_lock
);
if
(
!
TestSetPageDirty
(
page
))
{
write_lock
(
&
mapping
->
page_lock
);
...
...
@@ -519,7 +545,6 @@ int __set_page_dirty_buffers(struct page *page)
__mark_inode_dirty
(
mapping
->
host
,
I_DIRTY_PAGES
);
}
spin_unlock
(
&
mapping
->
private_lock
);
out:
return
ret
;
}
...
...
mm/swapfile.c
View file @
e7144e64
...
...
@@ -918,6 +918,26 @@ static int setup_swap_extents(struct swap_info_struct *sis)
return
ret
;
}
#if 0 /* We don't need this yet */
#include <linux/backing-dev.h>
int page_queue_congested(struct page *page)
{
struct backing_dev_info *bdi;
BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */
bdi = page->mapping->backing_dev_info;
if (PageSwapCache(page)) {
swp_entry_t entry = { .val = page->index };
struct swap_info_struct *sis;
sis = get_swap_info_struct(swp_type(entry));
bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info;
}
return bdi_write_congested(bdi);
}
#endif
asmlinkage
long
sys_swapoff
(
const
char
*
specialfile
)
{
struct
swap_info_struct
*
p
=
NULL
;
...
...
mm/vmscan.c
View file @
e7144e64
...
...
@@ -21,9 +21,11 @@
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/suspend.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
/* for try_to_release_page() */
#include <linux/mm_inline.h>
#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/rmap-locking.h>
#include <asm/pgalloc.h>
...
...
@@ -32,11 +34,11 @@
/*
* The "priority" of VM scanning is how much of the queues we
* will scan in one go. A value of
6
for DEF_PRIORITY implies
* that we'll scan 1/
64th of the queues ("queue_length >> 6
")
* will scan in one go. A value of
12
for DEF_PRIORITY implies
* that we'll scan 1/
4096th of the queues ("queue_length >> 12
")
* during a normal aging round.
*/
#define DEF_PRIORITY
(6)
#define DEF_PRIORITY
12
#ifdef ARCH_HAS_PREFETCH
#define prefetch_prev_lru_page(_page, _base, _field) \
...
...
@@ -95,7 +97,7 @@ static inline int is_page_cache_freeable(struct page *page)
static
/* inline */
int
shrink_list
(
struct
list_head
*
page_list
,
int
nr_pages
,
unsigned
int
gfp_mask
,
int
priority
,
int
*
max_scan
)
unsigned
int
gfp_mask
,
int
*
max_scan
)
{
struct
address_space
*
mapping
;
LIST_HEAD
(
ret_pages
);
...
...
@@ -117,10 +119,21 @@ shrink_list(struct list_head *page_list, int nr_pages,
BUG_ON
(
PageActive
(
page
));
may_enter_fs
=
(
gfp_mask
&
__GFP_FS
)
||
(
PageSwapCache
(
page
)
&&
(
gfp_mask
&
__GFP_IO
));
/*
* If the page is mapped into pagetables then wait on it, to
* throttle this allocator to the rate at which we can clear
* MAP_SHARED data. This will also throttle against swapcache
* writes.
*/
if
(
PageWriteback
(
page
))
{
if
(
may_enter_fs
)
wait_on_page_writeback
(
page
);
/* throttling */
else
if
(
may_enter_fs
)
{
if
(
page
->
pte
.
direct
||
page
->
mapping
->
backing_dev_info
==
current
->
backing_dev_info
)
{
wait_on_page_writeback
(
page
);
}
}
goto
keep_locked
;
}
...
...
@@ -172,15 +185,43 @@ shrink_list(struct list_head *page_list, int nr_pages,
* will write it. So we're back to page-at-a-time writepage
* in LRU order.
*/
if
(
PageDirty
(
page
)
&&
is_page_cache_freeable
(
page
)
&&
mapping
&&
may_enter_fs
)
{
/*
* If the page is dirty, only perform writeback if that write
* will be non-blocking. To prevent this allocation from being
* stalled by pagecache activity. But note that there may be
* stalls if we need to run get_block(). We could test
* PagePrivate for that.
*
* If this process is currently in generic_file_write() against
* this page's queue, we can perform writeback even if that
* will block.
*
* If the page is swapcache, write it back even if that would
* block, for some throttling. This happens by accident, because
* swap_backing_dev_info is bust: it doesn't reflect the
* congestion state of the swapdevs. Easy to fix, if needed.
* See swapfile.c:page_queue_congested().
*/
if
(
PageDirty
(
page
))
{
int
(
*
writeback
)(
struct
page
*
,
struct
writeback_control
*
);
struct
backing_dev_info
*
bdi
;
const
int
cluster_size
=
SWAP_CLUSTER_MAX
;
struct
writeback_control
wbc
=
{
.
nr_to_write
=
cluster_size
,
};
if
(
!
is_page_cache_freeable
(
page
))
goto
keep_locked
;
if
(
!
mapping
)
goto
keep_locked
;
if
(
!
may_enter_fs
)
goto
keep_locked
;
bdi
=
mapping
->
backing_dev_info
;
if
(
bdi
!=
current
->
backing_dev_info
&&
bdi_write_congested
(
bdi
))
goto
keep_locked
;
writeback
=
mapping
->
a_ops
->
vm_writeback
;
if
(
writeback
==
NULL
)
writeback
=
generic_vm_writeback
;
...
...
@@ -279,7 +320,7 @@ shrink_list(struct list_head *page_list, int nr_pages,
*/
static
/* inline */
int
shrink_cache
(
int
nr_pages
,
struct
zone
*
zone
,
unsigned
int
gfp_mask
,
int
priority
,
int
max_scan
)
unsigned
int
gfp_mask
,
int
max_scan
)
{
LIST_HEAD
(
page_list
);
struct
pagevec
pvec
;
...
...
@@ -298,9 +339,11 @@ shrink_cache(int nr_pages, struct zone *zone,
spin_lock_irq
(
&
zone
->
lru_lock
);
while
(
max_scan
>
0
&&
nr_pages
>
0
)
{
struct
page
*
page
;
int
n
=
0
;
int
nr_taken
=
0
;
int
nr_scan
=
0
;
while
(
n
<
nr_to_process
&&
!
list_empty
(
&
zone
->
inactive_list
))
{
while
(
nr_scan
++
<
nr_to_process
&&
!
list_empty
(
&
zone
->
inactive_list
))
{
page
=
list_entry
(
zone
->
inactive_list
.
prev
,
struct
page
,
lru
);
...
...
@@ -318,18 +361,17 @@ shrink_cache(int nr_pages, struct zone *zone,
}
list_add
(
&
page
->
lru
,
&
page_list
);
page_cache_get
(
page
);
n
++
;
n
r_taken
++
;
}
zone
->
nr_inactive
-=
n
;
zone
->
nr_inactive
-=
n
r_taken
;
spin_unlock_irq
(
&
zone
->
lru_lock
);
if
(
list_empty
(
&
page_list
)
)
if
(
nr_taken
==
0
)
goto
done
;
max_scan
-=
n
;
KERNEL_STAT_ADD
(
pgscan
,
n
);
nr_pages
=
shrink_list
(
&
page_list
,
nr_pages
,
gfp_mask
,
priority
,
&
max_scan
);
max_scan
-=
nr_scan
;
KERNEL_STAT_ADD
(
pgscan
,
nr_scan
);
nr_pages
=
shrink_list
(
&
page_list
,
nr_pages
,
gfp_mask
,
&
max_scan
);
if
(
nr_pages
<=
0
&&
list_empty
(
&
page_list
))
goto
done
;
...
...
@@ -420,6 +462,15 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in)
}
pte_chain_unlock
(
page
);
}
/*
* FIXME: need to consider page_count(page) here if/when we
* reap orphaned pages via the LRU (Daniel's locking stuff)
*/
if
(
total_swap_pages
==
0
&&
!
page
->
mapping
&&
!
PagePrivate
(
page
))
{
list_add
(
&
page
->
lru
,
&
l_active
);
continue
;
}
list_add
(
&
page
->
lru
,
&
l_inactive
);
pgdeactivate
++
;
}
...
...
@@ -470,11 +521,10 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in)
}
static
/* inline */
int
shrink_zone
(
struct
zone
*
zone
,
int
priority
,
shrink_zone
(
struct
zone
*
zone
,
int
max_scan
,
unsigned
int
gfp_mask
,
int
nr_pages
)
{
unsigned
long
ratio
;
int
max_scan
;
/* This is bogus for ZONE_HIGHMEM? */
if
(
kmem_cache_reap
(
gfp_mask
)
>=
nr_pages
)
...
...
@@ -497,43 +547,50 @@ shrink_zone(struct zone *zone, int priority,
atomic_sub
(
SWAP_CLUSTER_MAX
,
&
zone
->
refill_counter
);
refill_inactive_zone
(
zone
,
SWAP_CLUSTER_MAX
);
}
max_scan
=
zone
->
nr_inactive
/
priority
;
nr_pages
=
shrink_cache
(
nr_pages
,
zone
,
gfp_mask
,
priority
,
max_scan
);
if
(
nr_pages
<=
0
)
return
0
;
wakeup_bdflush
();
shrink_dcache_memory
(
priority
,
gfp_mask
);
/* After shrinking the dcache, get rid of unused inodes too .. */
shrink_icache_memory
(
1
,
gfp_mask
);
#ifdef CONFIG_QUOTA
shrink_dqcache_memory
(
DEF_PRIORITY
,
gfp_mask
);
#endif
nr_pages
=
shrink_cache
(
nr_pages
,
zone
,
gfp_mask
,
max_scan
);
return
nr_pages
;
}
static
int
shrink_caches
(
struct
zone
*
classzone
,
int
priority
,
int
gfp_mask
,
int
nr_pages
)
int
*
total_scanned
,
int
gfp_mask
,
int
nr_pages
)
{
struct
zone
*
first_classzone
;
struct
zone
*
zone
;
first_classzone
=
classzone
->
zone_pgdat
->
node_zones
;
zone
=
classzone
;
while
(
zone
>=
first_classzone
&&
nr_pages
>
0
)
{
if
(
zone
->
free_pages
<=
zone
->
pages_high
)
{
nr_pages
=
shrink_zone
(
zone
,
priority
,
gfp_mask
,
nr_pages
);
}
zone
--
;
for
(
zone
=
classzone
;
zone
>=
first_classzone
;
zone
--
)
{
int
max_scan
;
int
to_reclaim
;
int
unreclaimed
;
to_reclaim
=
zone
->
pages_high
-
zone
->
free_pages
;
if
(
to_reclaim
<
0
)
continue
;
/* zone has enough memory */
if
(
to_reclaim
>
SWAP_CLUSTER_MAX
)
to_reclaim
=
SWAP_CLUSTER_MAX
;
if
(
to_reclaim
<
nr_pages
)
to_reclaim
=
nr_pages
;
/*
* If we cannot reclaim `nr_pages' pages by scanning twice
* that many pages then fall back to the next zone.
*/
max_scan
=
zone
->
nr_inactive
>>
priority
;
if
(
max_scan
<
to_reclaim
*
2
)
max_scan
=
to_reclaim
*
2
;
unreclaimed
=
shrink_zone
(
zone
,
max_scan
,
gfp_mask
,
to_reclaim
);
nr_pages
-=
to_reclaim
-
unreclaimed
;
*
total_scanned
+=
max_scan
;
}
shrink_dcache_memory
(
priority
,
gfp_mask
);
shrink_icache_memory
(
1
,
gfp_mask
);
#ifdef CONFIG_QUOTA
shrink_dqcache_memory
(
DEF_PRIORITY
,
gfp_mask
);
#endif
return
nr_pages
;
}
...
...
@@ -564,12 +621,25 @@ try_to_free_pages(struct zone *classzone,
KERNEL_STAT_INC
(
pageoutrun
);
for
(
priority
=
DEF_PRIORITY
;
priority
;
priority
--
)
{
nr_pages
=
shrink_caches
(
classzone
,
priority
,
int
total_scanned
=
0
;
nr_pages
=
shrink_caches
(
classzone
,
priority
,
&
total_scanned
,
gfp_mask
,
nr_pages
);
if
(
nr_pages
<=
0
)
return
1
;
if
(
total_scanned
==
0
)
return
1
;
/* All zones had enough free memory */
if
(
!
(
gfp_mask
&
__GFP_FS
))
break
;
break
;
/* Let the caller handle it */
/*
* Try to write back as many pages as we just scanned. Not
* sure if that makes sense, but it's an attempt to avoid
* creating IO storms unnecessarily
*/
wakeup_bdflush
(
total_scanned
);
/* Take a nap, wait for some writeback to complete */
blk_congestion_wait
(
WRITE
,
HZ
/
4
);
}
if
(
gfp_mask
&
__GFP_FS
)
out_of_memory
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment