Commit 2ce067b0 authored by Linus Torvalds's avatar Linus Torvalds

Merge bk://ldm.bkbits.net/linux-2.5

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 56d8b39d 4ab1a3e6
......@@ -121,7 +121,7 @@ static int vidport;
static int lines, cols;
#ifdef CONFIG_MULTIQUAD
static void * const xquad_portio = NULL;
static void * xquad_portio = NULL;
#endif
#include "../../../../lib/inflate.c"
......
......@@ -1060,11 +1060,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (clustered_apic_mode && (numnodes > 1)) {
printk("Remapping cross-quad port I/O for %d quads\n",
numnodes);
xquad_portio = ioremap (XQUAD_PORTIO_BASE,
numnodes * XQUAD_PORTIO_QUAD);
printk("xquad_portio vaddr 0x%08lx, len %08lx\n",
(u_long) xquad_portio,
(u_long) numnodes * XQUAD_PORTIO_LEN);
xquad_portio = ioremap (XQUAD_PORTIO_BASE,
numnodes * XQUAD_PORTIO_LEN);
(u_long) numnodes * XQUAD_PORTIO_QUAD);
}
/*
......
......@@ -272,10 +272,9 @@ get_addr(unsigned long addr, unsigned long len)
return -ENOMEM;
if (!vma || ((addr + len) < vma->vm_start))
goto found_addr;
addr = vma->vm_end;
addr = HPAGE_ALIGN(vma->vm_end);
}
found_addr:
addr = HPAGE_ALIGN(addr);
return addr;
}
......
......@@ -9,9 +9,9 @@
#
export-objs := elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \
block_ioctl.o
block_ioctl.o deadline-iosched.o
obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o
obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o deadline-iosched.o
obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
......
This diff is collapsed.
......@@ -157,114 +157,6 @@ inline int elv_try_last_merge(request_queue_t *q, struct request **req,
return ret;
}
static int bio_rq_before(struct bio *bio, struct request *rq)
{
if (!kdev_same(to_kdev_t(bio->bi_bdev->bd_dev), rq->rq_dev))
return 0;
return bio->bi_sector < rq->sector;
}
/*
* elevator_linux starts here
*/
int elevator_linus_merge(request_queue_t *q, struct request **req,
struct bio *bio)
{
struct list_head *entry, *good;
struct request *__rq;
int ret;
if ((ret = elv_try_last_merge(q, req, bio)))
return ret;
entry = &q->queue_head;
good = &q->queue_head;
ret = ELEVATOR_NO_MERGE;
while ((entry = entry->prev) != &q->queue_head) {
__rq = list_entry_rq(entry);
if (__rq->flags & (REQ_BARRIER | REQ_STARTED))
break;
if (!(__rq->flags & REQ_CMD))
break;
if (bio_data_dir(bio) != rq_data_dir(__rq)) {
if (bio_data_dir(bio) == WRITE)
break;
good = entry->prev;
continue;
}
ret = elv_try_merge(__rq, bio);
if (ret) {
*req = __rq;
q->last_merge = &__rq->queuelist;
return ret;
}
if (bio_rq_before(bio, __rq))
good = entry->prev;
}
if (good != &q->queue_head)
*req = list_entry_rq(good);
return ELEVATOR_NO_MERGE;
}
void elevator_linus_merge_req(request_queue_t *q, struct request *req,
struct request *next)
{
if (elv_linus_sequence(next) < elv_linus_sequence(req))
elv_linus_sequence(req) = elv_linus_sequence(next);
}
void elevator_linus_add_request(request_queue_t *q, struct request *rq,
struct list_head *insert_here)
{
elevator_t *e = &q->elevator;
int lat = 0, *latency = e->elevator_data;
if (!insert_here)
insert_here = q->queue_head.prev;
if (!(rq->flags & REQ_BARRIER))
lat = latency[rq_data_dir(rq)];
elv_linus_sequence(rq) = lat;
list_add(&rq->queuelist, insert_here);
/*
* new merges must not precede this barrier
*/
if (rq->flags & REQ_BARRIER)
q->last_merge = NULL;
else if (!q->last_merge)
q->last_merge = &rq->queuelist;
}
int elevator_linus_init(request_queue_t *q, elevator_t *e)
{
int *latency;
latency = kmalloc(2 * sizeof(int), GFP_KERNEL);
if (!latency)
return -ENOMEM;
latency[READ] = 1024;
latency[WRITE] = 2048;
e->elevator_data = latency;
return 0;
}
void elevator_linus_exit(request_queue_t *q, elevator_t *e)
{
kfree(e->elevator_data);
}
/*
* elevator noop
*
......@@ -442,15 +334,6 @@ inline struct list_head *elv_get_sort_head(request_queue_t *q,
return &q->queue_head;
}
elevator_t elevator_linus = {
elevator_merge_fn: elevator_linus_merge,
elevator_merge_req_fn: elevator_linus_merge_req,
elevator_next_req_fn: elevator_noop_next_request,
elevator_add_req_fn: elevator_linus_add_request,
elevator_init_fn: elevator_linus_init,
elevator_exit_fn: elevator_linus_exit,
};
elevator_t elevator_noop = {
elevator_merge_fn: elevator_noop_merge,
elevator_next_req_fn: elevator_noop_next_request,
......@@ -459,7 +342,6 @@ elevator_t elevator_noop = {
module_init(elevator_global_init);
EXPORT_SYMBOL(elevator_linus);
EXPORT_SYMBOL(elevator_noop);
EXPORT_SYMBOL(__elv_add_request);
......
......@@ -1175,7 +1175,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
if (blk_init_free_list(q))
return -ENOMEM;
if ((ret = elevator_init(q, &q->elevator, elevator_linus))) {
if ((ret = elevator_init(q, &q->elevator, iosched_deadline))) {
blk_cleanup_queue(q);
return ret;
}
......@@ -1233,24 +1233,23 @@ static struct request *get_request(request_queue_t *q, int rw)
*/
static struct request *get_request_wait(request_queue_t *q, int rw)
{
DECLARE_WAITQUEUE(wait, current);
DEFINE_WAIT(wait);
struct request_list *rl = &q->rq[rw];
struct request *rq;
spin_lock_prefetch(q->queue_lock);
generic_unplug_device(q);
add_wait_queue_exclusive(&rl->wait, &wait);
do {
set_current_state(TASK_UNINTERRUPTIBLE);
prepare_to_wait_exclusive(&rl->wait, &wait,
TASK_UNINTERRUPTIBLE);
if (!rl->count)
schedule();
finish_wait(&rl->wait, &wait);
spin_lock_irq(q->queue_lock);
rq = get_request(q, rw);
spin_unlock_irq(q->queue_lock);
} while (rq == NULL);
remove_wait_queue(&rl->wait, &wait);
current->state = TASK_RUNNING;
return rq;
}
......@@ -1460,18 +1459,16 @@ void blk_put_request(struct request *req)
*/
void blk_congestion_wait(int rw, long timeout)
{
DECLARE_WAITQUEUE(wait, current);
DEFINE_WAIT(wait);
struct congestion_state *cs = &congestion_states[rw];
if (atomic_read(&cs->nr_congested_queues) == 0)
return;
blk_run_queues();
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(&cs->wqh, &wait);
prepare_to_wait(&cs->wqh, &wait, TASK_UNINTERRUPTIBLE);
if (atomic_read(&cs->nr_congested_queues) != 0)
schedule_timeout(timeout);
set_current_state(TASK_RUNNING);
remove_wait_queue(&cs->wqh, &wait);
finish_wait(&cs->wqh, &wait);
}
/*
......
......@@ -157,18 +157,12 @@ struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
#define MAX_DISK_SIZE 1024*1024*1024
static unsigned long
compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry)
{
loff_t size = lo_dentry->d_inode->i_mapping->host->i_size;
return (size - lo->lo_offset) >> BLOCK_SIZE_BITS;
}
static void figure_loop_size(struct loop_device *lo)
{
set_capacity(disks + lo->lo_number, compute_loop_size(lo,
lo->lo_backing_file->f_dentry));
loff_t size = lo->lo_backing_file->f_dentry->d_inode->i_size;
set_capacity(disks + lo->lo_number,
(size - lo->lo_offset) >> 9);
}
static inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
......
......@@ -338,6 +338,9 @@ static void cy82c693_tune_drive (ide_drive_t *drive, u8 pio)
*/
unsigned int __init init_chipset_cy82c693(struct pci_dev *dev, const char *name)
{
if (PCI_FUNC(dev->devfn) != 1)
return 0;
#ifdef CY82C693_SETDMA_CLOCK
u8 data = 0;
#endif /* CY82C693_SETDMA_CLOCK */
......@@ -411,20 +414,30 @@ void __init init_hwif_cy82c693(ide_hwif_t *hwif)
#endif /* CONFIG_BLK_DEV_IDEDMA */
}
void __init init_dma_cy82c693 (ide_hwif_t *hwif, unsigned long dmabase)
static __initdata ide_hwif_t *primary;
void __init init_iops_cy82c693(ide_hwif_t *hwif)
{
ide_setup_dma(hwif, dmabase, 8);
if (PCI_FUNC(hwif->pci_dev->devfn) == 1)
primary = hwif;
else {
hwif->mate = primary;
hwif->channel = 1;
}
}
extern void ide_setup_pci_device(struct pci_dev *, ide_pci_device_t *);
static int __devinit cy82c693_init_one(struct pci_dev *dev, const struct pci_device_id *id)
{
ide_pci_device_t *d = &cy82c693_chipsets[id->driver_data];
if ((!(PCI_FUNC(dev->devfn) & 1) ||
(!((dev->class >> 8) == PCI_CLASS_STORAGE_IDE))))
return 0; /* CY82C693 is more than only a IDE controller */
ide_setup_pci_device(dev, d);
struct pci_dev *dev2;
/* CY82C693 is more than only a IDE controller.
Function 1 is primary IDE channel, function 2 - secondary. */
if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE &&
PCI_FUNC(dev->devfn) == 1) {
dev2 = pci_find_slot(dev->bus->number, dev->devfn + 1);
ide_setup_pci_devices(dev, dev2, d);
}
return 0;
}
......
......@@ -66,7 +66,7 @@ typedef struct pio_clocks_s {
extern unsigned int init_chipset_cy82c693(struct pci_dev *, const char *);
extern void init_hwif_cy82c693(ide_hwif_t *);
extern void init_dma_cy82c693(ide_hwif_t *, unsigned long);
extern void init_iops_cy82c693(ide_hwif_t *);
static ide_pci_device_t cy82c693_chipsets[] __initdata = {
{ /* 0 */
......@@ -74,10 +74,10 @@ static ide_pci_device_t cy82c693_chipsets[] __initdata = {
device: PCI_DEVICE_ID_CONTAQ_82C693,
name: "CY82C693",
init_chipset: init_chipset_cy82c693,
init_iops: NULL,
init_iops: init_iops_cy82c693,
init_hwif: init_hwif_cy82c693,
init_dma: init_dma_cy82c693,
channels: 2,
init_dma: NULL,
channels: 1,
autodma: AUTODMA,
enablebits: {{0x00,0x00,0x00}, {0x00,0x00,0x00}},
bootable: ON_BOARD,
......
......@@ -250,6 +250,7 @@ static unsigned long __init ide_get_or_set_dma_base (ide_hwif_t *hwif)
switch(dev->device) {
case PCI_DEVICE_ID_AL_M5219:
case PCI_DEVICE_ID_AL_M5229:
case PCI_DEVICE_ID_AMD_VIPER_7409:
case PCI_DEVICE_ID_CMD_643:
case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE:
......
......@@ -68,6 +68,7 @@ static int proc_read_escdinfo(char *buf, char **start, off_t pos,
);
}
#define MAX_SANE_ESCD_SIZE (32*1024)
static int proc_read_escd(char *buf, char **start, off_t pos,
int count, int *eof, void *data)
{
......@@ -79,8 +80,8 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
return -EIO;
/* sanity check */
if (escd.escd_size > (32*1024)) {
printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size is too great\n");
if (escd.escd_size > MAX_SANE_ESCD_SIZE) {
printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS escd_info call is too great\n");
return -EFBIG;
}
......@@ -90,7 +91,14 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
if (pnp_bios_read_escd(tmpbuf, escd.nv_storage_base))
return -EIO;
escd_size = (unsigned char)(buf[0]) + (unsigned char)(buf[1])*256;
escd_size = (unsigned char)(tmpbuf[0]) + (unsigned char)(tmpbuf[1])*256;
/* sanity check */
if (escd_size > MAX_SANE_ESCD_SIZE) {
printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS read_escd call is too great\n");
return -EFBIG;
}
escd_left_to_read = escd_size - pos;
if (escd_left_to_read < 0) escd_left_to_read = 0;
if (escd_left_to_read == 0) *eof = 1;
......
......@@ -148,6 +148,11 @@
Fix bug in raw command post with data ioctl method.
Fix bug where rollcall sometimes failed with cable errors.
Print unit # on all command timeouts.
1.02.00.026 - Fix possible infinite retry bug with power glitch induced
drive timeouts.
Cleanup some AEN severity levels.
1.02.00.027 - Add drive not supported AEN code for SATA controllers.
Remove spurious unknown ioctl error message.
*/
#include <linux/module.h>
......@@ -201,7 +206,7 @@ static struct notifier_block tw_notifier = {
};
/* Globals */
char *tw_driver_version="1.02.00.025";
char *tw_driver_version="1.02.00.027";
TW_Device_Extension *tw_device_extension_list[TW_MAX_SLOT];
int tw_device_extension_count = 0;
......@@ -212,7 +217,7 @@ int tw_aen_complete(TW_Device_Extension *tw_dev, int request_id)
{
TW_Param *param;
unsigned short aen;
int error = 0;
int error = 0, table_max = 0;
dprintk(KERN_WARNING "3w-xxxx: tw_aen_complete()\n");
if (tw_dev->alignment_virtual_address[request_id] == NULL) {
......@@ -227,7 +232,8 @@ int tw_aen_complete(TW_Device_Extension *tw_dev, int request_id)
if (aen == 0x0ff) {
printk(KERN_WARNING "3w-xxxx: scsi%d: AEN: INFO: AEN queue overflow.\n", tw_dev->host->host_no);
} else {
if ((aen & 0x0ff) < TW_AEN_STRING_MAX) {
table_max = sizeof(tw_aen_string)/sizeof(char *);
if ((aen & 0x0ff) < table_max) {
if ((tw_aen_string[aen & 0xff][strlen(tw_aen_string[aen & 0xff])-1]) == '#') {
printk(KERN_WARNING "3w-xxxx: scsi%d: AEN: %s%d.\n", tw_dev->host->host_no, tw_aen_string[aen & 0xff], aen >> 8);
} else {
......@@ -289,7 +295,7 @@ int tw_aen_drain_queue(TW_Device_Extension *tw_dev)
int first_reset = 0;
int queue = 0;
int imax, i;
int found = 0;
int found = 0, table_max = 0;
dprintk(KERN_NOTICE "3w-xxxx: tw_aen_drain_queue()\n");
......@@ -409,7 +415,8 @@ int tw_aen_drain_queue(TW_Device_Extension *tw_dev)
if (aen == 0x0ff) {
printk(KERN_WARNING "3w-xxxx: AEN: INFO: AEN queue overflow.\n");
} else {
if ((aen & 0x0ff) < TW_AEN_STRING_MAX) {
table_max = sizeof(tw_aen_string)/sizeof(char *);
if ((aen & 0x0ff) < table_max) {
if ((tw_aen_string[aen & 0xff][strlen(tw_aen_string[aen & 0xff])-1]) == '#') {
printk(KERN_WARNING "3w-xxxx: AEN: %s%d.\n", tw_aen_string[aen & 0xff], aen >> 8);
} else {
......@@ -1442,7 +1449,8 @@ static void tw_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
/* If error, command failed */
if (error == 1) {
tw_dev->srb[request_id]->result = (DID_RESET << 16);
/* Ask for a host reset */
tw_dev->srb[request_id]->result = (DID_OK << 16) | (CHECK_CONDITION << 1);
}
/* Now complete the io */
......@@ -1784,7 +1792,7 @@ int tw_ioctl(TW_Device_Extension *tw_dev, int request_id)
return 1;
}
default:
printk(KERN_WARNING "3w-xxxx: Unknown ioctl 0x%x.\n", opcode);
dprintk(KERN_WARNING "3w-xxxx: Unknown ioctl 0x%x.\n", opcode);
tw_dev->state[request_id] = TW_S_COMPLETED;
tw_state_request_finish(tw_dev, request_id);
tw_dev->srb[request_id]->result = (DID_OK << 16);
......
......@@ -90,14 +90,13 @@ static char *tw_aen_string[] = {
"INFO: Verify started: Unit #", // 0x029
"ERROR: Verify failed: Port #", // 0x02A
"INFO: Verify complete: Unit #", // 0x02B
"ERROR: Overwrote bad sector during rebuild: Port #", //0x02C
"WARNING: Overwrote bad sector during rebuild: Port #", //0x02C
"ERROR: Encountered bad sector during rebuild: Port #", //0x02D
"INFO: Replacement drive is too small: Port #", //0x02E
"WARNING: Verify error: Unit not previously initialized: Unit #" //0x02F
"ERROR: Replacement drive is too small: Port #", //0x02E
"WARNING: Verify error: Unit not previously initialized: Unit #", //0x02F
"ERROR: Drive not supported: Port #" // 0x030
};
#define TW_AEN_STRING_MAX 0x030
/*
Sense key lookup table
Format: ESDC/flags,SenseKey,AdditionalSenseCode,AdditionalSenseCodeQualifier
......
......@@ -128,22 +128,18 @@ void unlock_buffer(struct buffer_head *bh)
*/
void __wait_on_buffer(struct buffer_head * bh)
{
wait_queue_head_t *wq = bh_waitq_head(bh);
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
wait_queue_head_t *wqh = bh_waitq_head(bh);
DEFINE_WAIT(wait);
get_bh(bh);
add_wait_queue(wq, &wait);
do {
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
blk_run_queues();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (!buffer_locked(bh))
break;
if (buffer_locked(bh))
schedule();
} while (buffer_locked(bh));
tsk->state = TASK_RUNNING;
remove_wait_queue(wq, &wait);
put_bh(bh);
finish_wait(wqh, &wait);
}
static inline void
......@@ -246,10 +242,12 @@ int fsync_bdev(struct block_device *bdev)
}
/*
* sync everything.
* sync everything. Start out by waking pdflush, because that writes back
* all queues in parallel.
*/
asmlinkage long sys_sync(void)
{
wakeup_bdflush(0);
sync_inodes(0); /* All mappings and inodes, including block devices */
DQUOT_SYNC(NULL);
sync_supers(); /* Write the superblocks */
......
......@@ -329,12 +329,11 @@ static inline void prune_one_dentry(struct dentry * dentry)
void prune_dcache(int count)
{
spin_lock(&dcache_lock);
for (;;) {
for (; count ; count--) {
struct dentry *dentry;
struct list_head *tmp;
tmp = dentry_unused.prev;
if (tmp == &dentry_unused)
break;
list_del_init(tmp);
......@@ -349,12 +348,8 @@ void prune_dcache(int count)
dentry_stat.nr_unused--;
/* Unused dentry with a count? */
if (atomic_read(&dentry->d_count))
BUG();
BUG_ON(atomic_read(&dentry->d_count));
prune_one_dentry(dentry);
if (!--count)
break;
}
spin_unlock(&dcache_lock);
}
......@@ -573,19 +568,11 @@ void shrink_dcache_anon(struct list_head *head)
/*
* This is called from kswapd when we think we need some
* more memory, but aren't really sure how much. So we
* carefully try to free a _bit_ of our dcache, but not
* too much.
*
* Priority:
* 1 - very urgent: shrink everything
* ...
* 6 - base-level: try to shrink a bit.
* more memory.
*/
int shrink_dcache_memory(int priority, unsigned int gfp_mask)
int shrink_dcache_memory(int ratio, unsigned int gfp_mask)
{
int count = 0;
int entries = dentry_stat.nr_dentry / ratio + 1;
/*
* Nasty deadlock avoidance.
*
......@@ -600,11 +587,8 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask)
if (!(gfp_mask & __GFP_FS))
return 0;
count = dentry_stat.nr_unused / priority;
prune_dcache(count);
kmem_cache_shrink(dentry_cache);
return 0;
prune_dcache(entries);
return entries;
}
#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
......
......@@ -480,26 +480,17 @@ static void prune_dqcache(int count)
/*
* This is called from kswapd when we think we need some
* more memory, but aren't really sure how much. So we
* carefully try to free a _bit_ of our dqcache, but not
* too much.
*
* Priority:
* 1 - very urgent: shrink everything
* ...
* 6 - base-level: try to shrink a bit.
* more memory
*/
int shrink_dqcache_memory(int priority, unsigned int gfp_mask)
int shrink_dqcache_memory(int ratio, unsigned int gfp_mask)
{
int count = 0;
int entries = dqstats.allocated_dquots / ratio + 1;
lock_kernel();
count = dqstats.free_dquots / priority;
prune_dqcache(count);
prune_dqcache(entries);
unlock_kernel();
kmem_cache_shrink(dquot_cachep);
return 0;
return entries;
}
/*
......
......@@ -386,10 +386,11 @@ void prune_icache(int goal)
count = 0;
entry = inode_unused.prev;
while (entry != &inode_unused)
{
for(; goal; goal--) {
struct list_head *tmp = entry;
if (entry == &inode_unused)
break;
entry = entry->prev;
inode = INODE(tmp);
if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
......@@ -403,8 +404,6 @@ void prune_icache(int goal)
list_add(tmp, freeable);
inode->i_state |= I_FREEING;
count++;
if (!--goal)
break;
}
inodes_stat.nr_unused -= count;
spin_unlock(&inode_lock);
......@@ -414,19 +413,11 @@ void prune_icache(int goal)
/*
* This is called from kswapd when we think we need some
* more memory, but aren't really sure how much. So we
* carefully try to free a _bit_ of our icache, but not
* too much.
*
* Priority:
* 1 - very urgent: shrink everything
* ...
* 6 - base-level: try to shrink a bit.
* more memory.
*/
int shrink_icache_memory(int priority, int gfp_mask)
int shrink_icache_memory(int ratio, unsigned int gfp_mask)
{
int count = 0;
int entries = inodes_stat.nr_inodes / ratio + 1;
/*
* Nasty deadlock avoidance..
*
......@@ -437,12 +428,10 @@ int shrink_icache_memory(int priority, int gfp_mask)
if (!(gfp_mask & __GFP_FS))
return 0;
count = inodes_stat.nr_unused / priority;
prune_icache(count);
kmem_cache_shrink(inode_cachep);
return 0;
prune_icache(entries);
return entries;
}
EXPORT_SYMBOL(shrink_icache_memory);
/*
* Called with the inode lock held.
......
......@@ -252,7 +252,7 @@ static int flock_make_lock(struct file *filp,
return -ENOMEM;
fl->fl_file = filp;
fl->fl_pid = current->pid;
fl->fl_pid = current->tgid;
fl->fl_flags = (cmd & LOCK_NB) ? FL_FLOCK : FL_FLOCK | FL_SLEEP;
fl->fl_type = type;
fl->fl_end = OFFSET_MAX;
......@@ -308,7 +308,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
fl->fl_end = OFFSET_MAX;
fl->fl_owner = current->files;
fl->fl_pid = current->pid;
fl->fl_pid = current->tgid;
fl->fl_file = filp;
fl->fl_flags = FL_POSIX;
fl->fl_notify = NULL;
......@@ -348,7 +348,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
fl->fl_end = OFFSET_MAX;
fl->fl_owner = current->files;
fl->fl_pid = current->pid;
fl->fl_pid = current->tgid;
fl->fl_file = filp;
fl->fl_flags = FL_POSIX;
fl->fl_notify = NULL;
......@@ -377,7 +377,7 @@ static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
return -ENOMEM;
fl->fl_owner = current->files;
fl->fl_pid = current->pid;
fl->fl_pid = current->tgid;
fl->fl_file = filp;
fl->fl_flags = FL_LEASE;
......@@ -669,7 +669,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
int error;
fl.fl_owner = current->files;
fl.fl_pid = current->pid;
fl.fl_pid = current->tgid;
fl.fl_file = filp;
fl.fl_flags = FL_POSIX | FL_ACCESS | FL_SLEEP;
fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
......@@ -1241,7 +1241,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
*before = fl;
list_add(&fl->fl_link, &file_lock_list);
error = f_setown(filp, current->pid, 1);
error = f_setown(filp, current->tgid, 1);
out_unlock:
unlock_kernel();
return error;
......@@ -1632,7 +1632,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
lock.fl_owner = owner;
lock.fl_pid = current->pid;
lock.fl_pid = current->tgid;
lock.fl_file = filp;
if (filp->f_op && filp->f_op->lock != NULL) {
......
......@@ -40,7 +40,6 @@
#define XQUAD_PORTIO_BASE 0xfe400000
#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
#define XQUAD_PORTIO_LEN 0x80000 /* Only remapping first 2 quads */
#ifdef __KERNEL__
......
......@@ -116,7 +116,7 @@ static inline void down(struct semaphore * sem)
#if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic);
#endif
might_sleep();
__asm__ __volatile__(
"# atomic down operation\n\t"
LOCK "decl %0\n\t" /* --sem->count */
......@@ -142,7 +142,7 @@ static inline int down_interruptible(struct semaphore * sem)
#if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic);
#endif
might_sleep();
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
LOCK "decl %1\n\t" /* --sem->count */
......
......@@ -186,7 +186,7 @@ extern int shrink_dcache_memory(int, unsigned int);
extern void prune_dcache(int);
/* icache memory management (defined in linux/fs/inode.c) */
extern int shrink_icache_memory(int, int);
extern int shrink_icache_memory(int, unsigned int);
extern void prune_icache(int);
/* quota cache memory management (defined in linux/fs/dquot.c) */
......
......@@ -52,12 +52,10 @@ extern inline struct list_head *elv_get_sort_head(request_queue_t *, struct requ
extern elevator_t elevator_noop;
/*
* elevator linus. based on linus ideas of starvation control, using
* sequencing to manage inserts and merges.
* deadline i/o scheduler. uses request time outs to prevent indefinite
* starvation
*/
extern elevator_t elevator_linus;
#define elv_linus_sequence(rq) ((long)(rq)->elevator_private)
#define ELV_LINUS_SEEK_COST 16
extern elevator_t iosched_deadline;
/*
* use the /proc/iosched interface, all the below is history ->
......
......@@ -40,6 +40,13 @@
struct completion;
#ifdef CONFIG_DEBUG_KERNEL
void __might_sleep(char *file, int line);
#define might_sleep() __might_sleep(__FILE__, __LINE__)
#else
#define might_sleep() do {} while(0)
#endif
extern struct notifier_block *panic_notifier_list;
NORET_TYPE void panic(const char * fmt, ...)
__attribute__ ((NORET_AND format (printf, 1, 2)));
......
......@@ -524,6 +524,7 @@ extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned lon
extern struct page * vmalloc_to_page(void *addr);
extern unsigned long get_page_cache_size(void);
extern unsigned int nr_used_zone_pages(void);
#endif /* __KERNEL__ */
......
......@@ -74,9 +74,15 @@ static inline void ___add_to_page_cache(struct page *page,
inc_page_state(nr_pagecache);
}
extern void FASTCALL(lock_page(struct page *page));
extern void FASTCALL(__lock_page(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
static inline void lock_page(struct page *page)
{
if (TestSetPageLocked(page))
__lock_page(page);
}
/*
* This is exported only for wait_on_page_locked/wait_on_page_writeback.
* Never use this directly!
......
......@@ -40,6 +40,7 @@ extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str));
*/
static inline void down_read(struct rw_semaphore *sem)
{
might_sleep();
rwsemtrace(sem,"Entering down_read");
__down_read(sem);
rwsemtrace(sem,"Leaving down_read");
......@@ -62,6 +63,7 @@ static inline int down_read_trylock(struct rw_semaphore *sem)
*/
static inline void down_write(struct rw_semaphore *sem)
{
might_sleep();
rwsemtrace(sem,"Entering down_write");
__down_write(sem);
rwsemtrace(sem,"Leaving down_write");
......
......@@ -100,8 +100,9 @@ extern unsigned long nr_uninterruptible(void);
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
#define TASK_ZOMBIE 4
#define TASK_STOPPED 8
#define TASK_STOPPED 4
#define TASK_ZOMBIE 8
#define TASK_DEAD 16
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
......
......@@ -119,6 +119,32 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
_raced; \
})
/*
* Waitqueue's which are removed from the waitqueue_head at wakeup time
*/
void FASTCALL(prepare_to_wait(wait_queue_head_t *q,
wait_queue_t *wait, int state));
void FASTCALL(prepare_to_wait_exclusive(wait_queue_head_t *q,
wait_queue_t *wait, int state));
void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait));
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync);
#define DEFINE_WAIT(name) \
wait_queue_t name = { \
.task = current, \
.func = autoremove_wake_function, \
.task_list = { .next = &name.task_list, \
.prev = &name.task_list, \
}, \
}
#define init_wait(wait) \
do { \
wait->task = current; \
wait->func = autoremove_wake_function; \
INIT_LIST_HEAD(&wait->task_list); \
} while (0)
#endif /* __KERNEL__ */
#endif
......@@ -32,6 +32,7 @@ int getrusage(struct task_struct *, int, struct rusage *);
static struct dentry * __unhash_process(struct task_struct *p)
{
struct dentry *proc_dentry;
nr_threads--;
detach_pid(p, PIDTYPE_PID);
detach_pid(p, PIDTYPE_TGID);
......@@ -57,31 +58,31 @@ static struct dentry * __unhash_process(struct task_struct *p)
void release_task(struct task_struct * p)
{
struct dentry *proc_dentry;
task_t *leader;
if (p->state != TASK_ZOMBIE)
if (p->state < TASK_ZOMBIE)
BUG();
if (p != current)
wait_task_inactive(p);
atomic_dec(&p->user->processes);
security_ops->task_free_security(p);
free_uid(p->user);
if (unlikely(p->ptrace)) {
write_lock_irq(&tasklist_lock);
if (unlikely(p->ptrace))
__ptrace_unlink(p);
write_unlock_irq(&tasklist_lock);
}
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
write_lock_irq(&tasklist_lock);
__exit_sighand(p);
proc_dentry = __unhash_process(p);
/*
* If we are the last non-leader member of the thread
* group, and the leader is zombie, then notify the
* group leader's parent process.
* group leader's parent process. (if it wants notification.)
*/
if (p->group_leader != p && thread_group_empty(p))
do_notify_parent(p->group_leader, p->group_leader->exit_signal);
leader = p->group_leader;
if (leader != p && thread_group_empty(leader) &&
leader->state == TASK_ZOMBIE && leader->exit_signal != -1)
do_notify_parent(leader, leader->exit_signal);
p->parent->cutime += p->utime + p->cutime;
p->parent->cstime += p->stime + p->cstime;
......@@ -159,7 +160,7 @@ static int __will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
if (p == ignored_task
|| p->state == TASK_ZOMBIE
|| p->state >= TASK_ZOMBIE
|| p->real_parent->pid == 1)
continue;
if (p->real_parent->pgrp != pgrp
......@@ -435,8 +436,11 @@ void exit_mm(struct task_struct *tsk)
static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
{
/* Make sure we're not reparenting to ourselves. */
if (p == reaper)
/*
* Make sure we're not reparenting to ourselves and that
* the parent is not a zombie.
*/
if (p == reaper || reaper->state >= TASK_ZOMBIE)
p->real_parent = child_reaper;
else
p->real_parent = reaper;
......@@ -774,9 +778,10 @@ static int eligible_child(pid_t pid, int options, task_t *p)
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{
int flag, retval;
DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
unsigned long state;
int flag, retval;
if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL;
......@@ -827,7 +832,15 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc
*/
if (ret == 2)
continue;
/*
* Try to move the task's state to DEAD
* only one thread is allowed to do this:
*/
state = xchg(&p->state, TASK_DEAD);
if (state != TASK_ZOMBIE)
continue;
read_unlock(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr) {
if (p->sig->group_exit)
......@@ -835,13 +848,16 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc
else
retval = put_user(p->exit_code, stat_addr);
}
if (retval)
if (retval) {
p->state = TASK_ZOMBIE;
goto end_wait4;
}
retval = p->pid;
if (p->real_parent != p->parent) {
write_lock_irq(&tasklist_lock);
__ptrace_unlink(p);
do_notify_parent(p, SIGCHLD);
p->state = TASK_ZOMBIE;
write_unlock_irq(&tasklist_lock);
} else
release_task(p);
......
......@@ -103,6 +103,52 @@ void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
spin_unlock_irqrestore(&q->lock, flags);
}
void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
__set_current_state(state);
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
void
prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
__set_current_state(state);
wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue_tail(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
__set_current_state(TASK_RUNNING);
if (!list_empty(&wait->task_list)) {
spin_lock_irqsave(&q->lock, flags);
list_del_init(&wait->task_list);
spin_unlock_irqrestore(&q->lock, flags);
}
}
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync)
{
int ret = default_wake_function(wait, mode, sync);
if (ret)
list_del_init(&wait->task_list);
return ret;
}
void __init fork_init(unsigned long mempages)
{
/* create a slab on which task_structs can be allocated */
......
......@@ -400,6 +400,10 @@ EXPORT_SYMBOL(irq_stat);
EXPORT_SYMBOL(add_wait_queue);
EXPORT_SYMBOL(add_wait_queue_exclusive);
EXPORT_SYMBOL(remove_wait_queue);
EXPORT_SYMBOL(prepare_to_wait);
EXPORT_SYMBOL(prepare_to_wait_exclusive);
EXPORT_SYMBOL(finish_wait);
EXPORT_SYMBOL(autoremove_wake_function);
/* completion handling */
EXPORT_SYMBOL(wait_for_completion);
......@@ -493,7 +497,9 @@ EXPORT_SYMBOL(jiffies_64);
EXPORT_SYMBOL(xtime);
EXPORT_SYMBOL(do_gettimeofday);
EXPORT_SYMBOL(do_settimeofday);
#ifdef CONFIG_DEBUG_KERNEL
EXPORT_SYMBOL(__might_sleep);
#endif
#if !defined(__ia64__)
EXPORT_SYMBOL(loops_per_jiffy);
#endif
......
......@@ -53,6 +53,8 @@ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES;
static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
inline void free_pidmap(int pid)
{
pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
......@@ -77,8 +79,13 @@ static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps)
* Free the page if someone raced with us
* installing it:
*/
if (cmpxchg(&map->page, NULL, (void *) page))
spin_lock(&pidmap_lock);
if (map->page)
free_page(page);
else
map->page = (void *)page;
spin_unlock(&pidmap_lock);
if (!map->page)
break;
}
......
......@@ -2150,3 +2150,20 @@ void __init sched_init(void)
enter_lazy_tlb(&init_mm, current, smp_processor_id());
}
#ifdef CONFIG_DEBUG_KERNEL
void __might_sleep(char *file, int line)
{
#if defined(in_atomic)
static unsigned long prev_jiffy; /* ratelimiting */
if (in_atomic()) {
if (time_before(jiffies, prev_jiffy + HZ))
return;
prev_jiffy = jiffies;
printk("Sleeping function called from illegal"
" context at %s:%d\n", file, line);
dump_stack();
}
#endif
}
#endif
......@@ -888,20 +888,6 @@ asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
return -EINVAL;
if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
current->policy != SCHED_NORMAL)
{
/*
* Short delay requests up to 2 ms will be handled with
* high precision by a busy wait for all real-time processes.
*
* Its important on SMP not to do this holding locks.
*/
udelay((t.tv_nsec + 999) / 1000);
return 0;
}
expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
current->state = TASK_INTERRUPTIBLE;
......
......@@ -632,19 +632,15 @@ static inline wait_queue_head_t *page_waitqueue(struct page *page)
void wait_on_page_bit(struct page *page, int bit_nr)
{
wait_queue_head_t *waitqueue = page_waitqueue(page);
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
DEFINE_WAIT(wait);
add_wait_queue(waitqueue, &wait);
do {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (!test_bit(bit_nr, &page->flags))
break;
prepare_to_wait(waitqueue, &wait, TASK_UNINTERRUPTIBLE);
sync_page(page);
if (test_bit(bit_nr, &page->flags))
schedule();
} while (test_bit(bit_nr, &page->flags));
__set_task_state(tsk, TASK_RUNNING);
remove_wait_queue(waitqueue, &wait);
finish_wait(waitqueue, &wait);
}
EXPORT_SYMBOL(wait_on_page_bit);
......@@ -690,38 +686,27 @@ void end_page_writeback(struct page *page)
EXPORT_SYMBOL(end_page_writeback);
/*
* Get a lock on the page, assuming we need to sleep
* to get it..
* Get a lock on the page, assuming we need to sleep to get it.
*
* Ugly: running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some
* random driver's requestfn sets TASK_RUNNING, we could busywait. However
* chances are that on the second loop, the block layer's plug list is empty,
* so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
*/
static void __lock_page(struct page *page)
void __lock_page(struct page *page)
{
wait_queue_head_t *waitqueue = page_waitqueue(page);
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
wait_queue_head_t *wqh = page_waitqueue(page);
DEFINE_WAIT(wait);
add_wait_queue_exclusive(waitqueue, &wait);
for (;;) {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (PageLocked(page)) {
while (TestSetPageLocked(page)) {
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
sync_page(page);
if (PageLocked(page))
schedule();
}
if (!TestSetPageLocked(page))
break;
}
__set_task_state(tsk, TASK_RUNNING);
remove_wait_queue(waitqueue, &wait);
}
/*
* Get an exclusive lock on the page, optimistically
* assuming it's not locked..
*/
void lock_page(struct page *page)
{
if (TestSetPageLocked(page))
__lock_page(page);
finish_wait(wqh, &wait);
}
EXPORT_SYMBOL(__lock_page);
/*
* a rather lightweight function, finding and getting a reference to a
......
......@@ -187,7 +187,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
* Try to merge with the previous vma.
*/
if (mprotect_attempt_merge(vma, *pprev, end, newflags))
return 0;
goto success;
} else {
error = split_vma(mm, vma, start, 1);
if (error)
......@@ -209,7 +209,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
vma->vm_flags = newflags;
vma->vm_page_prot = newprot;
spin_unlock(&mm->page_table_lock);
success:
change_protection(vma, start, end, newprot);
return 0;
......
......@@ -321,6 +321,9 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
struct page * page;
int freed, i;
if (gfp_mask & __GFP_WAIT)
might_sleep();
KERNEL_STAT_ADD(pgalloc, 1<<order);
zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
......@@ -479,6 +482,17 @@ unsigned int nr_free_pages(void)
return sum;
}
unsigned int nr_used_zone_pages(void)
{
unsigned int pages = 0;
struct zone *zone;
for_each_zone(zone)
pages += zone->nr_active + zone->nr_inactive;
return pages;
}
static unsigned int nr_free_zone_pages(int offset)
{
pg_data_t *pgdat;
......
......@@ -79,9 +79,9 @@ static unsigned long last_empty_jifs;
*/
struct pdflush_work {
struct task_struct *who; /* The thread */
void (*fn)(unsigned long); /* A callback function for pdflush to work on */
unsigned long arg0; /* An argument to the callback function */
struct list_head list; /* On pdflush_list, when the thread is idle */
void (*fn)(unsigned long); /* A callback function */
unsigned long arg0; /* An argument to the callback */
struct list_head list; /* On pdflush_list, when idle */
unsigned long when_i_went_to_sleep;
};
......@@ -99,23 +99,34 @@ static int __pdflush(struct pdflush_work *my_work)
current->flags |= PF_FLUSHER;
my_work->fn = NULL;
my_work->who = current;
INIT_LIST_HEAD(&my_work->list);
spin_lock_irq(&pdflush_lock);
nr_pdflush_threads++;
// printk("pdflush %d [%d] starts\n", nr_pdflush_threads, current->pid);
for ( ; ; ) {
struct pdflush_work *pdf;
list_add(&my_work->list, &pdflush_list);
my_work->when_i_went_to_sleep = jiffies;
set_current_state(TASK_INTERRUPTIBLE);
list_move(&my_work->list, &pdflush_list);
my_work->when_i_went_to_sleep = jiffies;
spin_unlock_irq(&pdflush_lock);
if (current->flags & PF_FREEZE)
refrigerator(PF_IOTHREAD);
schedule();
if (my_work->fn)
spin_lock_irq(&pdflush_lock);
if (!list_empty(&my_work->list)) {
printk("pdflush: bogus wakeup!\n");
my_work->fn = NULL;
continue;
}
if (my_work->fn == NULL) {
printk("pdflush: NULL work function\n");
continue;
}
spin_unlock_irq(&pdflush_lock);
(*my_work->fn)(my_work->arg0);
/*
......@@ -132,6 +143,7 @@ static int __pdflush(struct pdflush_work *my_work)
}
spin_lock_irq(&pdflush_lock);
my_work->fn = NULL;
/*
* Thread destruction: For how long has the sleepiest
......@@ -143,13 +155,12 @@ static int __pdflush(struct pdflush_work *my_work)
continue;
pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
pdf->when_i_went_to_sleep = jiffies; /* Limit exit rate */
/* Limit exit rate */
pdf->when_i_went_to_sleep = jiffies;
break; /* exeunt */
}
my_work->fn = NULL;
}
nr_pdflush_threads--;
// printk("pdflush %d [%d] ends\n", nr_pdflush_threads, current->pid);
spin_unlock_irq(&pdflush_lock);
return 0;
}
......@@ -191,11 +202,10 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
list_del_init(&pdf->list);
if (list_empty(&pdflush_list))
last_empty_jifs = jiffies;
spin_unlock_irqrestore(&pdflush_lock, flags);
pdf->fn = fn;
pdf->arg0 = arg0;
wmb(); /* ? */
wake_up_process(pdf->who);
spin_unlock_irqrestore(&pdflush_lock, flags);
}
return ret;
}
......
......@@ -1370,6 +1370,9 @@ static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
unsigned long save_flags;
void* objp;
if (flags & __GFP_WAIT)
might_sleep();
kmem_cache_alloc_head(cachep, flags);
try_again:
local_irq_save(save_flags);
......@@ -1496,7 +1499,11 @@ static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
if (unlikely(!--slabp->inuse)) {
/* Was partial or full, now empty. */
list_del(&slabp->list);
list_add(&slabp->list, &cachep->slabs_free);
/* list_add(&slabp->list, &cachep->slabs_free); */
if (unlikely(list_empty(&cachep->slabs_partial)))
list_add(&slabp->list, &cachep->slabs_partial);
else
kmem_slab_destroy(cachep, slabp);
} else if (unlikely(inuse == cachep->num)) {
/* Was full. */
list_del(&slabp->list);
......@@ -1970,7 +1977,7 @@ static int s_show(struct seq_file *m, void *p)
}
list_for_each(q,&cachep->slabs_partial) {
slabp = list_entry(q, slab_t, list);
if (slabp->inuse == cachep->num || !slabp->inuse)
if (slabp->inuse == cachep->num)
BUG();
active_objs += slabp->inuse;
active_slabs++;
......
......@@ -70,6 +70,10 @@
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
#ifndef CONFIG_QUOTA
#define shrink_dqcache_memory(ratio, gfp_mask) do { } while (0)
#endif
/* Must be called with page's pte_chain_lock held. */
static inline int page_mapping_inuse(struct page * page)
{
......@@ -97,7 +101,7 @@ static inline int is_page_cache_freeable(struct page *page)
static /* inline */ int
shrink_list(struct list_head *page_list, int nr_pages,
unsigned int gfp_mask, int *max_scan)
unsigned int gfp_mask, int *max_scan, int *nr_mapped)
{
struct address_space *mapping;
LIST_HEAD(ret_pages);
......@@ -116,6 +120,10 @@ shrink_list(struct list_head *page_list, int nr_pages,
if (TestSetPageLocked(page))
goto keep;
/* Double the slab pressure for mapped and swapcache pages */
if (page_mapped(page) || PageSwapCache(page))
(*nr_mapped)++;
BUG_ON(PageActive(page));
may_enter_fs = (gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (gfp_mask & __GFP_IO));
......@@ -320,7 +328,7 @@ shrink_list(struct list_head *page_list, int nr_pages,
*/
static /* inline */ int
shrink_cache(int nr_pages, struct zone *zone,
unsigned int gfp_mask, int max_scan)
unsigned int gfp_mask, int max_scan, int *nr_mapped)
{
LIST_HEAD(page_list);
struct pagevec pvec;
......@@ -371,7 +379,8 @@ shrink_cache(int nr_pages, struct zone *zone,
max_scan -= nr_scan;
KERNEL_STAT_ADD(pgscan, nr_scan);
nr_pages = shrink_list(&page_list,nr_pages,gfp_mask,&max_scan);
nr_pages = shrink_list(&page_list, nr_pages,
gfp_mask, &max_scan, nr_mapped);
if (nr_pages <= 0 && list_empty(&page_list))
goto done;
......@@ -522,14 +531,10 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in)
static /* inline */ int
shrink_zone(struct zone *zone, int max_scan,
unsigned int gfp_mask, int nr_pages)
unsigned int gfp_mask, int nr_pages, int *nr_mapped)
{
unsigned long ratio;
/* This is bogus for ZONE_HIGHMEM? */
if (kmem_cache_reap(gfp_mask) >= nr_pages)
return 0;
/*
* Try to keep the active list 2/3 of the size of the cache. And
* make sure that refill_inactive is given a decent number of pages.
......@@ -547,7 +552,8 @@ shrink_zone(struct zone *zone, int max_scan,
atomic_sub(SWAP_CLUSTER_MAX, &zone->refill_counter);
refill_inactive_zone(zone, SWAP_CLUSTER_MAX);
}
nr_pages = shrink_cache(nr_pages, zone, gfp_mask, max_scan);
nr_pages = shrink_cache(nr_pages, zone, gfp_mask,
max_scan, nr_mapped);
return nr_pages;
}
......@@ -557,6 +563,9 @@ shrink_caches(struct zone *classzone, int priority,
{
struct zone *first_classzone;
struct zone *zone;
int ratio;
int nr_mapped = 0;
int pages = nr_used_zone_pages();
first_classzone = classzone->zone_pgdat->node_zones;
for (zone = classzone; zone >= first_classzone; zone--) {
......@@ -581,16 +590,28 @@ shrink_caches(struct zone *classzone, int priority,
max_scan = zone->nr_inactive >> priority;
if (max_scan < to_reclaim * 2)
max_scan = to_reclaim * 2;
unreclaimed = shrink_zone(zone, max_scan, gfp_mask, to_reclaim);
unreclaimed = shrink_zone(zone, max_scan,
gfp_mask, to_reclaim, &nr_mapped);
nr_pages -= to_reclaim - unreclaimed;
*total_scanned += max_scan;
}
shrink_dcache_memory(priority, gfp_mask);
shrink_icache_memory(1, gfp_mask);
#ifdef CONFIG_QUOTA
shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
#endif
/*
* Here we assume it costs one seek to replace a lru page and that
* it also takes a seek to recreate a cache object. With this in
* mind we age equal percentages of the lru and ageable caches.
* This should balance the seeks generated by these structures.
*
* NOTE: for now I do this for all zones. If we find this is too
* aggressive on large boxes we may want to exclude ZONE_HIGHMEM
*
* If we're encountering mapped pages on the LRU then increase the
* pressure on slab to avoid swapping.
*/
ratio = (pages / (*total_scanned + nr_mapped + 1)) + 1;
shrink_dcache_memory(ratio, gfp_mask);
shrink_icache_memory(ratio, gfp_mask);
shrink_dqcache_memory(ratio, gfp_mask);
return nr_pages;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment