Commit 2ce067b0 authored by Linus Torvalds's avatar Linus Torvalds

Merge bk://ldm.bkbits.net/linux-2.5

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 56d8b39d 4ab1a3e6
...@@ -121,7 +121,7 @@ static int vidport; ...@@ -121,7 +121,7 @@ static int vidport;
static int lines, cols; static int lines, cols;
#ifdef CONFIG_MULTIQUAD #ifdef CONFIG_MULTIQUAD
static void * const xquad_portio = NULL; static void * xquad_portio = NULL;
#endif #endif
#include "../../../../lib/inflate.c" #include "../../../../lib/inflate.c"
......
...@@ -1060,11 +1060,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ...@@ -1060,11 +1060,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (clustered_apic_mode && (numnodes > 1)) { if (clustered_apic_mode && (numnodes > 1)) {
printk("Remapping cross-quad port I/O for %d quads\n", printk("Remapping cross-quad port I/O for %d quads\n",
numnodes); numnodes);
xquad_portio = ioremap (XQUAD_PORTIO_BASE,
numnodes * XQUAD_PORTIO_QUAD);
printk("xquad_portio vaddr 0x%08lx, len %08lx\n", printk("xquad_portio vaddr 0x%08lx, len %08lx\n",
(u_long) xquad_portio, (u_long) xquad_portio,
(u_long) numnodes * XQUAD_PORTIO_LEN); (u_long) numnodes * XQUAD_PORTIO_QUAD);
xquad_portio = ioremap (XQUAD_PORTIO_BASE,
numnodes * XQUAD_PORTIO_LEN);
} }
/* /*
......
...@@ -272,10 +272,9 @@ get_addr(unsigned long addr, unsigned long len) ...@@ -272,10 +272,9 @@ get_addr(unsigned long addr, unsigned long len)
return -ENOMEM; return -ENOMEM;
if (!vma || ((addr + len) < vma->vm_start)) if (!vma || ((addr + len) < vma->vm_start))
goto found_addr; goto found_addr;
addr = vma->vm_end; addr = HPAGE_ALIGN(vma->vm_end);
} }
found_addr: found_addr:
addr = HPAGE_ALIGN(addr);
return addr; return addr;
} }
......
...@@ -9,9 +9,9 @@ ...@@ -9,9 +9,9 @@
# #
export-objs := elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \ export-objs := elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \
block_ioctl.o block_ioctl.o deadline-iosched.o
obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o deadline-iosched.o
obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o
......
/*
* linux/drivers/block/deadline-iosched.c
*
* Deadline i/o scheduler.
*
* Copyright (C) 2002 Jens Axboe <axboe@suse.de>
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/blk.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/hash.h>
/*
* feel free to try other values :-). read_expire value is the timeout for
* reads, our goal is to start a request "around" the time when it expires.
* fifo_batch is how many steps along the sorted list we will take when the
* front fifo request expires.
*/
static int read_expire = HZ / 2; /* 500ms start timeout */
static int fifo_batch = 64; /* 4 seeks, or 64 contig */
static int seek_cost = 16; /* seek is 16 times more expensive */
/*
* how many times reads are allowed to starve writes
*/
static int writes_starved = 2;
static const int deadline_hash_shift = 8;
#define DL_HASH_BLOCK(sec) ((sec) >> 3)
#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
#define DL_HASH_ENTRIES (1 << deadline_hash_shift)
#define DL_INVALIDATE_HASH(dd) \
do { \
if (!++(dd)->hash_valid_count) \
(dd)->hash_valid_count = 1; \
} while (0)
struct deadline_data {
/*
* run time data
*/
struct list_head sort_list[2]; /* sorted listed */
struct list_head read_fifo; /* fifo list */
struct list_head *dispatch; /* driver dispatch queue */
struct list_head *hash; /* request hash */
sector_t last_sector; /* last sector sent to drive */
unsigned long hash_valid_count; /* barrier hash count */
unsigned int starved; /* writes starved */
/*
* settings that change how the i/o scheduler behaves
*/
unsigned int fifo_batch;
unsigned long read_expire;
unsigned int seek_cost;
unsigned int writes_starved;
};
/*
* pre-request data.
*/
struct deadline_rq {
struct list_head fifo;
struct list_head hash;
unsigned long hash_valid_count;
struct request *request;
unsigned long expires;
};
static kmem_cache_t *drq_pool;
#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
/*
* rq hash
*/
static inline void __deadline_del_rq_hash(struct deadline_rq *drq)
{
drq->hash_valid_count = 0;
list_del_init(&drq->hash);
}
#define ON_HASH(drq) (drq)->hash_valid_count
static inline void deadline_del_rq_hash(struct deadline_rq *drq)
{
if (ON_HASH(drq))
__deadline_del_rq_hash(drq);
}
static inline void
deadline_add_rq_hash(struct deadline_data *dd, struct deadline_rq *drq)
{
struct request *rq = drq->request;
BUG_ON(ON_HASH(drq));
drq->hash_valid_count = dd->hash_valid_count;
list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq->sector +rq->nr_sectors)]);
}
#define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash)
static struct request *
deadline_find_hash(struct deadline_data *dd, sector_t offset)
{
struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
struct list_head *entry, *next = hash_list->next;
struct deadline_rq *drq;
struct request *rq = NULL;
while ((entry = next) != hash_list) {
next = entry->next;
drq = list_entry_hash(entry);
BUG_ON(!drq->hash_valid_count);
if (!rq_mergeable(drq->request)
|| drq->hash_valid_count != dd->hash_valid_count) {
__deadline_del_rq_hash(drq);
continue;
}
if (drq->request->sector + drq->request->nr_sectors == offset) {
rq = drq->request;
break;
}
}
return rq;
}
static int
deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
struct deadline_data *dd = q->elevator.elevator_data;
const int data_dir = bio_data_dir(bio);
struct list_head *entry, *sort_list;
struct deadline_rq *drq;
struct request *__rq;
int ret = ELEVATOR_NO_MERGE;
/*
* try last_merge to avoid going to hash
*/
ret = elv_try_last_merge(q, req, bio);
if (ret != ELEVATOR_NO_MERGE)
goto out;
/*
* see if the merge hash can satisfy a back merge
*/
if ((__rq = deadline_find_hash(dd, bio->bi_sector))) {
BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
if (elv_rq_merge_ok(__rq, bio)) {
*req = __rq;
q->last_merge = &__rq->queuelist;
ret = ELEVATOR_BACK_MERGE;
goto out_ret;
}
}
entry = sort_list = &dd->sort_list[data_dir];
while ((entry = entry->prev) != sort_list) {
__rq = list_entry_rq(entry);
drq = RQ_DATA(__rq);
BUG_ON(__rq->flags & REQ_STARTED);
if (!(__rq->flags & REQ_CMD))
continue;
if (!*req && bio_rq_in_between(bio, __rq, sort_list))
*req = __rq;
if (__rq->flags & REQ_BARRIER)
break;
/*
* checking for a front merge, hash will miss those
*/
if (__rq->sector - bio_sectors(bio) == bio->bi_sector) {
ret = elv_try_merge(__rq, bio);
if (ret != ELEVATOR_NO_MERGE) {
*req = __rq;
q->last_merge = &__rq->queuelist;
break;
}
}
}
out:
if (ret != ELEVATOR_NO_MERGE) {
struct deadline_rq *drq = RQ_DATA(*req);
deadline_del_rq_hash(drq);
deadline_add_rq_hash(dd, drq);
}
out_ret:
return ret;
}
static void
deadline_merge_request(request_queue_t *q, struct request *req, struct request *next)
{
struct deadline_data *dd = q->elevator.elevator_data;
struct deadline_rq *drq = RQ_DATA(req);
struct deadline_rq *dnext = RQ_DATA(next);
BUG_ON(!drq);
BUG_ON(!dnext);
deadline_del_rq_hash(drq);
deadline_add_rq_hash(dd, drq);
/*
* if dnext expires before drq, assign it's expire time to drq
* and move into dnext position (dnext will be deleted) in fifo
*/
if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
if (time_before(dnext->expires, drq->expires)) {
list_move(&drq->fifo, &dnext->fifo);
drq->expires = dnext->expires;
}
}
}
/*
* move request from sort list to dispatch queue. maybe remove from rq hash
* here too?
*/
static inline void
deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
{
struct deadline_rq *drq = RQ_DATA(rq);
list_move_tail(&rq->queuelist, dd->dispatch);
list_del_init(&drq->fifo);
}
/*
* move along sort list and move entries to dispatch queue, starting from rq
*/
static void deadline_move_requests(struct deadline_data *dd, struct request *rq)
{
struct list_head *sort_head = &dd->sort_list[rq_data_dir(rq)];
sector_t last_sec = dd->last_sector;
int batch_count = dd->fifo_batch;
do {
struct list_head *nxt = rq->queuelist.next;
/*
* take it off the sort and fifo list, move
* to dispatch queue
*/
deadline_move_to_dispatch(dd, rq);
if (rq->sector == last_sec)
batch_count--;
else
batch_count -= dd->seek_cost;
if (nxt == sort_head)
break;
last_sec = rq->sector + rq->nr_sectors;
rq = list_entry_rq(nxt);
} while (batch_count > 0);
}
/*
* returns 0 if there are no expired reads on the fifo, 1 otherwise
*/
#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
static inline int deadline_check_fifo(struct deadline_data *dd)
{
struct deadline_rq *drq;
if (list_empty(&dd->read_fifo))
return 0;
drq = list_entry_fifo(dd->read_fifo.next);
if (time_before(jiffies, drq->expires))
return 0;
return 1;
}
static struct request *deadline_next_request(request_queue_t *q)
{
struct deadline_data *dd = q->elevator.elevator_data;
struct deadline_rq *drq;
struct list_head *nxt;
struct request *rq;
int writes;
/*
* if still requests on the dispatch queue, just grab the first one
*/
if (!list_empty(&q->queue_head)) {
dispatch:
rq = list_entry_rq(q->queue_head.next);
dd->last_sector = rq->sector + rq->nr_sectors;
return rq;
}
writes = !list_empty(&dd->sort_list[WRITE]);
/*
* if we have expired entries on the fifo list, move some to dispatch
*/
if (deadline_check_fifo(dd)) {
if (writes && (dd->starved++ >= dd->writes_starved))
goto dispatch_writes;
nxt = dd->read_fifo.next;
drq = list_entry_fifo(nxt);
deadline_move_requests(dd, drq->request);
goto dispatch;
}
if (!list_empty(&dd->sort_list[READ])) {
if (writes && (dd->starved++ >= dd->writes_starved))
goto dispatch_writes;
nxt = dd->sort_list[READ].next;
deadline_move_requests(dd, list_entry_rq(nxt));
goto dispatch;
}
/*
* either there are no reads expired or on sort list, or the reads
* have starved writes for too long. dispatch some writes
*/
if (writes) {
dispatch_writes:
nxt = dd->sort_list[WRITE].next;
deadline_move_requests(dd, list_entry_rq(nxt));
dd->starved = 0;
goto dispatch;
}
BUG_ON(!list_empty(&dd->sort_list[READ]));
BUG_ON(writes);
return NULL;
}
static void
deadline_add_request(request_queue_t *q, struct request *rq, struct list_head *insert_here)
{
struct deadline_data *dd = q->elevator.elevator_data;
struct deadline_rq *drq = RQ_DATA(rq);
const int data_dir = rq_data_dir(rq);
/*
* flush hash on barrier insert, as not to allow merges before a
* barrier.
*/
if (unlikely(rq->flags & REQ_BARRIER)) {
DL_INVALIDATE_HASH(dd);
q->last_merge = NULL;
}
/*
* add to sort list
*/
if (!insert_here)
insert_here = dd->sort_list[data_dir].prev;
list_add(&rq->queuelist, insert_here);
if (unlikely(!(rq->flags & REQ_CMD)))
return;
if (rq_mergeable(rq)) {
deadline_add_rq_hash(dd, drq);
if (!q->last_merge)
q->last_merge = &rq->queuelist;
}
if (data_dir == READ) {
/*
* set expire time and add to fifo list
*/
drq->expires = jiffies + dd->read_expire;
list_add_tail(&drq->fifo, &dd->read_fifo);
}
}
static void deadline_remove_request(request_queue_t *q, struct request *rq)
{
struct deadline_rq *drq = RQ_DATA(rq);
if (drq) {
list_del_init(&drq->fifo);
deadline_del_rq_hash(drq);
}
}
static int deadline_queue_empty(request_queue_t *q)
{
struct deadline_data *dd = q->elevator.elevator_data;
if (!list_empty(&q->queue_head) || !list_empty(&dd->sort_list[READ])
|| !list_empty(&dd->sort_list[WRITE]))
return 0;
BUG_ON(!list_empty(&dd->read_fifo));
return 1;
}
static struct list_head *
deadline_get_sort_head(request_queue_t *q, struct request *rq)
{
struct deadline_data *dd = q->elevator.elevator_data;
return &dd->sort_list[rq_data_dir(rq)];
}
static void deadline_exit(request_queue_t *q, elevator_t *e)
{
struct deadline_data *dd = e->elevator_data;
struct deadline_rq *drq;
struct request *rq;
int i;
BUG_ON(!list_empty(&dd->read_fifo));
BUG_ON(!list_empty(&dd->sort_list[READ]));
BUG_ON(!list_empty(&dd->sort_list[WRITE]));
for (i = READ; i <= WRITE; i++) {
struct request_list *rl = &q->rq[i];
struct list_head *entry = &rl->free;
if (list_empty(&rl->free))
continue;
while ((entry = entry->next) != &rl->free) {
rq = list_entry_rq(entry);
if ((drq = RQ_DATA(rq)) == NULL)
continue;
rq->elevator_private = NULL;
kmem_cache_free(drq_pool, drq);
}
}
kfree(dd->hash);
kfree(dd);
}
/*
* initialize elevator private data (deadline_data), and alloc a drq for
* each request on the free lists
*/
static int deadline_init(request_queue_t *q, elevator_t *e)
{
struct deadline_data *dd;
struct deadline_rq *drq;
struct request *rq;
int i, ret = 0;
if (!drq_pool)
return -ENOMEM;
dd = kmalloc(sizeof(*dd), GFP_KERNEL);
if (!dd)
return -ENOMEM;
memset(dd, 0, sizeof(*dd));
dd->hash = kmalloc(sizeof(struct list_head)*DL_HASH_ENTRIES,GFP_KERNEL);
if (!dd->hash) {
kfree(dd);
return -ENOMEM;
}
for (i = 0; i < DL_HASH_ENTRIES; i++)
INIT_LIST_HEAD(&dd->hash[i]);
INIT_LIST_HEAD(&dd->read_fifo);
INIT_LIST_HEAD(&dd->sort_list[READ]);
INIT_LIST_HEAD(&dd->sort_list[WRITE]);
dd->dispatch = &q->queue_head;
dd->fifo_batch = fifo_batch;
dd->read_expire = read_expire;
dd->seek_cost = seek_cost;
dd->hash_valid_count = 1;
dd->writes_starved = writes_starved;
e->elevator_data = dd;
for (i = READ; i <= WRITE; i++) {
struct request_list *rl = &q->rq[i];
struct list_head *entry = &rl->free;
if (list_empty(&rl->free))
continue;
while ((entry = entry->next) != &rl->free) {
rq = list_entry_rq(entry);
drq = kmem_cache_alloc(drq_pool, GFP_KERNEL);
if (!drq) {
ret = -ENOMEM;
break;
}
memset(drq, 0, sizeof(*drq));
INIT_LIST_HEAD(&drq->fifo);
INIT_LIST_HEAD(&drq->hash);
drq->request = rq;
rq->elevator_private = drq;
}
}
if (ret)
deadline_exit(q, e);
return ret;
}
static int __init deadline_slab_setup(void)
{
drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!drq_pool)
panic("deadline: can't init slab pool\n");
return 0;
}
module_init(deadline_slab_setup);
elevator_t iosched_deadline = {
.elevator_merge_fn = deadline_merge,
.elevator_merge_req_fn = deadline_merge_request,
.elevator_next_req_fn = deadline_next_request,
.elevator_add_req_fn = deadline_add_request,
.elevator_remove_req_fn = deadline_remove_request,
.elevator_queue_empty_fn = deadline_queue_empty,
.elevator_get_sort_head_fn = deadline_get_sort_head,
.elevator_init_fn = deadline_init,
.elevator_exit_fn = deadline_exit,
};
EXPORT_SYMBOL(iosched_deadline);
...@@ -157,114 +157,6 @@ inline int elv_try_last_merge(request_queue_t *q, struct request **req, ...@@ -157,114 +157,6 @@ inline int elv_try_last_merge(request_queue_t *q, struct request **req,
return ret; return ret;
} }
static int bio_rq_before(struct bio *bio, struct request *rq)
{
if (!kdev_same(to_kdev_t(bio->bi_bdev->bd_dev), rq->rq_dev))
return 0;
return bio->bi_sector < rq->sector;
}
/*
* elevator_linux starts here
*/
int elevator_linus_merge(request_queue_t *q, struct request **req,
struct bio *bio)
{
struct list_head *entry, *good;
struct request *__rq;
int ret;
if ((ret = elv_try_last_merge(q, req, bio)))
return ret;
entry = &q->queue_head;
good = &q->queue_head;
ret = ELEVATOR_NO_MERGE;
while ((entry = entry->prev) != &q->queue_head) {
__rq = list_entry_rq(entry);
if (__rq->flags & (REQ_BARRIER | REQ_STARTED))
break;
if (!(__rq->flags & REQ_CMD))
break;
if (bio_data_dir(bio) != rq_data_dir(__rq)) {
if (bio_data_dir(bio) == WRITE)
break;
good = entry->prev;
continue;
}
ret = elv_try_merge(__rq, bio);
if (ret) {
*req = __rq;
q->last_merge = &__rq->queuelist;
return ret;
}
if (bio_rq_before(bio, __rq))
good = entry->prev;
}
if (good != &q->queue_head)
*req = list_entry_rq(good);
return ELEVATOR_NO_MERGE;
}
void elevator_linus_merge_req(request_queue_t *q, struct request *req,
struct request *next)
{
if (elv_linus_sequence(next) < elv_linus_sequence(req))
elv_linus_sequence(req) = elv_linus_sequence(next);
}
void elevator_linus_add_request(request_queue_t *q, struct request *rq,
struct list_head *insert_here)
{
elevator_t *e = &q->elevator;
int lat = 0, *latency = e->elevator_data;
if (!insert_here)
insert_here = q->queue_head.prev;
if (!(rq->flags & REQ_BARRIER))
lat = latency[rq_data_dir(rq)];
elv_linus_sequence(rq) = lat;
list_add(&rq->queuelist, insert_here);
/*
* new merges must not precede this barrier
*/
if (rq->flags & REQ_BARRIER)
q->last_merge = NULL;
else if (!q->last_merge)
q->last_merge = &rq->queuelist;
}
int elevator_linus_init(request_queue_t *q, elevator_t *e)
{
int *latency;
latency = kmalloc(2 * sizeof(int), GFP_KERNEL);
if (!latency)
return -ENOMEM;
latency[READ] = 1024;
latency[WRITE] = 2048;
e->elevator_data = latency;
return 0;
}
void elevator_linus_exit(request_queue_t *q, elevator_t *e)
{
kfree(e->elevator_data);
}
/* /*
* elevator noop * elevator noop
* *
...@@ -442,15 +334,6 @@ inline struct list_head *elv_get_sort_head(request_queue_t *q, ...@@ -442,15 +334,6 @@ inline struct list_head *elv_get_sort_head(request_queue_t *q,
return &q->queue_head; return &q->queue_head;
} }
elevator_t elevator_linus = {
elevator_merge_fn: elevator_linus_merge,
elevator_merge_req_fn: elevator_linus_merge_req,
elevator_next_req_fn: elevator_noop_next_request,
elevator_add_req_fn: elevator_linus_add_request,
elevator_init_fn: elevator_linus_init,
elevator_exit_fn: elevator_linus_exit,
};
elevator_t elevator_noop = { elevator_t elevator_noop = {
elevator_merge_fn: elevator_noop_merge, elevator_merge_fn: elevator_noop_merge,
elevator_next_req_fn: elevator_noop_next_request, elevator_next_req_fn: elevator_noop_next_request,
...@@ -459,7 +342,6 @@ elevator_t elevator_noop = { ...@@ -459,7 +342,6 @@ elevator_t elevator_noop = {
module_init(elevator_global_init); module_init(elevator_global_init);
EXPORT_SYMBOL(elevator_linus);
EXPORT_SYMBOL(elevator_noop); EXPORT_SYMBOL(elevator_noop);
EXPORT_SYMBOL(__elv_add_request); EXPORT_SYMBOL(__elv_add_request);
......
...@@ -1175,7 +1175,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock) ...@@ -1175,7 +1175,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
if (blk_init_free_list(q)) if (blk_init_free_list(q))
return -ENOMEM; return -ENOMEM;
if ((ret = elevator_init(q, &q->elevator, elevator_linus))) { if ((ret = elevator_init(q, &q->elevator, iosched_deadline))) {
blk_cleanup_queue(q); blk_cleanup_queue(q);
return ret; return ret;
} }
...@@ -1233,24 +1233,23 @@ static struct request *get_request(request_queue_t *q, int rw) ...@@ -1233,24 +1233,23 @@ static struct request *get_request(request_queue_t *q, int rw)
*/ */
static struct request *get_request_wait(request_queue_t *q, int rw) static struct request *get_request_wait(request_queue_t *q, int rw)
{ {
DECLARE_WAITQUEUE(wait, current); DEFINE_WAIT(wait);
struct request_list *rl = &q->rq[rw]; struct request_list *rl = &q->rq[rw];
struct request *rq; struct request *rq;
spin_lock_prefetch(q->queue_lock); spin_lock_prefetch(q->queue_lock);
generic_unplug_device(q); generic_unplug_device(q);
add_wait_queue_exclusive(&rl->wait, &wait);
do { do {
set_current_state(TASK_UNINTERRUPTIBLE); prepare_to_wait_exclusive(&rl->wait, &wait,
TASK_UNINTERRUPTIBLE);
if (!rl->count) if (!rl->count)
schedule(); schedule();
finish_wait(&rl->wait, &wait);
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
rq = get_request(q, rw); rq = get_request(q, rw);
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
} while (rq == NULL); } while (rq == NULL);
remove_wait_queue(&rl->wait, &wait);
current->state = TASK_RUNNING;
return rq; return rq;
} }
...@@ -1460,18 +1459,16 @@ void blk_put_request(struct request *req) ...@@ -1460,18 +1459,16 @@ void blk_put_request(struct request *req)
*/ */
void blk_congestion_wait(int rw, long timeout) void blk_congestion_wait(int rw, long timeout)
{ {
DECLARE_WAITQUEUE(wait, current); DEFINE_WAIT(wait);
struct congestion_state *cs = &congestion_states[rw]; struct congestion_state *cs = &congestion_states[rw];
if (atomic_read(&cs->nr_congested_queues) == 0) if (atomic_read(&cs->nr_congested_queues) == 0)
return; return;
blk_run_queues(); blk_run_queues();
set_current_state(TASK_UNINTERRUPTIBLE); prepare_to_wait(&cs->wqh, &wait, TASK_UNINTERRUPTIBLE);
add_wait_queue(&cs->wqh, &wait);
if (atomic_read(&cs->nr_congested_queues) != 0) if (atomic_read(&cs->nr_congested_queues) != 0)
schedule_timeout(timeout); schedule_timeout(timeout);
set_current_state(TASK_RUNNING); finish_wait(&cs->wqh, &wait);
remove_wait_queue(&cs->wqh, &wait);
} }
/* /*
......
...@@ -157,18 +157,12 @@ struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { ...@@ -157,18 +157,12 @@ struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
#define MAX_DISK_SIZE 1024*1024*1024 #define MAX_DISK_SIZE 1024*1024*1024
static unsigned long
compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry)
{
loff_t size = lo_dentry->d_inode->i_mapping->host->i_size;
return (size - lo->lo_offset) >> BLOCK_SIZE_BITS;
}
static void figure_loop_size(struct loop_device *lo) static void figure_loop_size(struct loop_device *lo)
{ {
set_capacity(disks + lo->lo_number, compute_loop_size(lo, loff_t size = lo->lo_backing_file->f_dentry->d_inode->i_size;
lo->lo_backing_file->f_dentry));
set_capacity(disks + lo->lo_number,
(size - lo->lo_offset) >> 9);
} }
static inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf, static inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
......
...@@ -338,6 +338,9 @@ static void cy82c693_tune_drive (ide_drive_t *drive, u8 pio) ...@@ -338,6 +338,9 @@ static void cy82c693_tune_drive (ide_drive_t *drive, u8 pio)
*/ */
unsigned int __init init_chipset_cy82c693(struct pci_dev *dev, const char *name) unsigned int __init init_chipset_cy82c693(struct pci_dev *dev, const char *name)
{ {
if (PCI_FUNC(dev->devfn) != 1)
return 0;
#ifdef CY82C693_SETDMA_CLOCK #ifdef CY82C693_SETDMA_CLOCK
u8 data = 0; u8 data = 0;
#endif /* CY82C693_SETDMA_CLOCK */ #endif /* CY82C693_SETDMA_CLOCK */
...@@ -411,20 +414,30 @@ void __init init_hwif_cy82c693(ide_hwif_t *hwif) ...@@ -411,20 +414,30 @@ void __init init_hwif_cy82c693(ide_hwif_t *hwif)
#endif /* CONFIG_BLK_DEV_IDEDMA */ #endif /* CONFIG_BLK_DEV_IDEDMA */
} }
void __init init_dma_cy82c693 (ide_hwif_t *hwif, unsigned long dmabase) static __initdata ide_hwif_t *primary;
void __init init_iops_cy82c693(ide_hwif_t *hwif)
{ {
ide_setup_dma(hwif, dmabase, 8); if (PCI_FUNC(hwif->pci_dev->devfn) == 1)
primary = hwif;
else {
hwif->mate = primary;
hwif->channel = 1;
}
} }
extern void ide_setup_pci_device(struct pci_dev *, ide_pci_device_t *);
static int __devinit cy82c693_init_one(struct pci_dev *dev, const struct pci_device_id *id) static int __devinit cy82c693_init_one(struct pci_dev *dev, const struct pci_device_id *id)
{ {
ide_pci_device_t *d = &cy82c693_chipsets[id->driver_data]; ide_pci_device_t *d = &cy82c693_chipsets[id->driver_data];
if ((!(PCI_FUNC(dev->devfn) & 1) || struct pci_dev *dev2;
(!((dev->class >> 8) == PCI_CLASS_STORAGE_IDE))))
return 0; /* CY82C693 is more than only a IDE controller */ /* CY82C693 is more than only a IDE controller.
ide_setup_pci_device(dev, d); Function 1 is primary IDE channel, function 2 - secondary. */
if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE &&
PCI_FUNC(dev->devfn) == 1) {
dev2 = pci_find_slot(dev->bus->number, dev->devfn + 1);
ide_setup_pci_devices(dev, dev2, d);
}
return 0; return 0;
} }
......
...@@ -66,7 +66,7 @@ typedef struct pio_clocks_s { ...@@ -66,7 +66,7 @@ typedef struct pio_clocks_s {
extern unsigned int init_chipset_cy82c693(struct pci_dev *, const char *); extern unsigned int init_chipset_cy82c693(struct pci_dev *, const char *);
extern void init_hwif_cy82c693(ide_hwif_t *); extern void init_hwif_cy82c693(ide_hwif_t *);
extern void init_dma_cy82c693(ide_hwif_t *, unsigned long); extern void init_iops_cy82c693(ide_hwif_t *);
static ide_pci_device_t cy82c693_chipsets[] __initdata = { static ide_pci_device_t cy82c693_chipsets[] __initdata = {
{ /* 0 */ { /* 0 */
...@@ -74,10 +74,10 @@ static ide_pci_device_t cy82c693_chipsets[] __initdata = { ...@@ -74,10 +74,10 @@ static ide_pci_device_t cy82c693_chipsets[] __initdata = {
device: PCI_DEVICE_ID_CONTAQ_82C693, device: PCI_DEVICE_ID_CONTAQ_82C693,
name: "CY82C693", name: "CY82C693",
init_chipset: init_chipset_cy82c693, init_chipset: init_chipset_cy82c693,
init_iops: NULL, init_iops: init_iops_cy82c693,
init_hwif: init_hwif_cy82c693, init_hwif: init_hwif_cy82c693,
init_dma: init_dma_cy82c693, init_dma: NULL,
channels: 2, channels: 1,
autodma: AUTODMA, autodma: AUTODMA,
enablebits: {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, enablebits: {{0x00,0x00,0x00}, {0x00,0x00,0x00}},
bootable: ON_BOARD, bootable: ON_BOARD,
......
...@@ -250,6 +250,7 @@ static unsigned long __init ide_get_or_set_dma_base (ide_hwif_t *hwif) ...@@ -250,6 +250,7 @@ static unsigned long __init ide_get_or_set_dma_base (ide_hwif_t *hwif)
switch(dev->device) { switch(dev->device) {
case PCI_DEVICE_ID_AL_M5219: case PCI_DEVICE_ID_AL_M5219:
case PCI_DEVICE_ID_AL_M5229:
case PCI_DEVICE_ID_AMD_VIPER_7409: case PCI_DEVICE_ID_AMD_VIPER_7409:
case PCI_DEVICE_ID_CMD_643: case PCI_DEVICE_ID_CMD_643:
case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE: case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE:
......
...@@ -68,6 +68,7 @@ static int proc_read_escdinfo(char *buf, char **start, off_t pos, ...@@ -68,6 +68,7 @@ static int proc_read_escdinfo(char *buf, char **start, off_t pos,
); );
} }
#define MAX_SANE_ESCD_SIZE (32*1024)
static int proc_read_escd(char *buf, char **start, off_t pos, static int proc_read_escd(char *buf, char **start, off_t pos,
int count, int *eof, void *data) int count, int *eof, void *data)
{ {
...@@ -79,8 +80,8 @@ static int proc_read_escd(char *buf, char **start, off_t pos, ...@@ -79,8 +80,8 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
return -EIO; return -EIO;
/* sanity check */ /* sanity check */
if (escd.escd_size > (32*1024)) { if (escd.escd_size > MAX_SANE_ESCD_SIZE) {
printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size is too great\n"); printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS escd_info call is too great\n");
return -EFBIG; return -EFBIG;
} }
...@@ -90,7 +91,14 @@ static int proc_read_escd(char *buf, char **start, off_t pos, ...@@ -90,7 +91,14 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
if (pnp_bios_read_escd(tmpbuf, escd.nv_storage_base)) if (pnp_bios_read_escd(tmpbuf, escd.nv_storage_base))
return -EIO; return -EIO;
escd_size = (unsigned char)(buf[0]) + (unsigned char)(buf[1])*256; escd_size = (unsigned char)(tmpbuf[0]) + (unsigned char)(tmpbuf[1])*256;
/* sanity check */
if (escd_size > MAX_SANE_ESCD_SIZE) {
printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS read_escd call is too great\n");
return -EFBIG;
}
escd_left_to_read = escd_size - pos; escd_left_to_read = escd_size - pos;
if (escd_left_to_read < 0) escd_left_to_read = 0; if (escd_left_to_read < 0) escd_left_to_read = 0;
if (escd_left_to_read == 0) *eof = 1; if (escd_left_to_read == 0) *eof = 1;
......
...@@ -148,6 +148,11 @@ ...@@ -148,6 +148,11 @@
Fix bug in raw command post with data ioctl method. Fix bug in raw command post with data ioctl method.
Fix bug where rollcall sometimes failed with cable errors. Fix bug where rollcall sometimes failed with cable errors.
Print unit # on all command timeouts. Print unit # on all command timeouts.
1.02.00.026 - Fix possible infinite retry bug with power glitch induced
drive timeouts.
Cleanup some AEN severity levels.
1.02.00.027 - Add drive not supported AEN code for SATA controllers.
Remove spurious unknown ioctl error message.
*/ */
#include <linux/module.h> #include <linux/module.h>
...@@ -201,7 +206,7 @@ static struct notifier_block tw_notifier = { ...@@ -201,7 +206,7 @@ static struct notifier_block tw_notifier = {
}; };
/* Globals */ /* Globals */
char *tw_driver_version="1.02.00.025"; char *tw_driver_version="1.02.00.027";
TW_Device_Extension *tw_device_extension_list[TW_MAX_SLOT]; TW_Device_Extension *tw_device_extension_list[TW_MAX_SLOT];
int tw_device_extension_count = 0; int tw_device_extension_count = 0;
...@@ -212,7 +217,7 @@ int tw_aen_complete(TW_Device_Extension *tw_dev, int request_id) ...@@ -212,7 +217,7 @@ int tw_aen_complete(TW_Device_Extension *tw_dev, int request_id)
{ {
TW_Param *param; TW_Param *param;
unsigned short aen; unsigned short aen;
int error = 0; int error = 0, table_max = 0;
dprintk(KERN_WARNING "3w-xxxx: tw_aen_complete()\n"); dprintk(KERN_WARNING "3w-xxxx: tw_aen_complete()\n");
if (tw_dev->alignment_virtual_address[request_id] == NULL) { if (tw_dev->alignment_virtual_address[request_id] == NULL) {
...@@ -227,7 +232,8 @@ int tw_aen_complete(TW_Device_Extension *tw_dev, int request_id) ...@@ -227,7 +232,8 @@ int tw_aen_complete(TW_Device_Extension *tw_dev, int request_id)
if (aen == 0x0ff) { if (aen == 0x0ff) {
printk(KERN_WARNING "3w-xxxx: scsi%d: AEN: INFO: AEN queue overflow.\n", tw_dev->host->host_no); printk(KERN_WARNING "3w-xxxx: scsi%d: AEN: INFO: AEN queue overflow.\n", tw_dev->host->host_no);
} else { } else {
if ((aen & 0x0ff) < TW_AEN_STRING_MAX) { table_max = sizeof(tw_aen_string)/sizeof(char *);
if ((aen & 0x0ff) < table_max) {
if ((tw_aen_string[aen & 0xff][strlen(tw_aen_string[aen & 0xff])-1]) == '#') { if ((tw_aen_string[aen & 0xff][strlen(tw_aen_string[aen & 0xff])-1]) == '#') {
printk(KERN_WARNING "3w-xxxx: scsi%d: AEN: %s%d.\n", tw_dev->host->host_no, tw_aen_string[aen & 0xff], aen >> 8); printk(KERN_WARNING "3w-xxxx: scsi%d: AEN: %s%d.\n", tw_dev->host->host_no, tw_aen_string[aen & 0xff], aen >> 8);
} else { } else {
...@@ -289,7 +295,7 @@ int tw_aen_drain_queue(TW_Device_Extension *tw_dev) ...@@ -289,7 +295,7 @@ int tw_aen_drain_queue(TW_Device_Extension *tw_dev)
int first_reset = 0; int first_reset = 0;
int queue = 0; int queue = 0;
int imax, i; int imax, i;
int found = 0; int found = 0, table_max = 0;
dprintk(KERN_NOTICE "3w-xxxx: tw_aen_drain_queue()\n"); dprintk(KERN_NOTICE "3w-xxxx: tw_aen_drain_queue()\n");
...@@ -409,7 +415,8 @@ int tw_aen_drain_queue(TW_Device_Extension *tw_dev) ...@@ -409,7 +415,8 @@ int tw_aen_drain_queue(TW_Device_Extension *tw_dev)
if (aen == 0x0ff) { if (aen == 0x0ff) {
printk(KERN_WARNING "3w-xxxx: AEN: INFO: AEN queue overflow.\n"); printk(KERN_WARNING "3w-xxxx: AEN: INFO: AEN queue overflow.\n");
} else { } else {
if ((aen & 0x0ff) < TW_AEN_STRING_MAX) { table_max = sizeof(tw_aen_string)/sizeof(char *);
if ((aen & 0x0ff) < table_max) {
if ((tw_aen_string[aen & 0xff][strlen(tw_aen_string[aen & 0xff])-1]) == '#') { if ((tw_aen_string[aen & 0xff][strlen(tw_aen_string[aen & 0xff])-1]) == '#') {
printk(KERN_WARNING "3w-xxxx: AEN: %s%d.\n", tw_aen_string[aen & 0xff], aen >> 8); printk(KERN_WARNING "3w-xxxx: AEN: %s%d.\n", tw_aen_string[aen & 0xff], aen >> 8);
} else { } else {
...@@ -1442,7 +1449,8 @@ static void tw_interrupt(int irq, void *dev_instance, struct pt_regs *regs) ...@@ -1442,7 +1449,8 @@ static void tw_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
/* If error, command failed */ /* If error, command failed */
if (error == 1) { if (error == 1) {
tw_dev->srb[request_id]->result = (DID_RESET << 16); /* Ask for a host reset */
tw_dev->srb[request_id]->result = (DID_OK << 16) | (CHECK_CONDITION << 1);
} }
/* Now complete the io */ /* Now complete the io */
...@@ -1784,7 +1792,7 @@ int tw_ioctl(TW_Device_Extension *tw_dev, int request_id) ...@@ -1784,7 +1792,7 @@ int tw_ioctl(TW_Device_Extension *tw_dev, int request_id)
return 1; return 1;
} }
default: default:
printk(KERN_WARNING "3w-xxxx: Unknown ioctl 0x%x.\n", opcode); dprintk(KERN_WARNING "3w-xxxx: Unknown ioctl 0x%x.\n", opcode);
tw_dev->state[request_id] = TW_S_COMPLETED; tw_dev->state[request_id] = TW_S_COMPLETED;
tw_state_request_finish(tw_dev, request_id); tw_state_request_finish(tw_dev, request_id);
tw_dev->srb[request_id]->result = (DID_OK << 16); tw_dev->srb[request_id]->result = (DID_OK << 16);
......
...@@ -90,14 +90,13 @@ static char *tw_aen_string[] = { ...@@ -90,14 +90,13 @@ static char *tw_aen_string[] = {
"INFO: Verify started: Unit #", // 0x029 "INFO: Verify started: Unit #", // 0x029
"ERROR: Verify failed: Port #", // 0x02A "ERROR: Verify failed: Port #", // 0x02A
"INFO: Verify complete: Unit #", // 0x02B "INFO: Verify complete: Unit #", // 0x02B
"ERROR: Overwrote bad sector during rebuild: Port #", //0x02C "WARNING: Overwrote bad sector during rebuild: Port #", //0x02C
"ERROR: Encountered bad sector during rebuild: Port #", //0x02D "ERROR: Encountered bad sector during rebuild: Port #", //0x02D
"INFO: Replacement drive is too small: Port #", //0x02E "ERROR: Replacement drive is too small: Port #", //0x02E
"WARNING: Verify error: Unit not previously initialized: Unit #" //0x02F "WARNING: Verify error: Unit not previously initialized: Unit #", //0x02F
"ERROR: Drive not supported: Port #" // 0x030
}; };
#define TW_AEN_STRING_MAX 0x030
/* /*
Sense key lookup table Sense key lookup table
Format: ESDC/flags,SenseKey,AdditionalSenseCode,AdditionalSenseCodeQualifier Format: ESDC/flags,SenseKey,AdditionalSenseCode,AdditionalSenseCodeQualifier
......
...@@ -128,22 +128,18 @@ void unlock_buffer(struct buffer_head *bh) ...@@ -128,22 +128,18 @@ void unlock_buffer(struct buffer_head *bh)
*/ */
void __wait_on_buffer(struct buffer_head * bh) void __wait_on_buffer(struct buffer_head * bh)
{ {
wait_queue_head_t *wq = bh_waitq_head(bh); wait_queue_head_t *wqh = bh_waitq_head(bh);
struct task_struct *tsk = current; DEFINE_WAIT(wait);
DECLARE_WAITQUEUE(wait, tsk);
get_bh(bh); get_bh(bh);
add_wait_queue(wq, &wait);
do { do {
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
blk_run_queues(); blk_run_queues();
set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (buffer_locked(bh))
if (!buffer_locked(bh))
break;
schedule(); schedule();
} while (buffer_locked(bh)); } while (buffer_locked(bh));
tsk->state = TASK_RUNNING;
remove_wait_queue(wq, &wait);
put_bh(bh); put_bh(bh);
finish_wait(wqh, &wait);
} }
static inline void static inline void
...@@ -246,10 +242,12 @@ int fsync_bdev(struct block_device *bdev) ...@@ -246,10 +242,12 @@ int fsync_bdev(struct block_device *bdev)
} }
/* /*
* sync everything. * sync everything. Start out by waking pdflush, because that writes back
* all queues in parallel.
*/ */
asmlinkage long sys_sync(void) asmlinkage long sys_sync(void)
{ {
wakeup_bdflush(0);
sync_inodes(0); /* All mappings and inodes, including block devices */ sync_inodes(0); /* All mappings and inodes, including block devices */
DQUOT_SYNC(NULL); DQUOT_SYNC(NULL);
sync_supers(); /* Write the superblocks */ sync_supers(); /* Write the superblocks */
......
...@@ -329,12 +329,11 @@ static inline void prune_one_dentry(struct dentry * dentry) ...@@ -329,12 +329,11 @@ static inline void prune_one_dentry(struct dentry * dentry)
void prune_dcache(int count) void prune_dcache(int count)
{ {
spin_lock(&dcache_lock); spin_lock(&dcache_lock);
for (;;) { for (; count ; count--) {
struct dentry *dentry; struct dentry *dentry;
struct list_head *tmp; struct list_head *tmp;
tmp = dentry_unused.prev; tmp = dentry_unused.prev;
if (tmp == &dentry_unused) if (tmp == &dentry_unused)
break; break;
list_del_init(tmp); list_del_init(tmp);
...@@ -349,12 +348,8 @@ void prune_dcache(int count) ...@@ -349,12 +348,8 @@ void prune_dcache(int count)
dentry_stat.nr_unused--; dentry_stat.nr_unused--;
/* Unused dentry with a count? */ /* Unused dentry with a count? */
if (atomic_read(&dentry->d_count)) BUG_ON(atomic_read(&dentry->d_count));
BUG();
prune_one_dentry(dentry); prune_one_dentry(dentry);
if (!--count)
break;
} }
spin_unlock(&dcache_lock); spin_unlock(&dcache_lock);
} }
...@@ -573,19 +568,11 @@ void shrink_dcache_anon(struct list_head *head) ...@@ -573,19 +568,11 @@ void shrink_dcache_anon(struct list_head *head)
/* /*
* This is called from kswapd when we think we need some * This is called from kswapd when we think we need some
* more memory, but aren't really sure how much. So we * more memory.
* carefully try to free a _bit_ of our dcache, but not
* too much.
*
* Priority:
* 1 - very urgent: shrink everything
* ...
* 6 - base-level: try to shrink a bit.
*/ */
int shrink_dcache_memory(int priority, unsigned int gfp_mask) int shrink_dcache_memory(int ratio, unsigned int gfp_mask)
{ {
int count = 0; int entries = dentry_stat.nr_dentry / ratio + 1;
/* /*
* Nasty deadlock avoidance. * Nasty deadlock avoidance.
* *
...@@ -600,11 +587,8 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask) ...@@ -600,11 +587,8 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask)
if (!(gfp_mask & __GFP_FS)) if (!(gfp_mask & __GFP_FS))
return 0; return 0;
count = dentry_stat.nr_unused / priority; prune_dcache(entries);
return entries;
prune_dcache(count);
kmem_cache_shrink(dentry_cache);
return 0;
} }
#define NAME_ALLOC_LEN(len) ((len+16) & ~15) #define NAME_ALLOC_LEN(len) ((len+16) & ~15)
......
...@@ -480,26 +480,17 @@ static void prune_dqcache(int count) ...@@ -480,26 +480,17 @@ static void prune_dqcache(int count)
/* /*
* This is called from kswapd when we think we need some * This is called from kswapd when we think we need some
* more memory, but aren't really sure how much. So we * more memory
* carefully try to free a _bit_ of our dqcache, but not
* too much.
*
* Priority:
* 1 - very urgent: shrink everything
* ...
* 6 - base-level: try to shrink a bit.
*/ */
int shrink_dqcache_memory(int priority, unsigned int gfp_mask) int shrink_dqcache_memory(int ratio, unsigned int gfp_mask)
{ {
int count = 0; int entries = dqstats.allocated_dquots / ratio + 1;
lock_kernel(); lock_kernel();
count = dqstats.free_dquots / priority; prune_dqcache(entries);
prune_dqcache(count);
unlock_kernel(); unlock_kernel();
kmem_cache_shrink(dquot_cachep); return entries;
return 0;
} }
/* /*
......
...@@ -386,10 +386,11 @@ void prune_icache(int goal) ...@@ -386,10 +386,11 @@ void prune_icache(int goal)
count = 0; count = 0;
entry = inode_unused.prev; entry = inode_unused.prev;
while (entry != &inode_unused) for(; goal; goal--) {
{
struct list_head *tmp = entry; struct list_head *tmp = entry;
if (entry == &inode_unused)
break;
entry = entry->prev; entry = entry->prev;
inode = INODE(tmp); inode = INODE(tmp);
if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK)) if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
...@@ -403,8 +404,6 @@ void prune_icache(int goal) ...@@ -403,8 +404,6 @@ void prune_icache(int goal)
list_add(tmp, freeable); list_add(tmp, freeable);
inode->i_state |= I_FREEING; inode->i_state |= I_FREEING;
count++; count++;
if (!--goal)
break;
} }
inodes_stat.nr_unused -= count; inodes_stat.nr_unused -= count;
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
...@@ -414,19 +413,11 @@ void prune_icache(int goal) ...@@ -414,19 +413,11 @@ void prune_icache(int goal)
/* /*
* This is called from kswapd when we think we need some * This is called from kswapd when we think we need some
* more memory, but aren't really sure how much. So we * more memory.
* carefully try to free a _bit_ of our icache, but not
* too much.
*
* Priority:
* 1 - very urgent: shrink everything
* ...
* 6 - base-level: try to shrink a bit.
*/ */
int shrink_icache_memory(int priority, int gfp_mask) int shrink_icache_memory(int ratio, unsigned int gfp_mask)
{ {
int count = 0; int entries = inodes_stat.nr_inodes / ratio + 1;
/* /*
* Nasty deadlock avoidance.. * Nasty deadlock avoidance..
* *
...@@ -437,12 +428,10 @@ int shrink_icache_memory(int priority, int gfp_mask) ...@@ -437,12 +428,10 @@ int shrink_icache_memory(int priority, int gfp_mask)
if (!(gfp_mask & __GFP_FS)) if (!(gfp_mask & __GFP_FS))
return 0; return 0;
count = inodes_stat.nr_unused / priority; prune_icache(entries);
return entries;
prune_icache(count);
kmem_cache_shrink(inode_cachep);
return 0;
} }
EXPORT_SYMBOL(shrink_icache_memory);
/* /*
* Called with the inode lock held. * Called with the inode lock held.
......
...@@ -252,7 +252,7 @@ static int flock_make_lock(struct file *filp, ...@@ -252,7 +252,7 @@ static int flock_make_lock(struct file *filp,
return -ENOMEM; return -ENOMEM;
fl->fl_file = filp; fl->fl_file = filp;
fl->fl_pid = current->pid; fl->fl_pid = current->tgid;
fl->fl_flags = (cmd & LOCK_NB) ? FL_FLOCK : FL_FLOCK | FL_SLEEP; fl->fl_flags = (cmd & LOCK_NB) ? FL_FLOCK : FL_FLOCK | FL_SLEEP;
fl->fl_type = type; fl->fl_type = type;
fl->fl_end = OFFSET_MAX; fl->fl_end = OFFSET_MAX;
...@@ -308,7 +308,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, ...@@ -308,7 +308,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
fl->fl_end = OFFSET_MAX; fl->fl_end = OFFSET_MAX;
fl->fl_owner = current->files; fl->fl_owner = current->files;
fl->fl_pid = current->pid; fl->fl_pid = current->tgid;
fl->fl_file = filp; fl->fl_file = filp;
fl->fl_flags = FL_POSIX; fl->fl_flags = FL_POSIX;
fl->fl_notify = NULL; fl->fl_notify = NULL;
...@@ -348,7 +348,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, ...@@ -348,7 +348,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
fl->fl_end = OFFSET_MAX; fl->fl_end = OFFSET_MAX;
fl->fl_owner = current->files; fl->fl_owner = current->files;
fl->fl_pid = current->pid; fl->fl_pid = current->tgid;
fl->fl_file = filp; fl->fl_file = filp;
fl->fl_flags = FL_POSIX; fl->fl_flags = FL_POSIX;
fl->fl_notify = NULL; fl->fl_notify = NULL;
...@@ -377,7 +377,7 @@ static int lease_alloc(struct file *filp, int type, struct file_lock **flp) ...@@ -377,7 +377,7 @@ static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
return -ENOMEM; return -ENOMEM;
fl->fl_owner = current->files; fl->fl_owner = current->files;
fl->fl_pid = current->pid; fl->fl_pid = current->tgid;
fl->fl_file = filp; fl->fl_file = filp;
fl->fl_flags = FL_LEASE; fl->fl_flags = FL_LEASE;
...@@ -669,7 +669,7 @@ int locks_mandatory_area(int read_write, struct inode *inode, ...@@ -669,7 +669,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
int error; int error;
fl.fl_owner = current->files; fl.fl_owner = current->files;
fl.fl_pid = current->pid; fl.fl_pid = current->tgid;
fl.fl_file = filp; fl.fl_file = filp;
fl.fl_flags = FL_POSIX | FL_ACCESS | FL_SLEEP; fl.fl_flags = FL_POSIX | FL_ACCESS | FL_SLEEP;
fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
...@@ -1241,7 +1241,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) ...@@ -1241,7 +1241,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
*before = fl; *before = fl;
list_add(&fl->fl_link, &file_lock_list); list_add(&fl->fl_link, &file_lock_list);
error = f_setown(filp, current->pid, 1); error = f_setown(filp, current->tgid, 1);
out_unlock: out_unlock:
unlock_kernel(); unlock_kernel();
return error; return error;
...@@ -1632,7 +1632,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) ...@@ -1632,7 +1632,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_start = 0; lock.fl_start = 0;
lock.fl_end = OFFSET_MAX; lock.fl_end = OFFSET_MAX;
lock.fl_owner = owner; lock.fl_owner = owner;
lock.fl_pid = current->pid; lock.fl_pid = current->tgid;
lock.fl_file = filp; lock.fl_file = filp;
if (filp->f_op && filp->f_op->lock != NULL) { if (filp->f_op && filp->f_op->lock != NULL) {
......
...@@ -40,7 +40,6 @@ ...@@ -40,7 +40,6 @@
#define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_BASE 0xfe400000
#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
#define XQUAD_PORTIO_LEN 0x80000 /* Only remapping first 2 quads */
#ifdef __KERNEL__ #ifdef __KERNEL__
......
...@@ -116,7 +116,7 @@ static inline void down(struct semaphore * sem) ...@@ -116,7 +116,7 @@ static inline void down(struct semaphore * sem)
#if WAITQUEUE_DEBUG #if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic); CHECK_MAGIC(sem->__magic);
#endif #endif
might_sleep();
__asm__ __volatile__( __asm__ __volatile__(
"# atomic down operation\n\t" "# atomic down operation\n\t"
LOCK "decl %0\n\t" /* --sem->count */ LOCK "decl %0\n\t" /* --sem->count */
...@@ -142,7 +142,7 @@ static inline int down_interruptible(struct semaphore * sem) ...@@ -142,7 +142,7 @@ static inline int down_interruptible(struct semaphore * sem)
#if WAITQUEUE_DEBUG #if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic); CHECK_MAGIC(sem->__magic);
#endif #endif
might_sleep();
__asm__ __volatile__( __asm__ __volatile__(
"# atomic interruptible down operation\n\t" "# atomic interruptible down operation\n\t"
LOCK "decl %1\n\t" /* --sem->count */ LOCK "decl %1\n\t" /* --sem->count */
......
...@@ -186,7 +186,7 @@ extern int shrink_dcache_memory(int, unsigned int); ...@@ -186,7 +186,7 @@ extern int shrink_dcache_memory(int, unsigned int);
extern void prune_dcache(int); extern void prune_dcache(int);
/* icache memory management (defined in linux/fs/inode.c) */ /* icache memory management (defined in linux/fs/inode.c) */
extern int shrink_icache_memory(int, int); extern int shrink_icache_memory(int, unsigned int);
extern void prune_icache(int); extern void prune_icache(int);
/* quota cache memory management (defined in linux/fs/dquot.c) */ /* quota cache memory management (defined in linux/fs/dquot.c) */
......
...@@ -52,12 +52,10 @@ extern inline struct list_head *elv_get_sort_head(request_queue_t *, struct requ ...@@ -52,12 +52,10 @@ extern inline struct list_head *elv_get_sort_head(request_queue_t *, struct requ
extern elevator_t elevator_noop; extern elevator_t elevator_noop;
/* /*
* elevator linus. based on linus ideas of starvation control, using * deadline i/o scheduler. uses request time outs to prevent indefinite
* sequencing to manage inserts and merges. * starvation
*/ */
extern elevator_t elevator_linus; extern elevator_t iosched_deadline;
#define elv_linus_sequence(rq) ((long)(rq)->elevator_private)
#define ELV_LINUS_SEEK_COST 16
/* /*
* use the /proc/iosched interface, all the below is history -> * use the /proc/iosched interface, all the below is history ->
......
...@@ -40,6 +40,13 @@ ...@@ -40,6 +40,13 @@
struct completion; struct completion;
#ifdef CONFIG_DEBUG_KERNEL
void __might_sleep(char *file, int line);
#define might_sleep() __might_sleep(__FILE__, __LINE__)
#else
#define might_sleep() do {} while(0)
#endif
extern struct notifier_block *panic_notifier_list; extern struct notifier_block *panic_notifier_list;
NORET_TYPE void panic(const char * fmt, ...) NORET_TYPE void panic(const char * fmt, ...)
__attribute__ ((NORET_AND format (printf, 1, 2))); __attribute__ ((NORET_AND format (printf, 1, 2)));
......
...@@ -524,6 +524,7 @@ extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned lon ...@@ -524,6 +524,7 @@ extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned lon
extern struct page * vmalloc_to_page(void *addr); extern struct page * vmalloc_to_page(void *addr);
extern unsigned long get_page_cache_size(void); extern unsigned long get_page_cache_size(void);
extern unsigned int nr_used_zone_pages(void);
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -74,9 +74,15 @@ static inline void ___add_to_page_cache(struct page *page, ...@@ -74,9 +74,15 @@ static inline void ___add_to_page_cache(struct page *page,
inc_page_state(nr_pagecache); inc_page_state(nr_pagecache);
} }
extern void FASTCALL(lock_page(struct page *page)); extern void FASTCALL(__lock_page(struct page *page));
extern void FASTCALL(unlock_page(struct page *page)); extern void FASTCALL(unlock_page(struct page *page));
static inline void lock_page(struct page *page)
{
if (TestSetPageLocked(page))
__lock_page(page);
}
/* /*
* This is exported only for wait_on_page_locked/wait_on_page_writeback. * This is exported only for wait_on_page_locked/wait_on_page_writeback.
* Never use this directly! * Never use this directly!
......
...@@ -40,6 +40,7 @@ extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str)); ...@@ -40,6 +40,7 @@ extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str));
*/ */
static inline void down_read(struct rw_semaphore *sem) static inline void down_read(struct rw_semaphore *sem)
{ {
might_sleep();
rwsemtrace(sem,"Entering down_read"); rwsemtrace(sem,"Entering down_read");
__down_read(sem); __down_read(sem);
rwsemtrace(sem,"Leaving down_read"); rwsemtrace(sem,"Leaving down_read");
...@@ -62,6 +63,7 @@ static inline int down_read_trylock(struct rw_semaphore *sem) ...@@ -62,6 +63,7 @@ static inline int down_read_trylock(struct rw_semaphore *sem)
*/ */
static inline void down_write(struct rw_semaphore *sem) static inline void down_write(struct rw_semaphore *sem)
{ {
might_sleep();
rwsemtrace(sem,"Entering down_write"); rwsemtrace(sem,"Entering down_write");
__down_write(sem); __down_write(sem);
rwsemtrace(sem,"Leaving down_write"); rwsemtrace(sem,"Leaving down_write");
......
...@@ -100,8 +100,9 @@ extern unsigned long nr_uninterruptible(void); ...@@ -100,8 +100,9 @@ extern unsigned long nr_uninterruptible(void);
#define TASK_RUNNING 0 #define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1 #define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2 #define TASK_UNINTERRUPTIBLE 2
#define TASK_ZOMBIE 4 #define TASK_STOPPED 4
#define TASK_STOPPED 8 #define TASK_ZOMBIE 8
#define TASK_DEAD 16
#define __set_task_state(tsk, state_value) \ #define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0) do { (tsk)->state = (state_value); } while (0)
......
...@@ -119,6 +119,32 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, ...@@ -119,6 +119,32 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
_raced; \ _raced; \
}) })
/*
* Waitqueue's which are removed from the waitqueue_head at wakeup time
*/
void FASTCALL(prepare_to_wait(wait_queue_head_t *q,
wait_queue_t *wait, int state));
void FASTCALL(prepare_to_wait_exclusive(wait_queue_head_t *q,
wait_queue_t *wait, int state));
void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait));
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync);
#define DEFINE_WAIT(name) \
wait_queue_t name = { \
.task = current, \
.func = autoremove_wake_function, \
.task_list = { .next = &name.task_list, \
.prev = &name.task_list, \
}, \
}
#define init_wait(wait) \
do { \
wait->task = current; \
wait->func = autoremove_wake_function; \
INIT_LIST_HEAD(&wait->task_list); \
} while (0)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif #endif
...@@ -32,6 +32,7 @@ int getrusage(struct task_struct *, int, struct rusage *); ...@@ -32,6 +32,7 @@ int getrusage(struct task_struct *, int, struct rusage *);
static struct dentry * __unhash_process(struct task_struct *p) static struct dentry * __unhash_process(struct task_struct *p)
{ {
struct dentry *proc_dentry; struct dentry *proc_dentry;
nr_threads--; nr_threads--;
detach_pid(p, PIDTYPE_PID); detach_pid(p, PIDTYPE_PID);
detach_pid(p, PIDTYPE_TGID); detach_pid(p, PIDTYPE_TGID);
...@@ -57,31 +58,31 @@ static struct dentry * __unhash_process(struct task_struct *p) ...@@ -57,31 +58,31 @@ static struct dentry * __unhash_process(struct task_struct *p)
void release_task(struct task_struct * p) void release_task(struct task_struct * p)
{ {
struct dentry *proc_dentry; struct dentry *proc_dentry;
task_t *leader;
if (p->state != TASK_ZOMBIE) if (p->state < TASK_ZOMBIE)
BUG(); BUG();
if (p != current) if (p != current)
wait_task_inactive(p); wait_task_inactive(p);
atomic_dec(&p->user->processes); atomic_dec(&p->user->processes);
security_ops->task_free_security(p); security_ops->task_free_security(p);
free_uid(p->user); free_uid(p->user);
if (unlikely(p->ptrace)) {
write_lock_irq(&tasklist_lock); write_lock_irq(&tasklist_lock);
if (unlikely(p->ptrace))
__ptrace_unlink(p); __ptrace_unlink(p);
write_unlock_irq(&tasklist_lock);
}
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
write_lock_irq(&tasklist_lock);
__exit_sighand(p); __exit_sighand(p);
proc_dentry = __unhash_process(p); proc_dentry = __unhash_process(p);
/* /*
* If we are the last non-leader member of the thread * If we are the last non-leader member of the thread
* group, and the leader is zombie, then notify the * group, and the leader is zombie, then notify the
* group leader's parent process. * group leader's parent process. (if it wants notification.)
*/ */
if (p->group_leader != p && thread_group_empty(p)) leader = p->group_leader;
do_notify_parent(p->group_leader, p->group_leader->exit_signal); if (leader != p && thread_group_empty(leader) &&
leader->state == TASK_ZOMBIE && leader->exit_signal != -1)
do_notify_parent(leader, leader->exit_signal);
p->parent->cutime += p->utime + p->cutime; p->parent->cutime += p->utime + p->cutime;
p->parent->cstime += p->stime + p->cstime; p->parent->cstime += p->stime + p->cstime;
...@@ -159,7 +160,7 @@ static int __will_become_orphaned_pgrp(int pgrp, task_t *ignored_task) ...@@ -159,7 +160,7 @@ static int __will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) { for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
if (p == ignored_task if (p == ignored_task
|| p->state == TASK_ZOMBIE || p->state >= TASK_ZOMBIE
|| p->real_parent->pid == 1) || p->real_parent->pid == 1)
continue; continue;
if (p->real_parent->pgrp != pgrp if (p->real_parent->pgrp != pgrp
...@@ -435,8 +436,11 @@ void exit_mm(struct task_struct *tsk) ...@@ -435,8 +436,11 @@ void exit_mm(struct task_struct *tsk)
static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper) static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
{ {
/* Make sure we're not reparenting to ourselves. */ /*
if (p == reaper) * Make sure we're not reparenting to ourselves and that
* the parent is not a zombie.
*/
if (p == reaper || reaper->state >= TASK_ZOMBIE)
p->real_parent = child_reaper; p->real_parent = child_reaper;
else else
p->real_parent = reaper; p->real_parent = reaper;
...@@ -774,9 +778,10 @@ static int eligible_child(pid_t pid, int options, task_t *p) ...@@ -774,9 +778,10 @@ static int eligible_child(pid_t pid, int options, task_t *p)
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{ {
int flag, retval;
DECLARE_WAITQUEUE(wait, current); DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk; struct task_struct *tsk;
unsigned long state;
int flag, retval;
if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL)) if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL; return -EINVAL;
...@@ -827,7 +832,15 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc ...@@ -827,7 +832,15 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc
*/ */
if (ret == 2) if (ret == 2)
continue; continue;
/*
* Try to move the task's state to DEAD
* only one thread is allowed to do this:
*/
state = xchg(&p->state, TASK_DEAD);
if (state != TASK_ZOMBIE)
continue;
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr) { if (!retval && stat_addr) {
if (p->sig->group_exit) if (p->sig->group_exit)
...@@ -835,13 +848,16 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc ...@@ -835,13 +848,16 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc
else else
retval = put_user(p->exit_code, stat_addr); retval = put_user(p->exit_code, stat_addr);
} }
if (retval) if (retval) {
p->state = TASK_ZOMBIE;
goto end_wait4; goto end_wait4;
}
retval = p->pid; retval = p->pid;
if (p->real_parent != p->parent) { if (p->real_parent != p->parent) {
write_lock_irq(&tasklist_lock); write_lock_irq(&tasklist_lock);
__ptrace_unlink(p); __ptrace_unlink(p);
do_notify_parent(p, SIGCHLD); do_notify_parent(p, SIGCHLD);
p->state = TASK_ZOMBIE;
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
} else } else
release_task(p); release_task(p);
......
...@@ -103,6 +103,52 @@ void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait) ...@@ -103,6 +103,52 @@ void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
spin_unlock_irqrestore(&q->lock, flags); spin_unlock_irqrestore(&q->lock, flags);
} }
void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
__set_current_state(state);
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
void
prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
__set_current_state(state);
wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue_tail(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
__set_current_state(TASK_RUNNING);
if (!list_empty(&wait->task_list)) {
spin_lock_irqsave(&q->lock, flags);
list_del_init(&wait->task_list);
spin_unlock_irqrestore(&q->lock, flags);
}
}
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync)
{
int ret = default_wake_function(wait, mode, sync);
if (ret)
list_del_init(&wait->task_list);
return ret;
}
void __init fork_init(unsigned long mempages) void __init fork_init(unsigned long mempages)
{ {
/* create a slab on which task_structs can be allocated */ /* create a slab on which task_structs can be allocated */
......
...@@ -400,6 +400,10 @@ EXPORT_SYMBOL(irq_stat); ...@@ -400,6 +400,10 @@ EXPORT_SYMBOL(irq_stat);
EXPORT_SYMBOL(add_wait_queue); EXPORT_SYMBOL(add_wait_queue);
EXPORT_SYMBOL(add_wait_queue_exclusive); EXPORT_SYMBOL(add_wait_queue_exclusive);
EXPORT_SYMBOL(remove_wait_queue); EXPORT_SYMBOL(remove_wait_queue);
EXPORT_SYMBOL(prepare_to_wait);
EXPORT_SYMBOL(prepare_to_wait_exclusive);
EXPORT_SYMBOL(finish_wait);
EXPORT_SYMBOL(autoremove_wake_function);
/* completion handling */ /* completion handling */
EXPORT_SYMBOL(wait_for_completion); EXPORT_SYMBOL(wait_for_completion);
...@@ -493,7 +497,9 @@ EXPORT_SYMBOL(jiffies_64); ...@@ -493,7 +497,9 @@ EXPORT_SYMBOL(jiffies_64);
EXPORT_SYMBOL(xtime); EXPORT_SYMBOL(xtime);
EXPORT_SYMBOL(do_gettimeofday); EXPORT_SYMBOL(do_gettimeofday);
EXPORT_SYMBOL(do_settimeofday); EXPORT_SYMBOL(do_settimeofday);
#ifdef CONFIG_DEBUG_KERNEL
EXPORT_SYMBOL(__might_sleep);
#endif
#if !defined(__ia64__) #if !defined(__ia64__)
EXPORT_SYMBOL(loops_per_jiffy); EXPORT_SYMBOL(loops_per_jiffy);
#endif #endif
......
...@@ -53,6 +53,8 @@ static pidmap_t pidmap_array[PIDMAP_ENTRIES] = ...@@ -53,6 +53,8 @@ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES; static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES;
static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
inline void free_pidmap(int pid) inline void free_pidmap(int pid)
{ {
pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
...@@ -77,8 +79,13 @@ static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps) ...@@ -77,8 +79,13 @@ static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps)
* Free the page if someone raced with us * Free the page if someone raced with us
* installing it: * installing it:
*/ */
if (cmpxchg(&map->page, NULL, (void *) page)) spin_lock(&pidmap_lock);
if (map->page)
free_page(page); free_page(page);
else
map->page = (void *)page;
spin_unlock(&pidmap_lock);
if (!map->page) if (!map->page)
break; break;
} }
......
...@@ -2150,3 +2150,20 @@ void __init sched_init(void) ...@@ -2150,3 +2150,20 @@ void __init sched_init(void)
enter_lazy_tlb(&init_mm, current, smp_processor_id()); enter_lazy_tlb(&init_mm, current, smp_processor_id());
} }
#ifdef CONFIG_DEBUG_KERNEL
void __might_sleep(char *file, int line)
{
#if defined(in_atomic)
static unsigned long prev_jiffy; /* ratelimiting */
if (in_atomic()) {
if (time_before(jiffies, prev_jiffy + HZ))
return;
prev_jiffy = jiffies;
printk("Sleeping function called from illegal"
" context at %s:%d\n", file, line);
dump_stack();
}
#endif
}
#endif
...@@ -888,20 +888,6 @@ asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) ...@@ -888,20 +888,6 @@ asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0) if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
return -EINVAL; return -EINVAL;
if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
current->policy != SCHED_NORMAL)
{
/*
* Short delay requests up to 2 ms will be handled with
* high precision by a busy wait for all real-time processes.
*
* Its important on SMP not to do this holding locks.
*/
udelay((t.tv_nsec + 999) / 1000);
return 0;
}
expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
current->state = TASK_INTERRUPTIBLE; current->state = TASK_INTERRUPTIBLE;
......
...@@ -632,19 +632,15 @@ static inline wait_queue_head_t *page_waitqueue(struct page *page) ...@@ -632,19 +632,15 @@ static inline wait_queue_head_t *page_waitqueue(struct page *page)
void wait_on_page_bit(struct page *page, int bit_nr) void wait_on_page_bit(struct page *page, int bit_nr)
{ {
wait_queue_head_t *waitqueue = page_waitqueue(page); wait_queue_head_t *waitqueue = page_waitqueue(page);
struct task_struct *tsk = current; DEFINE_WAIT(wait);
DECLARE_WAITQUEUE(wait, tsk);
add_wait_queue(waitqueue, &wait);
do { do {
set_task_state(tsk, TASK_UNINTERRUPTIBLE); prepare_to_wait(waitqueue, &wait, TASK_UNINTERRUPTIBLE);
if (!test_bit(bit_nr, &page->flags))
break;
sync_page(page); sync_page(page);
if (test_bit(bit_nr, &page->flags))
schedule(); schedule();
} while (test_bit(bit_nr, &page->flags)); } while (test_bit(bit_nr, &page->flags));
__set_task_state(tsk, TASK_RUNNING); finish_wait(waitqueue, &wait);
remove_wait_queue(waitqueue, &wait);
} }
EXPORT_SYMBOL(wait_on_page_bit); EXPORT_SYMBOL(wait_on_page_bit);
...@@ -690,38 +686,27 @@ void end_page_writeback(struct page *page) ...@@ -690,38 +686,27 @@ void end_page_writeback(struct page *page)
EXPORT_SYMBOL(end_page_writeback); EXPORT_SYMBOL(end_page_writeback);
/* /*
* Get a lock on the page, assuming we need to sleep * Get a lock on the page, assuming we need to sleep to get it.
* to get it.. *
* Ugly: running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some
* random driver's requestfn sets TASK_RUNNING, we could busywait. However
* chances are that on the second loop, the block layer's plug list is empty,
* so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
*/ */
static void __lock_page(struct page *page) void __lock_page(struct page *page)
{ {
wait_queue_head_t *waitqueue = page_waitqueue(page); wait_queue_head_t *wqh = page_waitqueue(page);
struct task_struct *tsk = current; DEFINE_WAIT(wait);
DECLARE_WAITQUEUE(wait, tsk);
add_wait_queue_exclusive(waitqueue, &wait); while (TestSetPageLocked(page)) {
for (;;) { prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (PageLocked(page)) {
sync_page(page); sync_page(page);
if (PageLocked(page))
schedule(); schedule();
} }
if (!TestSetPageLocked(page)) finish_wait(wqh, &wait);
break;
}
__set_task_state(tsk, TASK_RUNNING);
remove_wait_queue(waitqueue, &wait);
}
/*
* Get an exclusive lock on the page, optimistically
* assuming it's not locked..
*/
void lock_page(struct page *page)
{
if (TestSetPageLocked(page))
__lock_page(page);
} }
EXPORT_SYMBOL(__lock_page);
/* /*
* a rather lightweight function, finding and getting a reference to a * a rather lightweight function, finding and getting a reference to a
......
...@@ -187,7 +187,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, ...@@ -187,7 +187,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
* Try to merge with the previous vma. * Try to merge with the previous vma.
*/ */
if (mprotect_attempt_merge(vma, *pprev, end, newflags)) if (mprotect_attempt_merge(vma, *pprev, end, newflags))
return 0; goto success;
} else { } else {
error = split_vma(mm, vma, start, 1); error = split_vma(mm, vma, start, 1);
if (error) if (error)
...@@ -209,7 +209,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, ...@@ -209,7 +209,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
vma->vm_flags = newflags; vma->vm_flags = newflags;
vma->vm_page_prot = newprot; vma->vm_page_prot = newprot;
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
success:
change_protection(vma, start, end, newprot); change_protection(vma, start, end, newprot);
return 0; return 0;
......
...@@ -321,6 +321,9 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -321,6 +321,9 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
struct page * page; struct page * page;
int freed, i; int freed, i;
if (gfp_mask & __GFP_WAIT)
might_sleep();
KERNEL_STAT_ADD(pgalloc, 1<<order); KERNEL_STAT_ADD(pgalloc, 1<<order);
zones = zonelist->zones; /* the list of zones suitable for gfp_mask */ zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
...@@ -479,6 +482,17 @@ unsigned int nr_free_pages(void) ...@@ -479,6 +482,17 @@ unsigned int nr_free_pages(void)
return sum; return sum;
} }
unsigned int nr_used_zone_pages(void)
{
unsigned int pages = 0;
struct zone *zone;
for_each_zone(zone)
pages += zone->nr_active + zone->nr_inactive;
return pages;
}
static unsigned int nr_free_zone_pages(int offset) static unsigned int nr_free_zone_pages(int offset)
{ {
pg_data_t *pgdat; pg_data_t *pgdat;
......
...@@ -79,9 +79,9 @@ static unsigned long last_empty_jifs; ...@@ -79,9 +79,9 @@ static unsigned long last_empty_jifs;
*/ */
struct pdflush_work { struct pdflush_work {
struct task_struct *who; /* The thread */ struct task_struct *who; /* The thread */
void (*fn)(unsigned long); /* A callback function for pdflush to work on */ void (*fn)(unsigned long); /* A callback function */
unsigned long arg0; /* An argument to the callback function */ unsigned long arg0; /* An argument to the callback */
struct list_head list; /* On pdflush_list, when the thread is idle */ struct list_head list; /* On pdflush_list, when idle */
unsigned long when_i_went_to_sleep; unsigned long when_i_went_to_sleep;
}; };
...@@ -99,23 +99,34 @@ static int __pdflush(struct pdflush_work *my_work) ...@@ -99,23 +99,34 @@ static int __pdflush(struct pdflush_work *my_work)
current->flags |= PF_FLUSHER; current->flags |= PF_FLUSHER;
my_work->fn = NULL; my_work->fn = NULL;
my_work->who = current; my_work->who = current;
INIT_LIST_HEAD(&my_work->list);
spin_lock_irq(&pdflush_lock); spin_lock_irq(&pdflush_lock);
nr_pdflush_threads++; nr_pdflush_threads++;
// printk("pdflush %d [%d] starts\n", nr_pdflush_threads, current->pid);
for ( ; ; ) { for ( ; ; ) {
struct pdflush_work *pdf; struct pdflush_work *pdf;
list_add(&my_work->list, &pdflush_list);
my_work->when_i_went_to_sleep = jiffies;
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
list_move(&my_work->list, &pdflush_list);
my_work->when_i_went_to_sleep = jiffies;
spin_unlock_irq(&pdflush_lock); spin_unlock_irq(&pdflush_lock);
if (current->flags & PF_FREEZE) if (current->flags & PF_FREEZE)
refrigerator(PF_IOTHREAD); refrigerator(PF_IOTHREAD);
schedule(); schedule();
if (my_work->fn) spin_lock_irq(&pdflush_lock);
if (!list_empty(&my_work->list)) {
printk("pdflush: bogus wakeup!\n");
my_work->fn = NULL;
continue;
}
if (my_work->fn == NULL) {
printk("pdflush: NULL work function\n");
continue;
}
spin_unlock_irq(&pdflush_lock);
(*my_work->fn)(my_work->arg0); (*my_work->fn)(my_work->arg0);
/* /*
...@@ -132,6 +143,7 @@ static int __pdflush(struct pdflush_work *my_work) ...@@ -132,6 +143,7 @@ static int __pdflush(struct pdflush_work *my_work)
} }
spin_lock_irq(&pdflush_lock); spin_lock_irq(&pdflush_lock);
my_work->fn = NULL;
/* /*
* Thread destruction: For how long has the sleepiest * Thread destruction: For how long has the sleepiest
...@@ -143,13 +155,12 @@ static int __pdflush(struct pdflush_work *my_work) ...@@ -143,13 +155,12 @@ static int __pdflush(struct pdflush_work *my_work)
continue; continue;
pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) { if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
pdf->when_i_went_to_sleep = jiffies; /* Limit exit rate */ /* Limit exit rate */
pdf->when_i_went_to_sleep = jiffies;
break; /* exeunt */ break; /* exeunt */
} }
my_work->fn = NULL;
} }
nr_pdflush_threads--; nr_pdflush_threads--;
// printk("pdflush %d [%d] ends\n", nr_pdflush_threads, current->pid);
spin_unlock_irq(&pdflush_lock); spin_unlock_irq(&pdflush_lock);
return 0; return 0;
} }
...@@ -191,11 +202,10 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0) ...@@ -191,11 +202,10 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
list_del_init(&pdf->list); list_del_init(&pdf->list);
if (list_empty(&pdflush_list)) if (list_empty(&pdflush_list))
last_empty_jifs = jiffies; last_empty_jifs = jiffies;
spin_unlock_irqrestore(&pdflush_lock, flags);
pdf->fn = fn; pdf->fn = fn;
pdf->arg0 = arg0; pdf->arg0 = arg0;
wmb(); /* ? */
wake_up_process(pdf->who); wake_up_process(pdf->who);
spin_unlock_irqrestore(&pdflush_lock, flags);
} }
return ret; return ret;
} }
......
...@@ -1370,6 +1370,9 @@ static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags) ...@@ -1370,6 +1370,9 @@ static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
unsigned long save_flags; unsigned long save_flags;
void* objp; void* objp;
if (flags & __GFP_WAIT)
might_sleep();
kmem_cache_alloc_head(cachep, flags); kmem_cache_alloc_head(cachep, flags);
try_again: try_again:
local_irq_save(save_flags); local_irq_save(save_flags);
...@@ -1496,7 +1499,11 @@ static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp) ...@@ -1496,7 +1499,11 @@ static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
if (unlikely(!--slabp->inuse)) { if (unlikely(!--slabp->inuse)) {
/* Was partial or full, now empty. */ /* Was partial or full, now empty. */
list_del(&slabp->list); list_del(&slabp->list);
list_add(&slabp->list, &cachep->slabs_free); /* list_add(&slabp->list, &cachep->slabs_free); */
if (unlikely(list_empty(&cachep->slabs_partial)))
list_add(&slabp->list, &cachep->slabs_partial);
else
kmem_slab_destroy(cachep, slabp);
} else if (unlikely(inuse == cachep->num)) { } else if (unlikely(inuse == cachep->num)) {
/* Was full. */ /* Was full. */
list_del(&slabp->list); list_del(&slabp->list);
...@@ -1970,7 +1977,7 @@ static int s_show(struct seq_file *m, void *p) ...@@ -1970,7 +1977,7 @@ static int s_show(struct seq_file *m, void *p)
} }
list_for_each(q,&cachep->slabs_partial) { list_for_each(q,&cachep->slabs_partial) {
slabp = list_entry(q, slab_t, list); slabp = list_entry(q, slab_t, list);
if (slabp->inuse == cachep->num || !slabp->inuse) if (slabp->inuse == cachep->num)
BUG(); BUG();
active_objs += slabp->inuse; active_objs += slabp->inuse;
active_slabs++; active_slabs++;
......
...@@ -70,6 +70,10 @@ ...@@ -70,6 +70,10 @@
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0) #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif #endif
#ifndef CONFIG_QUOTA
#define shrink_dqcache_memory(ratio, gfp_mask) do { } while (0)
#endif
/* Must be called with page's pte_chain_lock held. */ /* Must be called with page's pte_chain_lock held. */
static inline int page_mapping_inuse(struct page * page) static inline int page_mapping_inuse(struct page * page)
{ {
...@@ -97,7 +101,7 @@ static inline int is_page_cache_freeable(struct page *page) ...@@ -97,7 +101,7 @@ static inline int is_page_cache_freeable(struct page *page)
static /* inline */ int static /* inline */ int
shrink_list(struct list_head *page_list, int nr_pages, shrink_list(struct list_head *page_list, int nr_pages,
unsigned int gfp_mask, int *max_scan) unsigned int gfp_mask, int *max_scan, int *nr_mapped)
{ {
struct address_space *mapping; struct address_space *mapping;
LIST_HEAD(ret_pages); LIST_HEAD(ret_pages);
...@@ -116,6 +120,10 @@ shrink_list(struct list_head *page_list, int nr_pages, ...@@ -116,6 +120,10 @@ shrink_list(struct list_head *page_list, int nr_pages,
if (TestSetPageLocked(page)) if (TestSetPageLocked(page))
goto keep; goto keep;
/* Double the slab pressure for mapped and swapcache pages */
if (page_mapped(page) || PageSwapCache(page))
(*nr_mapped)++;
BUG_ON(PageActive(page)); BUG_ON(PageActive(page));
may_enter_fs = (gfp_mask & __GFP_FS) || may_enter_fs = (gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (gfp_mask & __GFP_IO)); (PageSwapCache(page) && (gfp_mask & __GFP_IO));
...@@ -320,7 +328,7 @@ shrink_list(struct list_head *page_list, int nr_pages, ...@@ -320,7 +328,7 @@ shrink_list(struct list_head *page_list, int nr_pages,
*/ */
static /* inline */ int static /* inline */ int
shrink_cache(int nr_pages, struct zone *zone, shrink_cache(int nr_pages, struct zone *zone,
unsigned int gfp_mask, int max_scan) unsigned int gfp_mask, int max_scan, int *nr_mapped)
{ {
LIST_HEAD(page_list); LIST_HEAD(page_list);
struct pagevec pvec; struct pagevec pvec;
...@@ -371,7 +379,8 @@ shrink_cache(int nr_pages, struct zone *zone, ...@@ -371,7 +379,8 @@ shrink_cache(int nr_pages, struct zone *zone,
max_scan -= nr_scan; max_scan -= nr_scan;
KERNEL_STAT_ADD(pgscan, nr_scan); KERNEL_STAT_ADD(pgscan, nr_scan);
nr_pages = shrink_list(&page_list,nr_pages,gfp_mask,&max_scan); nr_pages = shrink_list(&page_list, nr_pages,
gfp_mask, &max_scan, nr_mapped);
if (nr_pages <= 0 && list_empty(&page_list)) if (nr_pages <= 0 && list_empty(&page_list))
goto done; goto done;
...@@ -522,14 +531,10 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in) ...@@ -522,14 +531,10 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in)
static /* inline */ int static /* inline */ int
shrink_zone(struct zone *zone, int max_scan, shrink_zone(struct zone *zone, int max_scan,
unsigned int gfp_mask, int nr_pages) unsigned int gfp_mask, int nr_pages, int *nr_mapped)
{ {
unsigned long ratio; unsigned long ratio;
/* This is bogus for ZONE_HIGHMEM? */
if (kmem_cache_reap(gfp_mask) >= nr_pages)
return 0;
/* /*
* Try to keep the active list 2/3 of the size of the cache. And * Try to keep the active list 2/3 of the size of the cache. And
* make sure that refill_inactive is given a decent number of pages. * make sure that refill_inactive is given a decent number of pages.
...@@ -547,7 +552,8 @@ shrink_zone(struct zone *zone, int max_scan, ...@@ -547,7 +552,8 @@ shrink_zone(struct zone *zone, int max_scan,
atomic_sub(SWAP_CLUSTER_MAX, &zone->refill_counter); atomic_sub(SWAP_CLUSTER_MAX, &zone->refill_counter);
refill_inactive_zone(zone, SWAP_CLUSTER_MAX); refill_inactive_zone(zone, SWAP_CLUSTER_MAX);
} }
nr_pages = shrink_cache(nr_pages, zone, gfp_mask, max_scan); nr_pages = shrink_cache(nr_pages, zone, gfp_mask,
max_scan, nr_mapped);
return nr_pages; return nr_pages;
} }
...@@ -557,6 +563,9 @@ shrink_caches(struct zone *classzone, int priority, ...@@ -557,6 +563,9 @@ shrink_caches(struct zone *classzone, int priority,
{ {
struct zone *first_classzone; struct zone *first_classzone;
struct zone *zone; struct zone *zone;
int ratio;
int nr_mapped = 0;
int pages = nr_used_zone_pages();
first_classzone = classzone->zone_pgdat->node_zones; first_classzone = classzone->zone_pgdat->node_zones;
for (zone = classzone; zone >= first_classzone; zone--) { for (zone = classzone; zone >= first_classzone; zone--) {
...@@ -581,16 +590,28 @@ shrink_caches(struct zone *classzone, int priority, ...@@ -581,16 +590,28 @@ shrink_caches(struct zone *classzone, int priority,
max_scan = zone->nr_inactive >> priority; max_scan = zone->nr_inactive >> priority;
if (max_scan < to_reclaim * 2) if (max_scan < to_reclaim * 2)
max_scan = to_reclaim * 2; max_scan = to_reclaim * 2;
unreclaimed = shrink_zone(zone, max_scan, gfp_mask, to_reclaim); unreclaimed = shrink_zone(zone, max_scan,
gfp_mask, to_reclaim, &nr_mapped);
nr_pages -= to_reclaim - unreclaimed; nr_pages -= to_reclaim - unreclaimed;
*total_scanned += max_scan; *total_scanned += max_scan;
} }
shrink_dcache_memory(priority, gfp_mask); /*
shrink_icache_memory(1, gfp_mask); * Here we assume it costs one seek to replace a lru page and that
#ifdef CONFIG_QUOTA * it also takes a seek to recreate a cache object. With this in
shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); * mind we age equal percentages of the lru and ageable caches.
#endif * This should balance the seeks generated by these structures.
*
* NOTE: for now I do this for all zones. If we find this is too
* aggressive on large boxes we may want to exclude ZONE_HIGHMEM
*
* If we're encountering mapped pages on the LRU then increase the
* pressure on slab to avoid swapping.
*/
ratio = (pages / (*total_scanned + nr_mapped + 1)) + 1;
shrink_dcache_memory(ratio, gfp_mask);
shrink_icache_memory(ratio, gfp_mask);
shrink_dqcache_memory(ratio, gfp_mask);
return nr_pages; return nr_pages;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment