Linux 2.3.7pre1

I'd like to point out that the current pre-2.3.7 series is fairly experimental. As amply demonstrated by the filename (the "dangerous" part in the filename hopefully made some people go "Hmm.."). We're working on re-architecting (or rather, cleaning up so that it works like it really was supposed to) the page cache writing, and as a result a number of filesystems are probably going to be broken for a while unless we get people jumping in to help. Right now 2.3.7-1 (aka "dangerous") is not stable even with ext2, in that swapping doesn't work. Ingo just sent me patches to fix that, and I'm hoping to remove the "dangerous" part from 2.3.7-2, but even then a number of filesystems will be broken. We _may_ end up just re-introducing the "update_vm_cache()" code for filesystems that really don't need the added performance, but it would actually be preferable if people really wanted to make them perform well with the new direct write-through cache code. Linus

Linux 2.3.7pre1
I'd like to point out that the current pre-2.3.7 series is fairly experimental. As amply demonstrated by the filename (the "dangerous" part in the filename hopefully made some people go "Hmm.."). We're working on re-architecting (or rather, cleaning up so that it works like it really was supposed to) the page cache writing, and as a result a number of filesystems are probably going to be broken for a while unless we get people jumping in to help. Right now 2.3.7-1 (aka "dangerous") is not stable even with ext2, in that swapping doesn't work. Ingo just sent me patches to fix that, and I'm hoping to remove the "dangerous" part from 2.3.7-2, but even then a number of filesystems will be broken. We _may_ end up just re-introducing the "update_vm_cache()" code for filesystems that really don't need the added performance, but it would actually be preferable if people really wanted to make them perform well with the new direct write-through cache code. Linus
344971f8 · Linus Torvalds · 3820a431 · 344971f8 · 344971f8 · 344971f8
Commit 344971f8 authored Nov 23, 2007 by Linus Torvalds
48 changed files
--- a/CREDITS
+++ b/CREDITS
@@ -624,6 +624,13 @@ S: 1123 North Oak Park Avenue
 S: Oak Park, Illinois 60302
 S: USA

+N: Daniel J. Frasnelli
+E: dfrasnel@alphalinux.org
+W: http://www.alphalinux.org/
+P: 1024/3EF87611 B9 F1 44 50 D3 E8 C2 80  DA E5 55 AA 56 7C 42 DA
+D: DEC Alpha hacker 
+D: Miscellaneous bug squisher
+
 N: Jim Freeman
 E: jfree@sovereign.org
 W: http://www.sovereign.org/

--- a/Makefile
+++ b/Makefile
 VERSION = 2
 PATCHLEVEL = 3
-SUBLEVEL = 6
+SUBLEVEL = 7
 EXTRAVERSION =

 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)

--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -159,10 +159,10 @@ void show_mem(void)
 			reserved++;
 		else if (PageSwapCache(mem_map+i))
 			cached++;
-		else if (!atomic_read(&mem_map[i].count))
+		else if (!page_count(mem_map+i))
 			free++;
 		else
-			shared += atomic_read(&mem_map[i].count) - 1;
+			shared += page_count(mem_map+i) - 1;
 	}
 	printk("%d pages of RAM\n",total);
 	printk("%d reserved pages\n",reserved);
@@ -449,7 +449,7 @@ __initfunc(void mem_init(unsigned long start_mem, unsigned long end_mem))
 				reservedpages++;
 			continue;
 		}
-		atomic_set(&mem_map[MAP_NR(tmp)].count, 1);
+		set_page_count(mem_map+MAP_NR(tmp), 1);
 #ifdef CONFIG_BLK_DEV_INITRD
 		if (!initrd_start || (tmp < initrd_start || tmp >=
 		    initrd_end))
@@ -475,7 +475,7 @@ void free_initmem(void)
 	addr = (unsigned long)(&__init_begin);
 	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
 		mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved);
-		atomic_set(&mem_map[MAP_NR(addr)].count, 1);
+		set_page_count(mem_map+MAP_NR(addr), 1);
 		free_page(addr);
 	}
 	printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
@@ -494,9 +494,9 @@ void si_meminfo(struct sysinfo *val)
 		if (PageReserved(mem_map+i))
 			continue;
 		val->totalram++;
-		if (!atomic_read(&mem_map[i].count))
+		if (!page_count(mem_map+i))
 			continue;
-		val->sharedram += atomic_read(&mem_map[i].count) - 1;
+		val->sharedram += page_count(mem_map+i) - 1;
 	}
 	val->totalram <<= PAGE_SHIFT;
 	val->sharedram <<= PAGE_SHIFT;

--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -603,7 +603,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bh[])

 	/* Verify requested block sizes.  */
 	for (i = 0; i < nr; i++) {
-		if (bh[i] && bh[i]->b_size != correct_size) {
+		if (bh[i]->b_size != correct_size) {
 			printk(KERN_NOTICE "ll_rw_block: device %s: "
 			       "only %d-char blocks implemented (%lu)\n",
 			       kdevname(bh[0]->b_dev),

--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -9,6 +9,7 @@
 *	Al Longyear <longyear@netcom.com>, Paul Mackerras <Paul.Mackerras@cs.anu.edu.au>
 *
 * Original release 01/11/99
+ * ==FILEDATE 19990524==
 *
 * This code is released under the GNU General Public License (GPL)
 *
@@ -72,7 +73,7 @@
 */

 #define HDLC_MAGIC 0x239e
-#define HDLC_VERSION "1.0"
+#define HDLC_VERSION "1.2"

 #include <linux/version.h>
 #include <linux/config.h>
@@ -813,6 +814,8 @@ static int n_hdlc_tty_ioctl (struct tty_struct *tty, struct file * file,
 {
 	struct n_hdlc *n_hdlc = tty2n_hdlc (tty);
 	int error = 0;
+	int count;
+	unsigned long flags;
 	
 	if (debuglevel >= DEBUG_LEVEL_INFO)	
 		printk("%s(%d)n_hdlc_tty_ioctl() called %d\n",
@@ -824,21 +827,29 @@ static int n_hdlc_tty_ioctl (struct tty_struct *tty, struct file * file,

 	switch (cmd) {
 	case FIONREAD:
-		{
-			/* report count of read data available */
-			/* in next available frame (if any) */
-			int count;
-			unsigned long flags;
-			spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags);
-			if (n_hdlc->rx_buf_list.head)
-				count = n_hdlc->rx_buf_list.head->count;
-			else
-				count = 0;
-			spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags);
-			PUT_USER (error, count, (int *) arg);
-		}
+		/* report count of read data available */
+		/* in next available frame (if any) */
+		spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags);
+		if (n_hdlc->rx_buf_list.head)
+			count = n_hdlc->rx_buf_list.head->count;
+		else
+			count = 0;
+		spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags);
+		PUT_USER (error, count, (int *) arg);
 		break;
-		
+
+	case TIOCOUTQ:
+		/* get the pending tx byte count in the driver */
+		count = tty->driver.chars_in_buffer ?
+				tty->driver.chars_in_buffer(tty) : 0;
+		/* add size of next output frame in queue */
+		spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags);
+		if (n_hdlc->tx_buf_list.head)
+			count += n_hdlc->tx_buf_list.head->count;
+		spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags);
+		PUT_USER (error, count, (int*)arg);
+		break;
+
 	default:
 		error = n_tty_ioctl (tty, file, cmd, arg);
 		break;

--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
 /*
 * linux/drivers/char/synclink.c
 *
+ * ==FILEDATE 19990610==
+ *
 * Device driver for Microgate SyncLink ISA and PCI
 * high speed multiprotocol serial adapters.
 *
@@ -43,14 +45,15 @@
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */
- 
+
 #define VERSION(ver,rel,seq) (((ver)<<16) | ((rel)<<8) | (seq))
 #define BREAKPOINT() asm("   int $3");

 #define MAX_ISA_DEVICES 10

-#include <linux/config.h>
+#include <linux/config.h>	
 #include <linux/module.h>
+#include <linux/version.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
@@ -68,7 +71,7 @@
 #include <linux/mm.h>
 #include <linux/malloc.h>

-#if LINUX_VERSION_CODE >= VERSION(2,1,0)
+#if LINUX_VERSION_CODE >= VERSION(2,1,0) 
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <asm/serial.h>
@@ -209,8 +212,21 @@ typedef struct _BH_EVENT {
 } BH_EVENT, *BH_QUEUE;     /* Queue of BH actions to be done.  */

 #define MAX_BH_QUEUE_ENTRIES 200
+#define IO_PIN_SHUTDOWN_LIMIT (MAX_BH_QUEUE_ENTRIES/4)

 #define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
+
+struct	_input_signal_events {
+	int	ri_up;	
+	int	ri_down;
+	int	dsr_up;
+	int	dsr_down;
+	int	dcd_up;
+	int	dcd_down;
+	int	cts_up;
+	int	cts_down;
+};
+
 /*
 * Device instance data structure
 */
@@ -266,6 +282,11 @@ struct mgsl_struct {
 	int bh_running;		/* Protection from multiple */
 	int isr_overflow;
 	int bh_requested;
+	
+	int dcd_chkcount;		/* check counts to prevent */
+	int cts_chkcount;		/* too many IRQs if a signal */
+	int dsr_chkcount;		/* is floating */
+	int ri_chkcount;

 	char *buffer_list;		/* virtual address of Rx & Tx buffer lists */
 	unsigned long buffer_list_phys;
@@ -327,6 +348,11 @@ struct mgsl_struct {
 	char flag_buf[HDLC_MAX_FRAME_SIZE];
 	char char_buf[HDLC_MAX_FRAME_SIZE];	
 	BOOLEAN drop_rts_on_tx_done;
+
+	BOOLEAN loopmode_insert_requested;
+	BOOLEAN	loopmode_send_done_requested;
+	
+	struct	_input_signal_events	input_signal_events;
 };

 #define MGSL_MAGIC 0x5401
@@ -712,6 +738,13 @@ void usc_loopback_frame( struct mgsl_struct *info );

 void mgsl_tx_timeout(unsigned long context);

+
+void usc_loopmode_cancel_transmit( struct mgsl_struct * info );
+void usc_loopmode_insert_request( struct mgsl_struct * info );
+int usc_loopmode_active( struct mgsl_struct * info);
+void usc_loopmode_send_done( struct mgsl_struct * info );
+int usc_loopmode_send_active( struct mgsl_struct * info );
+
 /*
 * Defines a BUS descriptor value for the PCI adapter
 * local bus address ranges.
@@ -820,7 +853,8 @@ static int mgsl_set_txidle(struct mgsl_struct * info, int idle_mode);
 static int mgsl_txenable(struct mgsl_struct * info, int enable);
 static int mgsl_txabort(struct mgsl_struct * info);
 static int mgsl_rxenable(struct mgsl_struct * info, int enable);
-static int mgsl_wait_event(struct mgsl_struct * info, int mask);
+static int mgsl_wait_event(struct mgsl_struct * info, int * mask);
+static int mgsl_loopmode_send_done( struct mgsl_struct * info );

 #define jiffies_from_ms(a) ((((a) * HZ)/1000)+1)

@@ -865,7 +899,7 @@ MODULE_PARM(debug_level,"i");
 #endif

 static char *driver_name = "SyncLink serial driver";
-static char *driver_version = "1.00";
+static char *driver_version = "1.7";

 static struct tty_driver serial_driver, callout_driver;
 static int serial_refcount;
@@ -1001,6 +1035,7 @@ void mgsl_format_bh_queue( struct mgsl_struct *info )

 	/* As a safety measure, mark the end of the chain with a NULL */
 	info->free_bh_queue_tail->link = NULL;
+	info->isr_overflow=0;

 }	/* end of mgsl_format_bh_queue() */

@@ -1092,6 +1127,14 @@ int mgsl_bh_queue_get( struct mgsl_struct *info )
 		spin_unlock_irqrestore(&info->irq_spinlock,flags);
 		return 1;
 	}
+	
+	if ( info->isr_overflow ) {
+		if (debug_level >= DEBUG_LEVEL_BH)
+			printk("ISR overflow cleared.\n");
+		info->isr_overflow=0;
+		usc_EnableMasterIrqBit(info);
+		usc_EnableDmaInterrupts(info,DICR_MASTER);
+	}

 	/* Mark BH routine as complete */
 	info->bh_running   = 0;
@@ -1155,10 +1198,6 @@ void mgsl_bh_handler(void* Context)
 		}
 	}

-	if ( info->isr_overflow ) {
-		printk("ISR overflow detected.\n");
-	}
-
 	if ( debug_level >= DEBUG_LEVEL_BH )
 		printk( "%s(%d):mgsl_bh_handler(%s) exit\n",
 			__FILE__,__LINE__,info->device_name);
@@ -1199,6 +1238,7 @@ void mgsl_bh_receive_dma( struct mgsl_struct *info, unsigned short status )
 void mgsl_bh_transmit_data( struct mgsl_struct *info, unsigned short Datacount )
 {
 	struct tty_struct *tty = info->tty;
+	unsigned long flags;
 	
 	if ( debug_level >= DEBUG_LEVEL_BH )
 		printk( "%s(%d):mgsl_bh_transmit_data() entry on %s\n",
@@ -1215,7 +1255,15 @@ void mgsl_bh_transmit_data( struct mgsl_struct *info, unsigned short Datacount )
 		}
 		wake_up_interruptible(&tty->write_wait);
 	}
-	
+
+	/* if transmitter idle and loopmode_send_done_requested
+	 * then start echoing RxD to TxD
+	 */
+	spin_lock_irqsave(&info->irq_spinlock,flags);
+ 	if ( !info->tx_active && info->loopmode_send_done_requested )
+ 		usc_loopmode_send_done( info );
+	spin_unlock_irqrestore(&info->irq_spinlock,flags);
+
 }	/* End Of mgsl_bh_transmit_data() */

 /* mgsl_bh_status_handler()
@@ -1240,6 +1288,23 @@ void mgsl_bh_status_handler( struct mgsl_struct *info, unsigned short status )
 		printk( "%s(%d):mgsl_bh_status_handler() entry on %s\n",
 			__FILE__,__LINE__,info->device_name);

+	if (status & MISCSTATUS_RI_LATCHED) {
+		if (info->ri_chkcount)
+			(info->ri_chkcount)--;
+	}
+	if (status & MISCSTATUS_DSR_LATCHED) {
+		if (info->dsr_chkcount)
+			(info->dsr_chkcount)--;
+	}
+	if (status & MISCSTATUS_DCD_LATCHED) {
+		if (info->dcd_chkcount)
+			(info->dcd_chkcount)--;
+	}
+	if (status & MISCSTATUS_CTS_LATCHED) {
+		if (info->cts_chkcount)
+			(info->cts_chkcount)--;
+	}
+	
 }	/* End Of mgsl_bh_status_handler() */

 /* mgsl_isr_receive_status()
@@ -1259,8 +1324,21 @@ void mgsl_isr_receive_status( struct mgsl_struct *info )
 		printk("%s(%d):mgsl_isr_receive_status status=%04X\n",
 			__FILE__,__LINE__,status);
 			
-	usc_ClearIrqPendingBits( info, RECEIVE_STATUS );
-	usc_UnlatchRxstatusBits( info, status );
+ 	if ( (status & RXSTATUS_ABORT_RECEIVED) && 
+		info->loopmode_insert_requested &&
+ 		usc_loopmode_active(info) )
+ 	{
+		++info->icount.rxabort;
+	 	info->loopmode_insert_requested = FALSE;
+ 
+ 		/* clear CMR:13 to start echoing RxD to TxD */
+		info->cmr_value &= ~BIT13;
+ 		usc_OutReg(info, CMR, info->cmr_value);
+ 
+		/* disable received abort irq (no longer required) */
+	 	usc_OutReg(info, RICR,
+ 			(usc_InReg(info, RICR) & ~RXSTATUS_ABORT_RECEIVED));
+ 	}

 	if (status & (RXSTATUS_EXITED_HUNT + RXSTATUS_IDLE_RECEIVED)) {
 		if (status & RXSTATUS_EXITED_HUNT)
@@ -1278,6 +1356,9 @@ void mgsl_isr_receive_status( struct mgsl_struct *info )
 		usc_RTCmd( info, RTCmd_PurgeRxFifo );
 	}

+	usc_ClearIrqPendingBits( info, RECEIVE_STATUS );
+	usc_UnlatchRxstatusBits( info, status );
+
 }	/* end of mgsl_isr_receive_status() */

 /* mgsl_isr_transmit_status()
@@ -1300,7 +1381,7 @@ void mgsl_isr_transmit_status( struct mgsl_struct *info )
 	
 	usc_ClearIrqPendingBits( info, TRANSMIT_STATUS );
 	usc_UnlatchTxstatusBits( info, status );
-
+ 
 	if ( status & TXSTATUS_EOF_SENT )
 		info->icount.txok++;
 	else if ( status & TXSTATUS_UNDERRUN )
@@ -1356,12 +1437,32 @@ void mgsl_isr_io_pin( struct mgsl_struct *info )
 	              MISCSTATUS_DSR_LATCHED | MISCSTATUS_RI_LATCHED) ) {
 		icount = &info->icount;
 		/* update input line counters */
-		if (status & MISCSTATUS_RI_LATCHED)
+		if (status & MISCSTATUS_RI_LATCHED) {
+			if ((info->ri_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
+				usc_DisablestatusIrqs(info,SICR_RI);
 			icount->rng++;
-		if (status & MISCSTATUS_DSR_LATCHED)
+			if ( status & MISCSTATUS_RI )
+				info->input_signal_events.ri_up++;	
+			else
+				info->input_signal_events.ri_down++;	
+		}
+		if (status & MISCSTATUS_DSR_LATCHED) {
+			if ((info->dsr_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
+				usc_DisablestatusIrqs(info,SICR_DSR);
 			icount->dsr++;
+			if ( status & MISCSTATUS_DSR )
+				info->input_signal_events.dsr_up++;
+			else
+				info->input_signal_events.dsr_down++;
+		}
 		if (status & MISCSTATUS_DCD_LATCHED) {
+			if ((info->dcd_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
+				usc_DisablestatusIrqs(info,SICR_DCD);
 			icount->dcd++;
+			if ( status & MISCSTATUS_DCD )
+				info->input_signal_events.dcd_up++;
+			else
+				info->input_signal_events.dcd_down++;
 #ifdef CONFIG_HARD_PPS
 			if ((info->flags & ASYNC_HARDPPS_CD) &&
 			    (status & MISCSTATUS_DCD_LATCHED))
@@ -1369,7 +1470,15 @@ void mgsl_isr_io_pin( struct mgsl_struct *info )
 #endif
 		}
 		if (status & MISCSTATUS_CTS_LATCHED)
+		{
+			if ((info->cts_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
+				usc_DisablestatusIrqs(info,SICR_CTS);
 			icount->cts++;
+			if ( status & MISCSTATUS_CTS )
+				info->input_signal_events.cts_up++;
+			else
+				info->input_signal_events.cts_down++;
+		}
 		wake_up_interruptible(&info->status_event_wait_q);
 		wake_up_interruptible(&info->event_wait_q);

@@ -1411,6 +1520,8 @@ void mgsl_isr_io_pin( struct mgsl_struct *info )
 		}
 	}

+	mgsl_bh_queue_put(info, BH_TYPE_STATUS, status);
+	
 	/* for diagnostics set IRQ flag */
 	if ( status & MISCSTATUS_TXC_LATCHED ){
 		usc_OutReg( info, SICR,
@@ -1642,8 +1753,10 @@ void mgsl_isr_receive_dma( struct mgsl_struct *info )
 	/* Post a receive event for BH processing. */
 	mgsl_bh_queue_put( info, BH_TYPE_RECEIVE_DMA, status );
 	
-	if ( status & BIT3 )
+	if ( status & BIT3 ) {
 		info->rx_overflow = 1;
+		info->icount.buf_overrun++;
+	}

 }	/* end of mgsl_isr_receive_dma() */

@@ -1696,9 +1809,9 @@ static void mgsl_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 		if ( info->isr_overflow ) {
 			printk(KERN_ERR"%s(%d):%s isr overflow irq=%d\n",
 				__FILE__,__LINE__,info->device_name, irq);
-				/* Interrupt overflow. Reset adapter and exit. */
-//				UscReset(info);
-//				break;
+			usc_DisableMasterIrqBit(info);
+			usc_DisableDmaInterrupts(info,DICR_MASTER);
+			break;
 		}
 	}
 	
@@ -1980,6 +2093,11 @@ static void mgsl_change_params(struct mgsl_struct *info)
 		usc_set_async_mode(info);
 		
 	usc_set_serial_signals(info);
+	
+	info->dcd_chkcount = 0;
+	info->cts_chkcount = 0;
+	info->ri_chkcount = 0;
+	info->dsr_chkcount = 0;

 	/* enable modem signal IRQs and read initial signal states */
 	usc_EnableStatusIrqs(info,SICR_CTS+SICR_DSR+SICR_DCD+SICR_RI);		
@@ -2112,16 +2230,27 @@ static int mgsl_write(struct tty_struct * tty, int from_user,

 	if ( info->params.mode == MGSL_MODE_HDLC ) {
 		/* operating in synchronous (frame oriented) mode */
-	
+
 		if (info->tx_active) {
 			ret = 0; goto cleanup; 
 		}
-		
+	
+		/* if operating in HDLC LoopMode and the adapter  */
+		/* has yet to be inserted into the loop, we can't */
+		/* transmit					  */
+
+		if ( (info->params.flags & HDLC_FLAG_HDLC_LOOPMODE) &&
+			!usc_loopmode_active(info) )
+		{
+			ret = 0;
+			goto cleanup;
+		}
+
 		if ( info->xmit_cnt ) {
 			/* Send accumulated from send_char() calls */
 			/* as frame and wait before accepting more data. */
 			ret = 0;
-				
+			
 			/* copy data from circular xmit_buf to */
 			/* transmit DMA buffer. */
 			mgsl_load_tx_dma_buffer(info,
@@ -2578,8 +2707,19 @@ static int mgsl_txenable(struct mgsl_struct * info, int enable)
 			
 	spin_lock_irqsave(&info->irq_spinlock,flags);
 	if ( enable ) {
-		if ( !info->tx_enabled )
+		if ( !info->tx_enabled ) {
+
 			usc_start_transmitter(info);
+			/*--------------------------------------------------
+			 * if HDLC/SDLC Loop mode, attempt to insert the
+			 * station in the 'loop' by setting CMR:13. Upon
+			 * receipt of the next GoAhead (RxAbort) sequence,
+			 * the OnLoop indicator (CCSR:7) should go active
+			 * to indicate that we are on the loop
+			 *--------------------------------------------------*/
+			if ( info->params.flags & HDLC_FLAG_HDLC_LOOPMODE )
+				usc_loopmode_insert_request( info );
+		}
 	} else {
 		if ( info->tx_enabled )
 			usc_stop_transmitter(info);
@@ -2604,7 +2744,12 @@ static int mgsl_txabort(struct mgsl_struct * info)
 			
 	spin_lock_irqsave(&info->irq_spinlock,flags);
 	if ( info->tx_active && info->params.mode == MGSL_MODE_HDLC )
-		usc_TCmd(info,TCmd_SendAbort);
+	{
+		if ( info->params.flags & HDLC_FLAG_HDLC_LOOPMODE )
+			usc_loopmode_cancel_transmit( info );
+		else
+			usc_TCmd(info,TCmd_SendAbort);
+	}
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 	return 0;
 	
@@ -2640,25 +2785,39 @@ static int mgsl_rxenable(struct mgsl_struct * info, int enable)
 /* mgsl_wait_event() 	wait for specified event to occur
 * 	
 * Arguments:	 	info	pointer to device instance data
- * 			mask	bitmask of events to wait for
- * Return Value:	bit mask of triggering event, otherwise error code
+ * 			mask	pointer to bitmask of events to wait for
+ * Return Value:	0 	if successful and bit mask updated with
+ *				of events triggerred,
+ * 			otherwise error code
 */
-static int mgsl_wait_event(struct mgsl_struct * info, int mask)
+static int mgsl_wait_event(struct mgsl_struct * info, int * mask_ptr)
 {
 	unsigned long flags;
 	int s;
 	int rc=0;
 	u16 regval;
 	struct mgsl_icount cprev, cnow;
+	int events = 0;
+	int mask;
+	struct	_input_signal_events signal_events_prev, signal_events_now;
+
+	COPY_FROM_USER(rc,&mask, mask_ptr, sizeof(int));
+	if (rc) {
+		return  -EFAULT;
+	}
 		 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_wait_event(%s,%d)\n", __FILE__,__LINE__,
 			info->device_name, mask);
-			
+
 	spin_lock_irqsave(&info->irq_spinlock,flags);
-	
+
+	usc_get_serial_signals(info);
+	s = info->serial_signals;
+
 	/* note the counters on entry */
 	cprev = info->icount;
+	signal_events_prev = info->input_signal_events;
 	
 	if (mask & MgslEvent_ExitHuntMode) {
 		/* enable exit hunt mode IRQ */
@@ -2676,7 +2835,22 @@ static int mgsl_wait_event(struct mgsl_struct * info, int mask)
 	
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 	
-	while(!rc) {
+	/* Determine if any user requested events for input signals is currently TRUE */
+	
+	events |= (mask & ((s & SerialSignal_DSR) ?
+			MgslEvent_DsrActive:MgslEvent_DsrInactive));
+
+	events |= (mask & ((s & SerialSignal_DCD) ?
+			MgslEvent_DcdActive:MgslEvent_DcdInactive));
+		
+	events |= (mask & ((s & SerialSignal_CTS) ?
+			MgslEvent_CtsActive:MgslEvent_CtsInactive));
+		
+	events |= (mask & ((s & SerialSignal_RI) ?
+			MgslEvent_RiActive:MgslEvent_RiInactive));
+	
+
+	while(!events) {
 		/* sleep until event occurs */
 		interruptible_sleep_on(&info->event_wait_q);
 		
@@ -2687,39 +2861,52 @@ static int mgsl_wait_event(struct mgsl_struct * info, int mask)
 		}
 			
 		spin_lock_irqsave(&info->irq_spinlock,flags);
+
 		/* get icount and serial signal states */
 		cnow = info->icount;
-		s = info->serial_signals;
+		signal_events_now = info->input_signal_events;
 		spin_unlock_irqrestore(&info->irq_spinlock,flags);
+
+		if (signal_events_now.dsr_up != signal_events_prev.dsr_up && 
+				mask & MgslEvent_DsrActive )
+			events |= MgslEvent_DsrActive;
 		
-		rc = 0;		
+		if (signal_events_now.dsr_down != signal_events_prev.dsr_down && 
+				mask & MgslEvent_DsrInactive )
+			events |= MgslEvent_DsrInactive;
+
+		if (signal_events_now.dcd_up != signal_events_prev.dcd_up &&
+				mask & MgslEvent_DcdActive )
+			events |= MgslEvent_DcdActive;
 		
-		if (cnow.dsr != cprev.dsr)
-			rc |= (mask & ((s & SerialSignal_DSR) ?
-				MgslEvent_DsrActive:MgslEvent_DsrInactive));
+		if (signal_events_now.dcd_down != signal_events_prev.dcd_down &&
+				mask & MgslEvent_DcdInactive )
+			events |= MgslEvent_DcdInactive;
+		
+		if (signal_events_now.cts_up != signal_events_prev.cts_up &&
+				mask & MgslEvent_CtsActive )
+			events |= MgslEvent_CtsActive;
+		
+		if (signal_events_now.cts_down != signal_events_prev.cts_down &&
+				mask & MgslEvent_CtsInactive )
+			events |= MgslEvent_CtsInactive;
+		
+		if (signal_events_now.ri_up != signal_events_prev.ri_up &&
+				mask & MgslEvent_RiActive )
+			events |= MgslEvent_RiActive;
+		
+		if (signal_events_now.ri_down != signal_events_prev.ri_down &&
+				mask & MgslEvent_RiInactive )
+			events |= MgslEvent_RiInactive;
 		
-		if (cnow.dcd != cprev.dcd)
-			rc |= (mask & ((s & SerialSignal_DCD) ?
-				MgslEvent_DcdActive:MgslEvent_DcdInactive));
-				
-		if (cnow.cts != cprev.cts)
-			rc |= (mask & ((s & SerialSignal_CTS) ?
-				MgslEvent_CtsActive:MgslEvent_CtsInactive));
-				
-		if (cnow.rng != cprev.rng)
-			rc |= (mask & ((s & SerialSignal_RI) ?
-				MgslEvent_RiActive:MgslEvent_RiInactive));
-				
 		if (cnow.exithunt != cprev.exithunt)
-			rc |= (mask & MgslEvent_ExitHuntMode);
-			
+			events |= (mask & MgslEvent_ExitHuntMode);
+
 		if (cnow.rxidle != cprev.rxidle)
-			rc |= (mask & MgslEvent_ExitHuntMode);
-				
-		if (!rc)
-			rc = -EIO; /* no change => error */
-			
+			events |= (mask & MgslEvent_IdleReceived);
+		
 		cprev = cnow;
+		signal_events_prev = signal_events_now;
 	}
 	
 	if (mask & (MgslEvent_ExitHuntMode + MgslEvent_IdleReceived)) {
@@ -2732,7 +2919,10 @@ static int mgsl_wait_event(struct mgsl_struct * info, int mask)
 		}
 		spin_unlock_irqrestore(&info->irq_spinlock,flags);
 	}
-	
+
+	if ( rc == 0 )
+		PUT_USER(rc, events, mask_ptr);
+		
 	return rc;
 	
 }	/* end of mgsl_wait_event() */
@@ -2772,7 +2962,7 @@ static int get_modem_info(struct mgsl_struct * info, unsigned int *value)

 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_get_modem_info %s value=%08X\n",
-			 __FILE__,__LINE__, info->device_name, *value );
+			 __FILE__,__LINE__, info->device_name, result );
 			
 	PUT_USER(err,result,value);
 	return err;
@@ -2928,7 +3118,9 @@ static int mgsl_ioctl(struct tty_struct *tty, struct file * file,
 		case MGSL_IOCGSTATS:
 			return mgsl_get_stats(info,(struct mgsl_icount*)arg);
 		case MGSL_IOCWAITEVENT:
-			return mgsl_wait_event(info,(int)arg);
+			return mgsl_wait_event(info,(int*)arg);
+		case MGSL_IOCLOOPTXDONE:
+			return mgsl_loopmode_send_done(info);
 		case MGSL_IOCCLRMODCOUNT:
 			while(MOD_IN_USE)
 				MOD_DEC_USE_COUNT;
@@ -3626,11 +3818,6 @@ static inline int line_info(char *buf, struct mgsl_struct *info)
 	}
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 	
-#if 0 && LINUX_VERSION_CODE >= VERSION(2,1,0)
-	ret += sprintf(buf+ret, "irq_spinlock=%08X\n",
-	 		info->irq_spinlock.lock );
-#endif
-	
 	return ret;
 	
 }	/* end of line_info() */
@@ -4227,6 +4414,18 @@ int mgsl_enumerate_devices()
 			if ( PCIBIOS_SUCCESSFUL == pcibios_find_device(
 				MICROGATE_VENDOR_ID, SYNCLINK_DEVICE_ID, i, &bus, &func) ) {
 				
+#if LINUX_VERSION_CODE >= VERSION(2,1,0)
+				struct pci_dev *pdev = pci_find_slot(bus,func);
+				irq_line = pdev->irq;				
+#else												
+				if (pcibios_read_config_byte(bus,func,
+					PCI_INTERRUPT_LINE,&irq_line) ) {
+					printk( "%s(%d):USC I/O addr not set.\n",
+						__FILE__,__LINE__);
+					continue;
+				}
+#endif
+
 				if (pcibios_read_config_dword(bus,func,
 					PCI_BASE_ADDRESS_3,&shared_mem_base) ) {
 					printk( "%s(%d):Shared mem addr not set.\n",
@@ -4248,13 +4447,6 @@ int mgsl_enumerate_devices()
 					continue;
 				}
 				
-				if (pcibios_read_config_byte(bus,func,
-					PCI_INTERRUPT_LINE,&irq_line) ) {
-					printk( "%s(%d):USC I/O addr not set.\n",
-						__FILE__,__LINE__);
-					continue;
-				}
-				
 				info = mgsl_allocate_device();
 				if ( !info ) {
 					/* error allocating device instance data */
@@ -4671,29 +4863,53 @@ void usc_set_sdlc_mode( struct mgsl_struct *info )
 {
 	u16 RegValue;

-	/* Channel mode Register (CMR)
-	 *
-	 * <15..14>  00    Tx Sub modes, Underrun Action
-	 * <13>      0     1 = Send Preamble before opening flag
-	 * <12>      0     1 = Consecutive Idles share common 0
-	 * <11..8>   0110  Transmitter mode = HDLC/SDLC
-	 * <7..4>    0000  Rx Sub modes, addr/ctrl field handling
-	 * <3..0>    0110  Receiver mode = HDLC/SDLC
-	 *
-	 * 0000 0110 0000 0110 = 0x0606
-	 */
+ 	if ( info->params.flags & HDLC_FLAG_HDLC_LOOPMODE )
+ 	{
+ 	   /*
+ 	   ** Channel Mode Register (CMR)
+ 	   **
+ 	   ** <15..14>    10    Tx Sub Modes, Send Flag on Underrun
+ 	   ** <13>        0     0 = Transmit Disabled (initially)
+ 	   ** <12>        0     1 = Consecutive Idles share common 0
+ 	   ** <11..8>     1110  Transmitter Mode = HDLC/SDLC Loop
+ 	   ** <7..4>      0000  Rx Sub Modes, addr/ctrl field handling
+ 	   ** <3..0>      0110  Receiver Mode = HDLC/SDLC
+ 	   **
+ 	   ** 1000 1110 0000 0110 = 0x8e06
+ 	   */
+ 	   RegValue = 0x8e06;
+ 
+ 	   /*--------------------------------------------------
+ 	    * ignore user options for UnderRun Actions and
+ 	    * preambles
+ 	    *--------------------------------------------------*/
+ 	}
+ 	else
+ 	{	
+		/* Channel mode Register (CMR)
+		 *
+		 * <15..14>  00    Tx Sub modes, Underrun Action
+		 * <13>      0     1 = Send Preamble before opening flag
+		 * <12>      0     1 = Consecutive Idles share common 0
+		 * <11..8>   0110  Transmitter mode = HDLC/SDLC
+		 * <7..4>    0000  Rx Sub modes, addr/ctrl field handling
+		 * <3..0>    0110  Receiver mode = HDLC/SDLC
+		 *
+		 * 0000 0110 0000 0110 = 0x0606
+		 */

-	RegValue = 0x0606;
+		RegValue = 0x0606;

-	if ( info->params.flags & HDLC_FLAG_UNDERRUN_ABORT15 )
-		RegValue |= BIT14;
-	else if ( info->params.flags & HDLC_FLAG_UNDERRUN_FLAG )
-		RegValue |= BIT15;
-	else if ( info->params.flags & HDLC_FLAG_UNDERRUN_CRC )
-		RegValue |= BIT15 + BIT14;
+		if ( info->params.flags & HDLC_FLAG_UNDERRUN_ABORT15 )
+			RegValue |= BIT14;
+		else if ( info->params.flags & HDLC_FLAG_UNDERRUN_FLAG )
+			RegValue |= BIT15;
+		else if ( info->params.flags & HDLC_FLAG_UNDERRUN_CRC )
+			RegValue |= BIT15 + BIT14;

-	if ( info->params.preamble != HDLC_PREAMBLE_PATTERN_NONE )
-		RegValue |= BIT13;
+		if ( info->params.preamble != HDLC_PREAMBLE_PATTERN_NONE )
+			RegValue |= BIT13;
+	}

 	if ( info->params.flags & HDLC_FLAG_SHARE_ZERO )
 		RegValue |= BIT12;
@@ -4862,6 +5078,8 @@ void usc_set_sdlc_mode( struct mgsl_struct *info )
 		RegValue |= 0x0003;	/* RxCLK from DPLL */
 	else if ( info->params.flags & HDLC_FLAG_RXC_BRG )
 		RegValue |= 0x0004;	/* RxCLK from BRG0 */
+ 	else if ( info->params.flags & HDLC_FLAG_RXC_TXCPIN)
+ 		RegValue |= 0x0006;	/* RxCLK from TXC Input */
 	else
 		RegValue |= 0x0007;	/* RxCLK from Port1 */

@@ -4869,6 +5087,8 @@ void usc_set_sdlc_mode( struct mgsl_struct *info )
 		RegValue |= 0x0018;	/* TxCLK from DPLL */
 	else if ( info->params.flags & HDLC_FLAG_TXC_BRG )
 		RegValue |= 0x0020;	/* TxCLK from BRG0 */
+ 	else if ( info->params.flags & HDLC_FLAG_TXC_RXCPIN)
+ 		RegValue |= 0x0038;	/* RxCLK from TXC Input */
 	else
 		RegValue |= 0x0030;	/* TxCLK from Port0 */

@@ -4922,10 +5142,24 @@ void usc_set_sdlc_mode( struct mgsl_struct *info )
 		/*  of rounding up and then subtracting 1 we just don't subtract */
 		/*  the one in this case. */

-		Tc = (u16)((XtalSpeed/DpllDivisor)/info->params.clock_speed);
-		if ( !((((XtalSpeed/DpllDivisor) % info->params.clock_speed) * 2)
-		       / info->params.clock_speed) )
-			Tc--;
+ 		/*--------------------------------------------------
+ 		 * ejz: for DPLL mode, application should use the
+ 		 * same clock speed as the partner system, even 
+ 		 * though clocking is derived from the input RxData.
+ 		 * In case the user uses a 0 for the clock speed,
+ 		 * default to 0xffffffff and don't try to divide by
+ 		 * zero
+ 		 *--------------------------------------------------*/
+ 		if ( info->params.clock_speed )
+ 		{
+			Tc = (u16)((XtalSpeed/DpllDivisor)/info->params.clock_speed);
+			if ( !((((XtalSpeed/DpllDivisor) % info->params.clock_speed) * 2)
+			       / info->params.clock_speed) )
+				Tc--;
+ 		}
+ 		else
+ 			Tc = -1;
+ 				  

 		/* Write 16-bit Time Constant for BRG1 */
 		usc_OutReg( info, TC1R, Tc );
@@ -6328,6 +6562,13 @@ void mgsl_load_tx_dma_buffer(struct mgsl_struct *info, const char *Buffer,
 	if ( debug_level >= DEBUG_LEVEL_DATA )
 		mgsl_trace_block(info,Buffer,BufferSize,1);	

+	if (info->params.flags & HDLC_FLAG_HDLC_LOOPMODE) {
+		/* set CMR:13 to start transmit when
+		 * next GoAhead (abort) is received
+		 */
+	 	info->cmr_value |= BIT13;			  
+	}
+		
 	/* Setup the status and RCC (Frame Size) fields of the 1st */
 	/* buffer entry in the transmit DMA buffer list. */

@@ -6381,7 +6622,7 @@ BOOLEAN mgsl_register_test( struct mgsl_struct *info )
 	unsigned int i;
 	BOOLEAN rc = TRUE;
 	unsigned long flags;
-	
+
 	spin_lock_irqsave(&info->irq_spinlock,flags);
 	usc_reset(info);
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
@@ -6471,7 +6712,7 @@ BOOLEAN mgsl_irq_test( struct mgsl_struct *info )
 	usc_reset(info);
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 	
-	if ( !info->irq_occurred )
+	if ( !info->irq_occurred ) 
 		return FALSE;
 	else
 		return TRUE;
@@ -6499,7 +6740,7 @@ BOOLEAN mgsl_dma_test( struct mgsl_struct *info )
 	volatile unsigned long EndTime;
 	unsigned long flags;
 	MGSL_PARAMS tmp_params;
-	
+
 	/* save current port options */
 	memcpy(&tmp_params,&info->params,sizeof(MGSL_PARAMS));
 	/* load default port options */
@@ -6657,7 +6898,7 @@ BOOLEAN mgsl_dma_test( struct mgsl_struct *info )
 	/**********************************/
 	/* WAIT FOR TRANSMIT FIFO TO FILL */
 	/**********************************/
-															 
+	
 	/* Wait 100ms */
 	EndTime = jiffies + jiffies_from_ms(100);

@@ -6724,7 +6965,7 @@ BOOLEAN mgsl_dma_test( struct mgsl_struct *info )

 	if ( rc == TRUE ){
 		/* CHECK FOR TRANSMIT ERRORS */
-		if ( status & (BIT5 + BIT1) )
+		if ( status & (BIT5 + BIT1) ) 
 			rc = FALSE;
 	}

@@ -6981,13 +7222,90 @@ void mgsl_tx_timeout(unsigned long context)
 	if(info->tx_active && info->params.mode == MGSL_MODE_HDLC) {
 		info->icount.txtimeout++;
 	}
-	
 	spin_lock_irqsave(&info->irq_spinlock,flags);
 	info->tx_active = 0;
 	info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
+
+	if ( info->params.flags & HDLC_FLAG_HDLC_LOOPMODE )
+		usc_loopmode_cancel_transmit( info );
+
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 	
 	mgsl_bh_transmit_data(info,0);
 	
 }	/* end of mgsl_tx_timeout() */

+/* signal that there are no more frames to send, so that
+ * line is 'released' by echoing RxD to TxD when current
+ * transmission is complete (or immediately if no tx in progress).
+ */
+static int mgsl_loopmode_send_done( struct mgsl_struct * info )
+{
+	unsigned long flags;
+	
+	spin_lock_irqsave(&info->irq_spinlock,flags);
+	if (info->params.flags & HDLC_FLAG_HDLC_LOOPMODE) {
+		if (info->tx_active)
+			info->loopmode_send_done_requested = TRUE;
+		else
+			usc_loopmode_send_done(info);
+	}
+	spin_unlock_irqrestore(&info->irq_spinlock,flags);
+
+	return 0;
+}
+
+/* release the line by echoing RxD to TxD
+ * upon completion of a transmit frame
+ */
+void usc_loopmode_send_done( struct mgsl_struct * info )
+{
+ 	info->loopmode_send_done_requested = FALSE;
+ 	/* clear CMR:13 to 0 to start echoing RxData to TxData */
+ 	info->cmr_value &= ~BIT13;			  
+ 	usc_OutReg(info, CMR, info->cmr_value);
+}
+
+/* abort a transmit in progress while in HDLC LoopMode
+ */
+void usc_loopmode_cancel_transmit( struct mgsl_struct * info )
+{
+ 	/* reset tx dma channel and purge TxFifo */
+ 	usc_RTCmd( info, RTCmd_PurgeTxFifo );
+ 	usc_DmaCmd( info, DmaCmd_ResetTxChannel );
+  	usc_loopmode_send_done( info );
+}
+
+/* for HDLC/SDLC LoopMode, setting CMR:13 after the transmitter is enabled
+ * is an Insert Into Loop action. Upon receipt of a GoAhead sequence (RxAbort)
+ * we must clear CMR:13 to begin repeating TxData to RxData
+ */
+void usc_loopmode_insert_request( struct mgsl_struct * info )
+{
+ 	info->loopmode_insert_requested = TRUE;
+ 
+ 	/* enable RxAbort irq. On next RxAbort, clear CMR:13 to
+ 	 * begin repeating TxData on RxData (complete insertion)
+	 */
+ 	usc_OutReg( info, RICR, 
+		(usc_InReg( info, RICR ) | RXSTATUS_ABORT_RECEIVED ) );
+		
+	/* set CMR:13 to insert into loop on next GoAhead (RxAbort) */
+	info->cmr_value |= BIT13;
+ 	usc_OutReg(info, CMR, info->cmr_value);
+}
+
+/* return 1 if station is inserted into the loop, otherwise 0
+ */
+int usc_loopmode_active( struct mgsl_struct * info)
+{
+ 	return usc_InReg( info, CCSR ) & BIT7 ? 1 : 0 ;
+}
+
+/* return 1 if USC is in loop send mode, otherwise 0
+ */
+int usc_loopmode_send_active( struct mgsl_struct * info )
+{
+	return usc_InReg( info, CCSR ) & BIT6 ? 1 : 0 ;
+}			  
+
--- a/drivers/usb/usb-core.c
+++ b/drivers/usb/usb-core.c
@@ -53,7 +53,7 @@ int usb_init(void)
 		usb_acm_init();
 #	endif
 #	ifdef CONFIG_USB_PRINTER
-		usb_print_init();
+		usb_printer_init();
 #	endif
 #	ifdef CONFIG_USB_CPIA
 		usb_cpia_init();

--- a/drivers/video/vgacon.c
+++ b/drivers/video/vgacon.c
@@ -135,9 +135,17 @@ void no_scroll(char *str, int *ints)
 */
 static inline void write_vga(unsigned char reg, unsigned int val)
 {
-#ifndef SLOW_VGA
 	unsigned int v1, v2;
+	unsigned long flags;
+
+	/*
+	 * ddprintk might set the console position from interrupt
+	 * handlers, thus the write has to be IRQ-atomic.
+	 */
+	save_flags(flags);
+	cli();

+#ifndef SLOW_VGA
 	v1 = reg + (val & 0xff00);
 	v2 = reg + 1 + ((val << 8) & 0xff00);
 	outw(v1, vga_video_port_reg);
@@ -148,6 +156,7 @@ static inline void write_vga(unsigned char reg, unsigned int val)
 	outb_p(reg+1, vga_video_port_reg);
 	outb_p(val & 0xff, vga_video_port_val);
 #endif
+	restore_flags(flags);
 }

 __initfunc(static const char *vgacon_startup(void))

--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -109,11 +109,11 @@ union bdflush_param {
 		int dummy3;    /* unused */
 	} b_un;
 	unsigned int data[N_PARAM];
-} bdf_prm = {{40, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
+} bdf_prm = {{90, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};

 /* These are the min and max parameter values that we will allow to be assigned */
 int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   1*HZ,   1*HZ, 1, 1};
-int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 600*HZ, 600*HZ, 2047, 5};
+int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,1000, 6000*HZ, 6000*HZ, 2047, 5};

 void wakeup_bdflush(int);

@@ -439,10 +439,14 @@ static inline void remove_from_hash_queue(struct buffer_head * bh)

 static inline void remove_from_lru_list(struct buffer_head * bh)
 {
-	if (!(bh->b_prev_free) || !(bh->b_next_free))
-		panic("VFS: LRU block list corrupted");
-	if (bh->b_dev == B_FREE)
-		panic("LRU list corrupted");
+	if (!(bh->b_prev_free) || !(bh->b_next_free)) {
+		printk("VFS: LRU block list corrupted\n");
+		*(int*)0 = 0;
+	}
+	if (bh->b_dev == B_FREE) {
+		printk("LRU list corrupted");
+		*(int*)0 = 0;
+	}
 	bh->b_prev_free->b_next_free = bh->b_next_free;
 	bh->b_next_free->b_prev_free = bh->b_prev_free;

@@ -478,11 +482,12 @@ static void remove_from_queues(struct buffer_head * bh)
 	if(bh->b_dev == B_FREE) {
 		remove_from_free_list(bh); /* Free list entries should not be
 					      in the hash queue */
-		return;
+		goto out;
 	}
 	nr_buffers_type[bh->b_list]--;
 	remove_from_hash_queue(bh);
 	remove_from_lru_list(bh);
+out:
 }

 static inline void put_last_free(struct buffer_head * bh)
@@ -546,6 +551,28 @@ static void insert_into_queues(struct buffer_head * bh)
 	}
 }

+static void insert_into_dirty_queue(struct buffer_head * bh)
+{
+	struct buffer_head **bhp;
+
+
+	bhp = &lru_list[BUF_DIRTY];
+	if(!*bhp) {
+		*bhp = bh;
+		bh->b_prev_free = bh;
+	}
+	if (bh->b_next_free)
+		BUG();
+
+	bh->b_next_free = *bhp;
+	bh->b_prev_free = (*bhp)->b_prev_free;
+	(*bhp)->b_prev_free->b_next_free = bh;
+	(*bhp)->b_prev_free = bh;
+
+	nr_buffers++;
+	nr_buffers_type[BUF_DIRTY]++;
+}
+
 struct buffer_head * find_buffer(kdev_t dev, int block, int size)
 {		
 	struct buffer_head * next;
@@ -702,7 +729,7 @@ struct buffer_head * getblk(kdev_t dev, int block, int size)
 		if (!buffer_dirty(bh)) {
 			bh->b_flushtime = 0;
 		}
-		return bh;
+		goto out;
 	}

 	isize = BUFSIZE_INDEX(size);
@@ -718,7 +745,7 @@ struct buffer_head * getblk(kdev_t dev, int block, int size)
 	init_buffer(bh, dev, block, end_buffer_io_sync, NULL);
 	bh->b_state=0;
 	insert_into_queues(bh);
-	return bh;
+	goto out;

 	/*
 	 * If we block while refilling the free list, somebody may
@@ -729,6 +756,8 @@ struct buffer_head * getblk(kdev_t dev, int block, int size)
 	if (!find_buffer(dev,block,size))
 		goto get_free;
 	goto repeat;
+out:
+	return bh;
 }

 void set_writetime(struct buffer_head * buf, int flag)
@@ -750,18 +779,41 @@ void set_writetime(struct buffer_head * buf, int flag)
 /*
 * Put a buffer into the appropriate list, without side-effects.
 */
-static inline void file_buffer(struct buffer_head *bh, int list)
+static void file_buffer(struct buffer_head *bh, int list)
 {
 	remove_from_queues(bh);
 	bh->b_list = list;
 	insert_into_queues(bh);
 }

+/*
+ * if a new dirty buffer is created we need to balance bdflush.
+ */
+static inline void balance_dirty (kdev_t dev)
+{
+	int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
+
+	/* This buffer is dirty, maybe we need to start flushing.
+	 * If too high a percentage of the buffers are dirty...
+	 */
+	if (nr_buffers_type[BUF_DIRTY] > too_many) {
+		wakeup_bdflush(1);
+	}
+
+	/* If this is a loop device, and
+	 * more than half of the buffers are dirty...
+	 * (Prevents no-free-buffers deadlock with loop device.)
+	 */
+	if (MAJOR(dev) == LOOP_MAJOR &&
+	    nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
+		wakeup_bdflush(1);
+}
+
 /*
 * A buffer may need to be moved from one buffer list to another
 * (e.g. in case it is not shared any more). Handle this.
 */
-void refile_buffer(struct buffer_head * buf)
+void __refile_buffer(struct buffer_head * buf)
 {
 	int dispose;

@@ -777,23 +829,8 @@ void refile_buffer(struct buffer_head * buf)
 		dispose = BUF_CLEAN;
 	if(dispose != buf->b_list) {
 		file_buffer(buf, dispose);
-		if(dispose == BUF_DIRTY) {
-			int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
-
-			/* This buffer is dirty, maybe we need to start flushing.
-			 * If too high a percentage of the buffers are dirty...
-			 */
-			if (nr_buffers_type[BUF_DIRTY] > too_many)
-				wakeup_bdflush(1);
-
-			/* If this is a loop device, and
-			 * more than half of the buffers are dirty...
-			 * (Prevents no-free-buffers deadlock with loop device.)
-			 */
-			if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
-			    nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
-				wakeup_bdflush(1);
-		}
+		if(dispose == BUF_DIRTY)
+			balance_dirty(buf->b_dev);
 	}
 }

@@ -809,6 +846,7 @@ void __brelse(struct buffer_head * buf)

 	if (buf->b_count) {
 		buf->b_count--;
+		wake_up(&buffer_wait);
 		return;
 	}
 	printk("VFS: brelse: Trying to free free buffer\n");
@@ -928,7 +966,7 @@ static void put_unused_buffer_head(struct buffer_head * bh)
 		return;
 	}

-	memset(bh,0,sizeof(*bh));
+	memset(bh, 0, sizeof(*bh));
 	init_waitqueue_head(&bh->b_wait);
 	nr_unused_buffer_heads++;
 	bh->b_next_free = unused_list;
@@ -1153,17 +1191,12 @@ static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
 	struct page *page;

 	mark_buffer_uptodate(bh, uptodate);
-	unlock_buffer(bh);

 	/* This is a temporary buffer used for page I/O. */
 	page = mem_map + MAP_NR(bh->b_data);
-	if (!PageLocked(page))
-		goto not_locked;
-	if (bh->b_count != 1)
-		goto bad_count;

-	if (!test_bit(BH_Uptodate, &bh->b_state))
-		set_bit(PG_error, &page->flags);
+	if (!uptodate)
+		SetPageError(page);

 	/*
 	 * Be _very_ careful from here on. Bad things can happen if
@@ -1179,69 +1212,58 @@ static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
 	 */
 	save_flags(flags);
 	cli();
-	bh->b_count--;
-	tmp = bh;
-	do {
-		if (tmp->b_count)
+	unlock_buffer(bh);
+	tmp = bh->b_this_page;
+	while (tmp != bh) {
+		if (buffer_locked(tmp)) {
 			goto still_busy;
+		}
 		tmp = tmp->b_this_page;
-	} while (tmp != bh);
+	}

 	/* OK, the async IO on this page is complete. */
-	free_async_buffers(bh);
 	restore_flags(flags);
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
+
 	after_unlock_page(page);
+	/*
+	 * if none of the buffers had errors then we can set the
+	 * page uptodate:
+	 */
+	if (!PageError(page))
+		SetPageUptodate(page);
+	page->owner = (int)current; // HACK, FIXME, will go away.
+	UnlockPage(page);
+
 	return;

 still_busy:
 	restore_flags(flags);
 	return;
-
-not_locked:
-	printk ("Whoops: end_buffer_io_async: async io complete on unlocked page\n");
-	return;
-
-bad_count:
-	printk ("Whoops: end_buffer_io_async: b_count != 1 on async io.\n");
-	return;
 }

-/*
- * Start I/O on a page.
- * This function expects the page to be locked and may return before I/O is complete.
- * You then have to check page->locked, page->uptodate, and maybe wait on page->wait.
- */
-int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
+static int create_page_buffers (int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
 {
-	struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
-	int block, nr;
+	struct buffer_head *head, *bh, *tail;
+	int block;

 	if (!PageLocked(page))
-		panic("brw_page: page not locked for I/O");
-	clear_bit(PG_uptodate, &page->flags);
-	clear_bit(PG_error, &page->flags);
+		BUG();
 	/*
 	 * Allocate async buffer heads pointing to this page, just for I/O.
-	 * They do _not_ show up in the buffer hash table!
-	 * They are _not_ registered in page->buffers either!
+	 * They show up in the buffer hash table and are registered in
+	 * page->buffers.
 	 */
-	bh = create_buffers(page_address(page), size, 1);
-	if (!bh) {
-		/* WSH: exit here leaves page->count incremented */
-		clear_bit(PG_locked, &page->flags);
-		wake_up(&page->wait);
-		return -ENOMEM;
-	}
-	nr = 0;
-	next = bh;
-	do {
-		struct buffer_head * tmp;
+	head = create_buffers(page_address(page), size, 1);
+	if (page->buffers)
+		BUG();
+	if (!head)
+		BUG();
+	tail = head;
+	for (bh = head; bh; bh = bh->b_this_page) {
 		block = *(b++);

-		init_buffer(next, dev, block, end_buffer_io_async, NULL);
-		set_bit(BH_Uptodate, &next->b_state);
+		tail = bh;
+		init_buffer(bh, dev, block, end_buffer_io_async, NULL);

 		/*
 		 * When we use bmap, we define block zero to represent
@@ -1250,51 +1272,334 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
 		 * two cases.
 		 */
 		if (bmap && !block) {
-			memset(next->b_data, 0, size);
-			next->b_count--;
-			continue;
+			set_bit(BH_Uptodate, &bh->b_state);
+			unlock_kernel();
+			memset(bh->b_data, 0, size);
+			lock_kernel();
+		} else {
+			struct buffer_head *alias = find_buffer(dev, block, size);
+			/*
+			 * Tricky issue. It is legal to have an alias here,
+			 * because the buffer-cache layer can increase the
+			 * b_counter even if the buffer goes inactive
+			 * meanwhile.
+			 */
+			if (alias) {
+				printk(" buffer %p has nonzero alias %p which is locked!!! hoping that it will go away.\n", bh, alias);
+			}
 		}
-		tmp = get_hash_table(dev, block, size);
-		if (tmp) {
-			if (!buffer_uptodate(tmp)) {
-				if (rw == READ)
-					ll_rw_block(READ, 1, &tmp);
-				wait_on_buffer(tmp);
+	}
+	tail->b_this_page = head;
+	page->buffers = head;
+	return 0;
+}
+
+/*
+ * Can the buffer be thrown out?
+ */
+#define BUFFER_BUSY_BITS	((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
+#define buffer_busy(bh)	((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))
+
+static int page_idle(struct page *page, int sync)
+{
+	struct buffer_head *head, *bh, *next;
+
+	head = page->buffers;
+	bh = head;
+	do {
+		next = bh->b_this_page;
+
+		if (bh->b_blocknr) {
+			if (buffer_locked(bh)) {
+				wait_on_buffer(bh);
+				return 0;
 			}
-			if (rw == READ) 
-				memcpy(next->b_data, tmp->b_data, size);
-			else {
-				memcpy(tmp->b_data, next->b_data, size);
-				mark_buffer_dirty(tmp, 0);
+			if (buffer_dirty(bh)) {
+				if (sync) {
+					wait_on_buffer(bh);
+					ll_rw_block(WRITE, 1, &bh);
+					return 0;
+				} else
+					clear_bit(BH_Dirty, &bh->b_state);
 			}
-			brelse(tmp);
-			next->b_count--;
-			continue;
 		}
-		if (rw == READ)
-			clear_bit(BH_Uptodate, &next->b_state);
-		else
-			set_bit(BH_Dirty, &next->b_state);
-		arr[nr++] = next;
-	} while (prev = next, (next = next->b_this_page) != NULL);
-	prev->b_this_page = bh;
-	
-	if (nr) {
+		bh = next;
+	} while (bh != head);
+	return 1;
+}
+
+/*
+ * We dont have to release all buffers here, but
+ * we have to be sure that no dirty buffer is left
+ * and no IO is going on (no buffer is locked), because
+ * we are going to free the underlying page.
+ */
+int generic_block_flushpage(struct inode *inode, struct page *page, int sync)
+{
+	struct buffer_head *head, *bh, *next;
+
+
+	if (!PageLocked(page))
+		BUG();
+	if (!page->buffers)
+		BUG();
+
+	while (!page_idle(page, sync));
+
+	head = page->buffers;
+	bh = head;
+	do {
+		next = bh->b_this_page;
+		if (bh->b_blocknr) {
+			if(bh->b_dev == B_FREE) {
+				remove_from_free_list(bh);
+			} else {
+				if (bh->b_list == BUF_DIRTY) {
+					nr_buffers--;
+					nr_buffers_type[BUF_DIRTY]--;
+					remove_from_lru_list(bh);
+				}
+			}
+		} else {
+		}
+		bh->b_state = 0;
+		bh->b_count = 0;
+		put_unused_buffer_head(bh);
+		bh = next;
+	} while (bh != head);
+	page->buffers = NULL;
+
+	return 0;
+}
+
+
+long block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block)
+{
+	struct dentry *dentry = file->f_dentry;
+	struct inode *inode = dentry->d_inode;
+	unsigned long block;
+	int err, created;
+	unsigned long blocksize, start_block, end_block;
+	unsigned long start_offset, start_bytes, end_bytes;
+	unsigned long bbits, phys, blocks, i, len;
+	struct buffer_head *bh;
+	char * target_buf;
+
+	target_buf = (char *)page_address(page) + offset;
+	lock_kernel();
+
+	if (!PageLocked(page))
+		BUG();
+
+	blocksize = inode->i_sb->s_blocksize;
+	if (!page->buffers) {
+		struct buffer_head *head, *tail;
+
+		head = create_buffers(page_address(page), blocksize, 1);
+		if (page->buffers)
+			BUG();
+
+		bh = head;
+		do {
+			bh->b_dev = inode->i_dev;
+			tail = bh;
+			bh = bh->b_this_page;
+		} while (bh);
+		tail->b_this_page = head;
+		page->buffers = head;
+	}
+
+	bbits = inode->i_sb->s_blocksize_bits;
+	block = page->offset >> bbits;
+	blocks = PAGE_SIZE >> bbits;
+	start_block = offset >> bbits;
+	end_block = (offset + bytes - 1) >> bbits;
+	start_offset = offset & (blocksize - 1);
+	start_bytes = blocksize - start_offset;
+	if (start_bytes > bytes)
+		start_bytes = bytes;
+	end_bytes = (offset+bytes) & (blocksize - 1);
+	if (end_bytes > bytes)
+		end_bytes = bytes;
+
+	if (offset < 0 || offset >= PAGE_SIZE)
+		BUG();
+	if (bytes+offset < 0 || bytes+offset > PAGE_SIZE)
+		BUG();
+	if (start_block < 0 || start_block >= blocks)
+		BUG();
+	if (end_block < 0 || end_block >= blocks)
+		BUG();
+	// FIXME: currently we assume page alignment.
+	if (page->offset & (PAGE_SIZE-1))
+		BUG();
+
+	bh = page->buffers;
+	i = 0;
+	do {
+		if (!bh)
+			BUG();
+
+		if ((i < start_block) || (i > end_block)) {
+			goto skip;
+		}
+		if (!bh->b_blocknr) {
+			phys = fs_get_block (inode, block, 1, &err, &created);
+			err = -EIO;
+			if (!phys)
+				goto out;
+
+			unlock_kernel();
+			/*
+			 * if partially written block which has contents on
+			 * disk, then we have to read it first.
+			 */
+			if (!created && (start_offset ||
+					(end_bytes && (i == end_block)))) {
+				init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL);
+				bh->b_state = 0;
+				ll_rw_block(READ, 1, &bh);
+				wait_on_buffer(bh);
+				err = -EIO;
+				if (!buffer_uptodate(bh))
+					goto out_nolock;
+			}
+			lock_kernel();
+
+			init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL);
+			bh->b_state = (1<<BH_Dirty) | (1<<BH_Uptodate);
+			bh->b_list = BUF_DIRTY;
+			insert_into_dirty_queue(bh);
+		} else {
+			/*
+			 * block already exists, just mark it dirty:
+			 */
+			bh->b_end_io = end_buffer_io_sync;
+			set_bit(BH_Dirty, &bh->b_state);
+			set_bit(BH_Uptodate, &bh->b_state);
+		}
+		unlock_kernel();
+
+		err = -EFAULT;
+		if (start_offset) {
+			len = start_bytes;
+			start_offset = 0;
+		} else
+		if (end_bytes && (i == end_block)) {
+			len = end_bytes;
+			end_bytes = 0;
+		} else {
+			/*
+			 * Overwritten block.
+			 */
+			len = blocksize;
+		}
+		if (copy_from_user(target_buf, buf, len))
+			goto out_nolock;
+		target_buf += len;
+		buf += len;
+
+		lock_kernel();
+		if (bh->b_list != BUF_DIRTY) {
+			bh->b_list = BUF_DIRTY;
+			insert_into_dirty_queue(bh);
+		}
+skip:
+		i++;
+		block++;
+		bh = bh->b_this_page;
+	} while (i < blocks);
+	unlock_kernel();
+
+	SetPageUptodate(page);
+	return bytes;
+out:
+	unlock_kernel();
+out_nolock:
+	ClearPageUptodate(page);
+	return err;
+}
+
+/*
+ * Start I/O on a page.
+ * This function expects the page to be locked and may return
+ * before I/O is complete. You then have to check page->locked,
+ * page->uptodate, and maybe wait on page->wait.
+ */
+int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
+{
+	struct buffer_head *head, *bh, *arr[MAX_BUF_PER_PAGE];
+	int nr, fresh, block;
+
+
+	if ((rw == READ) && !PageLocked(page))
+		panic("brw_page: page not locked for I/O");
+//	clear_bit(PG_error, &page->flags);
+	/*
+	 * We pretty much rely on the page lock for this, because
+	 * create_page_buffers() might sleep.
+	 */
+	fresh = 0;
+	if (!page->buffers) {
+		create_page_buffers(rw, page, dev, b, size, bmap);
+		fresh = 1;
+	}
+	if (!page->buffers)
+		BUG();
+
+	head = page->buffers;
+	bh = head;
+	nr = 0;
+	do {
+		block = *(b++);
+
+		if (fresh && (bh->b_count != 1))
+			BUG();
+		if (rw == READ) {
+			if (!fresh)
+				BUG();
+			if (bmap && !block) {
+				if (block)
+					BUG();
+			} else {
+				if (bmap && !block)
+					BUG();
+				if (!buffer_uptodate(bh)) {
+					arr[nr++] = bh;
+				}
+			}
+		} else { /* WRITE */
+			if (!bh->b_blocknr) {
+				if (!block)
+					BUG();
+				bh->b_blocknr = block;
+			} else {
+				if (!block)
+					BUG();
+			}
+			set_bit(BH_Dirty, &bh->b_state);
+			set_bit(BH_Uptodate, &bh->b_state);
+			if (bh->b_list != BUF_DIRTY) {
+				bh->b_list = BUF_DIRTY;
+				insert_into_dirty_queue(bh);
+			}
+		}
+		bh = bh->b_this_page;
+	} while (bh != head);
+	if (rw == READ)
+		++current->maj_flt;
+	if ((rw == READ) && nr) {
+		if (Page_Uptodate(page))
+			BUG();
+		unlock_kernel();
 		ll_rw_block(rw, nr, arr);
-		/* The rest of the work is done in mark_buffer_uptodate()
-		 * and unlock_buffer(). */
+		lock_kernel();
 	} else {
-		unsigned long flags;
-		clear_bit(PG_locked, &page->flags);
-		set_bit(PG_uptodate, &page->flags);
-		wake_up(&page->wait);
-		save_flags(flags);
-		cli();
-		free_async_buffers(bh);
-		restore_flags(flags);
-		after_unlock_page(page);
+		if (!nr && rw == READ) {
+			SetPageUptodate(page);
+			UnlockPage(page);
+		}
 	}
-	++current->maj_flt;
 	return 0;
 }

@@ -1305,6 +1610,7 @@ void mark_buffer_uptodate(struct buffer_head * bh, int on)
 {
 	if (on) {
 		struct buffer_head *tmp = bh;
+		struct page *page;
 		set_bit(BH_Uptodate, &bh->b_state);
 		/* If a page has buffers and all these buffers are uptodate,
 		 * then the page is uptodate. */
@@ -1313,7 +1619,8 @@ void mark_buffer_uptodate(struct buffer_head * bh, int on)
 				return;
 			tmp=tmp->b_this_page;
 		} while (tmp && tmp != bh);
-		set_bit(PG_uptodate, &mem_map[MAP_NR(bh->b_data)].flags);
+		page = mem_map + MAP_NR(bh->b_data);
+		SetPageUptodate(page);
 		return;
 	}
 	clear_bit(BH_Uptodate, &bh->b_state);
@@ -1334,9 +1641,11 @@ int generic_readpage(struct file * file, struct page * page)
 	int *p, nr[PAGE_SIZE/512];
 	int i;

-	atomic_inc(&page->count);
-	set_bit(PG_locked, &page->flags);
-	set_bit(PG_free_after, &page->flags);
+	if (page->buffers) {
+		printk("hm, no brw_page(%p) because IO already started.\n",
+					 page);
+		goto out;
+	}
 	
 	i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
 	block = page->offset >> inode->i_sb->s_blocksize_bits;
@@ -1350,6 +1659,7 @@ int generic_readpage(struct file * file, struct page * page)

 	/* IO start */
 	brw_page(READ, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
+out:
 	return 0;
 }

@@ -1392,7 +1702,7 @@ static int grow_buffers(int size)
 			tmp->b_next_free = tmp;
 		}
 		insert_point = tmp;
-		++nr_buffers;
+		nr_buffers++;
 		if (tmp->b_this_page)
 			tmp = tmp->b_this_page;
 		else
@@ -1405,12 +1715,6 @@ static int grow_buffers(int size)
 	return 1;
 }

-/*
- * Can the buffer be thrown out?
- */
-#define BUFFER_BUSY_BITS	((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
-#define buffer_busy(bh)		((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))
-
 /*
 * try_to_free_buffers() checks if all the buffers on this particular page
 * are unused, and free's the page if so.
@@ -1418,9 +1722,9 @@ static int grow_buffers(int size)
 * Wake up bdflush() if this fails - if we're running low on memory due
 * to dirty buffers, we need to flush them out as quickly as possible.
 */
-int try_to_free_buffers(struct page * page_map)
+int try_to_free_buffers(struct page * page)
 {
-	struct buffer_head * tmp, * bh = page_map->buffers;
+	struct buffer_head * tmp, * bh = page->buffers;

 	tmp = bh;
 	do {
@@ -1448,8 +1752,8 @@ int try_to_free_buffers(struct page * page_map)

 	/* And free the page */
 	buffermem -= PAGE_SIZE;
-	page_map->buffers = NULL;
-	__free_page(page_map);
+	page->buffers = NULL;
+	__free_page(page);
 	return 1;
 }

@@ -1509,11 +1813,11 @@ void __init buffer_init(unsigned long memory_size)
 	   the heuristic from working with large databases and getting
 	   fsync times (ext2) manageable, is the following */

-	memory_size >>= 20;
+	memory_size >>= 22;
 	for (order = 5; (1UL << order) < memory_size; order++);

 	/* try to allocate something until we get it or we're asking
-           for something that is really too small */
+	   for something that is really too small */

 	do {
 		nr_hash = (1UL << order) * PAGE_SIZE /
@@ -1521,6 +1825,7 @@ void __init buffer_init(unsigned long memory_size)
 		hash_table = (struct buffer_head **)
 		    __get_free_pages(GFP_ATOMIC, order);
 	} while (hash_table == NULL && --order > 4);
+	printk("buffer-cache hash table entries: %d (order: %d, %ld bytes)\n", nr_hash, order, (1UL<<order) * PAGE_SIZE);
 	
 	if (!hash_table)
 		panic("Failed to allocate buffer hash table\n");
@@ -1565,11 +1870,11 @@ void wakeup_bdflush(int wait)
 {
 	if (current == bdflush_tsk)
 		return;
-	wake_up(&bdflush_wait);
-	if (wait) {
+	if (wait)
 		run_task_queue(&tq_disk);
+	wake_up(&bdflush_wait);
+	if (wait)
 		sleep_on(&bdflush_done);
-	}
 }


@@ -1801,6 +2106,7 @@ int bdflush(void * unused)
 #endif
 					  bh->b_count--;
 					  next->b_count--;
+					  wake_up(&buffer_wait);
 				  }
 		 }
 #ifdef DEBUG

--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -358,7 +358,7 @@ void ext2_free_blocks (const struct inode * inode, unsigned long block,
 * bitmap, and then for any free bit if that fails.
 */
 int ext2_new_block (const struct inode * inode, unsigned long goal,
-		    u32 * prealloc_count, u32 * prealloc_block, int * err)
+    u32 * prealloc_count, u32 * prealloc_block, int * err)
 {
 	struct buffer_head * bh;
 	struct buffer_head * bh2;
@@ -594,20 +594,12 @@ int ext2_new_block (const struct inode * inode, unsigned long goal,

 	if (j >= le32_to_cpu(es->s_blocks_count)) {
 		ext2_error (sb, "ext2_new_block",
-			    "block >= blocks count - "
-			    "block_group = %d, block=%d", i, j);
+			    "block(%d) >= blocks count(%d) - "
+			    "block_group = %d, es == %p ",j,
+			le32_to_cpu(es->s_blocks_count), i, es);
 		unlock_super (sb);
 		return 0;
 	}
-	if (!(bh = getblk (sb->s_dev, j, sb->s_blocksize))) {
-		ext2_error (sb, "ext2_new_block", "cannot get block %d", j);
-		unlock_super (sb);
-		return 0;
-	}
-	memset(bh->b_data, 0, sb->s_blocksize);
-	mark_buffer_uptodate(bh, 1);
-	mark_buffer_dirty(bh, 1);
-	brelse (bh);

 	ext2_debug ("allocating block %d. "
 		    "Goal hits %d of %d.\n", j, goal_hits, goal_attempts);

--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -30,15 +30,15 @@
 #include <linux/locks.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/smp_lock.h>

 #define	NBUF	32

 #define MIN(a,b) (((a)<(b))?(a):(b))
 #define MAX(a,b) (((a)>(b))?(a):(b))

+static int ext2_writepage (struct file * file, struct page * page);
 static long long ext2_file_lseek(struct file *, long long, int);
-static ssize_t ext2_file_write (struct file *, const char *, size_t, loff_t *);
-static int ext2_release_file (struct inode *, struct file *);
 #if BITS_PER_LONG < 64
 static int ext2_open_file (struct inode *, struct file *);

@@ -57,51 +57,6 @@ EXT2_MAX_SIZE(10), EXT2_MAX_SIZE(11), EXT2_MAX_SIZE(12), EXT2_MAX_SIZE(13)

 #endif

-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ext2 filesystem.
- */
-static struct file_operations ext2_file_operations = {
-	ext2_file_lseek,	/* lseek */
-	generic_file_read,	/* read */
-	ext2_file_write,	/* write */
-	NULL,			/* readdir - bad */
-	NULL,			/* poll - default */
-	ext2_ioctl,		/* ioctl */
-	generic_file_mmap,	/* mmap */
-#if BITS_PER_LONG == 64	
-	NULL,			/* no special open is needed */
-#else
-	ext2_open_file,
-#endif
-	NULL,			/* flush */
-	ext2_release_file,	/* release */
-	ext2_sync_file,		/* fsync */
-	NULL,			/* fasync */
-	NULL,			/* check_media_change */
-	NULL			/* revalidate */
-};
-
-struct inode_operations ext2_file_inode_operations = {
-	&ext2_file_operations,/* default file operations */
-	NULL,			/* create */
-	NULL,			/* lookup */
-	NULL,			/* link */
-	NULL,			/* unlink */
-	NULL,			/* symlink */
-	NULL,			/* mkdir */
-	NULL,			/* rmdir */
-	NULL,			/* mknod */
-	NULL,			/* rename */
-	NULL,			/* readlink */
-	NULL,			/* follow_link */
-	generic_readpage,	/* readpage */
-	NULL,			/* writepage */
-	ext2_bmap,		/* bmap */
-	ext2_truncate,		/* truncate */
-	ext2_permission,	/* permission */
-	NULL			/* smap */
-};

 /*
 * Make sure the offset never goes beyond the 32-bit mark..
@@ -151,164 +106,50 @@ static inline void remove_suid(struct inode *inode)
 	}
 }

-static ssize_t ext2_file_write (struct file * filp, const char * buf,
-				size_t count, loff_t *ppos)
+static int ext2_writepage (struct file * file, struct page * page)
 {
-	struct inode * inode = filp->f_dentry->d_inode;
-	off_t pos;
-	long block;
-	int offset;
-	int written, c;
-	struct buffer_head * bh, *bufferlist[NBUF];
-	struct super_block * sb;
-	int err;
-	int i,buffercount,write_error;
-
-	/* POSIX: mtime/ctime may not change for 0 count */
-	if (!count)
-		return 0;
-	write_error = buffercount = 0;
-	if (!inode) {
-		printk("ext2_file_write: inode = NULL\n");
-		return -EINVAL;
-	}
-	sb = inode->i_sb;
-	if (sb->s_flags & MS_RDONLY)
-		/*
-		 * This fs has been automatically remounted ro because of errors
-		 */
-		return -ENOSPC;
-
-	if (!S_ISREG(inode->i_mode)) {
-		ext2_warning (sb, "ext2_file_write", "mode = %07o",
-			      inode->i_mode);
-		return -EINVAL;
-	}
-	remove_suid(inode);
-
-	if (filp->f_flags & O_APPEND)
-		pos = inode->i_size;
-	else {
-		pos = *ppos;
-		if (pos != *ppos)
-			return -EINVAL;
-#if BITS_PER_LONG >= 64
-		if (pos > ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(sb)])
-			return -EINVAL;
-#endif
-	}
-
-	/* Check for overflow.. */
-#if BITS_PER_LONG < 64
-	if (pos > (__u32) (pos + count)) {
-		count = ~pos; /* == 0xFFFFFFFF - pos */
-		if (!count)
-			return -EFBIG;
-	}
-#else
-	{
-		off_t max = ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(sb)];
-
-		if (pos + count > max) {
-			count = max - pos;
-			if (!count)
-				return -EFBIG;
-		}
-		if (((pos + count) >> 32) && 
-		    !(sb->u.ext2_sb.s_es->s_feature_ro_compat &
-		      cpu_to_le32(EXT2_FEATURE_RO_COMPAT_LARGE_FILE))) {
-			/* If this is the first large file created, add a flag
-			   to the superblock */
-			sb->u.ext2_sb.s_es->s_feature_ro_compat |=
-				cpu_to_le32(EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
-			mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
-		}
-	}
-#endif
-
-	/*
-	 * If a file has been opened in synchronous mode, we have to ensure
-	 * that meta-data will also be written synchronously.  Thus, we
-	 * set the i_osync field.  This field is tested by the allocation
-	 * routines.
-	 */
-	if (filp->f_flags & O_SYNC)
-		inode->u.ext2_i.i_osync++;
-	block = pos >> EXT2_BLOCK_SIZE_BITS(sb);
-	offset = pos & (sb->s_blocksize - 1);
-	c = sb->s_blocksize - offset;
-	written = 0;
+	struct dentry *dentry = file->f_dentry;
+	struct inode *inode = dentry->d_inode;
+	unsigned long block;
+	int *p, nr[PAGE_SIZE/512];
+	int i, err, created;
+	struct buffer_head *bh;
+
+	i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
+	block = page->offset >> inode->i_sb->s_blocksize_bits;
+	p = nr;
+	bh = page->buffers;
 	do {
-		bh = ext2_getblk (inode, block, 1, &err);
-		if (!bh) {
-			if (!written)
-				written = err;
-			break;
-		}
-		if (c > count)
-			c = count;
-		if (c != sb->s_blocksize && !buffer_uptodate(bh)) {
-			ll_rw_block (READ, 1, &bh);
-			wait_on_buffer (bh);
-			if (!buffer_uptodate(bh)) {
-				brelse (bh);
-				if (!written)
-					written = -EIO;
-				break;
-			}
-		}
-		c -= copy_from_user (bh->b_data + offset, buf, c);
-		if (!c) {
-			brelse(bh);
-			if (!written)
-				written = -EFAULT;
-			break;
-		}
-		update_vm_cache(inode, pos, bh->b_data + offset, c);
-		pos += c;
-		written += c;
-		buf += c;
-		count -= c;
-		mark_buffer_uptodate(bh, 1);
-		mark_buffer_dirty(bh, 0);
-
-		if (filp->f_flags & O_SYNC)
-			bufferlist[buffercount++] = bh;
+		if (bh && bh->b_blocknr)
+			*p = bh->b_blocknr;
 		else
-			brelse(bh);
-		if (buffercount == NBUF){
-			ll_rw_block(WRITE, buffercount, bufferlist);
-			for(i=0; i<buffercount; i++){
-				wait_on_buffer(bufferlist[i]);
-				if (!buffer_uptodate(bufferlist[i]))
-					write_error=1;
-				brelse(bufferlist[i]);
-			}
-			buffercount=0;
-		}
-		if(write_error)
-			break;
+			*p = ext2_getblk_block (inode, block, 1, &err, &created);
+		if (!*p)
+			return -EIO;
+		i--;
 		block++;
-		offset = 0;
-		c = sb->s_blocksize;
-	} while (count);
-	if ( buffercount ){
-		ll_rw_block(WRITE, buffercount, bufferlist);
-		for(i=0; i<buffercount; i++){
-			wait_on_buffer(bufferlist[i]);
-			if (!buffer_uptodate(bufferlist[i]))
-				write_error=1;
-			brelse(bufferlist[i]);
-		}
-	}		
-	if (pos > inode->i_size)
-		inode->i_size = pos;
-	if (filp->f_flags & O_SYNC)
-		inode->u.ext2_i.i_osync--;
-	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-	*ppos = pos;
-	mark_inode_dirty(inode);
-	return written;
+		p++;
+		if (bh)
+			bh = bh->b_this_page;
+	} while (i > 0);
+
+	/* IO start */
+	brw_page(WRITE, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
+	return 0;
+}
+
+static long ext2_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+{
+	return block_write_one_page(file, page, offset, bytes, buf, ext2_getblk_block);
+}
+
+/*
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+ext2_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+{
+	return generic_file_write(file, buf, count, ppos, ext2_write_one_page);
 }

 /*
@@ -335,3 +176,52 @@ static int ext2_open_file (struct inode * inode, struct file * filp)
 	return 0;
 }
 #endif
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the ext2 filesystem.
+ */
+static struct file_operations ext2_file_operations = {
+	ext2_file_lseek,	/* lseek */
+	generic_file_read,	/* read */
+	ext2_file_write,	/* write */
+	NULL,			/* readdir - bad */
+	NULL,			/* poll - default */
+	ext2_ioctl,		/* ioctl */
+	generic_file_mmap,	/* mmap */
+#if BITS_PER_LONG == 64	
+	NULL,			/* no special open is needed */
+#else
+	ext2_open_file,
+#endif
+	NULL,			/* flush */
+	ext2_release_file,	/* release */
+	ext2_sync_file,		/* fsync */
+	NULL,			/* fasync */
+	NULL,			/* check_media_change */
+	NULL			/* revalidate */
+};
+
+struct inode_operations ext2_file_inode_operations = {
+	&ext2_file_operations,/* default file operations */
+	NULL,			/* create */
+	NULL,			/* lookup */
+	NULL,			/* link */
+	NULL,			/* unlink */
+	NULL,			/* symlink */
+	NULL,			/* mkdir */
+	NULL,			/* rmdir */
+	NULL,			/* mknod */
+	NULL,			/* rename */
+	NULL,			/* readlink */
+	NULL,			/* follow_link */
+	generic_readpage,	/* readpage */
+	ext2_writepage,		/* writepage */
+	ext2_bmap,		/* bmap */
+	ext2_truncate,		/* truncate */
+	ext2_permission,	/* permission */
+	NULL,			/* smap */
+	NULL,			/* updatepage */
+	NULL,			/* revalidate */
+	generic_block_flushpage,/* flushpage */
+};
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -92,13 +92,12 @@ void ext2_discard_prealloc (struct inode * inode)
 #endif
 }

-static int ext2_alloc_block (struct inode * inode, unsigned long goal, int * err)
+static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err)
 {
 #ifdef EXT2FS_DEBUG
 	static unsigned long alloc_hits = 0, alloc_attempts = 0;
 #endif
 	unsigned long result;
-	struct buffer_head * bh;

 	wait_on_super (inode->i_sb);

@@ -112,19 +111,6 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int * err
 		ext2_debug ("preallocation hit (%lu/%lu).\n",
 			    ++alloc_hits, ++alloc_attempts);

-		/* It doesn't matter if we block in getblk() since
-		   we have already atomically allocated the block, and
-		   are only clearing it now. */
-		if (!(bh = getblk (inode->i_sb->s_dev, result,
-				   inode->i_sb->s_blocksize))) {
-			ext2_error (inode->i_sb, "ext2_alloc_block",
-				    "cannot get block %lu", result);
-			return 0;
-		}
-		memset(bh->b_data, 0, inode->i_sb->s_blocksize);
-		mark_buffer_uptodate(bh, 1);
-		mark_buffer_dirty(bh, 1);
-		brelse (bh);
 	} else {
 		ext2_discard_prealloc (inode);
 		ext2_debug ("preallocation miss (%lu/%lu).\n",
@@ -139,7 +125,6 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int * err
 #else
 	result = ext2_new_block (inode, goal, 0, 0, err);
 #endif
-
 	return result;
 }

@@ -200,8 +185,65 @@ int ext2_bmap (struct inode * inode, int block)
 			   block & (addr_per_block - 1));
 }

+int ext2_bmap_create (struct inode * inode, int block)
+{
+	int i;
+	int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+	int addr_per_block_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
+
+	if (block < 0) {
+		ext2_warning (inode->i_sb, "ext2_bmap", "block < 0");
+		return 0;
+	}
+	if (block >= EXT2_NDIR_BLOCKS + addr_per_block +
+		(1 << (addr_per_block_bits * 2)) +
+		((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {
+		ext2_warning (inode->i_sb, "ext2_bmap", "block > big");
+		return 0;
+	}
+	if (block < EXT2_NDIR_BLOCKS)
+		return inode_bmap (inode, block);
+	block -= EXT2_NDIR_BLOCKS;
+	if (block < addr_per_block) {
+		i = inode_bmap (inode, EXT2_IND_BLOCK);
+		if (!i)
+			return 0;
+		return block_bmap (bread (inode->i_dev, i,
+					  inode->i_sb->s_blocksize), block);
+	}
+	block -= addr_per_block;
+	if (block < (1 << (addr_per_block_bits * 2))) {
+		i = inode_bmap (inode, EXT2_DIND_BLOCK);
+		if (!i)
+			return 0;
+		i = block_bmap (bread (inode->i_dev, i,
+				       inode->i_sb->s_blocksize),
+				block >> addr_per_block_bits);
+		if (!i)
+			return 0;
+		return block_bmap (bread (inode->i_dev, i,
+					  inode->i_sb->s_blocksize),
+				   block & (addr_per_block - 1));
+	}
+	block -= (1 << (addr_per_block_bits * 2));
+	i = inode_bmap (inode, EXT2_TIND_BLOCK);
+	if (!i)
+		return 0;
+	i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
+			block >> (addr_per_block_bits * 2));
+	if (!i)
+		return 0;
+	i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
+			(block >> addr_per_block_bits) & (addr_per_block - 1));
+	if (!i)
+		return 0;
+	return block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
+			   block & (addr_per_block - 1));
+}
+
 static struct buffer_head * inode_getblk (struct inode * inode, int nr,
-					  int create, int new_block, int * err)
+	int create, int new_block, int * err, int metadata,
+	int *phys_block, int *created)
 {
 	u32 * p;
 	int tmp, goal = 0;
@@ -212,11 +254,16 @@ static struct buffer_head * inode_getblk (struct inode * inode, int nr,
 repeat:
 	tmp = *p;
 	if (tmp) {
-		struct buffer_head * result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
-		if (tmp == *p)
-			return result;
-		brelse (result);
-		goto repeat;
+		if (metadata) {
+			struct buffer_head * result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
+			if (tmp == *p)
+				return result;
+			brelse (result);
+			goto repeat;
+		} else {
+			*phys_block = tmp;
+			return NULL;
+		}
 	}
 	*err = -EFBIG;
 	if (!create)
@@ -259,13 +306,28 @@ static struct buffer_head * inode_getblk (struct inode * inode, int nr,
 	tmp = ext2_alloc_block (inode, goal, err);
 	if (!tmp)
 		return NULL;
-	result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
-	if (*p) {
-		ext2_free_blocks (inode, tmp, 1);
-		brelse (result);
-		goto repeat;
+	if (metadata) {
+		result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
+		if (*p) {
+			ext2_free_blocks (inode, tmp, 1);
+			brelse (result);
+			goto repeat;
+		}
+		memset(result->b_data, 0, inode->i_sb->s_blocksize);
+		mark_buffer_uptodate(result, 1);
+		mark_buffer_dirty(result, 1);
+	} else {
+		if (*p) {
+			ext2_free_blocks (inode, tmp, 1);
+			goto repeat;
+		}
+		*phys_block = tmp;
+		result = NULL;
+		*err = 0;
+		*created = 1;
 	}
 	*p = tmp;
+
 	inode->u.ext2_i.i_next_alloc_block = new_block;
 	inode->u.ext2_i.i_next_alloc_goal = tmp;
 	inode->i_ctime = CURRENT_TIME;
@@ -277,10 +339,17 @@ static struct buffer_head * inode_getblk (struct inode * inode, int nr,
 	return result;
 }

+/*
+ *   metadata / data
+ *   possibly create / access
+ *   can fail due to: - not present
+ *                    - out of space
+ *
+ *   NULL return in the data case is mandatory.
+ */
 static struct buffer_head * block_getblk (struct inode * inode,
-					  struct buffer_head * bh, int nr,
-					  int create, int blocksize, 
-					  int new_block, int * err)
+	  struct buffer_head * bh, int nr, int create, int blocksize, 
+	  int new_block, int * err, int metadata, int *phys_block, int *created)
 {
 	int tmp, goal = 0;
 	u32 * p;
@@ -302,13 +371,19 @@ static struct buffer_head * block_getblk (struct inode * inode,
 repeat:
 	tmp = le32_to_cpu(*p);
 	if (tmp) {
-		result = getblk (bh->b_dev, tmp, blocksize);
-		if (tmp == le32_to_cpu(*p)) {
+		if (metadata) {
+			result = getblk (bh->b_dev, tmp, blocksize);
+			if (tmp == le32_to_cpu(*p)) {
+				brelse (bh);
+				return result;
+			}
+			brelse (result);
+			goto repeat;
+		} else {
+			*phys_block = tmp;
 			brelse (bh);
-			return result;
+			return NULL;
 		}
-		brelse (result);
-		goto repeat;
 	}
 	*err = -EFBIG;
 	if (!create) {
@@ -343,7 +418,22 @@ static struct buffer_head * block_getblk (struct inode * inode,
 		brelse (bh);
 		return NULL;
 	}
-	result = getblk (bh->b_dev, tmp, blocksize);
+	if (metadata) {
+		result = getblk (bh->b_dev, tmp, blocksize);
+		if (*p) {
+			ext2_free_blocks (inode, tmp, 1);
+			brelse (result);
+			goto repeat;
+		}
+		memset(result->b_data, 0, inode->i_sb->s_blocksize);
+		mark_buffer_uptodate(result, 1);
+		mark_buffer_dirty(result, 1);
+	} else {
+		*phys_block = tmp;
+		result = NULL;
+		*err = 0;
+		*created = 1;
+	}
 	if (le32_to_cpu(*p)) {
 		ext2_free_blocks (inode, tmp, 1);
 		brelse (result);
@@ -364,24 +454,25 @@ static struct buffer_head * block_getblk (struct inode * inode,
 	return result;
 }

-struct buffer_head * ext2_getblk (struct inode * inode, long block,
-				  int create, int * err)
+int ext2_getblk_block (struct inode * inode, long block,
+				  int create, int * err, int * created)
 {
-	struct buffer_head * bh;
+	struct buffer_head * bh, *tmp;
 	unsigned long b;
 	unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
 	int addr_per_block_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
+	int phys_block;

 	*err = -EIO;
 	if (block < 0) {
 		ext2_warning (inode->i_sb, "ext2_getblk", "block < 0");
-		return NULL;
+		return 0;
 	}
 	if (block > EXT2_NDIR_BLOCKS + addr_per_block +
 		(1 << (addr_per_block_bits * 2)) +
 		((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {
 		ext2_warning (inode->i_sb, "ext2_getblk", "block > big");
-		return NULL;
+		return 0;
 	}
 	/*
 	 * If this is a sequential block allocation, set the next_alloc_block
@@ -398,32 +489,71 @@ struct buffer_head * ext2_getblk (struct inode * inode, long block,
 		inode->u.ext2_i.i_next_alloc_goal++;
 	}

-	*err = -ENOSPC;
+	*err = 0; // -ENOSPC;
 	b = block;
-	if (block < EXT2_NDIR_BLOCKS)
-		return inode_getblk (inode, block, create, b, err);
+	*created = 0;
+	if (block < EXT2_NDIR_BLOCKS) {
+		/*
+		 * data page.
+		 */
+		tmp = inode_getblk (inode, block, create, b,
+					err, 0, &phys_block, created);
+		goto out;
+	}
 	block -= EXT2_NDIR_BLOCKS;
 	if (block < addr_per_block) {
-		bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, err);
-		return block_getblk (inode, bh, block, create,
-				     inode->i_sb->s_blocksize, b, err);
+		bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, err, 1, NULL, NULL);
+		tmp = block_getblk (inode, bh, block, create,
+		     inode->i_sb->s_blocksize, b, err, 0, &phys_block, created);
+		goto out;
 	}
 	block -= addr_per_block;
 	if (block < (1 << (addr_per_block_bits * 2))) {
-		bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, err);
+		bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, err, 1, NULL, NULL);
 		bh = block_getblk (inode, bh, block >> addr_per_block_bits,
-				   create, inode->i_sb->s_blocksize, b, err);
-		return block_getblk (inode, bh, block & (addr_per_block - 1),
-				     create, inode->i_sb->s_blocksize, b, err);
+				   create, inode->i_sb->s_blocksize, b, err, 1, NULL, NULL);
+		tmp = block_getblk (inode, bh, block & (addr_per_block - 1),
+		     create, inode->i_sb->s_blocksize, b, err, 0, &phys_block, created);
+		goto out;
 	}
 	block -= (1 << (addr_per_block_bits * 2));
-	bh = inode_getblk (inode, EXT2_TIND_BLOCK, create, b, err);
+	bh = inode_getblk (inode, EXT2_TIND_BLOCK, create, b, err, 1, NULL,NULL);
 	bh = block_getblk (inode, bh, block >> (addr_per_block_bits * 2),
-			   create, inode->i_sb->s_blocksize, b, err);
-	bh = block_getblk (inode, bh, (block >> addr_per_block_bits) & (addr_per_block - 1),
-			   create, inode->i_sb->s_blocksize, b, err);
-	return block_getblk (inode, bh, block & (addr_per_block - 1), create,
-			     inode->i_sb->s_blocksize, b, err);
+			   create, inode->i_sb->s_blocksize, b, err, 1, NULL,NULL);
+	bh = block_getblk (inode, bh, (block >> addr_per_block_bits) &
+		(addr_per_block - 1), create, inode->i_sb->s_blocksize,
+		b, err, 1, NULL,NULL);
+	tmp = block_getblk (inode, bh, block & (addr_per_block - 1), create,
+		inode->i_sb->s_blocksize, b, err, 0, &phys_block, created);
+
+out:
+	if (!phys_block) {
+		return 0;
+	}
+	if (*err) {
+		return 0;
+	}
+	return phys_block;
+}
+
+struct buffer_head * ext2_getblk (struct inode * inode, long block,
+				  int create, int * err)
+{
+	struct buffer_head *tmp = NULL;
+	int phys_block;
+	int created;
+
+	phys_block = ext2_getblk_block (inode, block, create, err, &created);
+
+	if (phys_block) {
+		tmp = getblk (inode->i_dev, phys_block, inode->i_sb->s_blocksize);
+		if (created) {
+			memset(tmp->b_data, 0, inode->i_sb->s_blocksize);
+			mark_buffer_uptodate(tmp, 1);
+			mark_buffer_dirty(tmp, 1);
+		}
+	}
+	return tmp;
 }

 struct buffer_head * ext2_bread (struct inode * inode, int block, 

--- a/fs/ext2/truncate.c
+++ b/fs/ext2/truncate.c
@@ -160,6 +160,9 @@ static int check_block_empty(struct inode *inode, struct buffer_head *bh,
 	return retry;
 }

+#define DATA_BUFFER_USED(bh) \
+	((bh->b_count > 1) || buffer_locked(bh))
+
 static int trunc_direct (struct inode * inode)
 {
 	struct buffer_head * bh;
@@ -178,7 +181,7 @@ static int trunc_direct (struct inode * inode)
 		bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize);
 		if (bh) {
 			bh->b_count++;
-			if(bh->b_count != 1 || buffer_locked(bh)) {
+			if (DATA_BUFFER_USED(bh)) {
 				brelse(bh);
 				retry = 1;
 				continue;
@@ -255,8 +258,8 @@ static int trunc_indirect (struct inode * inode, int offset, u32 * p,
 		bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize);
 		if (bh) {
 			bh->b_count++;
-			if (bh->b_count != 1 || buffer_locked(bh)) {
-				brelse (bh);
+			if (DATA_BUFFER_USED(bh)) {
+				brelse(bh);
 				retry = 1;
 				continue;
 			}
@@ -384,8 +387,6 @@ static int trunc_tindirect (struct inode * inode)
 		
 void ext2_truncate (struct inode * inode)
 {
-	int err, offset;
-
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	    S_ISLNK(inode->i_mode)))
 		return;
@@ -411,25 +412,6 @@ void ext2_truncate (struct inode * inode)
 		current->policy |= SCHED_YIELD;
 		schedule();
 	}
-	/*
-	 * If the file is not being truncated to a block boundary, the
-	 * contents of the partial block following the end of the file
-	 * must be zeroed in case it ever becomes accessible again due
-	 * to subsequent file growth.
-	 */
-	offset = inode->i_size & (inode->i_sb->s_blocksize - 1);
-	if (offset) {
-		struct buffer_head * bh;
-		bh = ext2_bread (inode,
-				 inode->i_size >> EXT2_BLOCK_SIZE_BITS(inode->i_sb),
-				 0, &err);
-		if (bh) {
-			memset (bh->b_data + offset, 0,
-				inode->i_sb->s_blocksize - offset);
-			mark_buffer_dirty (bh, 0);
-			brelse (bh);
-		}
-	}
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(inode);
 }
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -2,14 +2,45 @@
 *  linux/fs/fifo.c
 *
 *  written by Paul H. Hargrove
+ *
+ *  Fixes:
+ *	10-06-1999, AV: fixed OOM handling in fifo_open(), moved
+ *			initialization there, switched to external
+ *			allocation of pipe_inode_info.
 */

 #include <linux/mm.h>
+#include <linux/malloc.h>

 static int fifo_open(struct inode * inode,struct file * filp)
 {
 	int retval = 0;
-	unsigned long page;
+	unsigned long page = 0;
+	struct pipe_inode_info *info, *tmp = NULL;
+
+	if (inode->i_pipe)
+		goto got_it;
+	tmp = kmalloc(sizeof(struct pipe_inode_info),GFP_KERNEL);
+	if (inode->i_pipe)
+		goto got_it;
+	if (!tmp)
+		goto oom;
+	page = __get_free_page(GFP_KERNEL);
+	if (inode->i_pipe)
+		goto got_it;
+	if (!page)
+		goto oom;
+	inode->i_pipe = tmp;
+	PIPE_LOCK(*inode) = 0;
+	PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+	PIPE_BASE(*inode) = (char *) page;
+	PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
+	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
+	init_waitqueue_head(&PIPE_WAIT(*inode));
+	tmp = NULL;	/* no need to free it */
+	page = 0;
+
+got_it:

 	switch( filp->f_mode ) {

@@ -94,19 +125,26 @@ static int fifo_open(struct inode * inode,struct file * filp)
 	default:
 		retval = -EINVAL;
 	}
-	if (retval || PIPE_BASE(*inode))
-		return retval;
-	page = __get_free_page(GFP_KERNEL);
-	if (PIPE_BASE(*inode)) {
+	if (retval) 
+		goto cleanup;
+out:
+	if (tmp)
+		kfree(tmp);
+	if (page)
 		free_page(page);
-		return 0;
+	return retval;
+
+cleanup:
+	if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
+		info = inode->i_pipe;
+		inode->i_pipe = NULL;
+		free_page((unsigned long)info->base);
+		kfree(info);
 	}
-	if (!page)
-		return -ENOMEM;
-	PIPE_LOCK(*inode) = 0;
-	PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
-	PIPE_BASE(*inode) = (char *) page;
-	return 0;
+	goto out;
+oom:
+	retval = -ENOMEM;
+	goto out;
 }

 /*
@@ -148,13 +186,10 @@ struct inode_operations fifo_inode_operations = {
 	NULL			/* permission */
 };

+
+/* Goner. Filesystems do not use it anymore. */
+
 void init_fifo(struct inode * inode)
 {
 	inode->i_op = &fifo_inode_operations;
-	PIPE_LOCK(*inode) = 0;
-	PIPE_BASE(*inode) = NULL;
-	PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
-	PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
-	init_waitqueue_head(&PIPE_WAIT(*inode));
-	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
 }
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -527,6 +527,7 @@ void clean_inode(struct inode *inode)
 	inode->i_generation = 0;
 	memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
 	sema_init(&inode->i_sem, 1);
+	inode->i_pipe = NULL;
 }

 /*

--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -221,35 +221,36 @@ static struct page *try_to_get_dirent_page(struct file *, unsigned long, int);
 */
 static int refetch_to_readdir_off(struct file *file, struct inode *inode, u32 off)
 {
+	struct page *page;
 	u32 cur_off, goal_off = off & PAGE_MASK;

 again:
 	cur_off = 0;
 	while (cur_off < goal_off) {
-		struct page *page;
-
-		page = find_page(inode, cur_off);
+		page = find_get_page(inode, cur_off);
 		if (page) {
-			if (PageLocked(page))
-				__wait_on_page(page);
-			if (!PageUptodate(page))
-				return -1;
+			if (!Page_Uptodate(page))
+				goto out_error;
 		} else {
 			page = try_to_get_dirent_page(file, cur_off, 0);
 			if (!page) {
 				if (!cur_off)
-					return -1;
+					goto out_error;

 				/* Someone touched the dir on us. */
 				goto again;
 			}
-			page_cache_release(page);
 		}
+		page_cache_release(page);

 		cur_off += PAGE_SIZE;
 	}
-
 	return 0;
+
+out_error:
+	if (page)
+		page_cache_release(page);
+	return -1;
 }

 static struct page *try_to_get_dirent_page(struct file *file, unsigned long offset, int refetch_ok)
@@ -274,20 +275,18 @@ static struct page *try_to_get_dirent_page(struct file *file, unsigned long offs
 	}

 	hash = page_hash(inode, offset);
-	page = __find_page(inode, offset, *hash);
+repeat:
+	page = __find_lock_page(inode, offset, *hash);
 	if (page) {
 		page_cache_free(page_cache);
-		goto out;
+		goto unlock_out;
 	}

 	page = page_cache_entry(page_cache);
-	atomic_inc(&page->count);
-	page->flags = ((page->flags &
-			~((1 << PG_uptodate) | (1 << PG_error))) |
-		       ((1 << PG_referenced) | (1 << PG_locked)));
-	page->offset = offset;
-	add_page_to_inode_queue(inode, page);
-	__add_page_to_hash_queue(page, hash);
+	if (add_to_page_cache_unique(page, inode, offset, hash)) {
+		page_cache_release(page);
+		goto repeat;
+	}

 	rd_args.fh = NFS_FH(dentry);
 	rd_res.buffer = (char *)page_cache;
@@ -308,15 +307,14 @@ static struct page *try_to_get_dirent_page(struct file *file, unsigned long offs
 	else if (create_cookie(rd_res.cookie, offset, inode))
 		goto error;

-	set_bit(PG_uptodate, &page->flags);
+	SetPageUptodate(page);
 unlock_out:
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
+	UnlockPage(page);
 out:
 	return page;

 error:
-	set_bit(PG_error, &page->flags);
+	SetPageError(page);
 	goto unlock_out;
 }

@@ -371,12 +369,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)

 	offset = filp->f_pos >> PAGE_CACHE_SHIFT;
 	hash = page_hash(inode, offset);
-	page = __find_page(inode, offset, *hash);
+	page = __find_get_page(inode, offset, *hash);
 	if (!page)
 		goto no_dirent_page;
-	if (PageLocked(page))
-		goto dirent_locked_wait;
-	if (!PageUptodate(page))
+	if (!Page_Uptodate(page))
 		goto dirent_read_error;
 success:
 	filp->f_pos = nfs_do_filldir((__u32 *) page_address(page),
@@ -389,9 +385,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if (!page)
 		goto no_page;

-dirent_locked_wait:
-	wait_on_page(page);
-	if (PageUptodate(page))
+	if (Page_Uptodate(page))
 		goto success;
 dirent_read_error:
 	page_cache_release(page);

--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,6 +26,7 @@
 #include <linux/malloc.h>
 #include <linux/pagemap.h>
 #include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>

 #include <asm/uaccess.h>
 #include <asm/segment.h>
@@ -78,6 +79,7 @@ struct inode_operations nfs_file_inode_operations = {
 	NULL,			/* smap */
 	NULL,			/* updatepage */
 	nfs_revalidate,		/* revalidate */
+	NULL,			/* flushpage */
 };

 /* Hack for future NFS swap support */
@@ -172,8 +174,11 @@ static long nfs_write_one_page(struct file *file, struct page *page, unsigned lo

 	bytes -= copy_from_user((u8*)page_address(page) + offset, buf, bytes);
 	status = -EFAULT;
-	if (bytes)
+	if (bytes) {
+		lock_kernel();
 		status = nfs_updatepage(file, page, offset, bytes);
+		unlock_kernel();
+	}
 	return status;
 }


--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -77,7 +77,6 @@ nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page)
 	int		flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;

 	dprintk("NFS: nfs_readpage_sync(%p)\n", page);
-	clear_bit(PG_error, &page->flags);

 	do {
 		if (count < rsize)
@@ -111,16 +110,14 @@ nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page)
 	} while (count);

 	memset(buffer, 0, count);
-	set_bit(PG_uptodate, &page->flags);
+	SetPageUptodate(page);
 	result = 0;

 io_error:
+	UnlockPage(page);
 	/* Note: we don't refresh if the call returned error */
 	if (refresh && result >= 0)
 		nfs_refresh_inode(inode, &rqst.ra_fattr);
-	/* N.B. Use nfs_unlock_page here? */
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
 	return result;
 }

@@ -146,17 +143,15 @@ nfs_readpage_result(struct rpc_task *task)
 			memset((char *) address + result, 0, PAGE_SIZE - result);
 		}
 		nfs_refresh_inode(req->ra_inode, &req->ra_fattr);
-		set_bit(PG_uptodate, &page->flags);
+		SetPageUptodate(page);
 		succ++;
 	} else {
-		set_bit(PG_error, &page->flags);
+		SetPageError(page);
 		fail++;
 		dprintk("NFS: %d successful reads, %d failures\n", succ, fail);
 	}
-	/* N.B. Use nfs_unlock_page here? */
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
-
+	page->owner = (int)current; // HACK, FIXME, will go away.
+	UnlockPage(page);
 	free_page(address);

 	rpc_release_task(task);
@@ -229,8 +224,7 @@ nfs_readpage(struct file *file, struct page *page)

 	dprintk("NFS: nfs_readpage (%p %ld@%ld)\n",
 		page, PAGE_SIZE, page->offset);
-	atomic_inc(&page->count);
-	set_bit(PG_locked, &page->flags);
+	get_page(page);

 	/*
 	 * Try to flush any pending writes to the file..
@@ -256,8 +250,7 @@ nfs_readpage(struct file *file, struct page *page)
 	goto out_free;

 out_error:
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
+	UnlockPage(page);
 out_free:
 	free_page(page_address(page));
 out:

--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -65,20 +65,18 @@ static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode
 		goto out;

 	hash = page_hash(inode, 0);
-	page = __find_page(inode, 0, *hash);
+repeat:
+	page = __find_lock_page(inode, 0, *hash);
 	if (page) {
 		page_cache_free(page_cache);
-		goto out;
+		goto unlock_out;
 	}

 	page = page_cache_entry(page_cache);
-	atomic_inc(&page->count);
-	page->flags = ((page->flags &
-			~((1 << PG_uptodate) | (1 << PG_error))) |
-		       ((1 << PG_referenced) | (1 << PG_locked)));
-	page->offset = 0;
-	add_page_to_inode_queue(inode, page);
-	__add_page_to_hash_queue(page, hash);
+	if (add_to_page_cache_unique(page, inode, 0, hash)) {
+		page_cache_release(page);
+		goto repeat;
+	}

 	/* We place the length at the beginning of the page,
 	 * in host byte order, followed by the string.  The
@@ -89,32 +87,28 @@ static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode
 	if (rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK,
 		     &rl_args, NULL, 0) < 0)
 		goto error;
-	set_bit(PG_uptodate, &page->flags);
+	SetPageUptodate(page);
 unlock_out:
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
+	UnlockPage(page);
 out:
 	return page;

 error:
-	set_bit(PG_error, &page->flags);
+	SetPageError(page);
 	goto unlock_out;
 }

 static int nfs_readlink(struct dentry *dentry, char *buffer, int buflen)
 {
 	struct inode *inode = dentry->d_inode;
-	struct page *page, **hash;
+	struct page *page;
 	u32 *p, len;

 	/* Caller revalidated the directory inode already. */
-	hash = page_hash(inode, 0);
-	page = __find_page(inode, 0, *hash);
+	page = find_get_page(inode, 0);
 	if (!page)
 		goto no_readlink_page;
-	if (PageLocked(page))
-		goto readlink_locked_wait;
-	if (!PageUptodate(page))
+	if (!Page_Uptodate(page))
 		goto readlink_read_error;
 success:
 	p = (u32 *) page_address(page);
@@ -129,9 +123,7 @@ static int nfs_readlink(struct dentry *dentry, char *buffer, int buflen)
 	page = try_to_get_symlink_page(dentry, inode);
 	if (!page)
 		goto no_page;
-readlink_locked_wait:
-	wait_on_page(page);
-	if (PageUptodate(page))
+	if (Page_Uptodate(page))
 		goto success;
 readlink_read_error:
 	page_cache_release(page);
@@ -144,17 +136,14 @@ nfs_follow_link(struct dentry *dentry, struct dentry *base, unsigned int follow)
 {
 	struct dentry *result;
 	struct inode *inode = dentry->d_inode;
-	struct page *page, **hash;
+	struct page *page;
 	u32 *p;

 	/* Caller revalidated the directory inode already. */
-	hash = page_hash(inode, 0);
-	page = __find_page(inode, 0, *hash);
+	page = find_get_page(inode, 0);
 	if (!page)
 		goto no_followlink_page;
-	if (PageLocked(page))
-		goto followlink_locked_wait;
-	if (!PageUptodate(page))
+	if (!Page_Uptodate(page))
 		goto followlink_read_error;
 success:
 	p = (u32 *) page_address(page);
@@ -166,9 +155,7 @@ nfs_follow_link(struct dentry *dentry, struct dentry *base, unsigned int follow)
 	page = try_to_get_symlink_page(dentry, inode);
 	if (!page)
 		goto no_page;
-followlink_locked_wait:
-	wait_on_page(page);
-	if (PageUptodate(page))
+	if (Page_Uptodate(page))
 		goto success;
 followlink_read_error:
 	page_cache_release(page);

--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -110,7 +110,7 @@ nfs_writepage_sync(struct dentry *dentry, struct inode *inode,

 		if (result < 0) {
 			/* Must mark the page invalid after I/O error */
-			clear_bit(PG_uptodate, &page->flags);
+			ClearPageUptodate(page);
 			goto io_error;
 		}
 		if (result != wsize)
@@ -463,7 +463,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
 	 * Ok, there's another user of this page with the new request..
 	 * The IO completion will then free the page and the dentry.
 	 */
-	atomic_inc(&page->count);
+	get_page(page);
 	file->f_count++;

 	/* Schedule request */
@@ -471,7 +471,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig

 updated:
 	if (req->wb_bytes == PAGE_SIZE)
-		set_bit(PG_uptodate, &page->flags);
+		SetPageUptodate(page);

 	retval = count;
 	if (synchronous) {
@@ -486,7 +486,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
 		}

 		if (retval < 0)
-			clear_bit(PG_uptodate, &page->flags);
+			ClearPageUptodate(page);
 	}

 	free_write_request(req);
@@ -682,7 +682,7 @@ nfs_wback_result(struct rpc_task *task)
 	rpc_release_task(task);

 	if (WB_INVALIDATE(req))
-		clear_bit(PG_uptodate, &page->flags);
+		ClearPageUptodate(page);

 	__free_page(page);
 	remove_write_request(&NFS_WRITEBACK(inode), req);

--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <linux/file.h>
 #include <linux/poll.h>
+#include <linux/malloc.h>

 #include <asm/uaccess.h>

@@ -249,8 +250,10 @@ static unsigned int connect_poll(struct file * filp, poll_table * wait)
 static int pipe_release(struct inode * inode)
 {
 	if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
-		free_page((unsigned long) PIPE_BASE(*inode));
-		PIPE_BASE(*inode) = NULL;
+		struct pipe_inode_info *info = inode->i_pipe;
+		inode->i_pipe = NULL;
+		free_page((unsigned long) info->base);
+		kfree(info);
 	}
 	wake_up_interruptible(&PIPE_WAIT(*inode));
 	return 0;
@@ -404,36 +407,48 @@ static struct inode * get_pipe_inode(void)
 {
 	extern struct inode_operations pipe_inode_operations;
 	struct inode *inode = get_empty_inode();
+	unsigned long page;

-	if (inode) {
-		unsigned long page = __get_free_page(GFP_USER);
-
-		if (!page) {
-			iput(inode);
-			inode = NULL;
-		} else {
-			PIPE_BASE(*inode) = (char *) page;
-			inode->i_op = &pipe_inode_operations;
-			init_waitqueue_head(&PIPE_WAIT(*inode));
-			PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
-			PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
-			PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
-			PIPE_LOCK(*inode) = 0;
-			/*
-			 * Mark the inode dirty from the very beginning,
-			 * that way it will never be moved to the dirty
-			 * list because "mark_inode_dirty()" will think
-			 * that it already _is_ on the dirty list.
-			 */
-			inode->i_state = I_DIRTY;
-			inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
-			inode->i_uid = current->fsuid;
-			inode->i_gid = current->fsgid;
-			inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-			inode->i_blksize = PAGE_SIZE;
-		}
-	}
+	if (!inode)
+		goto fail_inode;
+
+	page = __get_free_page(GFP_USER);
+
+	if (!page)
+		goto fail_iput;
+
+	/* XXX */
+	inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+	if (!inode->i_pipe)
+		goto fail_page;
+
+	PIPE_BASE(*inode) = (char *) page;
+	inode->i_op = &pipe_inode_operations;
+	init_waitqueue_head(&PIPE_WAIT(*inode));
+	PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+	PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
+	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+	PIPE_LOCK(*inode) = 0;
+	/*
+	 * Mark the inode dirty from the very beginning,
+	 * that way it will never be moved to the dirty
+	 * list because "mark_inode_dirty()" will think
+	 * that it already _is_ on the dirty list.
+	 */
+	inode->i_state = I_DIRTY;
+	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
+	inode->i_uid = current->fsuid;
+	inode->i_gid = current->fsgid;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	inode->i_blksize = PAGE_SIZE;
 	return inode;
+
+fail_page:
+	free_page(page);
+fail_iput:
+	iput(inode);
+fail_inode:
+	return NULL;
 }

 struct inode_operations pipe_inode_operations = {
@@ -513,6 +528,8 @@ int do_pipe(int *fd)
 	put_unused_fd(i);
 close_f12_inode:
 	free_page((unsigned long) PIPE_BASE(*inode));
+	kfree(inode->i_pipe);
+	inode->i_pipe = NULL;
 	iput(inode);
 close_f12:
 	put_filp(f2);

--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -348,7 +348,7 @@ static int get_meminfo(char * buffer)
 	len = sprintf(buffer, "        total:    used:    free:  shared: buffers:  cached:\n"
 		"Mem:  %8lu %8lu %8lu %8lu %8lu %8lu\n"
 		"Swap: %8lu %8lu %8lu\n",
-		i.totalram, i.totalram-i.freeram, i.freeram, i.sharedram, i.bufferram, page_cache_size*PAGE_SIZE,
+		i.totalram, i.totalram-i.freeram, i.freeram, i.sharedram, i.bufferram, atomic_read(&page_cache_size)*PAGE_SIZE,
 		i.totalswap, i.totalswap-i.freeswap, i.freeswap);
 	/*
 	 * Tagged format, for easy grepping and expansion. The above will go away
@@ -359,14 +359,14 @@ static int get_meminfo(char * buffer)
 		"MemFree:   %8lu kB\n"
 		"MemShared: %8lu kB\n"
 		"Buffers:   %8lu kB\n"
-		"Cached:    %8lu kB\n"
+		"Cached:    %8u kB\n"
 		"SwapTotal: %8lu kB\n"
 		"SwapFree:  %8lu kB\n",
 		i.totalram >> 10,
 		i.freeram >> 10,
 		i.sharedram >> 10,
 		i.bufferram >> 10,
-		page_cache_size << (PAGE_SHIFT - 10),
+		atomic_read(&page_cache_size) << (PAGE_SHIFT - 10),
 		i.totalswap >> 10,
 		i.freeswap >> 10);
 }
@@ -975,7 +975,7 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned
 			++*dirty;
 		if (MAP_NR(pte_page(page)) >= max_mapnr)
 			continue;
-		if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) > 1)
+		if (page_count(mem_map + MAP_NR(pte_page(page))) > 1)
 			++*shared;
 	} while (address < end);
 }

--- a/fs/proc/mem.c
+++ b/fs/proc/mem.c
@@ -298,7 +298,7 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma)
 		set_pte(dest_table, *src_table);
 		mapnr = MAP_NR(pte_page(*src_table));
 		if (mapnr < max_mapnr)
-			atomic_inc(&mem_map[MAP_NR(pte_page(*src_table))].count);
+			get_page(mem_map + MAP_NR(pte_page(*src_table)));

 		stmp += PAGE_SIZE;
 		dtmp += PAGE_SIZE;

--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/malloc.h>
 #include <linux/pagemap.h>
+#include <linux/smp_lock.h>

 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -271,8 +272,11 @@ static long smb_write_one_page(struct file *file, struct page *page, unsigned lo

 	bytes -= copy_from_user((u8*)page_address(page) + offset, buf, bytes);
 	status = -EFAULT;
-	if (bytes)
+	if (bytes) {
+		lock_kernel();
 		status = smb_updatepage(file, page, offset, bytes);
+		unlock_kernel();
+	}
 	return status;
 }


--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -84,6 +84,14 @@ typedef unsigned long pgprot_t;

 #define __PAGE_OFFSET		(PAGE_OFFSET_RAW)

+#ifndef __ASSEMBLY__
+
+#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); *(int *)0=0; } while (0)
+#define PAGE_BUG(page) do { \
+				BUG(); } while (0)
+
+#endif /* __ASSEMBLY__ */
+
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))

--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -556,6 +556,7 @@ extern void ext2_check_inodes_bitmap (struct super_block *);
 extern int ext2_bmap (struct inode *, int);

 extern struct buffer_head * ext2_getblk (struct inode *, long, int, int *);
+extern int ext2_getblk_block (struct inode *, long, int, int *, int *);
 extern struct buffer_head * ext2_bread (struct inode *, int, int, int *);

 extern int ext2_getcluster (struct inode * inode, long block);

--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -74,11 +74,11 @@ extern int max_super_blocks, nr_super_blocks;

 /* public flags for file_system_type */
 #define FS_REQUIRES_DEV 1 
-#define FS_NO_DCACHE    2 /* Only dcache the necessary things. */
-#define FS_NO_PRELIM    4 /* prevent preloading of dentries, even if
+#define FS_NO_DCACHE	2 /* Only dcache the necessary things. */
+#define FS_NO_PRELIM	4 /* prevent preloading of dentries, even if
 			   * FS_NO_DCACHE is not set.
 			   */
-#define FS_IBASKET      8 /* FS does callback to free_ibasket() if space gets low. */
+#define FS_IBASKET	8 /* FS does callback to free_ibasket() if space gets low. */

 /*
 * These are the fs-independent mount-flags: up to 16 flags are supported
@@ -94,9 +94,9 @@ extern int max_super_blocks, nr_super_blocks;
 #define S_APPEND	256	/* Append-only file */
 #define S_IMMUTABLE	512	/* Immutable file */
 #define MS_NOATIME	1024	/* Do not update access times. */
-#define MS_NODIRATIME   2048    /* Do not update directory access times */
+#define MS_NODIRATIME	2048	/* Do not update directory access times */

-#define MS_ODD_RENAME   32768    /* Temporary stuff; will go away as soon
+#define MS_ODD_RENAME	32768	/* Temporary stuff; will go away as soon
 				  * as nfs_rename() will be cleaned up
 				  */

@@ -189,7 +189,6 @@ typedef char buffer_block[BLOCK_SIZE];
 #define BH_Lock		2	/* 1 if the buffer is locked */
 #define BH_Req		3	/* 0 if the buffer has been invalidated */
 #define BH_Protected	6	/* 1 if the buffer is protected */
-
 /*
 * Try to keep the most commonly used fields in single cache lines (16
 * bytes) to improve performance.  This ordering should be
@@ -218,7 +217,7 @@ struct buffer_head {
 	/* Non-performance-critical data follows. */
 	char * b_data;			/* pointer to data block (1024 bytes) */
 	unsigned int b_list;		/* List that this buffer appears */
-	unsigned long b_flushtime;      /* Time when this (dirty) buffer
+	unsigned long b_flushtime;	/* Time when this (dirty) buffer
 					 * should be written */
 	wait_queue_head_t b_wait;
 	struct buffer_head ** b_pprev;		/* doubly linked list of hash-queue */
@@ -365,22 +364,21 @@ struct inode {
 	struct vm_area_struct	*i_mmap;
 	struct page		*i_pages;
 	struct dquot		*i_dquot[MAXQUOTAS];
+	struct pipe_inode_info	*i_pipe;

 	unsigned long		i_state;

 	unsigned int		i_flags;
-	unsigned char		i_pipe;
 	unsigned char		i_sock;

 	int			i_writecount;
 	unsigned int		i_attr_flags;
 	__u32			i_generation;
 	union {
-		struct pipe_inode_info		pipe_i;
 		struct minix_inode_info		minix_i;
 		struct ext2_inode_info		ext2_i;
 		struct hpfs_inode_info		hpfs_i;
-		struct ntfs_inode_info          ntfs_i;
+		struct ntfs_inode_info		ntfs_i;
 		struct msdos_inode_info		msdos_i;
 		struct umsdos_inode_info	umsdos_i;
 		struct iso_inode_info		isofs_i;
@@ -388,13 +386,13 @@ struct inode {
 		struct sysv_inode_info		sysv_i;
 		struct affs_inode_info		affs_i;
 		struct ufs_inode_info		ufs_i;
-		struct efs_inode_info		efs_i;	   
+		struct efs_inode_info		efs_i;
 		struct romfs_inode_info		romfs_i;
 		struct coda_inode_info		coda_i;
 		struct smb_inode_info		smbfs_i;
 		struct hfs_inode_info		hfs_i;
 		struct adfs_inode_info		adfs_i;
-		struct qnx4_inode_info		qnx4_i;	   
+		struct qnx4_inode_info		qnx4_i;
 		struct socket			socket_i;
 		void				*generic_ip;
 	} u;
@@ -491,10 +489,10 @@ extern void posix_block_lock(struct file_lock *, struct file_lock *);
 extern void posix_unblock_lock(struct file_lock *);

 struct fasync_struct {
-	int    magic;
-	int    fa_fd;
-	struct fasync_struct	*fa_next; /* singly linked list */
-	struct file 		*fa_file;
+	int	magic;
+	int	fa_fd;
+	struct	fasync_struct	*fa_next; /* singly linked list */
+	struct	file 		*fa_file;
 };

 #define FASYNC_MAGIC 0x4601
@@ -547,19 +545,19 @@ struct super_block {
 		struct minix_sb_info	minix_sb;
 		struct ext2_sb_info	ext2_sb;
 		struct hpfs_sb_info	hpfs_sb;
-		struct ntfs_sb_info     ntfs_sb;
+		struct ntfs_sb_info	ntfs_sb;
 		struct msdos_sb_info	msdos_sb;
 		struct isofs_sb_info	isofs_sb;
 		struct nfs_sb_info	nfs_sb;
 		struct sysv_sb_info	sysv_sb;
 		struct affs_sb_info	affs_sb;
 		struct ufs_sb_info	ufs_sb;
-		struct efs_sb_info	efs_sb;	   
+		struct efs_sb_info	efs_sb;
 		struct romfs_sb_info	romfs_sb;
 		struct smb_sb_info	smbfs_sb;
 		struct hfs_sb_info	hfs_sb;
 		struct adfs_sb_info	adfs_sb;
-		struct qnx4_sb_info	qnx4_sb;	   
+		struct qnx4_sb_info	qnx4_sb;
 		void			*generic_sbp;
 	} u;
 	/*
@@ -624,6 +622,7 @@ struct inode_operations {
 	int (*smap) (struct inode *,int);
 	int (*updatepage) (struct file *, struct page *, unsigned long, unsigned int);
 	int (*revalidate) (struct dentry *);
+	int (*flushpage) (struct inode *, struct page *, int);
 };

 struct super_operations {
@@ -749,13 +748,19 @@ extern int fs_may_mount(kdev_t);

 extern struct file *inuse_filps;

-extern void refile_buffer(struct buffer_head *);
 extern void set_writetime(struct buffer_head *, int);
 extern int try_to_free_buffers(struct page *);
+extern void __refile_buffer(struct buffer_head * buf);
+extern inline void refile_buffer(struct buffer_head * buf)
+{
+	/*
+	 * Subtle, we do not want to refile not hashed buffers ...
+	 */
+	if (buf->b_pprev)
+		__refile_buffer(buf);
+}

-extern int nr_buffers;
 extern int buffermem;
-extern int nr_buffer_heads;

 #define BUF_CLEAN	0
 #define BUF_LOCKED	1	/* Buffers scheduled for write */
@@ -869,11 +874,15 @@ extern struct buffer_head * breada(kdev_t, int, int, unsigned int, unsigned int)
 extern int brw_page(int, struct page *, kdev_t, int [], int, int);

 typedef long (*writepage_t)(struct file *, struct page *, unsigned long, unsigned long, const char *);
+typedef int (*fs_getblock_t)(struct inode *, long, int, int *, int *);
+

 extern int generic_readpage(struct file *, struct page *);
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
 extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *, writepage_t);
+extern int generic_block_flushpage(struct inode *, struct page *, int);
+extern long block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block);

 extern struct super_block *get_super(kdev_t);
 extern void put_super(kdev_t);

--- a/include/linux/hpfs_fs_i.h
+++ b/include/linux/hpfs_fs_i.h
 #ifndef _HPFS_FS_I
 #define _HPFS_FS_I

-#if ANALWARNINGS
-#warning Fix the FIFO stuff!
-#warning Fix the FIFO stuff!
-#warning Fix the FIFO stuff!
-#endif
-
 struct hpfs_inode_info {
-	union {				    /* Linux sometimes destroys this structure */
-		struct pipe_inode_info bla; /* due to a bug. Linus doesn't want to fix */
-		struct socket ble;	    /* it so I had to write this workaround :-) */
-	} dummy;
 	ino_t i_parent_dir;	/* (directories) gives fnode of parent dir */
 	unsigned i_dno;		/* (directories) root dnode */
 	unsigned i_dpos;	/* (directories) temp for readdir */

--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -129,29 +129,56 @@ typedef struct page {
 	wait_queue_head_t wait;
 	struct page **pprev_hash;
 	struct buffer_head * buffers;
+	int owner; /* temporary debugging check */
 } mem_map_t;

+#define get_page(p) do { atomic_inc(&(p)->count); \
+						} while (0)
+#define put_page(p) __free_page(p)
+#define put_page_testzero(p) ({ int __ret = atomic_dec_and_test(&(p)->count);\
+				__ret; })
+#define page_count(p) atomic_read(&(p)->count)
+#define set_page_count(p,v) do { atomic_set(&(p)->count, v); \
+				} while (0)
+
 /* Page flag bit values */
 #define PG_locked		 0
 #define PG_error		 1
 #define PG_referenced		 2
-#define PG_dirty		 3
-#define PG_uptodate		 4
-#define PG_free_after		 5
-#define PG_decr_after		 6
-#define PG_swap_unlock_after	 7
-#define PG_DMA			 8
-#define PG_Slab			 9
-#define PG_swap_cache		10
-#define PG_skip			11
+#define PG_uptodate		 3
+#define PG_free_after		 4
+#define PG_decr_after		 5
+#define PG_swap_unlock_after	 6
+#define PG_DMA			 7
+#define PG_Slab			 8
+#define PG_swap_cache		 9
+#define PG_skip			10
+				/* bits 21-30 unused */
 #define PG_reserved		31

+
 /* Make it prettier to test the above... */
+#define Page_Uptodate(page)	(test_bit(PG_uptodate, &(page)->flags))
+#define SetPageUptodate(page)	do { set_bit(PG_uptodate, &(page)->flags); \
+					} while (0)
+#define ClearPageUptodate(page)	do { clear_bit(PG_uptodate, &(page)->flags); \
+					} while (0)
 #define PageLocked(page)	(test_bit(PG_locked, &(page)->flags))
+#define LockPage(page)		\
+	do { int _ret = test_and_set_bit(PG_locked, &(page)->flags); \
+	if (_ret) PAGE_BUG(page); \
+	page->owner = (int)current; } while (0)
+#define TryLockPage(page)	({ int _ret = test_and_set_bit(PG_locked, &(page)->flags); \
+				if (!_ret) page->owner = (int)current; _ret; })
+#define UnlockPage(page)	do { \
+					if (page->owner != (int)current) { \
+BUG(); } page->owner = 0; \
+if (!test_and_clear_bit(PG_locked, &(page)->flags)) { \
+				BUG(); } wake_up(&page->wait); } while (0)
 #define PageError(page)		(test_bit(PG_error, &(page)->flags))
+#define SetPageError(page)	({ int _ret = test_and_set_bit(PG_error, &(page)->flags); _ret; })
+#define ClearPageError(page)	do { if (!test_and_clear_bit(PG_error, &(page)->flags)) BUG(); } while (0)
 #define PageReferenced(page)	(test_bit(PG_referenced, &(page)->flags))
-#define PageDirty(page)		(test_bit(PG_dirty, &(page)->flags))
-#define PageUptodate(page)	(test_bit(PG_uptodate, &(page)->flags))
 #define PageFreeAfter(page)	(test_bit(PG_free_after, &(page)->flags))
 #define PageDecrAfter(page)	(test_bit(PG_decr_after, &(page)->flags))
 #define PageSwapUnlockAfter(page) (test_bit(PG_swap_unlock_after, &(page)->flags))
@@ -163,16 +190,12 @@ typedef struct page {
 #define PageSetSlab(page)	(set_bit(PG_Slab, &(page)->flags))
 #define PageSetSwapCache(page)	(set_bit(PG_swap_cache, &(page)->flags))

-#define PageTestandSetDirty(page)	\
-			(test_and_set_bit(PG_dirty, &(page)->flags))
 #define PageTestandSetSwapCache(page)	\
 			(test_and_set_bit(PG_swap_cache, &(page)->flags))

 #define PageClearSlab(page)	(clear_bit(PG_Slab, &(page)->flags))
 #define PageClearSwapCache(page)(clear_bit(PG_swap_cache, &(page)->flags))

-#define PageTestandClearDirty(page) \
-			(test_and_clear_bit(PG_dirty, &(page)->flags))
 #define PageTestandClearSwapCache(page)	\
 			(test_and_clear_bit(PG_swap_cache, &(page)->flags))

@@ -387,7 +410,7 @@ extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned

 #define buffer_under_min()	((buffermem >> PAGE_SHIFT) * 100 < \
 				buffer_mem.min_percent * num_physpages)
-#define pgcache_under_min()	(page_cache_size * 100 < \
+#define pgcache_under_min()	(atomic_read(&page_cache_size) * 100 < \
 				page_cache.min_percent * num_physpages)

 #endif /* __KERNEL__ */

--- a/include/linux/msdos_fs_i.h
+++ b/include/linux/msdos_fs_i.h
 #ifndef _MSDOS_FS_I
 #define _MSDOS_FS_I

-#ifndef _LINUX_PIPE_FS_I_H
-#include <linux/pipe_fs_i.h>
-#endif
-
 /*
 * MS-DOS file system inode data in memory
 */

 struct msdos_inode_info {
-	/*
-		UMSDOS manage special file and fifo as normal empty
-		msdos file. fifo inode processing conflict with msdos
-		processing. So I insert the pipe_inode_info so the
-		information does not overlap. This increases the size of
-		the msdos_inode_info, but the clear winner here is
-		the ext2_inode_info. So it does not change anything to
-		the total size of a struct inode.
-
-		I have not put it conditional. With the advent of loadable
-		file system drivers, it would be very easy to compile
-		a MS-DOS FS driver unaware of UMSDOS and then later to
-		load a (then incompatible) UMSDOS FS driver.
-	*/
-	struct pipe_inode_info reserved;
 	int i_start;	/* first cluster or 0 */
 	int i_logstart;	/* logical first cluster */
 	int i_attrs;	/* unused attribute bits */

--- a/include/linux/nfs_fs_i.h
+++ b/include/linux/nfs_fs_i.h
@@ -8,13 +8,6 @@
 * nfs fs inode data in memory
 */
 struct nfs_inode_info {
-	/*
-	 * This is a place holder so named pipes on NFS filesystems
-	 * work (more or less correctly). This must be first in the
-	 * struct because the data is really accessed via inode->u.pipe_i.
-	 */
-	struct pipe_inode_info	pipeinfo;
-
 	/*
 	 * Various flags
 	 */

--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -39,10 +39,10 @@ static inline unsigned long page_address(struct page * page)
 */
 #define page_cache_entry(x)	(mem_map + MAP_NR(x))

-#define PAGE_HASH_BITS 12
+#define PAGE_HASH_BITS 16
 #define PAGE_HASH_SIZE (1 << PAGE_HASH_BITS)

-extern unsigned long page_cache_size; /* # of pages currently in the hash table */
+extern atomic_t page_cache_size; /* # of pages currently in the hash table */
 extern struct page * page_hash_table[PAGE_HASH_SIZE];

 /*
@@ -64,50 +64,18 @@ static inline unsigned long _page_hashfn(struct inode * inode, unsigned long off

 #define page_hash(inode,offset) (page_hash_table+_page_hashfn(inode,offset))

-static inline struct page * __find_page(struct inode * inode, unsigned long offset, struct page *page)
-{
-	goto inside;
-	for (;;) {
-		page = page->next_hash;
-inside:
-		if (!page)
-			goto not_found;
-		if (page->inode != inode)
-			continue;
-		if (page->offset == offset)
-			break;
-	}
-	/* Found the page. */
-	atomic_inc(&page->count);
-	set_bit(PG_referenced, &page->flags);
-not_found:
-	return page;
-}
-
-static inline struct page *find_page(struct inode * inode, unsigned long offset)
-{
-	return __find_page(inode, offset, *page_hash(inode, offset));
-}
+extern struct page * __find_get_page (struct inode * inode,
+				unsigned long offset, struct page *page);
+#define find_get_page(inode, offset) \
+		__find_get_page(inode, offset, *page_hash(inode, offset))
+extern struct page * __find_lock_page (struct inode * inode,
+				unsigned long offset, struct page *page);
+#define find_lock_page(inode, offset) \
+		__find_lock_page(inode, offset, *page_hash(inode, offset))

-static inline void remove_page_from_hash_queue(struct page * page)
-{
-	if(page->pprev_hash) {
-		if(page->next_hash)
-			page->next_hash->pprev_hash = page->pprev_hash;
-		*page->pprev_hash = page->next_hash;
-		page->pprev_hash = NULL;
-	}
-	page_cache_size--;
-}
+extern void __add_page_to_hash_queue(struct page * page, struct page **p);

-static inline void __add_page_to_hash_queue(struct page * page, struct page **p)
-{
-	page_cache_size++;
-	if((page->next_hash = *p) != NULL)
-		(*p)->pprev_hash = &page->next_hash;
-	*p = page;
-	page->pprev_hash = p;
-}
+extern int add_to_page_cache_unique(struct page * page, struct inode * inode, unsigned long offset, struct page **hash);

 static inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset)
 {
@@ -118,7 +86,6 @@ static inline void remove_page_from_inode_queue(struct page * page)
 {
 	struct inode * inode = page->inode;

-	page->inode = NULL;
 	inode->i_nrpages--;
 	if (inode->i_pages == page)
 		inode->i_pages = page->next;
@@ -142,11 +109,13 @@ static inline void add_page_to_inode_queue(struct inode * inode, struct page * p
 	*p = page;
 }

-extern void __wait_on_page(struct page *);
+extern void ___wait_on_page(struct page *);
+
 static inline void wait_on_page(struct page * page)
 {
+
 	if (PageLocked(page))
-		__wait_on_page(page);
+		___wait_on_page(page);
 }

 extern void update_vm_cache(struct inode *, unsigned long, const char *, int);

--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -12,15 +12,15 @@ struct pipe_inode_info {
 	unsigned int writers;
 };

-#define PIPE_WAIT(inode)	((inode).u.pipe_i.wait)
-#define PIPE_BASE(inode)	((inode).u.pipe_i.base)
-#define PIPE_START(inode)	((inode).u.pipe_i.start)
+#define PIPE_WAIT(inode)	((inode).i_pipe->wait)
+#define PIPE_BASE(inode)	((inode).i_pipe->base)
+#define PIPE_START(inode)	((inode).i_pipe->start)
 #define PIPE_LEN(inode)		((inode).i_size)
-#define PIPE_RD_OPENERS(inode)	((inode).u.pipe_i.rd_openers)
-#define PIPE_WR_OPENERS(inode)	((inode).u.pipe_i.wr_openers)
-#define PIPE_READERS(inode)	((inode).u.pipe_i.readers)
-#define PIPE_WRITERS(inode)	((inode).u.pipe_i.writers)
-#define PIPE_LOCK(inode)	((inode).u.pipe_i.lock)
+#define PIPE_RD_OPENERS(inode)	((inode).i_pipe->rd_openers)
+#define PIPE_WR_OPENERS(inode)	((inode).i_pipe->wr_openers)
+#define PIPE_READERS(inode)	((inode).i_pipe->readers)
+#define PIPE_WRITERS(inode)	((inode).i_pipe->writers)
+#define PIPE_LOCK(inode)	((inode).i_pipe->lock)
 #define PIPE_SIZE(inode)	PIPE_LEN(inode)

 #define PIPE_EMPTY(inode)	(PIPE_SIZE(inode)==0)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -286,7 +286,7 @@ struct task_struct {
 	gid_t gid,egid,sgid,fsgid;
 	int ngroups;
 	gid_t	groups[NGROUPS];
-        kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
+	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
 	struct user_struct *user;
 /* limits */
 	struct rlimit rlim[RLIM_NLIMITS];
@@ -601,7 +601,7 @@ extern inline int capable(int cap)
 #else
 	if (cap_is_fs_cap(cap) ? current->fsuid == 0 : current->euid == 0)
 #endif
-        {
+	{
 		current->flags |= PF_SUPERPRIV;
 		return 1;
 	}

--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -67,7 +67,7 @@ extern int nr_swap_pages;
 extern int nr_free_pages;
 extern atomic_t nr_async_pages;
 extern struct inode swapper_inode;
-extern unsigned long page_cache_size;
+extern atomic_t page_cache_size;
 extern int buffermem;

 /* Incomplete types for prototype declarations: */
@@ -163,7 +163,7 @@ static inline int is_page_shared(struct page *page)
 	unsigned int count;
 	if (PageReserved(page))
 		return 1;
-	count = atomic_read(&page->count);
+	count = page_count(page);
 	if (PageSwapCache(page))
 		count += swap_count(page->offset) - 2;
 	if (PageFreeAfter(page))

--- a/include/linux/synclink.h
+++ b/include/linux/synclink.h
 /*
 * SyncLink Multiprotocol Serial Adapter Driver
 *
+ * ==FILEDATE 19990523==
+ *
 * Copyright (C) 1998 by Microgate Corporation
 * 
 * Redistribution of this file is permitted under 
@@ -66,11 +68,16 @@
 #define HDLC_FLAG_AUTO_RTS		0x0080
 #define HDLC_FLAG_RXC_DPLL		0x0100
 #define HDLC_FLAG_RXC_BRG		0x0200
+#define HDLC_FLAG_RXC_TXCPIN	0x8000
+#define HDLC_FLAG_RXC_RXCPIN	0x0000
 #define HDLC_FLAG_TXC_DPLL		0x0400
 #define HDLC_FLAG_TXC_BRG		0x0800
+#define HDLC_FLAG_TXC_TXCPIN	0x0000
+#define HDLC_FLAG_TXC_RXCPIN	0x0008
 #define HDLC_FLAG_DPLL_DIV8		0x1000
 #define HDLC_FLAG_DPLL_DIV16		0x2000
 #define HDLC_FLAG_DPLL_DIV32		0x0000
+#define HDLC_FLAG_HDLC_LOOPMODE		0x4000

 #define HDLC_CRC_NONE			0
 #define HDLC_CRC_16_CCITT		1
@@ -87,6 +94,7 @@
 #define HDLC_ENCODING_NRZB			1
 #define HDLC_ENCODING_NRZI_MARK			2
 #define HDLC_ENCODING_NRZI_SPACE		3
+#define HDLC_ENCODING_NRZI			HDLC_ENCODING_NRZI_SPACE
 #define HDLC_ENCODING_BIPHASE_MARK		4
 #define HDLC_ENCODING_BIPHASE_SPACE		5
 #define HDLC_ENCODING_BIPHASE_LEVEL		6
@@ -227,17 +235,19 @@ struct mgsl_icount {
 * MGSL_IOCTXABORT	abort transmitting frame (HDLC)
 * MGSL_IOCGSTATS	return current statistics
 * MGSL_IOCWAITEVENT	wait for specified event to occur
+ * MGSL_LOOPTXDONE	transmit in HDLC LoopMode done
 */
 #define MGSL_MAGIC_IOC	'm'
-#define MGSL_IOCSPARAMS		_IOW(MGSL_MAGIC_IOC,0,sizeof(MGSL_PARAMS))
-#define MGSL_IOCGPARAMS		_IOR(MGSL_MAGIC_IOC,1,sizeof(MGSL_PARAMS))
+#define MGSL_IOCSPARAMS		_IOW(MGSL_MAGIC_IOC,0,struct _MGSL_PARAMS)
+#define MGSL_IOCGPARAMS		_IOR(MGSL_MAGIC_IOC,1,struct _MGSL_PARAMS)
 #define MGSL_IOCSTXIDLE		_IO(MGSL_MAGIC_IOC,2)
 #define MGSL_IOCGTXIDLE		_IO(MGSL_MAGIC_IOC,3)
 #define MGSL_IOCTXENABLE	_IO(MGSL_MAGIC_IOC,4)
 #define MGSL_IOCRXENABLE	_IO(MGSL_MAGIC_IOC,5)
 #define MGSL_IOCTXABORT		_IO(MGSL_MAGIC_IOC,6)
 #define MGSL_IOCGSTATS		_IO(MGSL_MAGIC_IOC,7)
-#define MGSL_IOCWAITEVENT	_IO(MGSL_MAGIC_IOC,8)
+#define MGSL_IOCWAITEVENT	_IOWR(MGSL_MAGIC_IOC,8,int)
 #define MGSL_IOCCLRMODCOUNT	_IO(MGSL_MAGIC_IOC,15)
+#define MGSL_IOCLOOPTXDONE	_IO(MGSL_MAGIC_IOC,9)

 #endif /* _SYNCLINK_H_ */
--- a/include/linux/umsdos_fs_i.h
+++ b/include/linux/umsdos_fs_i.h
@@ -28,9 +28,8 @@
 * 
 * For directory, we also have a reference to the inode of its
 * own EMD file. Also, we have dir_locking_info to help synchronise
- * file creation and file lookup. This data is sharing space with
- * the pipe_inode_info not used by directory. See also msdos_fs_i.h
- * for more information about pipe_inode_info and msdos_inode_info.
+ * file creation and file lookup. See also msdos_fs_i.h for more 
+ * information about msdos_inode_info.
 * 
 * Special file and fifo do have an inode which correspond to an
 * empty MSDOS file.
@@ -38,11 +37,6 @@
 * symlink are processed mostly like regular file. The content is the
 * link.
 * 
- * fifos add there own extension to the inode. I have reserved some
- * space for fifos side by side with msdos_inode_info. This is just
- * to for the show, because msdos_inode_info already include the
- * pipe_inode_info.
- * 
 * The UMSDOS specific extension is placed after the union.
 */

@@ -60,7 +54,6 @@ struct dir_locking_info {
 struct umsdos_inode_info {
 	union {
 		struct msdos_inode_info msdos_info;
-		struct pipe_inode_info pipe_info;
 		struct dir_locking_info dir_info;
 	} u;
 	int i_patched;			/* Inode has been patched */

--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -675,7 +675,7 @@ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long addr

 done:	/* pte_val(pte) == shp->shm_pages[idx] */
 	current->min_flt++;
-	atomic_inc(&mem_map[MAP_NR(pte_page(pte))].count);
+	get_page(mem_map + MAP_NR(pte_page(pte)));
 	return pte_page(pte);
 }

@@ -730,7 +730,7 @@ int shm_swap (int prio, int gfp_mask)
 		swap_free (swap_nr);
 		return 0;
 	}
-	if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) != 1)
+	if (page_count(mem_map + MAP_NR(pte_page(page))) != 1)
 		goto check_table;
 	shp->shm_pages[idx] = swap_nr;
 	rw_swap_page_nocache (WRITE, swap_nr, (char *) pte_page(page));
@@ -751,7 +751,7 @@ static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,

 	pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
 	shp->shm_pages[idx] = pte_val(pte);
-	atomic_inc(&mem_map[MAP_NR(page)].count);
+	get_page(mem_map + MAP_NR(page));
 	shm_rss++;

 	swap_free(entry);

--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -106,7 +106,6 @@ EXPORT_SYMBOL(mem_map);
 EXPORT_SYMBOL(remap_page_range);
 EXPORT_SYMBOL(max_mapnr);
 EXPORT_SYMBOL(high_memory);
-EXPORT_SYMBOL(update_vm_cache);
 EXPORT_SYMBOL(vmtruncate);
 EXPORT_SYMBOL(find_vma);
 EXPORT_SYMBOL(get_unmapped_area);
@@ -175,7 +174,6 @@ EXPORT_SYMBOL(posix_test_lock);
 EXPORT_SYMBOL(posix_block_lock);
 EXPORT_SYMBOL(posix_unblock_lock);
 EXPORT_SYMBOL(dput);
-EXPORT_SYMBOL(get_cached_page);
 EXPORT_SYMBOL(put_cached_page);
 EXPORT_SYMBOL(is_root_busy);
 EXPORT_SYMBOL(prune_dcache);
@@ -361,7 +359,6 @@ EXPORT_SYMBOL(sys_tz);
 EXPORT_SYMBOL(__wait_on_super);
 EXPORT_SYMBOL(file_fsync);
 EXPORT_SYMBOL(clear_inode);
-EXPORT_SYMBOL(refile_buffer);
 EXPORT_SYMBOL(nr_async_pages);
 EXPORT_SYMBOL(___strtok);
 EXPORT_SYMBOL(init_special_inode);

--- a/mm/filemap.c
+++ b/mm/filemap.c
 /*
 *	linux/mm/filemap.c
 *
- * Copyright (C) 1994, 1995  Linus Torvalds
+ * Copyright (C) 1994-1999  Linus Torvalds
 */

 /*
@@ -29,9 +29,12 @@
 * though.
 *
 * Shared mappings now work. 15.8.1995  Bruno.
+ *
+ * finished 'unifying' the page and buffer cache and SMP-threaded the
+ * page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
 */

-unsigned long page_cache_size = 0;
+atomic_t page_cache_size = ATOMIC_INIT(0);
 struct page * page_hash_table[PAGE_HASH_SIZE];

 /* 
@@ -50,26 +53,52 @@ static struct pio_request *pio_first = NULL, **pio_last = &pio_first;
 static kmem_cache_t *pio_request_cache;
 static DECLARE_WAIT_QUEUE_HEAD(pio_wait);

+spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
+
+
 static inline void 
 make_pio_request(struct file *, unsigned long, unsigned long);

+void __add_page_to_hash_queue(struct page * page, struct page **p){
+	atomic_inc(&page_cache_size);
+	if((page->next_hash = *p) != NULL)
+		(*p)->pprev_hash = &page->next_hash;
+	*p = page;
+	page->pprev_hash = p;
+	if (page->buffers)
+		PAGE_BUG(page);
+}
+
+static void remove_page_from_hash_queue(struct page * page)
+{
+	if(page->pprev_hash) {
+		if(page->next_hash)
+			page->next_hash->pprev_hash = page->pprev_hash;
+		*page->pprev_hash = page->next_hash;
+		page->pprev_hash = NULL;
+	}
+	atomic_dec(&page_cache_size);
+}
+

-/*
- * Invalidate the pages of an inode, removing all pages that aren't
- * locked down (those are sure to be up-to-date anyway, so we shouldn't
- * invalidate them).
- */
 void invalidate_inode_pages(struct inode * inode)
 {
 	struct page ** p;
 	struct page * page;

+repeat:
+	spin_lock(&pagecache_lock);
 	p = &inode->i_pages;
 	while ((page = *p) != NULL) {
-		if (PageLocked(page)) {
-			p = &page->next;
-			continue;
+		get_page(page);
+		if (TryLockPage(page)) {
+			spin_unlock(&pagecache_lock);
+			wait_on_page(page);
+			page_cache_release(page);
+			goto repeat;
 		}
+		if (page_count(page) != 2)
+			printk("hm, busy page invalidated? (not necesserily a bug)\n");
 		inode->i_nrpages--;
 		if ((*p = page->next) != NULL)
 			(*p)->prev = page->prev;
@@ -77,11 +106,13 @@ void invalidate_inode_pages(struct inode * inode)
 		page->prev = NULL;
 		remove_page_from_hash_queue(page);
 		page->inode = NULL;
+		UnlockPage(page);
+		page_cache_release(page);
 		page_cache_release(page);
-		continue;
+
 	}
+	spin_unlock(&pagecache_lock);
 }
-
 /*
 * Truncate the page cache at a set offset, removing the pages
 * that are beyond that offset (and zeroing out partial pages).
@@ -90,18 +121,39 @@ void truncate_inode_pages(struct inode * inode, unsigned long start)
 {
 	struct page ** p;
 	struct page * page;
+	int partial = 0;

 repeat:
+	spin_lock(&pagecache_lock);
 	p = &inode->i_pages;
 	while ((page = *p) != NULL) {
 		unsigned long offset = page->offset;

 		/* page wholly truncated - free it */
 		if (offset >= start) {
-			if (PageLocked(page)) {
+			get_page(page);
+			if (TryLockPage(page)) {
+				spin_unlock(&pagecache_lock);
 				wait_on_page(page);
+				page_cache_release(page);
 				goto repeat;
 			}
+			if (page_count(page) != 2)
+				printk("hm, busy page truncated? (not necesserily a bug)\n");
+			spin_unlock(&pagecache_lock);
+
+			if (inode->i_op->flushpage)
+				inode->i_op->flushpage(inode, page, 0);
+
+			/*
+			 * We remove the page from the page cache
+			 * _after_ we have destroyed all buffer-cache
+			 * references to it. Otherwise some other process
+			 * might think this inode page is not in the
+			 * page cache and creates a buffer-cache alias
+			 * to it causing all sorts of fun problems ...
+			 */
+			spin_lock(&pagecache_lock);
 			inode->i_nrpages--;
 			if ((*p = page->next) != NULL)
 				(*p)->prev = page->prev;
@@ -109,28 +161,87 @@ void truncate_inode_pages(struct inode * inode, unsigned long start)
 			page->prev = NULL;
 			remove_page_from_hash_queue(page);
 			page->inode = NULL;
+
+			if (page_count(page) != 2)
+				printk("hm, busy page truncated? (not necesserily a bug)\n");
+			spin_unlock(&pagecache_lock);
+
+			UnlockPage(page);
 			page_cache_release(page);
-			continue;
+			page_cache_release(page);
+
+			/*
+			 * We have done things without the pagecache lock,
+			 * so we'll have to repeat the scan.
+			 * It's not possible to deadlock here because
+			 * we are guaranteed to make progress. (ie. we have
+			 * just removed a page)
+			 */
+			goto repeat;
 		}
 		p = &page->next;
+		/*
+		 * there is only one partial page possible.
+		 */
+		if (partial)
+			continue;
+
 		offset = start - offset;
 		/* partial truncate, clear end of page */
 		if (offset < PAGE_CACHE_SIZE) {
-			unsigned long address = page_address(page);
+			unsigned long address;
+			/*
+			 * It's worth dropping the write lock only at
+			 * this point. We are holding the page lock
+			 * so nobody can do anything bad to us.
+			 */
+			spin_unlock(&pagecache_lock);
+			partial = 1;
+
+			address = page_address(page);
 			memset((void *) (offset + address), 0, PAGE_CACHE_SIZE - offset);
 			flush_page_to_ram(address);
+			/*
+			 * we have dropped the lock so we have to
+			 * restart.
+			 */
+			goto repeat;
 		}
 	}
+	spin_unlock(&pagecache_lock);
 }

 /*
- * Remove a page from the page cache and free it.
+ * Remove a page from the page cache and free it. Caller has to make
+ * sure the page is locked and that nobody else uses it - or that usage
+ * is safe.
 */
 void remove_inode_page(struct page *page)
 {
-	remove_page_from_hash_queue(page);
+	struct inode *inode = page->inode;
+
+	if (!PageLocked(page))
+		PAGE_BUG(page);
+
+	/*
+	 * We might sleep here. Other processes might arrive and sleep on
+	 * the lock, but nobody is allowed to 'cross' the lock and get a
+	 * reference to the page. We then remove the page from the hash
+	 * before unlocking it. This mechanizm ensures that 1) nobody gets
+	 * a half-freed page 2) nobody creates the same pagecache content
+	 * before we finish destroying this page. This is not a
+	 * performance problem as pages here are candidates for getting
+	 * freed, ie. it's supposed to be unlikely that the above situation
+	 * happens.
+	 */
+	if (inode->i_op->flushpage)
+		inode->i_op->flushpage(inode, page, 1);
+
+	spin_lock(&pagecache_lock);
 	remove_page_from_inode_queue(page);
-	page_cache_release(page);
+	remove_page_from_hash_queue(page);
+	page->inode = NULL;
+	spin_unlock(&pagecache_lock);
 }

 int shrink_mmap(int priority, int gfp_mask)
@@ -138,7 +249,7 @@ int shrink_mmap(int priority, int gfp_mask)
 	static unsigned long clock = 0;
 	unsigned long limit = num_physpages;
 	struct page * page;
-	int count;
+	int count, err;

 	count = limit >> priority;

@@ -171,7 +282,7 @@ int shrink_mmap(int priority, int gfp_mask)
 			continue;

 		/* We can't free pages unless there's just one user */
-		if (atomic_read(&page->count) != 1)
+		if (page_count(page) != 1)
 			continue;

 		count--;
@@ -185,74 +296,110 @@ int shrink_mmap(int priority, int gfp_mask)
 			if (referenced && swap_count(page->offset) != 1)
 				continue;
 			delete_from_swap_cache(page);
-			return 1;
+			err = 1;
+			goto out;
 		}	

 		if (referenced)
 			continue;

+		/* is it a page-cache page? */
+		spin_lock(&pagecache_lock);
+		if (page->inode) {
+			if (pgcache_under_min())
+				goto unlock_continue;
+			if (TryLockPage(page))
+				goto unlock_continue;
+			if (page_count(page) != 1) {
+				UnlockPage(page);
+				goto unlock_continue;
+			}
+			spin_unlock(&pagecache_lock);
+
+			remove_inode_page(page);
+			UnlockPage(page);
+			page_cache_release(page);
+			err = 1;
+			goto out;
+unlock_continue:
+			spin_unlock(&pagecache_lock);
+			continue;
+		}
+		spin_unlock(&pagecache_lock);
+
 		/* Is it a buffer page? */
 		if (page->buffers) {
 			if (buffer_under_min())
 				continue;
 			if (!try_to_free_buffers(page))
 				continue;
-			return 1;
-		}
-
-		/* is it a page-cache page? */
-		if (page->inode) {
-			if (pgcache_under_min())
-				continue;
-			remove_inode_page(page);
-			return 1;
+			err = 1;
+			goto out;
 		}

 	} while (count > 0);
-	return 0;
+	err = 0;
+out:
+	return err;
 }

-/*
- * Update a page cache copy, when we're doing a "write()" system call
- * See also "update_vm_cache()".
- */
-void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
+static inline struct page * __find_page_nolock(struct inode * inode, unsigned long offset, struct page *page)
 {
-	unsigned long offset, len;
+	goto inside;

-	offset = (pos & ~PAGE_CACHE_MASK);
-	pos = pos & PAGE_CACHE_MASK;
-	len = PAGE_CACHE_SIZE - offset;
-	do {
-		struct page * page;
-
-		if (len > count)
-			len = count;
-		page = find_page(inode, pos);
-		if (page) {
-			wait_on_page(page);
-			memcpy((void *) (offset + page_address(page)), buf, len);
-			page_cache_release(page);
-		}
-		count -= len;
-		buf += len;
-		len = PAGE_CACHE_SIZE;
-		offset = 0;
-		pos += PAGE_CACHE_SIZE;
-	} while (count);
+	for (;;) {
+		page = page->next_hash;
+inside:
+		if (!page)
+			goto not_found;
+		if (page->inode != inode)
+			continue;
+		if (page->offset == offset)
+			break;
+	}
+not_found:
+	return page;
 }

-static inline void add_to_page_cache(struct page * page,
+/*
+ * This adds a page to the page cache, starting out as locked,
+ * owned by us, referenced, but not uptodate and with no errors.
+ */
+static inline void __add_to_page_cache(struct page * page,
 	struct inode * inode, unsigned long offset,
 	struct page **hash)
 {
-	atomic_inc(&page->count);
-	page->flags = (page->flags & ~((1 << PG_uptodate) | (1 << PG_error))) | (1 << PG_referenced);
+	unsigned long flags;
+
+	flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error));
+	page->flags = flags |  ((1 << PG_locked) | (1 << PG_referenced));
+	page->owner = (int)current;	/* REMOVEME */
+	get_page(page);
 	page->offset = offset;
 	add_page_to_inode_queue(inode, page);
 	__add_page_to_hash_queue(page, hash);
 }

+int add_to_page_cache_unique(struct page * page,
+	struct inode * inode, unsigned long offset,
+	struct page **hash)
+{
+	int err;
+	struct page *alias;
+
+	spin_lock(&pagecache_lock);
+	alias = __find_page_nolock(inode, offset, *hash);
+
+	err = 1;
+	if (!alias) {
+		__add_to_page_cache(page,inode,offset,hash);
+		err = 0;
+	}
+
+	spin_unlock(&pagecache_lock);
+	return err;
+}
+
 /*
 * Try to read ahead in the file. "page_cache" is a potentially free page
 * that we could use for the cache (if it is 0 we can try to create one,
@@ -275,29 +422,30 @@ static unsigned long try_to_read_ahead(struct file * file,
 		if (offset >= inode->i_size)
 			break;
 		hash = page_hash(inode, offset);
-		page = __find_page(inode, offset, *hash);
-		if (!page) {
+		page = page_cache_entry(page_cache);
+		if (!add_to_page_cache_unique(page, inode, offset, hash)) {
 			/*
-			 * Ok, add the new page to the hash-queues...
+			 * We do not have to check the return value here
+			 * because it's a readahead.
 			 */
-			page = page_cache_entry(page_cache);
-			add_to_page_cache(page, inode, offset, hash);
+			lock_kernel();
 			inode->i_op->readpage(file, page);
+			unlock_kernel();
 			page_cache = 0;
+			page_cache_release(page);
 		}
-		page_cache_release(page);
 	}
 	return page_cache;
 }

 /* 
- * Wait for IO to complete on a locked page.
+ * Wait for a page to get unlocked.
 *
 * This must be called with the caller "holding" the page,
 * ie with increased "page->count" so that the page won't
 * go away during the wait..
 */
-void __wait_on_page(struct page *page)
+void ___wait_on_page(struct page *page)
 {
 	struct task_struct *tsk = current;
 	DECLARE_WAITQUEUE(wait, tsk);
@@ -307,13 +455,143 @@ void __wait_on_page(struct page *page)
 	tsk->state = TASK_UNINTERRUPTIBLE;
 	run_task_queue(&tq_disk);
 	if (PageLocked(page)) {
-		schedule();
+		int left;
+		left = schedule_timeout(HZ*20);
+		if (!left)
+			PAGE_BUG(page);
 		goto repeat;
 	}
 	tsk->state = TASK_RUNNING;
 	remove_wait_queue(&page->wait, &wait);
 }

+/*
+ * Get an exclusive lock on the page..
+ */
+static void lock_page(struct page *page)
+{
+	if (TryLockPage(page)) {
+		struct task_struct *tsk = current;
+		DECLARE_WAITQUEUE(wait, current);
+
+		run_task_queue(&tq_disk);
+		add_wait_queue(&page->wait, &wait);
+		tsk->state = TASK_UNINTERRUPTIBLE;
+
+		while (TryLockPage(page)) {
+			schedule();
+			tsk->state = TASK_UNINTERRUPTIBLE;
+		}
+
+		remove_wait_queue(&page->wait, &wait);
+		tsk->state = TASK_RUNNING;
+	}
+}
+
+
+/*
+ * a rather lightweight function, finding and getting a reference to a
+ * hashed page atomically, waiting for it if it's locked.
+ */
+struct page * __find_get_page (struct inode * inode,
+				unsigned long offset, struct page *page)
+{
+
+	/*
+	 * We scan the hash list read-only. Addition to and removal from
+	 * the hash-list needs a held write-lock.
+	 */
+repeat:
+	spin_lock(&pagecache_lock);
+	page = __find_page_nolock(inode, offset, page);
+	if (page)
+		get_page(page);
+	spin_unlock(&pagecache_lock);
+
+	/* Found the page, sleep if locked. */
+	if (page && PageLocked(page)) {
+		struct task_struct *tsk = current;
+		DECLARE_WAITQUEUE(wait, tsk);
+
+		add_wait_queue(&page->wait, &wait);
+		tsk->state = TASK_UNINTERRUPTIBLE;
+
+		run_task_queue(&tq_disk);
+		if (PageLocked(page))
+			schedule();
+		tsk->state = TASK_RUNNING;
+		remove_wait_queue(&page->wait, &wait);
+
+		/*
+		 * The page might have been unhashed meanwhile. It's
+		 * not freed though because we hold a reference to it.
+		 * If this is the case then it will be freed _here_,
+		 * and we recheck the hash anyway.
+		 */
+		page_cache_release(page);
+		goto repeat;
+	}
+	/*
+	 * It's not locked so we can return the page and we hold
+	 * a reference to it.
+	 */
+	return page;
+}
+
+/*
+ * Get the lock to a page atomically.
+ */
+struct page * __find_lock_page (struct inode * inode,
+				unsigned long offset, struct page *page)
+{
+	int locked;
+
+
+	/*
+	 * We scan the hash list read-only. Addition to and removal from
+	 * the hash-list needs a held write-lock.
+	 */
+repeat:
+	spin_lock(&pagecache_lock);
+	page = __find_page_nolock(inode, offset, page);
+	locked = 0;
+	if (page) {
+		get_page(page);
+		if (TryLockPage(page))
+			locked = 1;
+	}
+	spin_unlock(&pagecache_lock);
+
+	/* Found the page, sleep if locked. */
+	if (page && locked) {
+		struct task_struct *tsk = current;
+		DECLARE_WAITQUEUE(wait, tsk);
+
+		add_wait_queue(&page->wait, &wait);
+		tsk->state = TASK_UNINTERRUPTIBLE;
+
+		run_task_queue(&tq_disk);
+		if (PageLocked(page))
+			schedule();
+		tsk->state = TASK_RUNNING;
+		remove_wait_queue(&page->wait, &wait);
+
+		/*
+		 * The page might have been unhashed meanwhile. It's
+		 * not freed though because we hold a reference to it.
+		 * If this is the case then it will be freed _here_,
+		 * and we recheck the hash anyway.
+		 */
+		page_cache_release(page);
+		goto repeat;
+	}
+	/*
+	 * It's not locked so we can return the page and we hold
+	 * a reference to it.
+	 */
+	return page;
+}
+
 #if 0
 #define PROFILE_READAHEAD
 #define DEBUG_READAHEAD
@@ -386,14 +664,14 @@ static void profile_readahead(int async, struct file *filp)
 * -------------------
 * The read ahead context fields of the "struct file" are the following:
 * - f_raend : position of the first byte after the last page we tried to
- *             read ahead.
+ *	       read ahead.
 * - f_ramax : current read-ahead maximum size.
 * - f_ralen : length of the current IO read block we tried to read-ahead.
 * - f_rawin : length of the current read-ahead window.
- *             if last read-ahead was synchronous then
- *                  f_rawin = f_ralen
- *             otherwise (was asynchronous)
- *                  f_rawin = previous value of f_ralen + f_ralen
+ *		if last read-ahead was synchronous then
+ *			f_rawin = f_ralen
+ *		otherwise (was asynchronous)
+ *			f_rawin = previous value of f_ralen + f_ralen
 *
 * Read-ahead limits:
 * ------------------
@@ -485,7 +763,7 @@ static inline unsigned long generic_file_readahead(int reada_ok,
 * We will later force unplug device in order to force asynchronous read IO.
 */
 	else if (reada_ok && filp->f_ramax && raend >= PAGE_CACHE_SIZE &&
-	         ppos <= raend && ppos + filp->f_ralen >= raend) {
+		 ppos <= raend && ppos + filp->f_ralen >= raend) {
 /*
 * Add ONE page to max_ahead in order to try to have about the same IO max size
 * as synchronous read-ahead (MAX_READAHEAD + 1)*PAGE_CACHE_SIZE.
@@ -578,6 +856,7 @@ static void do_generic_file_read(struct file * filp, loff_t *ppos, read_descript
 	struct inode *inode = dentry->d_inode;
 	size_t pos, pgpos, page_cache;
 	int reada_ok;
+	int error;
 	int max_readahead = get_max_readahead(inode);

 	page_cache = 0;
@@ -633,33 +912,22 @@ static void do_generic_file_read(struct file * filp, loff_t *ppos, read_descript
 		 * Try to find the data in the page cache..
 		 */
 		hash = page_hash(inode, pos & PAGE_CACHE_MASK);
-		page = __find_page(inode, pos & PAGE_CACHE_MASK, *hash);
+
+		spin_lock(&pagecache_lock);
+		page = __find_page_nolock(inode, pos & PAGE_CACHE_MASK, *hash);
 		if (!page)
 			goto no_cached_page;
-
 found_page:
-/*
- * Try to read ahead only if the current page is filled or being filled.
- * Otherwise, if we were reading ahead, decrease max read ahead size to
- * the minimum value.
- * In this context, that seems to may happen only on some read error or if 
- * the page has been rewritten.
- */
-		if (PageUptodate(page) || PageLocked(page))
-			page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_CACHE_MASK, page, page_cache);
-		else if (reada_ok && filp->f_ramax > MIN_READAHEAD)
-				filp->f_ramax = MIN_READAHEAD;
-
-		wait_on_page(page);
-
-		if (!PageUptodate(page))
-			goto page_read_error;
+		get_page(page);
+		spin_unlock(&pagecache_lock);

-success:
-		/*
-		 * Ok, we have the page, it's up-to-date and ok,
-		 * so now we can finally copy it to user space...
-		 */
+		if (!Page_Uptodate(page))
+			goto page_not_up_to_date;
+page_ok:
+	/*
+	 * Ok, we have the page, and it's up-to-date, so
+	 * now we can copy it to user space...
+	 */
 	{
 		unsigned long offset, nr;

@@ -683,75 +951,79 @@ static void do_generic_file_read(struct file * filp, loff_t *ppos, read_descript
 		break;
 	}

+/*
+ * Ok, the page was not immediately readable, so let's try to read ahead while we're at it..
+ */
+page_not_up_to_date:
+		page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_CACHE_MASK, page, page_cache);
+
+		if (Page_Uptodate(page))
+			goto page_ok;
+
+		/* Get exclusive access to the page ... */
+		lock_page(page);
+		if (Page_Uptodate(page)) {
+			UnlockPage(page);
+			goto page_ok;
+		}
+
+read_page:
+		/* ... and start the actual read. The read will unlock the page. */
+		lock_kernel();
+		error = inode->i_op->readpage(filp, page);
+		unlock_kernel();
+
+		if (!error) {
+			if (Page_Uptodate(page))
+				goto page_ok;
+
+			/* Again, try some read-ahead while waiting for the page to finish.. */
+			page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_CACHE_MASK, page, page_cache);
+			wait_on_page(page);
+			if (Page_Uptodate(page))
+				goto page_ok;
+			error = -EIO;
+		}
+
+		/* UHHUH! A synchronous read error occurred. Report it */
+		desc->error = error;
+		page_cache_release(page);
+		break;
+
 no_cached_page:
 		/*
 		 * Ok, it wasn't cached, so we need to create a new
 		 * page..
+		 *
+		 * We get here with the page cache lock held.
 		 */
 		if (!page_cache) {
+			spin_unlock(&pagecache_lock);
 			page_cache = page_cache_alloc();
+			if (!page_cache) {
+				desc->error = -ENOMEM;
+				break;
+			}
+
 			/*
-			 * That could have slept, so go around to the
-			 * very beginning..
+			 * Somebody may have added the page while we
+			 * dropped the page cache lock. Check for that.
 			 */
-			if (page_cache)
-				continue;
-			desc->error = -ENOMEM;
-			break;
+			spin_lock(&pagecache_lock);
+			page = __find_page_nolock(inode, pos & PAGE_CACHE_MASK, *hash);
+			if (page)
+				goto found_page;
 		}

 		/*
 		 * Ok, add the new page to the hash-queues...
 		 */
 		page = page_cache_entry(page_cache);
-		page_cache = 0;
-		add_to_page_cache(page, inode, pos & PAGE_CACHE_MASK, hash);
-
-		/*
-		 * Error handling is tricky. If we get a read error,
-		 * the cached page stays in the cache (but uptodate=0),
-		 * and the next process that accesses it will try to
-		 * re-read it. This is needed for NFS etc, where the
-		 * identity of the reader can decide if we can read the
-		 * page or not..
-		 */
-/*
- * We have to read the page.
- * If we were reading ahead, we had previously tried to read this page,
- * That means that the page has probably been removed from the cache before 
- * the application process needs it, or has been rewritten.
- * Decrease max readahead size to the minimum value in that situation.
- */
-		if (reada_ok && filp->f_ramax > MIN_READAHEAD)
-			filp->f_ramax = MIN_READAHEAD;
-
-		{
-			int error = inode->i_op->readpage(filp, page);
-			if (!error)
-				goto found_page;
-			desc->error = error;
-			page_cache_release(page);
-			break;
-		}
+		__add_to_page_cache(page, inode, pos & PAGE_CACHE_MASK, hash);
+		spin_unlock(&pagecache_lock);

-page_read_error:
-		/*
-		 * We found the page, but it wasn't up-to-date.
-		 * Try to re-read it _once_. We do this synchronously,
-		 * because this happens only if there were errors.
-		 */
-		{
-			int error = inode->i_op->readpage(filp, page);
-			if (!error) {
-				wait_on_page(page);
-				if (PageUptodate(page) && !PageError(page))
-					goto success;
-				error = -EIO; /* Some unspecified error occurred.. */
-			}
-			desc->error = error;
-			page_cache_release(page);
-			break;
-		}
+		page_cache = 0;
+		goto read_page;
 	}

 	*ppos = pos;
@@ -787,6 +1059,7 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
 {
 	ssize_t retval;

+	unlock_kernel();
 	retval = -EFAULT;
 	if (access_ok(VERIFY_WRITE, buf, count)) {
 		retval = 0;
@@ -804,6 +1077,7 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
 				retval = desc.error;
 		}
 	}
+	lock_kernel();
 	return retval;
 }

@@ -934,17 +1208,20 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
 	unsigned long offset, reada, i;
 	struct page * page, **hash;
 	unsigned long old_page, new_page;
+	int error;

 	new_page = 0;
 	offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
 	if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
-		goto no_page;
+		goto no_page_nolock;
+	unlock_kernel();

 	/*
 	 * Do we have something in the page cache already?
 	 */
 	hash = page_hash(inode, offset);
-	page = __find_page(inode, offset, *hash);
+retry_find:
+	page = __find_get_page(inode, offset, *hash);
 	if (!page)
 		goto no_cached_page;

@@ -960,15 +1237,15 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
 			goto failure;
 	}

-	if (PageLocked(page))
-		goto page_locked_wait;
-	if (!PageUptodate(page))
-		goto page_read_error;
+	wait_on_page(page);
+
+	if (!Page_Uptodate(page))
+		PAGE_BUG(page);

 success:
 	/*
-	 * Found the page, need to check sharing and possibly
-	 * copy it over to another page..
+	 * Found the page and have a reference on it, need to check sharing
+	 * and possibly copy it over to another page..
 	 */
 	old_page = page_address(page);
 	if (!no_share) {
@@ -980,6 +1257,7 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
 			page_cache_free(new_page);

 		flush_page_to_ram(old_page);
+		lock_kernel();
 		return old_page;
 	}

@@ -989,6 +1267,7 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
 	copy_page(new_page, old_page);
 	flush_page_to_ram(new_page);
 	page_cache_release(page);
+	lock_kernel();
 	return new_page;

 no_cached_page:
@@ -1013,7 +1292,7 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
 	 * cache.. The page we just got may be useful if we
 	 * can't share, so don't get rid of it here.
 	 */
-	page = find_page(inode, offset);
+	page = __find_get_page(inode, offset, *hash);
 	if (page)
 		goto found_page;

@@ -1021,19 +1300,25 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
 	 * Now, create a new page-cache page from the page we got
 	 */
 	page = page_cache_entry(new_page);
-	new_page = 0;
-	add_to_page_cache(page, inode, offset, hash);
+	if (add_to_page_cache_unique(page, inode, offset, hash))
+		goto retry_find;

-	if (inode->i_op->readpage(file, page) != 0)
-		goto failure;
+	/*
+	 * Now it's ours and locked, we can do initial IO to it:
+	 */
+	new_page = 0;

-	goto found_page;
+	lock_kernel();
+	error = inode->i_op->readpage(file, page);
+	unlock_kernel();

-page_locked_wait:
-	__wait_on_page(page);
-	if (PageUptodate(page))
+	if (!error) {
+		wait_on_page(page);
+		if (PageError(page))
+			goto page_read_error;
 		goto success;
-	
+	}
+
 page_read_error:
 	/*
 	 * Umm, take care of errors if the page isn't up-to-date.
@@ -1041,12 +1326,16 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
 	 * because there really aren't any performance issues here
 	 * and we need to check for errors.
 	 */
-	if (inode->i_op->readpage(file, page) != 0)
+	if (!PageLocked(page))
+		PAGE_BUG(page);
+	ClearPageError(page);
+	lock_kernel();
+	error = inode->i_op->readpage(file, page);
+	unlock_kernel();
+	if (error)
 		goto failure;
 	wait_on_page(page);
-	if (PageError(page))
-		goto failure;
-	if (PageUptodate(page))
+	if (Page_Uptodate(page))
 		goto success;

 	/*
@@ -1058,6 +1347,8 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
 	if (new_page)
 		page_cache_free(new_page);
 no_page:
+	lock_kernel();
+no_page_nolock:
 	return 0;
 }

@@ -1066,12 +1357,14 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
 * if the disk is full.
 */
 static inline int do_write_page(struct inode * inode, struct file * file,
-	const char * page, unsigned long offset)
+	const char * page_addr, unsigned long offset)
 {
 	int retval;
 	unsigned long size;
 	loff_t loff = offset;
 	mm_segment_t old_fs;
+	int (*writepage) (struct file *, struct page *);
+	struct page * page;

 	size = offset + PAGE_SIZE;
 	/* refuse to extend file size.. */
@@ -1086,8 +1379,19 @@ static inline int do_write_page(struct inode * inode, struct file * file,
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
 	retval = -EIO;
-	if (size == file->f_op->write(file, (const char *) page, size, &loff))
+	writepage = inode->i_op->writepage;
+	page = mem_map + MAP_NR(page_addr);
+repeat:
+	wait_on_page(page);
+	if (TryLockPage(page))
+		goto repeat;
+	if (writepage) {
+		retval = writepage(file, page);
+	} else {
+		if (size == file->f_op->write(file, page_addr, size, &loff))
 		retval = 0;
+	}
+	UnlockPage(page);
 	set_fs(old_fs);
 	return retval;
 }
@@ -1146,7 +1450,8 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
 	unsigned long address, unsigned int flags)
 {
 	pte_t pte = *ptep;
-	unsigned long page;
+	unsigned long pageaddr;
+	struct page *page;
 	int error;

 	if (!(flags & MS_INVALIDATE)) {
@@ -1158,8 +1463,9 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
 		flush_cache_page(vma, address);
 		set_pte(ptep, pte_mkclean(pte));
 		flush_tlb_page(vma, address);
-		page = pte_page(pte);
-		atomic_inc(&page_cache_entry(page)->count);
+		pageaddr = pte_page(pte);
+		page = page_cache_entry(pageaddr);
+		get_page(page);
 	} else {
 		if (pte_none(pte))
 			return 0;
@@ -1170,14 +1476,14 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
 			swap_free(pte_val(pte));
 			return 0;
 		}
-		page = pte_page(pte);
+		pageaddr = pte_page(pte);
 		if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
-			page_cache_free(page);
+			page_cache_free(pageaddr);
 			return 0;
 		}
 	}
-	error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page, 1);
-	page_cache_free(page);
+	error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, pageaddr, 1);
+	page_cache_free(pageaddr);
 	return error;
 }

@@ -1436,11 +1742,12 @@ generic_file_write(struct file *file, const char *buf,
 	unsigned long	page_cache = 0;
 	unsigned long	written;
 	long		status;
+	int		err;

-	if (file->f_error) {
-		int error = file->f_error;
+	err = file->f_error;
+	if (err) {
 		file->f_error = 0;
-		return error;
+		goto out;
 	}

 	written = 0;
@@ -1451,7 +1758,7 @@ generic_file_write(struct file *file, const char *buf,
 	/*
 	 * Check whether we've reached the file size limit.
 	 */
-	status = -EFBIG;
+	err = -EFBIG;
 	if (pos >= limit) {
 		send_sig(SIGXFSZ, current, 0);
 		goto out;
@@ -1467,6 +1774,8 @@ generic_file_write(struct file *file, const char *buf,
 		count = limit - pos;
 	}

+	unlock_kernel();
+
 	while (count) {
 		unsigned long bytes, pgpos, offset;
 		/*
@@ -1480,29 +1789,36 @@ generic_file_write(struct file *file, const char *buf,
 			bytes = count;

 		hash = page_hash(inode, pgpos);
-		page = __find_page(inode, pgpos, *hash);
+repeat_find:
+		page = __find_lock_page(inode, pgpos, *hash);
 		if (!page) {
 			if (!page_cache) {
 				page_cache = page_cache_alloc();
 				if (page_cache)
-					continue;
+					goto repeat_find;
 				status = -ENOMEM;
 				break;
 			}
 			page = page_cache_entry(page_cache);
-			add_to_page_cache(page, inode, pgpos, hash);
+			if (add_to_page_cache_unique(page,inode,pgpos,hash))
+				goto repeat_find;
+
 			page_cache = 0;
 		}

-		/* Get exclusive IO access to the page.. */
-		wait_on_page(page);
-		set_bit(PG_locked, &page->flags);
+		/* We have exclusive IO access to the page.. */
+		if (!PageLocked(page)) {
+			PAGE_BUG(page);
+		} else {
+			if (page->owner != (int)current) {
+				PAGE_BUG(page);
+			}
+		}

 		status = write_one_page(file, page, offset, bytes, buf);

 		/* Mark it unlocked again and drop the page.. */
-		clear_bit(PG_locked, &page->flags);
-		wake_up(&page->wait);
+		UnlockPage(page);
 		page_cache_release(page);

 		if (status < 0)
@@ -1519,51 +1835,16 @@ generic_file_write(struct file *file, const char *buf,

 	if (page_cache)
 		page_cache_free(page_cache);
+
+	err = written ? written : status;
+	lock_kernel();
 out:
-	return written ? written : status;
+	return err;
 }

 /*
- * Support routines for directory cacheing using the page cache.
- */
-
-/*
- * Finds the page at the specified offset, installing a new page
- * if requested.  The count is incremented and the page is locked.
- *
- * Note: we don't have to worry about races here, as the caller
- * is holding the inode semaphore.
+ * Support routines for directory caching using the page cache.
 */
-unsigned long get_cached_page(struct inode * inode, unsigned long offset,
-				int new)
-{
-	struct page * page;
-	struct page ** hash;
-	unsigned long page_cache = 0;
-
-	hash = page_hash(inode, offset);
-	page = __find_page(inode, offset, *hash);
-	if (!page) {
-		if (!new)
-			goto out;
-		page_cache = page_cache_alloc();
-		if (!page_cache)
-			goto out;
-		clear_page(page_cache);
-		page = page_cache_entry(page_cache);
-		add_to_page_cache(page, inode, offset, hash);
-	}
-	if (atomic_read(&page->count) != 2)
-		printk(KERN_ERR "get_cached_page: page count=%d\n",
-			atomic_read(&page->count));
-	if (test_bit(PG_locked, &page->flags))
-		printk(KERN_ERR "get_cached_page: page already locked!\n");
-	set_bit(PG_locked, &page->flags);
-	page_cache = page_address(page);
-
-out:
-	return page_cache;
-}

 /*
 * Unlock and free a page.
@@ -1572,13 +1853,10 @@ void put_cached_page(unsigned long addr)
 {
 	struct page * page = page_cache_entry(addr);

-	if (!test_bit(PG_locked, &page->flags))
-		printk("put_cached_page: page not locked!\n");
-	if (atomic_read(&page->count) != 2)
-		printk("put_cached_page: page count=%d\n", 
-			atomic_read(&page->count));
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
+	UnlockPage(page);
+	if (page_count(page) != 2)
+		panic("put_cached_page: page count=%d\n", 
+			page_count(page));
 	page_cache_release(page);
 }

@@ -1607,11 +1885,13 @@ static inline struct pio_request * get_pio_request(void)

 static inline void make_pio_request(struct file *file,
 				    unsigned long offset,
-				    unsigned long page)
+				    unsigned long pageaddr)
 {
 	struct pio_request *p;
+	struct page *page;

-	atomic_inc(&page_cache_entry(page)->count);
+	page = page_cache_entry(pageaddr);
+	get_page(page);

 	/* 
 	 * We need to allocate without causing any recursive IO in the
@@ -1634,7 +1914,7 @@ static inline void make_pio_request(struct file *file,
 	
 	p->file   = file;
 	p->offset = offset;
-	p->page   = page;
+	p->page   = pageaddr;

 	put_pio_request(p);
 	wake_up(&pio_wait);

--- a/mm/memory.c
+++ b/mm/memory.c
@@ -272,7 +272,7 @@ skip_copy_pte_range:		address = (address + PMD_SIZE) & PMD_MASK;
 				if (vma->vm_flags & VM_SHARED)
 					pte = pte_mkclean(pte);
 				set_pte(dst_pte, pte_mkold(pte));
-				atomic_inc(&mem_map[page_nr].count);
+				get_page(mem_map + page_nr);
 			
 cont_copy_pte_range:		address += PAGE_SIZE;
 				if (address >= end)
@@ -554,7 +554,7 @@ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsig

 	if (MAP_NR(page) >= max_mapnr)
 		printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
-	if (atomic_read(&mem_map[MAP_NR(page)].count) != 1)
+	if (page_count(mem_map + MAP_NR(page)) != 1)
 		printk("mem_map disagrees with %08lx at %08lx\n",page,address);
 	pgd = pgd_offset(tsk->mm,address);
 	pmd = pmd_alloc(pgd, address);
@@ -602,17 +602,17 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
 	unsigned long address, pte_t *page_table, pte_t pte)
 {
 	unsigned long old_page, new_page;
-	struct page * page_map;
+	struct page * page;
 	
 	new_page = __get_free_page(GFP_USER);
-	/* Did swap_out() unmapped the protected page while we slept? */
+	/* Did swap_out() unmap the protected page while we slept? */
 	if (pte_val(*page_table) != pte_val(pte))
 		goto end_wp_page;
 	old_page = pte_page(pte);
 	if (MAP_NR(old_page) >= max_mapnr)
 		goto bad_wp_page;
 	tsk->min_flt++;
-	page_map = mem_map + MAP_NR(old_page);
+	page = mem_map + MAP_NR(old_page);
 	
 	/*
 	 * We can avoid the copy if:
@@ -622,13 +622,13 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
 	 *   in which case we can remove the page
 	 *   from the swap cache.
 	 */
-	switch (atomic_read(&page_map->count)) {
+	switch (page_count(page)) {
 	case 2:
-		if (!PageSwapCache(page_map))
+		if (!PageSwapCache(page))
 			break;
-		if (swap_count(page_map->offset) != 1)
+		if (swap_count(page->offset) != 1)
 			break;
-		delete_from_swap_cache(page_map);
+		delete_from_swap_cache(page);
 		/* FallThrough */
 	case 1:
 		flush_cache_page(vma, address);
@@ -650,7 +650,7 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
 	if (!new_page)
 		goto no_new_page;

-	if (PageReserved(page_map))
+	if (PageReserved(page))
 		++vma->vm_mm->rss;
 	copy_cow_page(old_page,new_page);
 	flush_page_to_ram(old_page);
@@ -659,7 +659,7 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
 	set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
 	flush_tlb_page(vma, address);
 	unlock_kernel();
-	__free_page(page_map);
+	__free_page(page);
 	return 1;

 bad_wp_page:
@@ -774,7 +774,7 @@ static int do_swap_page(struct task_struct * tsk,
 		if (pte_val(*page_table) != pte_val(entry)) {
 			free_page(pte_page(page));
 		} else {
-			if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) > 1 &&
+			if (page_count(mem_map + MAP_NR(pte_page(page))) > 1 &&
 			    !(vma->vm_flags & VM_SHARED))
 				page = pte_wrprotect(page);
 			++vma->vm_mm->rss;
@@ -858,7 +858,7 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
 	entry = mk_pte(page, vma->vm_page_prot);
 	if (write_access) {
 		entry = pte_mkwrite(pte_mkdirty(entry));
-	} else if (atomic_read(&mem_map[MAP_NR(page)].count) > 1 &&
+	} else if (page_count(mem_map+MAP_NR(page)) > 1 &&
 		   !(vma->vm_flags & VM_SHARED))
 		entry = pte_wrprotect(entry);
 	set_pte(page_table, entry);

--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -63,7 +63,7 @@ int vm_enough_memory(long pages)
 	    return 1;

 	free = buffermem >> PAGE_SHIFT;
-	free += page_cache_size;
+	free += atomic_read(&page_cache_size);
 	free += nr_free_pages;
 	free += nr_swap_pages;
 	free -= (page_cache.min_percent + buffer_mem.min_percent + 2)*num_physpages/100; 
@@ -728,6 +728,10 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	struct vm_area_struct * vma;
 	unsigned long flags, retval;

+	len = PAGE_ALIGN(len);
+	if (!len)
+		return addr;
+
 	/*
 	 * mlock MCL_FUTURE?
 	 */

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -121,9 +121,9 @@ static inline void free_pages_ok(unsigned long map_nr, unsigned long order)

 void __free_page(struct page *page)
 {
-	if (!PageReserved(page) && atomic_dec_and_test(&page->count)) {
+	if (!PageReserved(page) && put_page_testzero(page)) {
 		if (PageSwapCache(page))
-			panic ("Freeing swap cache page");
+			PAGE_BUG(page);
 		page->flags &= ~(1 << PG_referenced);
 		free_pages_ok(page - mem_map, 0);
 		return;
@@ -138,9 +138,9 @@ void free_pages(unsigned long addr, unsigned long order)
 		mem_map_t * map = mem_map + map_nr;
 		if (PageReserved(map))
 			return;
-		if (atomic_dec_and_test(&map->count)) {
+		if (put_page_testzero(map)) {
 			if (PageSwapCache(map))
-				panic ("Freeing swap cache pages");
+				PAGE_BUG(map);
 			map->flags &= ~(1 << PG_referenced);
 			free_pages_ok(map_nr, order);
 			return;
@@ -167,7 +167,7 @@ do { struct free_area_struct * area = free_area+order; \
 				MARK_USED(map_nr, new_order, area); \
 				nr_free_pages -= 1 << order; \
 				EXPAND(ret, map_nr, order, new_order, area); \
-				spin_unlock_irqrestore(&page_alloc_lock, flags); \
+				spin_unlock_irqrestore(&page_alloc_lock,flags);\
 				return ADDRESS(map_nr); \
 			} \
 			prev = ret; \
@@ -186,7 +186,7 @@ do { unsigned long size = 1 << high; \
 		index += size; \
 		map += size; \
 	} \
-	atomic_set(&map->count, 1); \
+	set_page_count(map, 1); \
 } while (0)

 int low_on_memory = 0;
@@ -321,7 +321,7 @@ unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_m
 	memset(mem_map, 0, start_mem - (unsigned long) mem_map);
 	do {
 		--p;
-		atomic_set(&p->count, 0);
+		set_page_count(p, 0);
 		p->flags = (1 << PG_DMA) | (1 << PG_reserved);
 		init_waitqueue_head(&p->wait);
 	} while (p > mem_map);

--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -47,7 +47,7 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
 #ifdef DEBUG_SWAP
 	printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n",
 		(rw == READ) ? "read" : "write", 
-		entry, (char *) page_address(page), atomic_read(&page->count),
+		entry, (char *) page_address(page), page_count(page),
 		wait ? "wait" : "nowait");
 #endif

@@ -105,12 +105,12 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
 		}
 	}
 	if (rw == READ) {
-		clear_bit(PG_uptodate, &page->flags);
+		ClearPageUptodate(page);
 		kstat.pswpin++;
 	} else
 		kstat.pswpout++;

-	atomic_inc(&page->count);
+	get_page(page);
 	if (p->swap_device) {
 		zones[0] = offset;
 		zones_used = 1;
@@ -167,7 +167,7 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
 				printk("swap_after_unlock_page: lock already cleared\n");
 			wake_up(&lock_queue);
 		}
-		atomic_dec(&page->count);
+		put_page(page);
 		return;
 	}
 	if (!wait) {
@@ -182,23 +182,27 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in

 	/* block_size == PAGE_SIZE/zones_used */
 	brw_page(rw, page, dev, zones, block_size, 0);
+
+	if (rw == WRITE) // HACK, FIXME
+		UnlockPage(page);
 
 	/* Note! For consistency we do all of the logic,
 	 * decrementing the page count, and unlocking the page in the
 	 * swap lock map - in the IO completion handler.
 	 */
- 	if (!wait) 
+ 	if (!wait) {
 		return;
+	}
 	wait_on_page(page);
 	/* This shouldn't happen, but check to be sure. */
-	if (atomic_read(&page->count) == 0)
+	if (page_count(page) == 0)
 		printk(KERN_ERR "rw_swap_page: page unused while waiting!\n");

 #ifdef DEBUG_SWAP
 	printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n",
 		(rw == READ) ? "read" : "write", 
-		(char *) page_adddress(page), 
-		atomic_read(&page->count));
+		(char *) page_address(page), 
+		page_count(page));
 #endif
 }

@@ -238,7 +242,7 @@ void rw_swap_page(int rw, unsigned long entry, char *buf, int wait)
 	struct page *page = mem_map + MAP_NR(buf);

 	if (page->inode && page->inode != &swapper_inode)
-		panic ("Tried to swap a non-swapper page");
+		PAGE_BUG(page);

 	/*
 	 * Make sure that we have a swap cache association for this
@@ -268,23 +272,27 @@ void rw_swap_page_nocache(int rw, unsigned long entry, char *buffer)
 	struct page *page;
 	
 	page = mem_map + MAP_NR((unsigned long) buffer);
-	wait_on_page(page);
-	set_bit(PG_locked, &page->flags);
-	if (test_and_set_bit(PG_swap_cache, &page->flags)) {
-		printk ("VM: read_swap_page: page already in swap cache!\n");
-		return;
-	}
-	if (page->inode) {
-		printk ("VM: read_swap_page: page already in page cache!\n");
-		return;
-	}
+
+	if (TryLockPage(page))
+		PAGE_BUG(page);
+	if (test_and_set_bit(PG_swap_cache, &page->flags))
+		PAGE_BUG(page);
+	if (page->inode)
+		PAGE_BUG(page);
+	get_page(page);		/* Protect from shrink_mmap() */
 	page->inode = &swapper_inode;
 	page->offset = entry;
-	atomic_inc(&page->count);	/* Protect from shrink_mmap() */
 	rw_swap_page(rw, entry, buffer, 1);
-	atomic_dec(&page->count);
-	page->inode = 0;
-	clear_bit(PG_swap_cache, &page->flags);
+
+	/*
+	 * and now remove it from the pagecache ...
+	 */
+	if (TryLockPage(page))
+		PAGE_BUG(page);
+	PageClearSwapCache(page);
+	remove_inode_page(page);
+	page_cache_release(page);
+	UnlockPage(page);
 }

 /*

--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -25,7 +25,32 @@
 * ensure that any mistaken dereferences of this structure cause a
 * kernel oops.
 */
-struct inode swapper_inode;
+
+static struct inode_operations swapper_inode_operations = {
+	NULL,				/* default file operations */
+	NULL,				/* create */
+	NULL,				/* lookup */
+	NULL,				/* link */
+	NULL,				/* unlink */
+	NULL,				/* symlink */
+	NULL,				/* mkdir */
+	NULL,				/* rmdir */
+	NULL,				/* mknod */
+	NULL,				/* rename */
+	NULL,				/* readlink */
+	NULL,				/* follow_link */
+	NULL,				/* readpage */
+	NULL,				/* writepage */
+	NULL,				/* bmap */
+	NULL,				/* truncate */
+	NULL,				/* permission */
+	NULL,				/* smap */
+	NULL,				/* updatepage */
+	NULL,				/* revalidate */
+	generic_block_flushpage,	/* flushpage */
+};
+
+struct inode swapper_inode = { i_op: &swapper_inode_operations };

 #ifdef SWAP_CACHE_INFO
 unsigned long swap_cache_add_total = 0;
@@ -49,20 +74,20 @@ int add_to_swap_cache(struct page *page, unsigned long entry)
 #endif
 #ifdef DEBUG_SWAP
 	printk("DebugVM: add_to_swap_cache(%08lx count %d, entry %08lx)\n",
-	       page_address(page), atomic_read(&page->count), entry);
+		   page_address(page), page_count(page), entry);
 #endif
 	if (PageTestandSetSwapCache(page)) {
 		printk(KERN_ERR "swap_cache: replacing non-empty entry %08lx "
-		       "on page %08lx\n",
-		       page->offset, page_address(page));
+			   "on page %08lx\n",
+			   page->offset, page_address(page));
 		return 0;
 	}
 	if (page->inode) {
 		printk(KERN_ERR "swap_cache: replacing page-cached entry "
-		       "on page %08lx\n", page_address(page));
+			   "on page %08lx\n", page_address(page));
 		return 0;
 	}
-	atomic_inc(&page->count);
+	get_page(page);
 	page->inode = &swapper_inode;
 	page->offset = entry;
 	add_page_to_hash_queue(page, &swapper_inode, entry);
@@ -111,7 +136,7 @@ int swap_duplicate(unsigned long entry)
 	result = 1;
 #ifdef DEBUG_SWAP
 	printk("DebugVM: swap_duplicate(entry %08lx, count now %d)\n",
-	       entry, p->swap_map[offset]);
+		   entry, p->swap_map[offset]);
 #endif
 out:
 	return result;
@@ -127,7 +152,7 @@ int swap_duplicate(unsigned long entry)
 bad_unused:
 	printk(KERN_ERR
 		"swap_duplicate at %8p: entry %08lx, unused page\n", 
-	       __builtin_return_address(0), entry);
+		   __builtin_return_address(0), entry);
 	goto out;
 }

@@ -153,7 +178,7 @@ int swap_count(unsigned long entry)
 	retval = p->swap_map[offset];
 #ifdef DEBUG_SWAP
 	printk("DebugVM: swap_count(entry %08lx, count %d)\n",
-	       entry, retval);
+		   entry, retval);
 #endif
 out:
 	return retval;
@@ -163,16 +188,16 @@ int swap_count(unsigned long entry)
 	goto out;
 bad_file:
 	printk(KERN_ERR
-	       "swap_count: entry %08lx, nonexistent swap file!\n", entry);
+		   "swap_count: entry %08lx, nonexistent swap file!\n", entry);
 	goto out;
 bad_offset:
 	printk(KERN_ERR
-	       "swap_count: entry %08lx, offset exceeds max!\n", entry);
+		   "swap_count: entry %08lx, offset exceeds max!\n", entry);
 	goto out;
 bad_unused:
 	printk(KERN_ERR
-	       "swap_count at %8p: entry %08lx, unused page!\n", 
-	       __builtin_return_address(0), entry);
+		   "swap_count at %8p: entry %08lx, unused page!\n", 
+		   __builtin_return_address(0), entry);
 	goto out;
 }

@@ -190,10 +215,11 @@ static inline void remove_from_swap_cache(struct page *page)

 #ifdef DEBUG_SWAP
 	printk("DebugVM: remove_from_swap_cache(%08lx count %d)\n",
-	       page_address(page), atomic_read(&page->count));
+		   page_address(page), page_count(page));
 #endif
-	PageClearSwapCache (page);
+	PageClearSwapCache(page);
 	remove_inode_page(page);
+	page_cache_release(page);
 }


@@ -205,16 +231,19 @@ void delete_from_swap_cache(struct page *page)
 {
 	long entry = page->offset;

+	LockPage(page);
+
 #ifdef SWAP_CACHE_INFO
 	swap_cache_del_total++;
 #endif
 #ifdef DEBUG_SWAP
 	printk("DebugVM: delete_from_swap_cache(%08lx count %d, "
-	       "entry %08lx)\n",
-	       page_address(page), atomic_read(&page->count), entry);
+		   "entry %08lx)\n",
+		   page_address(page), page_count(page), entry);
 #endif
 	remove_from_swap_cache (page);
 	swap_free (entry);
+	UnlockPage(page);
 }

 /* 
@@ -238,9 +267,10 @@ void free_page_and_swap_cache(unsigned long addr)


 /*
- * Lookup a swap entry in the swap cache.  We need to be careful about
- * locked pages.  A found page will be returned with its refcount
- * incremented.
+ * Lookup a swap entry in the swap cache. A found page will be returned
+ * unlocked and with its refcount incremented - we rely on the kernel
+ * lock getting page table operations atomic even if we drop the page
+ * lock before returning.
 */

 struct page * lookup_swap_cache(unsigned long entry)
@@ -251,23 +281,21 @@ struct page * lookup_swap_cache(unsigned long entry)
 	swap_cache_find_total++;
 #endif
 	while (1) {
-		found = find_page(&swapper_inode, entry);
+		found = find_lock_page(&swapper_inode, entry);
 		if (!found)
 			return 0;
 		if (found->inode != &swapper_inode || !PageSwapCache(found))
 			goto out_bad;
-		if (!PageLocked(found)) {
 #ifdef SWAP_CACHE_INFO
-			swap_cache_find_success++;
+		swap_cache_find_success++;
 #endif
-			return found;
-		}
-		__free_page(found);
-		__wait_on_page(found);
+		UnlockPage(found);
+		return found;
 	}

 out_bad:
 	printk (KERN_ERR "VM: Found a non-swapper swap page!\n");
+	UnlockPage(found);
 	__free_page(found);
 	return 0;
 }
@@ -288,7 +316,7 @@ struct page * read_swap_cache_async(unsigned long entry, int wait)
 	
 #ifdef DEBUG_SWAP
 	printk("DebugVM: read_swap_cache_async entry %08lx%s\n",
-	       entry, wait ? ", wait" : "");
+		   entry, wait ? ", wait" : "");
 #endif
 	/*
 	 * Make sure the swap entry is still in use.
@@ -319,12 +347,12 @@ struct page * read_swap_cache_async(unsigned long entry, int wait)
 	if (!add_to_swap_cache(new_page, entry))
 		goto out_free_page;

-	set_bit(PG_locked, &new_page->flags);
+	LockPage(new_page);
 	rw_swap_page(READ, entry, (char *) new_page_addr, wait);
 #ifdef DEBUG_SWAP
 	printk("DebugVM: read_swap_cache_async created "
-	       "entry %08lx at %p\n",
-	       entry, (char *) page_address(new_page));
+		   "entry %08lx at %p\n",
+			entry, (char *) page_address(new_page));
 #endif
 	return new_page;

@@ -335,3 +363,4 @@ struct page * read_swap_cache_async(unsigned long entry, int wait)
 out:
 	return found_page;
 }
+
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -192,7 +192,7 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
 		return;
 	set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
 	swap_free(entry);
-	atomic_inc(&mem_map[MAP_NR(page)].count);
+	get_page(mem_map + MAP_NR(page));
 	++vma->vm_mm->rss;
 }


--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -157,7 +157,7 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
 	add_to_swap_cache(page_map, entry);
 	/* We checked we were unlocked way up above, and we
 	   have been careful not to stall until here */
-	set_bit(PG_locked, &page_map->flags);
+	LockPage(page_map);

 	/* OK, do a physical asynchronous write to swap.  */
 	rw_swap_page(WRITE, entry, (char *) page, 0);

--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -377,7 +377,6 @@ EXPORT_SYMBOL(rtattr_parse);
 EXPORT_SYMBOL(rtnetlink_links);
 EXPORT_SYMBOL(__rta_fill);
 EXPORT_SYMBOL(rtnetlink_dump_ifinfo);
-EXPORT_SYMBOL(rtnl_wlockct);
 EXPORT_SYMBOL(rtnl);
 EXPORT_SYMBOL(neigh_delete);
 EXPORT_SYMBOL(neigh_add);