Commit 991b3ae8 authored by Linus Torvalds's avatar Linus Torvalds

v2.4.9.3 -> v2.4.9.4

  - Hugh Dickins: swapoff cleanups and speedups
  - Matthew Dharm: USB storage update
  - Keith Owens: Makefile fixes
  - Tom Rini: MPC8xx build fix
  - Nikita Danilov: reiserfs update
  - Jakub Jelinek: ELF loader fix for ET_DYN
  - Andrew Morton: reparent_to_init() for kernel threads
  - Christoph Hellwig: VxFS and SysV updates, vfs_permission fix
parent df038637
...@@ -11601,6 +11601,20 @@ CONFIG_USB_STORAGE_DEBUG ...@@ -11601,6 +11601,20 @@ CONFIG_USB_STORAGE_DEBUG
Say Y here in order to have the USB Mass Storage code generate Say Y here in order to have the USB Mass Storage code generate
verbose debugging messages. verbose debugging messages.
ISD-200 USB/ATA driver
CONFIG_USB_STORAGE_ISD200
Say Y here if you want to use USB Mass Store devices based
on the In-Systems Design ISD-200 USB/ATA bridge.
Some of the products that use this chip are:
- Archos Jukebox 6000
- ISD SmartCable for Storage
- Taiwan Skymaster CD530U/DEL-0241 IDE bridge
- Sony CRX10U CD-R/RW drive
- CyQ've CQ8060A CDRW drive
- Planex eXtreme Drive RX-25HU USB-IDE cable (not model RX-25U)
USS720 parport driver USS720 parport driver
CONFIG_USB_USS720 CONFIG_USB_USS720
This driver is for USB parallel port adapters that use the Lucent This driver is for USB parallel port adapters that use the Lucent
......
VERSION = 2 VERSION = 2
PATCHLEVEL = 4 PATCHLEVEL = 4
SUBLEVEL = 10 SUBLEVEL = 10
EXTRAVERSION =-pre3 EXTRAVERSION =-pre4
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
......
...@@ -705,6 +705,7 @@ CONFIG_USB_UHCI_ALT=y ...@@ -705,6 +705,7 @@ CONFIG_USB_UHCI_ALT=y
CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set # CONFIG_USB_STORAGE_DEBUG is not set
# CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_FREECOM is not set
# CONFIG_USB_STORAGE_ISD200 is not set
# CONFIG_USB_STORAGE_DPCM is not set # CONFIG_USB_STORAGE_DPCM is not set
# CONFIG_USB_ACM is not set # CONFIG_USB_ACM is not set
# CONFIG_USB_PRINTER is not set # CONFIG_USB_PRINTER is not set
...@@ -738,7 +739,6 @@ CONFIG_USB_STORAGE=y ...@@ -738,7 +739,6 @@ CONFIG_USB_STORAGE=y
# #
# USB Network adaptors # USB Network adaptors
# #
# CONFIG_USB_PLUSB is not set
# CONFIG_USB_PEGASUS is not set # CONFIG_USB_PEGASUS is not set
# CONFIG_USB_CATC is not set # CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set # CONFIG_USB_KAWETH is not set
......
...@@ -651,11 +651,10 @@ int __init scc_enet_init(void) ...@@ -651,11 +651,10 @@ int __init scc_enet_init(void)
volatile scc_t *sccp; volatile scc_t *sccp;
volatile scc_enet_t *ep; volatile scc_enet_t *ep;
volatile immap_t *immap; volatile immap_t *immap;
extern unsigned long _get_IMMR(void);
cp = cpmp; /* Get pointer to Communication Processor */ cp = cpmp; /* Get pointer to Communication Processor */
immap = (immap_t *)(_get_IMMR() & 0xFFFF0000); /* and to internal registers */ immap = (immap_t *)(mfspr(IMMR) & 0xFFFF0000); /* and to internal registers */
bd = (bd_t *)__res; bd = (bd_t *)__res;
......
...@@ -1509,7 +1509,6 @@ int __init fec_enet_init(void) ...@@ -1509,7 +1509,6 @@ int __init fec_enet_init(void)
volatile immap_t *immap; volatile immap_t *immap;
volatile fec_t *fecp; volatile fec_t *fecp;
bd_t *bd; bd_t *bd;
extern uint _get_IMMR(void);
#ifdef CONFIG_SCC_ENET #ifdef CONFIG_SCC_ENET
unsigned char tmpaddr[6]; unsigned char tmpaddr[6];
#endif #endif
...@@ -1680,7 +1679,7 @@ int __init fec_enet_init(void) ...@@ -1680,7 +1679,7 @@ int __init fec_enet_init(void)
/* Bits moved from Rev. D onward. /* Bits moved from Rev. D onward.
*/ */
if ((_get_IMMR() & 0xffff) < 0x0501) if ((mfspr(IMMR) & 0xffff) < 0x0501)
immap->im_ioport.iop_pddir = 0x1c58; /* Pre rev. D */ immap->im_ioport.iop_pddir = 0x1c58; /* Pre rev. D */
else else
immap->im_ioport.iop_pddir = 0x1fff; /* Rev. D and later */ immap->im_ioport.iop_pddir = 0x1fff; /* Rev. D and later */
......
export-objs := bm_osl.o
O_TARGET := ospm_$(notdir $(CURDIR)).o O_TARGET := ospm_$(notdir $(CURDIR)).o
obj-m := $(O_TARGET) obj-m := $(O_TARGET)
EXTRA_CFLAGS += $(ACPI_CFLAGS) EXTRA_CFLAGS += $(ACPI_CFLAGS)
......
...@@ -48,20 +48,6 @@ ifeq ($(ARCH),s390x) ...@@ -48,20 +48,6 @@ ifeq ($(ARCH),s390x)
SERIAL = SERIAL =
endif endif
ifeq ($(ARCH),s390)
KEYMAP =
KEYBD =
CONSOLE =
SERIAL =
endif
ifeq ($(ARCH),s390x)
KEYMAP =
KEYBD =
CONSOLE =
SERIAL =
endif
ifeq ($(ARCH),m68k) ifeq ($(ARCH),m68k)
ifdef CONFIG_AMIGA ifdef CONFIG_AMIGA
KEYBD = amikeyb.o KEYBD = amikeyb.o
...@@ -241,5 +227,7 @@ consolemap_deftbl.c: $(FONTMAPFILE) conmakehash ...@@ -241,5 +227,7 @@ consolemap_deftbl.c: $(FONTMAPFILE) conmakehash
consolemap_deftbl.o: consolemap_deftbl.c $(TOPDIR)/include/linux/types.h consolemap_deftbl.o: consolemap_deftbl.c $(TOPDIR)/include/linux/types.h
.DELETE_ON_ERROR:
defkeymap.c: defkeymap.map defkeymap.c: defkeymap.map
loadkeys --mktable defkeymap.map > defkeymap.c set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@
...@@ -1610,6 +1610,7 @@ static int rtl8139_thread (void *data) ...@@ -1610,6 +1610,7 @@ static int rtl8139_thread (void *data)
unsigned long timeout; unsigned long timeout;
daemonize (); daemonize ();
reparent_to_init();
spin_lock_irq(&current->sigmask_lock); spin_lock_irq(&current->sigmask_lock);
sigemptyset(&current->blocked); sigemptyset(&current->blocked);
recalc_sigpending(current); recalc_sigpending(current);
......
...@@ -137,12 +137,8 @@ obj-$(CONFIG_PPP_BSDCOMP) += bsd_comp.o ...@@ -137,12 +137,8 @@ obj-$(CONFIG_PPP_BSDCOMP) += bsd_comp.o
obj-$(CONFIG_PPPOE) += pppox.o pppoe.o obj-$(CONFIG_PPPOE) += pppox.o pppoe.o
obj-$(CONFIG_SLIP) += slip.o obj-$(CONFIG_SLIP) += slip.o
ifeq ($(CONFIG_SLIP),y) ifeq ($(CONFIG_SLIP_COMPRESSED),y)
obj-$(CONFIG_SLIP_COMPRESSED) += slhc.o obj-$(CONFIG_SLIP) += slhc.o
else
ifeq ($(CONFIG_SLIP),m)
obj-$(CONFIG_SLIP_COMPRESSED) += slhc.o
endif
endif endif
obj-$(CONFIG_STRIP) += strip.o obj-$(CONFIG_STRIP) += strip.o
......
...@@ -32,7 +32,12 @@ comment 'USB Controllers' ...@@ -32,7 +32,12 @@ comment 'USB Controllers'
if [ "$CONFIG_USB_STORAGE" != "n" ]; then if [ "$CONFIG_USB_STORAGE" != "n" ]; then
bool ' USB Mass Storage verbose debug' CONFIG_USB_STORAGE_DEBUG bool ' USB Mass Storage verbose debug' CONFIG_USB_STORAGE_DEBUG
bool ' Freecom USB/ATAPI Bridge support' CONFIG_USB_STORAGE_FREECOM bool ' Freecom USB/ATAPI Bridge support' CONFIG_USB_STORAGE_FREECOM
bool ' Microtech CompactFlash/SmartMedia reader' CONFIG_USB_STORAGE_DPCM bool ' ISD-200 USB/ATA Bridge support' CONFIG_USB_STORAGE_ISD200
bool ' Microtech CompactFlash/SmartMedia support' CONFIG_USB_STORAGE_DPCM
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
bool ' HP CD-Writer 82xx support' CONFIG_USB_STORAGE_HP8200e
bool ' SanDisk SDDR-09 (and other SmartMedia) support' CONFIG_USB_STORAGE_SDDR09
fi
fi fi
dep_tristate ' USB Modem (CDC ACM) support' CONFIG_USB_ACM $CONFIG_USB dep_tristate ' USB Modem (CDC ACM) support' CONFIG_USB_ACM $CONFIG_USB
dep_tristate ' USB Printer support' CONFIG_USB_PRINTER $CONFIG_USB dep_tristate ' USB Printer support' CONFIG_USB_PRINTER $CONFIG_USB
......
/* Driver for USB Mass Storage compliant devices /* Driver for USB Mass Storage compliant devices
* SCSI layer glue code * SCSI layer glue code
* *
* $Id: scsiglue.c,v 1.21 2001/07/29 23:41:52 mdharm Exp $ * $Id: scsiglue.c,v 1.22 2001/09/02 04:29:27 mdharm Exp $
* *
* Current development and maintenance by: * Current development and maintenance by:
* (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net) * (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net)
......
/* Driver for SanDisk SDDR-09 SmartMedia reader /* Driver for SanDisk SDDR-09 SmartMedia reader
* *
* $Id: sddr09.c,v 1.18 2001/06/11 02:54:25 mdharm Exp $ * $Id: sddr09.c,v 1.19 2001/09/02 06:07:20 mdharm Exp $
* *
* SDDR09 driver v0.1: * SDDR09 driver v0.1:
* *
...@@ -693,7 +693,7 @@ int sddr09_read_map(struct us_data *us) { ...@@ -693,7 +693,7 @@ int sddr09_read_map(struct us_data *us) {
// scatterlist block i*64/128k = i*(2^6)*(2^-17) = i*(2^-11) // scatterlist block i*64/128k = i*(2^6)*(2^-17) = i*(2^-11)
for (i=0; i<numblocks; i++) { for (i=0; i<numblocks; i++) {
ptr = sg[i>>11].address+(i<<6); ptr = sg[i>>11].address+((i&0x7ff)<<6);
if (ptr[0]!=0xFF || ptr[1]!=0xFF || ptr[2]!=0xFF || if (ptr[0]!=0xFF || ptr[1]!=0xFF || ptr[2]!=0xFF ||
ptr[3]!=0xFF || ptr[4]!=0xFF || ptr[5]!=0xFF) { ptr[3]!=0xFF || ptr[4]!=0xFF || ptr[5]!=0xFF) {
US_DEBUGP("PBA %04X has no logical mapping: reserved area = " US_DEBUGP("PBA %04X has no logical mapping: reserved area = "
......
/* Driver for USB Mass Storage compliant devices /* Driver for USB Mass Storage compliant devices
* *
* $Id: transport.c,v 1.39 2001/03/10 16:46:28 zagor Exp $ * $Id: transport.c,v 1.40 2001/08/18 08:37:46 mdharm Exp $
* *
* Current development and maintenance by: * Current development and maintenance by:
* (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net) * (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net)
...@@ -371,10 +371,9 @@ int usb_stor_clear_halt(struct usb_device *dev, int pipe) ...@@ -371,10 +371,9 @@ int usb_stor_clear_halt(struct usb_device *dev, int pipe)
*/ */
static void usb_stor_blocking_completion(urb_t *urb) static void usb_stor_blocking_completion(urb_t *urb)
{ {
wait_queue_head_t *wqh_ptr = (wait_queue_head_t *)urb->context; struct completion *urb_done_ptr = (struct completion *)urb->context;
if (waitqueue_active(wqh_ptr)) complete(urb_done_ptr);
wake_up(wqh_ptr);
} }
/* This is our function to emulate usb_control_msg() but give us enough /* This is our function to emulate usb_control_msg() but give us enough
...@@ -384,8 +383,7 @@ int usb_stor_control_msg(struct us_data *us, unsigned int pipe, ...@@ -384,8 +383,7 @@ int usb_stor_control_msg(struct us_data *us, unsigned int pipe,
u8 request, u8 requesttype, u16 value, u16 index, u8 request, u8 requesttype, u16 value, u16 index,
void *data, u16 size) void *data, u16 size)
{ {
wait_queue_head_t wqh; struct completion urb_done;
wait_queue_t wait;
int status; int status;
devrequest *dr; devrequest *dr;
...@@ -402,9 +400,7 @@ int usb_stor_control_msg(struct us_data *us, unsigned int pipe, ...@@ -402,9 +400,7 @@ int usb_stor_control_msg(struct us_data *us, unsigned int pipe,
dr->length = cpu_to_le16(size); dr->length = cpu_to_le16(size);
/* set up data structures for the wakeup system */ /* set up data structures for the wakeup system */
init_waitqueue_head(&wqh); init_completion(&urb_done);
init_waitqueue_entry(&wait, current);
add_wait_queue(&wqh, &wait);
/* lock the URB */ /* lock the URB */
down(&(us->current_urb_sem)); down(&(us->current_urb_sem));
...@@ -412,33 +408,25 @@ int usb_stor_control_msg(struct us_data *us, unsigned int pipe, ...@@ -412,33 +408,25 @@ int usb_stor_control_msg(struct us_data *us, unsigned int pipe,
/* fill the URB */ /* fill the URB */
FILL_CONTROL_URB(us->current_urb, us->pusb_dev, pipe, FILL_CONTROL_URB(us->current_urb, us->pusb_dev, pipe,
(unsigned char*) dr, data, size, (unsigned char*) dr, data, size,
usb_stor_blocking_completion, &wqh); usb_stor_blocking_completion, &urb_done);
us->current_urb->actual_length = 0; us->current_urb->actual_length = 0;
us->current_urb->error_count = 0; us->current_urb->error_count = 0;
us->current_urb->transfer_flags = USB_ASYNC_UNLINK; us->current_urb->transfer_flags = USB_ASYNC_UNLINK;
/* submit the URB */ /* submit the URB */
set_current_state(TASK_UNINTERRUPTIBLE);
status = usb_submit_urb(us->current_urb); status = usb_submit_urb(us->current_urb);
if (status) { if (status) {
/* something went wrong */ /* something went wrong */
up(&(us->current_urb_sem)); up(&(us->current_urb_sem));
set_current_state(TASK_RUNNING);
remove_wait_queue(&wqh, &wait);
kfree(dr); kfree(dr);
return status; return status;
} }
/* wait for the completion of the URB */ /* wait for the completion of the URB */
up(&(us->current_urb_sem)); up(&(us->current_urb_sem));
while (us->current_urb->status == -EINPROGRESS) wait_for_completion(&urb_done);
schedule();
down(&(us->current_urb_sem)); down(&(us->current_urb_sem));
/* we either timed out or got woken up -- clean up either way */
set_current_state(TASK_RUNNING);
remove_wait_queue(&wqh, &wait);
/* return the actual length of the data transferred if no error*/ /* return the actual length of the data transferred if no error*/
status = us->current_urb->status; status = us->current_urb->status;
if (status >= 0) if (status >= 0)
...@@ -456,46 +444,35 @@ int usb_stor_control_msg(struct us_data *us, unsigned int pipe, ...@@ -456,46 +444,35 @@ int usb_stor_control_msg(struct us_data *us, unsigned int pipe,
int usb_stor_bulk_msg(struct us_data *us, void *data, int pipe, int usb_stor_bulk_msg(struct us_data *us, void *data, int pipe,
unsigned int len, unsigned int *act_len) unsigned int len, unsigned int *act_len)
{ {
wait_queue_head_t wqh; struct completion urb_done;
wait_queue_t wait;
int status; int status;
/* set up data structures for the wakeup system */ /* set up data structures for the wakeup system */
init_waitqueue_head(&wqh); init_completion(&urb_done);
init_waitqueue_entry(&wait, current);
add_wait_queue(&wqh, &wait);
/* lock the URB */ /* lock the URB */
down(&(us->current_urb_sem)); down(&(us->current_urb_sem));
/* fill the URB */ /* fill the URB */
FILL_BULK_URB(us->current_urb, us->pusb_dev, pipe, data, len, FILL_BULK_URB(us->current_urb, us->pusb_dev, pipe, data, len,
usb_stor_blocking_completion, &wqh); usb_stor_blocking_completion, &urb_done);
us->current_urb->actual_length = 0; us->current_urb->actual_length = 0;
us->current_urb->error_count = 0; us->current_urb->error_count = 0;
us->current_urb->transfer_flags = USB_ASYNC_UNLINK; us->current_urb->transfer_flags = USB_ASYNC_UNLINK;
/* submit the URB */ /* submit the URB */
set_current_state(TASK_UNINTERRUPTIBLE);
status = usb_submit_urb(us->current_urb); status = usb_submit_urb(us->current_urb);
if (status) { if (status) {
/* something went wrong */ /* something went wrong */
up(&(us->current_urb_sem)); up(&(us->current_urb_sem));
set_current_state(TASK_RUNNING);
remove_wait_queue(&wqh, &wait);
return status; return status;
} }
/* wait for the completion of the URB */ /* wait for the completion of the URB */
up(&(us->current_urb_sem)); up(&(us->current_urb_sem));
while (us->current_urb->status == -EINPROGRESS) wait_for_completion(&urb_done);
schedule();
down(&(us->current_urb_sem)); down(&(us->current_urb_sem));
/* we either timed out or got woken up -- clean up either way */
set_current_state(TASK_RUNNING);
remove_wait_queue(&wqh, &wait);
/* return the actual length of the data transferred */ /* return the actual length of the data transferred */
*act_len = us->current_urb->actual_length; *act_len = us->current_urb->actual_length;
......
/* Driver for USB Mass Storage compliant devices /* Driver for USB Mass Storage compliant devices
* Ununsual Devices File * Ununsual Devices File
* *
* $Id: unusual_devs.h,v 1.16 2001/07/30 00:27:59 mdharm Exp $ * $Id: unusual_devs.h,v 1.20 2001/09/02 05:12:57 mdharm Exp $
* *
* Current development and maintenance by: * Current development and maintenance by:
* (c) 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net) * (c) 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net)
...@@ -68,6 +68,19 @@ UNUSUAL_DEV( 0x0436, 0x0005, 0x0100, 0x0100, ...@@ -68,6 +68,19 @@ UNUSUAL_DEV( 0x0436, 0x0005, 0x0100, 0x0100,
US_FL_START_STOP ), US_FL_START_STOP ),
#endif #endif
/* Made with the help of Edd Dumbill <edd@usefulinc.com> */
UNUSUAL_DEV( 0x0451, 0x5409, 0x0001, 0x0001,
"Frontier Labs",
"Nex II Digital",
US_SC_SCSI, US_PR_BULK, NULL, US_FL_START_STOP),
/* Reported by Paul Stewart <stewart@wetlogic.net>
* This entry is needed because the device reports Sub=ff */
UNUSUAL_DEV( 0x04a4, 0x0004, 0x0001, 0x0001,
"Hitachi",
"DVD-CAM DZ-MV100A Camcorder",
US_SC_SCSI, US_PR_CB, NULL, US_FL_SINGLE_LUN),
UNUSUAL_DEV( 0x04cb, 0x0100, 0x0000, 0x2210, UNUSUAL_DEV( 0x04cb, 0x0100, 0x0000, 0x2210,
"Fujifilm", "Fujifilm",
"FinePix 1400Zoom", "FinePix 1400Zoom",
...@@ -155,13 +168,20 @@ UNUSUAL_DEV( 0x054c, 0x0010, 0x0106, 0x0322, ...@@ -155,13 +168,20 @@ UNUSUAL_DEV( 0x054c, 0x0010, 0x0106, 0x0322,
US_SC_SCSI, US_PR_CB, NULL, US_SC_SCSI, US_PR_CB, NULL,
US_FL_SINGLE_LUN | US_FL_START_STOP | US_FL_MODE_XLATE ), US_FL_SINGLE_LUN | US_FL_START_STOP | US_FL_MODE_XLATE ),
/* Reported by win@geeks.nl */
UNUSUAL_DEV( 0x054c, 0x0025, 0x0100, 0x0100,
"Sony",
"Memorystick NW-MS7",
US_SC_UFI, US_PR_CB, NULL,
US_FL_SINGLE_LUN | US_FL_START_STOP ),
UNUSUAL_DEV( 0x054c, 0x002d, 0x0100, 0x0100, UNUSUAL_DEV( 0x054c, 0x002d, 0x0100, 0x0100,
"Sony", "Sony",
"Memorystick MSAC-US1", "Memorystick MSAC-US1",
US_SC_UFI, US_PR_CB, NULL, US_SC_UFI, US_PR_CB, NULL,
US_FL_SINGLE_LUN | US_FL_START_STOP ), US_FL_SINGLE_LUN | US_FL_START_STOP ),
/* Submitted by Klaus Mueller <k.mueller@intership.de> */ /* Submitted by Klaus Mueller <k.mueller@intershop.de> */
UNUSUAL_DEV( 0x054c, 0x002e, 0x0106, 0x0310, UNUSUAL_DEV( 0x054c, 0x002e, 0x0106, 0x0310,
"Sony", "Sony",
"Handycam", "Handycam",
...@@ -198,12 +218,6 @@ UNUSUAL_DEV( 0x05ab, 0x0031, 0x0100, 0x0110, ...@@ -198,12 +218,6 @@ UNUSUAL_DEV( 0x05ab, 0x0031, 0x0100, 0x0110,
US_SC_ISD200, US_PR_BULK, isd200_Initialization, US_SC_ISD200, US_PR_BULK, isd200_Initialization,
0 ), 0 ),
UNUSUAL_DEV( 0x05ab, 0x0060, 0x0100, 0x0110,
"In-System",
"USB 2.0/IDE Bridge (ATA/ATAPI)",
US_SC_ISD200, US_PR_BULK, isd200_Initialization,
0 ),
UNUSUAL_DEV( 0x05ab, 0x0301, 0x0100, 0x0110, UNUSUAL_DEV( 0x05ab, 0x0301, 0x0100, 0x0110,
"In-System", "In-System",
"Portable USB Harddrive V2", "Portable USB Harddrive V2",
......
...@@ -633,7 +633,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) ...@@ -633,7 +633,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
if (elf_ex.e_type == ET_DYN) { if (elf_ex.e_type == ET_DYN) {
load_bias += error - load_bias += error -
ELF_PAGESTART(load_bias + vaddr); ELF_PAGESTART(load_bias + vaddr);
load_addr += error; load_addr += load_bias;
} }
} }
k = elf_ppnt->p_vaddr; k = elf_ppnt->p_vaddr;
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* SUCH DAMAGE. * SUCH DAMAGE.
*/ */
#ident "$Id: vxfs_bmap.c,v 1.22 2001/05/26 22:41:23 hch Exp hch $" #ident "$Id: vxfs_bmap.c,v 1.23 2001/07/05 19:48:03 hch Exp hch $"
/* /*
* Veritas filesystem driver - filesystem to disk block mapping. * Veritas filesystem driver - filesystem to disk block mapping.
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#ifndef _VXFS_EXTERN_H_ #ifndef _VXFS_EXTERN_H_
#define _VXFS_EXTERN_H_ #define _VXFS_EXTERN_H_
#ident "$Id: vxfs_extern.h,v 1.20 2001/04/26 22:48:44 hch Exp hch $" #ident "$Id: vxfs_extern.h,v 1.21 2001/08/07 16:13:30 hch Exp hch $"
/* /*
* Veritas filesystem driver - external prototypes. * Veritas filesystem driver - external prototypes.
...@@ -55,8 +55,9 @@ extern int vxfs_read_fshead(struct super_block *); ...@@ -55,8 +55,9 @@ extern int vxfs_read_fshead(struct super_block *);
/* vxfs_inode.c */ /* vxfs_inode.c */
extern struct kmem_cache_s *vxfs_inode_cachep; extern struct kmem_cache_s *vxfs_inode_cachep;
extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t); extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t);
extern struct inode * vxfs_fake_inode(struct super_block *, extern struct inode * vxfs_get_fake_inode(struct super_block *,
struct vxfs_inode_info *); struct vxfs_inode_info *);
extern void vxfs_put_fake_inode(struct inode *);
extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t); extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t);
extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t); extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t);
extern void vxfs_read_inode(struct inode *); extern void vxfs_read_inode(struct inode *);
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* SUCH DAMAGE. * SUCH DAMAGE.
*/ */
#ident "$Id: vxfs_fshead.c,v 1.18 2001/04/25 18:11:23 hch Exp $" #ident "$Id: vxfs_fshead.c,v 1.19 2001/08/07 16:14:10 hch Exp hch $"
/* /*
* Veritas filesystem driver - fileset header routines. * Veritas filesystem driver - fileset header routines.
...@@ -124,7 +124,7 @@ vxfs_read_fshead(struct super_block *sbp) ...@@ -124,7 +124,7 @@ vxfs_read_fshead(struct super_block *sbp)
vxfs_dumpi(vip, infp->vsi_fshino); vxfs_dumpi(vip, infp->vsi_fshino);
#endif #endif
if (!(infp->vsi_fship = vxfs_fake_inode(sbp, vip))) { if (!(infp->vsi_fship = vxfs_get_fake_inode(sbp, vip))) {
printk(KERN_ERR "vxfs: unabled to get fsh inode\n"); printk(KERN_ERR "vxfs: unabled to get fsh inode\n");
return -EINVAL; return -EINVAL;
} }
...@@ -148,7 +148,7 @@ vxfs_read_fshead(struct super_block *sbp) ...@@ -148,7 +148,7 @@ vxfs_read_fshead(struct super_block *sbp)
#endif #endif
tip = vxfs_blkiget(sbp, infp->vsi_iext, sfp->fsh_ilistino[0]); tip = vxfs_blkiget(sbp, infp->vsi_iext, sfp->fsh_ilistino[0]);
if (!tip || ((infp->vsi_stilist = vxfs_fake_inode(sbp, tip)) == NULL)) { if (!tip || ((infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip)) == NULL)) {
printk(KERN_ERR "vxfs: unabled to get structual list inode\n"); printk(KERN_ERR "vxfs: unabled to get structual list inode\n");
return -EINVAL; return -EINVAL;
} else if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) { } else if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) {
...@@ -158,7 +158,7 @@ vxfs_read_fshead(struct super_block *sbp) ...@@ -158,7 +158,7 @@ vxfs_read_fshead(struct super_block *sbp)
} }
tip = vxfs_stiget(sbp, pfp->fsh_ilistino[0]); tip = vxfs_stiget(sbp, pfp->fsh_ilistino[0]);
if (!tip || ((infp->vsi_ilist = vxfs_fake_inode(sbp, tip)) == NULL)) { if (!tip || ((infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip)) == NULL)) {
printk(KERN_ERR "vxfs: unabled to get inode list inode\n"); printk(KERN_ERR "vxfs: unabled to get inode list inode\n");
return -EINVAL; return -EINVAL;
} else if (!VXFS_ISILT(VXFS_INO(infp->vsi_ilist))) { } else if (!VXFS_ISILT(VXFS_INO(infp->vsi_ilist))) {
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* SUCH DAMAGE. * SUCH DAMAGE.
*/ */
#ident "$Id: vxfs_inode.c,v 1.36 2001/05/26 22:28:02 hch Exp hch $" #ident "$Id: vxfs_inode.c,v 1.37 2001/08/07 16:13:30 hch Exp hch $"
/* /*
* Veritas filesystem driver - inode routines. * Veritas filesystem driver - inode routines.
...@@ -47,6 +47,7 @@ extern struct address_space_operations vxfs_immed_aops; ...@@ -47,6 +47,7 @@ extern struct address_space_operations vxfs_immed_aops;
extern struct inode_operations vxfs_immed_symlink_iops; extern struct inode_operations vxfs_immed_symlink_iops;
static struct file_operations vxfs_file_operations = { static struct file_operations vxfs_file_operations = {
.llseek = generic_file_llseek,
.read = generic_file_read, .read = generic_file_read,
.mmap = generic_file_mmap, .mmap = generic_file_mmap,
}; };
...@@ -93,7 +94,7 @@ vxfs_dumpi(struct vxfs_inode_info *vip, ino_t ino) ...@@ -93,7 +94,7 @@ vxfs_dumpi(struct vxfs_inode_info *vip, ino_t ino)
* NOTE: * NOTE:
* While __vxfs_iget uses the pagecache vxfs_blkiget uses the * While __vxfs_iget uses the pagecache vxfs_blkiget uses the
* buffercache. This function should not be used outside the * buffercache. This function should not be used outside the
* read_super() method, othwerwise the data may be incoherent. * read_super() method, otherwise the data may be incoherent.
*/ */
struct vxfs_inode_info * struct vxfs_inode_info *
vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino) vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino)
...@@ -251,7 +252,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip) ...@@ -251,7 +252,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
} }
/** /**
* vxfs_fake_inode - get fake inode structure * vxfs_get_fake_inode - get fake inode structure
* @sbp: filesystem superblock * @sbp: filesystem superblock
* @vip: fspriv inode * @vip: fspriv inode
* *
...@@ -261,7 +262,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip) ...@@ -261,7 +262,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
* Returns the filled VFS inode. * Returns the filled VFS inode.
*/ */
struct inode * struct inode *
vxfs_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip) vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
{ {
struct inode *ip = NULL; struct inode *ip = NULL;
...@@ -272,6 +273,19 @@ vxfs_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip) ...@@ -272,6 +273,19 @@ vxfs_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
return (ip); return (ip);
} }
/**
* vxfs_put_fake_inode - free faked inode
* *ip: VFS inode
*
* Description:
* vxfs_put_fake_inode frees all data asssociated with @ip.
*/
void
vxfs_put_fake_inode(struct inode *ip)
{
iput(ip);
}
/** /**
* vxfs_read_inode - fill in inode information * vxfs_read_inode - fill in inode information
* @ip: inode pointer to fill * @ip: inode pointer to fill
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* SUCH DAMAGE. * SUCH DAMAGE.
*/ */
#ident "$Id: vxfs_olt.c,v 1.8 2001/04/25 18:11:23 hch Exp hch $" #ident "$Id: vxfs_olt.c,v 1.9 2001/08/07 16:14:45 hch Exp hch $"
/* /*
* Veritas filesystem driver - object location table support. * Veritas filesystem driver - object location table support.
...@@ -56,11 +56,11 @@ vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp) ...@@ -56,11 +56,11 @@ vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp)
} }
static __inline__ u_long static __inline__ u_long
vxfs_oblock(daddr_t oblock, u_long bsize) vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize)
{ {
if ((oblock * BLOCK_SIZE) % bsize) if (sbp->s_blocksize % bsize)
BUG(); BUG();
return ((oblock * BLOCK_SIZE) / bsize); return (block * (sbp->s_blocksize / bsize));
} }
...@@ -85,7 +85,8 @@ vxfs_read_olt(struct super_block *sbp, u_long bsize) ...@@ -85,7 +85,8 @@ vxfs_read_olt(struct super_block *sbp, u_long bsize)
char *oaddr, *eaddr; char *oaddr, *eaddr;
bp = bread(sbp->s_dev, vxfs_oblock(infp->vsi_oltext, bsize), bsize); bp = bread(sbp->s_dev,
vxfs_oblock(sbp, infp->vsi_oltext, bsize), bsize);
if (!bp || !bp->b_data) if (!bp || !bp->b_data)
goto fail; goto fail;
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* SUCH DAMAGE. * SUCH DAMAGE.
*/ */
#ident "$Id: vxfs_super.c,v 1.25 2001/05/25 18:25:55 hch Exp hch $" #ident "$Id: vxfs_super.c,v 1.26 2001/08/07 16:13:30 hch Exp hch $"
/* /*
* Veritas filesystem driver - superblock related routines. * Veritas filesystem driver - superblock related routines.
...@@ -54,7 +54,6 @@ MODULE_DESCRIPTION("Veritas Filesystem (VxFS) driver"); ...@@ -54,7 +54,6 @@ MODULE_DESCRIPTION("Veritas Filesystem (VxFS) driver");
static void vxfs_put_super(struct super_block *); static void vxfs_put_super(struct super_block *);
static int vxfs_statfs(struct super_block *, struct statfs *); static int vxfs_statfs(struct super_block *, struct statfs *);
static struct super_operations vxfs_super_ops = { static struct super_operations vxfs_super_ops = {
.read_inode = vxfs_read_inode, .read_inode = vxfs_read_inode,
.put_inode = vxfs_put_inode, .put_inode = vxfs_put_inode,
...@@ -83,14 +82,15 @@ vxfs_validate_bsize(kdev_t dev) ...@@ -83,14 +82,15 @@ vxfs_validate_bsize(kdev_t dev)
* vxfs_put_super frees all resources allocated for @sbp * vxfs_put_super frees all resources allocated for @sbp
* after the last instance of the filesystem is unmounted. * after the last instance of the filesystem is unmounted.
*/ */
static void static void
vxfs_put_super(struct super_block *sbp) vxfs_put_super(struct super_block *sbp)
{ {
struct vxfs_sb_info *infp = VXFS_SBI(sbp); struct vxfs_sb_info *infp = VXFS_SBI(sbp);
vxfs_put_inode(infp->vsi_fship); vxfs_put_fake_inode(infp->vsi_fship);
vxfs_put_inode(infp->vsi_ilist); vxfs_put_fake_inode(infp->vsi_ilist);
vxfs_put_inode(infp->vsi_stilist); vxfs_put_fake_inode(infp->vsi_stilist);
brelse(infp->vsi_bp); brelse(infp->vsi_bp);
kfree(infp); kfree(infp);
...@@ -135,7 +135,7 @@ vxfs_statfs(struct super_block *sbp, struct statfs *bufp) ...@@ -135,7 +135,7 @@ vxfs_statfs(struct super_block *sbp, struct statfs *bufp)
* vxfs_read_super - read superblock into memory and initalize filesystem * vxfs_read_super - read superblock into memory and initalize filesystem
* @sbp: VFS superblock (to fill) * @sbp: VFS superblock (to fill)
* @dp: fs private mount data * @dp: fs private mount data
* @silent: ??? * @silent: do not complain loudly when sth is wrong
* *
* Description: * Description:
* We are called on the first mount of a filesystem to read the * We are called on the first mount of a filesystem to read the
...@@ -167,18 +167,23 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent) ...@@ -167,18 +167,23 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent)
bp = bread(dev, 1, bsize); bp = bread(dev, 1, bsize);
if (!bp) { if (!bp) {
printk(KERN_WARNING "vxfs: unable to read disk superblock\n"); if (!silent) {
printk(KERN_WARNING
"vxfs: unable to read disk superblock\n");
}
goto out; goto out;
} }
rsbp = (struct vxfs_sb *)bp->b_data; rsbp = (struct vxfs_sb *)bp->b_data;
if (rsbp->vs_magic != VXFS_SUPER_MAGIC) { if (rsbp->vs_magic != VXFS_SUPER_MAGIC) {
printk(KERN_NOTICE "vxfs: WRONG superblock magic\n"); if (!silent)
printk(KERN_NOTICE "vxfs: WRONG superblock magic\n");
goto out; goto out;
} }
if (rsbp->vs_version < 2 || rsbp->vs_version > 4) { if ((rsbp->vs_version < 2 || rsbp->vs_version > 4) && !silent) {
printk(KERN_NOTICE "vxfs: unsupported VxFS version (%d)\n", rsbp->vs_version); printk(KERN_NOTICE "vxfs: unsupported VxFS version (%d)\n",
rsbp->vs_version);
goto out; goto out;
} }
...@@ -188,6 +193,7 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent) ...@@ -188,6 +193,7 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent)
#endif #endif
sbp->s_magic = rsbp->vs_magic; sbp->s_magic = rsbp->vs_magic;
sbp->s_blocksize = rsbp->vs_bsize;
sbp->u.generic_sbp = (void *)infp; sbp->u.generic_sbp = (void *)infp;
infp->vsi_raw = rsbp; infp->vsi_raw = rsbp;
...@@ -195,7 +201,6 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent) ...@@ -195,7 +201,6 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent)
infp->vsi_oltext = rsbp->vs_oltext[0]; infp->vsi_oltext = rsbp->vs_oltext[0];
infp->vsi_oltsize = rsbp->vs_oltsize; infp->vsi_oltsize = rsbp->vs_oltsize;
sbp->s_blocksize = rsbp->vs_bsize;
switch (rsbp->vs_bsize) { switch (rsbp->vs_bsize) {
case 1024: case 1024:
...@@ -208,8 +213,11 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent) ...@@ -208,8 +213,11 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent)
sbp->s_blocksize_bits = 12; sbp->s_blocksize_bits = 12;
break; break;
default: default:
printk(KERN_WARNING "vxfs: unsupported blocksise: %d\n", if (!silent) {
printk(KERN_WARNING
"vxfs: unsupported blocksise: %d\n",
rsbp->vs_bsize); rsbp->vs_bsize);
}
goto out; goto out;
} }
...@@ -220,20 +228,28 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent) ...@@ -220,20 +228,28 @@ vxfs_read_super(struct super_block *sbp, void *dp, int silent)
if (vxfs_read_fshead(sbp)) { if (vxfs_read_fshead(sbp)) {
printk(KERN_WARNING "vxfs: unable to read fshead\n"); printk(KERN_WARNING "vxfs: unable to read fshead\n");
return NULL; goto out;
} }
sbp->s_op = &vxfs_super_ops; sbp->s_op = &vxfs_super_ops;
if ((sbp->s_root = d_alloc_root(iget(sbp, VXFS_ROOT_INO)))) sbp->s_root = d_alloc_root(iget(sbp, VXFS_ROOT_INO));
return (sbp); if (!sbp->s_root) {
printk(KERN_WARNING "vxfs: unable to get root dentry.\n");
goto out_free_ilist;
}
return (sbp);
printk(KERN_WARNING "vxfs: unable to get root dentry.\n"); out_free_ilist:
vxfs_put_fake_inode(infp->vsi_fship);
vxfs_put_fake_inode(infp->vsi_ilist);
vxfs_put_fake_inode(infp->vsi_stilist);
out: out:
brelse(bp);
kfree(infp); kfree(infp);
return NULL; return NULL;
} }
/* /*
* The usual module blurb. * The usual module blurb.
*/ */
...@@ -246,7 +262,7 @@ vxfs_init(void) ...@@ -246,7 +262,7 @@ vxfs_init(void)
sizeof(struct vxfs_inode_info), 0, 0, NULL, NULL); sizeof(struct vxfs_inode_info), 0, 0, NULL, NULL);
if (vxfs_inode_cachep) if (vxfs_inode_cachep)
return (register_filesystem(&vxfs_fs_type)); return (register_filesystem(&vxfs_fs_type));
return 0; return -ENOMEM;
} }
static void __exit static void __exit
......
...@@ -139,35 +139,55 @@ char * getname(const char * filename) ...@@ -139,35 +139,55 @@ char * getname(const char * filename)
} }
/* /*
* permission() * vfs_permission()
* *
* is used to check for read/write/execute permissions on a file. * is used to check for read/write/execute permissions on a file.
* We use "fsuid" for this, letting us set arbitrary permissions * We use "fsuid" for this, letting us set arbitrary permissions
* for filesystem access without changing the "normal" uids which * for filesystem access without changing the "normal" uids which
* are used for other things.. * are used for other things..
*/ */
int vfs_permission(struct inode * inode,int mask) int vfs_permission(struct inode * inode, int mask)
{ {
int mode = inode->i_mode; umode_t mode = inode->i_mode;
if ((mask & S_IWOTH) && IS_RDONLY(inode) && if (mask & MAY_WRITE) {
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) /*
return -EROFS; /* Nobody gets write access to a read-only fs */ * Nobody gets write access to a read-only fs.
*/
if (IS_RDONLY(inode) &&
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
return -EROFS;
if ((mask & S_IWOTH) && IS_IMMUTABLE(inode)) /*
return -EACCES; /* Nobody gets write access to an immutable file */ * Nobody gets write access to an immutable file.
*/
if (IS_IMMUTABLE(inode))
return -EACCES;
}
if (current->fsuid == inode->i_uid) if (current->fsuid == inode->i_uid)
mode >>= 6; mode >>= 6;
else if (in_group_p(inode->i_gid)) else if (in_group_p(inode->i_gid))
mode >>= 3; mode >>= 3;
if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE)) /*
* If the DACs are ok we don't need any capability check.
*/
if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
return 0; return 0;
/* read and search access */ /*
if ((mask == S_IROTH) || * Read/write DACs are always overridable.
(S_ISDIR(inode->i_mode) && !(mask & ~(S_IROTH | S_IXOTH)))) * Executable DACs are overridable if at least one exec bit is set.
*/
if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
if (capable(CAP_DAC_OVERRIDE))
return 0;
/*
* Searching includes executable on directories, else just read.
*/
if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
if (capable(CAP_DAC_READ_SEARCH)) if (capable(CAP_DAC_READ_SEARCH))
return 0; return 0;
......
...@@ -499,6 +499,7 @@ int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, ...@@ -499,6 +499,7 @@ int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th,
unsigned long border = 0; unsigned long border = 0;
unsigned long bstart = 0; unsigned long bstart = 0;
unsigned long hash_in, hash_out; unsigned long hash_in, hash_out;
unsigned long saved_search_start=search_start;
int allocated[PREALLOCATION_SIZE]; int allocated[PREALLOCATION_SIZE];
int blks; int blks;
...@@ -604,7 +605,15 @@ int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, ...@@ -604,7 +605,15 @@ int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th,
** and should probably be removed ** and should probably be removed
*/ */
if ( search_start < border ) search_start=border; if ( search_start < border ) search_start=border;
/* If the disk free space is already below 10% we should
** start looking for the free blocks from the beginning
** of the partition, before the border line.
*/
if ( SB_FREE_BLOCKS(th->t_super) <= (SB_BLOCK_COUNT(th->t_super) / 10) ) {
search_start=saved_search_start;
}
*free_blocknrs = 0; *free_blocknrs = 0;
blks = PREALLOCATION_SIZE-1; blks = PREALLOCATION_SIZE-1;
for (blks_gotten=0; blks_gotten<PREALLOCATION_SIZE; blks_gotten++) { for (blks_gotten=0; blks_gotten<PREALLOCATION_SIZE; blks_gotten++) {
......
...@@ -55,6 +55,7 @@ void reiserfs_delete_inode (struct inode * inode) ...@@ -55,6 +55,7 @@ void reiserfs_delete_inode (struct inode * inode)
; ;
} }
clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */ clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */
inode->i_blocks = 0;
unlock_kernel() ; unlock_kernel() ;
} }
...@@ -525,16 +526,26 @@ int reiserfs_get_block (struct inode * inode, long block, ...@@ -525,16 +526,26 @@ int reiserfs_get_block (struct inode * inode, long block,
int fs_gen; int fs_gen;
int windex ; int windex ;
struct reiserfs_transaction_handle th ; struct reiserfs_transaction_handle th ;
int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 ; /* space reserved in transaction batch:
. 3 balancings in direct->indirect conversion
. 1 block involved into reiserfs_update_sd()
XXX in practically impossible worst case direct2indirect()
can incur (much) more that 3 balancings. */
int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1;
int version; int version;
int transaction_started = 0 ; int transaction_started = 0 ;
loff_t new_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ; loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
/* bad.... */ /* bad.... */
lock_kernel() ; lock_kernel() ;
th.t_trans_id = 0 ; th.t_trans_id = 0 ;
version = inode_items_version (inode); version = inode_items_version (inode);
if (block < 0) {
unlock_kernel();
return -EIO;
}
if (!file_capable (inode, block)) { if (!file_capable (inode, block)) {
unlock_kernel() ; unlock_kernel() ;
return -EFBIG; return -EFBIG;
...@@ -552,20 +563,14 @@ int reiserfs_get_block (struct inode * inode, long block, ...@@ -552,20 +563,14 @@ int reiserfs_get_block (struct inode * inode, long block,
return ret; return ret;
} }
if (block < 0) {
unlock_kernel();
return -EIO;
}
inode->u.reiserfs_i.i_pack_on_close = 1 ; inode->u.reiserfs_i.i_pack_on_close = 1 ;
windex = push_journal_writer("reiserfs_get_block") ; windex = push_journal_writer("reiserfs_get_block") ;
/* set the key of the first byte in the 'block'-th block of file */ /* set the key of the first byte in the 'block'-th block of file */
make_cpu_key (&key, inode, make_cpu_key (&key, inode, new_offset,
(loff_t)block * inode->i_sb->s_blocksize + 1, // k_offset
TYPE_ANY, 3/*key length*/); TYPE_ANY, 3/*key length*/);
if ((new_offset + inode->i_sb->s_blocksize) >= inode->i_size) { if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
journal_begin(&th, inode->i_sb, jbegin_count) ; journal_begin(&th, inode->i_sb, jbegin_count) ;
transaction_started = 1 ; transaction_started = 1 ;
} }
...@@ -618,10 +623,13 @@ int reiserfs_get_block (struct inode * inode, long block, ...@@ -618,10 +623,13 @@ int reiserfs_get_block (struct inode * inode, long block,
} }
if (indirect_item_found (retval, ih)) { if (indirect_item_found (retval, ih)) {
b_blocknr_t unfm_ptr;
/* 'block'-th block is in the file already (there is /* 'block'-th block is in the file already (there is
corresponding cell in some indirect item). But it may be corresponding cell in some indirect item). But it may be
zero unformatted node pointer (hole) */ zero unformatted node pointer (hole) */
if (!item[pos_in_item]) { unfm_ptr = le32_to_cpu (item[pos_in_item]);
if (unfm_ptr == 0) {
/* use allocated block to plug the hole */ /* use allocated block to plug the hole */
reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
...@@ -630,15 +638,14 @@ int reiserfs_get_block (struct inode * inode, long block, ...@@ -630,15 +638,14 @@ int reiserfs_get_block (struct inode * inode, long block,
} }
bh_result->b_state |= (1UL << BH_New); bh_result->b_state |= (1UL << BH_New);
item[pos_in_item] = cpu_to_le32 (allocated_block_nr); item[pos_in_item] = cpu_to_le32 (allocated_block_nr);
unfm_ptr = allocated_block_nr;
journal_mark_dirty (&th, inode->i_sb, bh); journal_mark_dirty (&th, inode->i_sb, bh);
inode->i_blocks += (inode->i_sb->s_blocksize / 512) ; inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
reiserfs_update_sd(&th, inode) ; reiserfs_update_sd(&th, inode) ;
} }
set_block_dev_mapped(bh_result, le32_to_cpu (item[pos_in_item]), inode); set_block_dev_mapped(bh_result, unfm_ptr, inode);
pathrelse (&path); pathrelse (&path);
#ifdef REISERFS_CHECK
pop_journal_writer(windex) ; pop_journal_writer(windex) ;
#endif /* REISERFS_CHECK */
if (transaction_started) if (transaction_started)
journal_end(&th, inode->i_sb, jbegin_count) ; journal_end(&th, inode->i_sb, jbegin_count) ;
...@@ -815,8 +822,8 @@ int reiserfs_get_block (struct inode * inode, long block, ...@@ -815,8 +822,8 @@ int reiserfs_get_block (struct inode * inode, long block,
goto failure; goto failure;
} }
if (retval == POSITION_FOUND) { if (retval == POSITION_FOUND) {
reiserfs_warning ("vs-: reiserfs_get_block: " reiserfs_warning ("vs-825: reiserfs_get_block: "
"%k should not be found", &key); "%k should not be found\n", &key);
retval = -EEXIST; retval = -EEXIST;
if (allocated_block_nr) if (allocated_block_nr)
reiserfs_free_block (&th, allocated_block_nr); reiserfs_free_block (&th, allocated_block_nr);
......
...@@ -815,7 +815,7 @@ static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reis ...@@ -815,7 +815,7 @@ static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reis
** called by flush_journal_list, before it calls remove_all_from_journal_list ** called by flush_journal_list, before it calls remove_all_from_journal_list
** **
*/ */
static int update_journal_header_block(struct super_block *p_s_sb, unsigned long offset, unsigned long trans_id) { static int _update_journal_header_block(struct super_block *p_s_sb, unsigned long offset, unsigned long trans_id) {
struct reiserfs_journal_header *jh ; struct reiserfs_journal_header *jh ;
if (trans_id >= SB_JOURNAL(p_s_sb)->j_last_flush_trans_id) { if (trans_id >= SB_JOURNAL(p_s_sb)->j_last_flush_trans_id) {
if (buffer_locked((SB_JOURNAL(p_s_sb)->j_header_bh))) { if (buffer_locked((SB_JOURNAL(p_s_sb)->j_header_bh))) {
...@@ -834,12 +834,21 @@ static int update_journal_header_block(struct super_block *p_s_sb, unsigned long ...@@ -834,12 +834,21 @@ static int update_journal_header_block(struct super_block *p_s_sb, unsigned long
ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ; ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ;
wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ;
if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) { if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) {
reiserfs_panic(p_s_sb, "journal-712: buffer write failed\n") ; printk( "reiserfs: journal-837: IO error during journal replay\n" );
return -EIO ;
} }
} }
return 0 ; return 0 ;
} }
static int update_journal_header_block(struct super_block *p_s_sb,
unsigned long offset,
unsigned long trans_id) {
if (_update_journal_header_block(p_s_sb, offset, trans_id)) {
reiserfs_panic(p_s_sb, "journal-712: buffer write failed\n") ;
}
return 0 ;
}
/* /*
** flush any and all journal lists older than you are ** flush any and all journal lists older than you are
** can only be called from flush_journal_list ** can only be called from flush_journal_list
...@@ -1374,6 +1383,9 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb, struct buffe ...@@ -1374,6 +1383,9 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb, struct buffe
struct buffer_head *c_bh ; struct buffer_head *c_bh ;
unsigned long offset ; unsigned long offset ;
if (!d_bh)
return 0 ;
desc = (struct reiserfs_journal_desc *)d_bh->b_data ; desc = (struct reiserfs_journal_desc *)d_bh->b_data ;
if (le32_to_cpu(desc->j_len) > 0 && !memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) { if (le32_to_cpu(desc->j_len) > 0 && !memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) {
if (oldest_invalid_trans_id && *oldest_invalid_trans_id && le32_to_cpu(desc->j_trans_id) > *oldest_invalid_trans_id) { if (oldest_invalid_trans_id && *oldest_invalid_trans_id && le32_to_cpu(desc->j_trans_id) > *oldest_invalid_trans_id) {
...@@ -1641,8 +1653,6 @@ static int journal_read(struct super_block *p_s_sb) { ...@@ -1641,8 +1653,6 @@ static int journal_read(struct super_block *p_s_sb) {
if (continue_replay && is_read_only(p_s_sb->s_dev)) { if (continue_replay && is_read_only(p_s_sb->s_dev)) {
printk("clm-2076: device is readonly, unable to replay log\n") ; printk("clm-2076: device is readonly, unable to replay log\n") ;
brelse(SB_JOURNAL(p_s_sb)->j_header_bh) ;
SB_JOURNAL(p_s_sb)->j_header_bh = NULL ;
return -1 ; return -1 ;
} }
if (continue_replay && (p_s_sb->s_flags & MS_RDONLY)) { if (continue_replay && (p_s_sb->s_flags & MS_RDONLY)) {
...@@ -1734,9 +1744,14 @@ static int journal_read(struct super_block *p_s_sb) { ...@@ -1734,9 +1744,14 @@ static int journal_read(struct super_block *p_s_sb) {
printk("reiserfs: replayed %d transactions in %lu seconds\n", replay_count, printk("reiserfs: replayed %d transactions in %lu seconds\n", replay_count,
CURRENT_TIME - start) ; CURRENT_TIME - start) ;
} }
if (!is_read_only(p_s_sb->s_dev)) { if (!is_read_only(p_s_sb->s_dev) &&
update_journal_header_block(p_s_sb, SB_JOURNAL(p_s_sb)->j_start, _update_journal_header_block(p_s_sb, SB_JOURNAL(p_s_sb)->j_start,
SB_JOURNAL(p_s_sb)->j_last_flush_trans_id) ; SB_JOURNAL(p_s_sb)->j_last_flush_trans_id))
{
/* replay failed, caller must call free_journal_ram and abort
** the mount
*/
return -1 ;
} }
return 0 ; return 0 ;
} }
......
...@@ -1857,6 +1857,7 @@ void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, ...@@ -1857,6 +1857,7 @@ void reiserfs_do_truncate (struct reiserfs_transaction_handle *th,
return; return;
} }
if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
pathrelse (&s_search_path);
reiserfs_warning ("PAP-5660: reiserfs_do_truncate: " reiserfs_warning ("PAP-5660: reiserfs_do_truncate: "
"wrong result %d of search for %K\n", retval, &s_item_key); "wrong result %d of search for %K\n", retval, &s_item_key);
return; return;
......
...@@ -29,19 +29,11 @@ ...@@ -29,19 +29,11 @@
#endif #endif
#define SUPPORT_OLD_FORMAT
#define REISERFS_OLD_BLOCKSIZE 4096 #define REISERFS_OLD_BLOCKSIZE 4096
#define REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ 20 #define REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ 20
#if 0
// this one is not used currently
inline void reiserfs_mark_buffer_dirty (struct buffer_head * bh, int flag)
{
mark_buffer_dirty (bh, flag);
}
#endif
// //
// a portion of this function, particularly the VFS interface portion, // a portion of this function, particularly the VFS interface portion,
...@@ -367,98 +359,34 @@ void check_bitmap (struct super_block * s) ...@@ -367,98 +359,34 @@ void check_bitmap (struct super_block * s)
free, SB_FREE_BLOCKS (s)); free, SB_FREE_BLOCKS (s));
} }
#ifdef SUPPORT_OLD_FORMAT
/* support old disk layout */
static int read_old_super_block (struct super_block * s, int size)
{
struct buffer_head * bh;
struct reiserfs_super_block * rs;
printk("read_old_super_block: try to find super block in old location\n");
/* there are only 4k-sized blocks in v3.5.10 */
if (size != REISERFS_OLD_BLOCKSIZE)
set_blocksize(s->s_dev, REISERFS_OLD_BLOCKSIZE);
bh = bread (s->s_dev,
REISERFS_OLD_DISK_OFFSET_IN_BYTES / REISERFS_OLD_BLOCKSIZE,
REISERFS_OLD_BLOCKSIZE);
if (!bh) {
printk("read_old_super_block: unable to read superblock on dev %s\n", kdevname(s->s_dev));
return 1;
}
rs = (struct reiserfs_super_block *)bh->b_data;
if (strncmp (rs->s_magic, REISERFS_SUPER_MAGIC_STRING, strlen ( REISERFS_SUPER_MAGIC_STRING))) {
/* pre-journaling version check */
if(!strncmp((char*)rs + REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ,
REISERFS_SUPER_MAGIC_STRING, strlen(REISERFS_SUPER_MAGIC_STRING))) {
printk("read_old_super_blockr: a pre-journaling reiserfs filesystem isn't suitable there.\n");
brelse(bh);
return 1;
}
brelse (bh);
printk ("read_old_super_block: can't find a reiserfs filesystem on dev %s.\n", kdevname(s->s_dev));
return 1;
}
if(REISERFS_OLD_BLOCKSIZE != le16_to_cpu (rs->s_blocksize)) {
printk("read_old_super_block: blocksize mismatch, super block corrupted\n");
brelse(bh);
return 1;
}
s->s_blocksize = REISERFS_OLD_BLOCKSIZE;
s->s_blocksize_bits = 0;
while ((1 << s->s_blocksize_bits) != s->s_blocksize)
s->s_blocksize_bits ++;
SB_BUFFER_WITH_SB (s) = bh; static int read_super_block (struct super_block * s, int size, int offset)
SB_DISK_SUPER_BLOCK (s) = rs;
s->s_op = &reiserfs_sops;
return 0;
}
#endif
//
// FIXME: mounting old filesystems we _must_ change magic string to
// make then unmountable by reiserfs of 3.5.x
//
static int read_super_block (struct super_block * s, int size)
{ {
struct buffer_head * bh; struct buffer_head * bh;
struct reiserfs_super_block * rs; struct reiserfs_super_block * rs;
bh = bread (s->s_dev, REISERFS_DISK_OFFSET_IN_BYTES / size, size);
bh = bread (s->s_dev, offset / size, size);
if (!bh) { if (!bh) {
printk("read_super_block: unable to read superblock on dev %s\n", kdevname(s->s_dev)); printk ("read_super_block: "
"bread failed (dev %s, block %d, size %d)\n",
kdevname (s->s_dev), offset / size, size);
return 1; return 1;
} }
rs = (struct reiserfs_super_block *)bh->b_data; rs = (struct reiserfs_super_block *)bh->b_data;
if (!is_reiserfs_magic_string (rs)) { if (!is_reiserfs_magic_string (rs)) {
printk ("read_super_block: can't find a reiserfs filesystem on dev %s\n", printk ("read_super_block: "
kdevname(s->s_dev)); "can't find a reiserfs filesystem on (dev %s, block %lu, size %d)\n",
kdevname(s->s_dev), bh->b_blocknr, size);
brelse (bh); brelse (bh);
return 1; return 1;
} }
// //
// ok, reiserfs signature (old or new) found in 64-th 1k block of // ok, reiserfs signature (old or new) found in at the given offset
// the device
// //
#ifndef SUPPORT_OLD_FORMAT
// with SUPPORT_OLD_FORMAT undefined - detect old format by
// checking super block version
if (le16_to_cpu (rs->s_version) != REISERFS_VERSION_2) {
brelse (bh);
printk ("read_super_block: unsupported version (%d) of reiserfs found on dev %s\n",
le16_to_cpu (rs->s_version), kdevname(s->s_dev));
return 1;
}
#endif
s->s_blocksize = le16_to_cpu (rs->s_blocksize); s->s_blocksize = le16_to_cpu (rs->s_blocksize);
s->s_blocksize_bits = 0; s->s_blocksize_bits = 0;
while ((1 << s->s_blocksize_bits) != s->s_blocksize) while ((1 << s->s_blocksize_bits) != s->s_blocksize)
...@@ -468,17 +396,22 @@ static int read_super_block (struct super_block * s, int size) ...@@ -468,17 +396,22 @@ static int read_super_block (struct super_block * s, int size)
if (s->s_blocksize != size) if (s->s_blocksize != size)
set_blocksize (s->s_dev, s->s_blocksize); set_blocksize (s->s_dev, s->s_blocksize);
bh = reiserfs_bread (s->s_dev, REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize, s->s_blocksize);
bh = bread (s->s_dev, offset / s->s_blocksize, s->s_blocksize);
if (!bh) { if (!bh) {
printk("read_super_block: unable to read superblock on dev %s\n", kdevname(s->s_dev)); printk ("read_super_block: "
"bread failed (dev %s, block %d, size %d)\n",
kdevname (s->s_dev), offset / size, size);
return 1; return 1;
} }
rs = (struct reiserfs_super_block *)bh->b_data; rs = (struct reiserfs_super_block *)bh->b_data;
if (!is_reiserfs_magic_string (rs) || if (!is_reiserfs_magic_string (rs) ||
le16_to_cpu (rs->s_blocksize) != s->s_blocksize) { le16_to_cpu (rs->s_blocksize) != s->s_blocksize) {
printk ("read_super_block: "
"can't find a reiserfs filesystem on (dev %s, block %lu, size %d)\n",
kdevname(s->s_dev), bh->b_blocknr, size);
brelse (bh); brelse (bh);
printk ("read_super_block: can't find a reiserfs filesystem on dev %s.\n", kdevname(s->s_dev));
return 1; return 1;
} }
/* must check to be sure we haven't pulled an old format super out /* must check to be sure we haven't pulled an old format super out
...@@ -489,7 +422,8 @@ static int read_super_block (struct super_block * s, int size) ...@@ -489,7 +422,8 @@ static int read_super_block (struct super_block * s, int size)
if (bh->b_blocknr >= le32_to_cpu(rs->s_journal_block) && if (bh->b_blocknr >= le32_to_cpu(rs->s_journal_block) &&
bh->b_blocknr < (le32_to_cpu(rs->s_journal_block) + JOURNAL_BLOCK_COUNT)) { bh->b_blocknr < (le32_to_cpu(rs->s_journal_block) + JOURNAL_BLOCK_COUNT)) {
brelse(bh) ; brelse(bh) ;
printk("super-459: read_super_block: super found at block %lu is within its own log. " printk("super-459: read_super_block: "
"super found at block %lu is within its own log. "
"It must not be of this format type.\n", bh->b_blocknr) ; "It must not be of this format type.\n", bh->b_blocknr) ;
return 1 ; return 1 ;
} }
...@@ -504,6 +438,8 @@ static int read_super_block (struct super_block * s, int size) ...@@ -504,6 +438,8 @@ static int read_super_block (struct super_block * s, int size)
return 0; return 0;
} }
/* after journal replay, reread all bitmap and super blocks */ /* after journal replay, reread all bitmap and super blocks */
static int reread_meta_blocks(struct super_block *s) { static int reread_meta_blocks(struct super_block *s) {
int i ; int i ;
...@@ -712,15 +648,12 @@ struct super_block * reiserfs_read_super (struct super_block * s, void * data, i ...@@ -712,15 +648,12 @@ struct super_block * reiserfs_read_super (struct super_block * s, void * data, i
} }
/* read block (64-th 1k block), which can contain reiserfs super block */ /* read block (64-th 1k block), which can contain reiserfs super block */
if (read_super_block (s, size)) { if (read_super_block (s, size, REISERFS_DISK_OFFSET_IN_BYTES)) {
#ifdef SUPPORT_OLD_FORMAT
// try old format (undistributed bitmap, super block in 8-th 1k block of a device) // try old format (undistributed bitmap, super block in 8-th 1k block of a device)
if(read_old_super_block(s,size)) if (read_super_block (s, size, REISERFS_OLD_DISK_OFFSET_IN_BYTES))
goto error; goto error;
else else
old_format = 1; old_format = 1;
#endif
goto error ;
} }
s->u.reiserfs_sb.s_mount_state = le16_to_cpu (SB_DISK_SUPER_BLOCK (s)->s_state); /* journal victim */ s->u.reiserfs_sb.s_mount_state = le16_to_cpu (SB_DISK_SUPER_BLOCK (s)->s_state); /* journal victim */
...@@ -779,16 +712,23 @@ struct super_block * reiserfs_read_super (struct super_block * s, void * data, i ...@@ -779,16 +712,23 @@ struct super_block * reiserfs_read_super (struct super_block * s, void * data, i
if (!(s->s_flags & MS_RDONLY)) { if (!(s->s_flags & MS_RDONLY)) {
struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
int old_magic;
old_magic = strncmp (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING,
strlen ( REISER2FS_SUPER_MAGIC_STRING));
if( old_magic && le16_to_cpu(rs->s_version) != 0 ) {
dput(s->s_root) ;
s->s_root = NULL ;
reiserfs_warning("reiserfs: wrong version/magic combination in the super-block\n") ;
goto error ;
}
journal_begin(&th, s, 1) ; journal_begin(&th, s, 1) ;
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
rs->s_state = cpu_to_le16 (REISERFS_ERROR_FS); rs->s_state = cpu_to_le16 (REISERFS_ERROR_FS);
if (strncmp (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, if ( old_magic ) {
strlen ( REISER2FS_SUPER_MAGIC_STRING))) {
if (le16_to_cpu(rs->s_version) != 0)
BUG ();
// filesystem created under 3.5.x found // filesystem created under 3.5.x found
if (!old_format_only (s)) { if (!old_format_only (s)) {
reiserfs_warning("reiserfs: converting 3.5.x filesystem to the new format\n") ; reiserfs_warning("reiserfs: converting 3.5.x filesystem to the new format\n") ;
......
...@@ -9,7 +9,8 @@ ...@@ -9,7 +9,8 @@
O_TARGET := sysv.o O_TARGET := sysv.o
obj-y := ialloc.o balloc.o inode.o itree.o file.o dir.o namei.o super.o obj-y := ialloc.o balloc.o inode.o itree.o file.o dir.o \
namei.o super.o symlink.o
obj-m := $(O_TARGET) obj-m := $(O_TARGET)
include $(TOPDIR)/Rules.make include $(TOPDIR)/Rules.make
...@@ -46,6 +46,14 @@ void sysv_free_block(struct super_block * sb, u32 nr) ...@@ -46,6 +46,14 @@ void sysv_free_block(struct super_block * sb, u32 nr)
unsigned count; unsigned count;
unsigned block = fs32_to_cpu(sb, nr); unsigned block = fs32_to_cpu(sb, nr);
/*
* This code does not work at all for AFS (it has a bitmap
* free list). As AFS is supposed to be read-only no one
* should call this for an AFS filesystem anyway...
*/
if (sb->sv_type == FSTYPE_AFS)
return;
if (block < sb->sv_firstdatazone || block >= sb->sv_nzones) { if (block < sb->sv_firstdatazone || block >= sb->sv_nzones) {
printk("sysv_free_block: trying to free block not in datazone\n"); printk("sysv_free_block: trying to free block not in datazone\n");
return; return;
...@@ -154,6 +162,14 @@ unsigned long sysv_count_free_blocks(struct super_block * sb) ...@@ -154,6 +162,14 @@ unsigned long sysv_count_free_blocks(struct super_block * sb)
unsigned block; unsigned block;
int n; int n;
/*
* This code does not work at all for AFS (it has a bitmap
* free list). As AFS is supposed to be read-only we just
* lie and say it has no free block at all.
*/
if (sb->sv_type == FSTYPE_AFS)
return 0;
lock_super(sb); lock_super(sb);
sb_count = fs32_to_cpu(sb, *sb->sv_free_blocks); sb_count = fs32_to_cpu(sb, *sb->sv_free_blocks);
......
...@@ -131,8 +131,11 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) ...@@ -131,8 +131,11 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
inode->i_fop = &sysv_dir_operations; inode->i_fop = &sysv_dir_operations;
inode->i_mapping->a_ops = &sysv_aops; inode->i_mapping->a_ops = &sysv_aops;
} else if (S_ISLNK(inode->i_mode)) { } else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &sysv_symlink_inode_operations; if (inode->i_blocks) {
inode->i_mapping->a_ops = &sysv_aops; inode->i_op = &sysv_symlink_inode_operations;
inode->i_mapping->a_ops = &sysv_aops;
} else
inode->i_op = &sysv_fast_symlink_inode_operations;
} else } else
init_special_inode(inode, inode->i_mode, rdev); init_special_inode(inode, inode->i_mode, rdev);
} }
...@@ -196,7 +199,6 @@ int sysv_notify_change(struct dentry *dentry, struct iattr *attr) ...@@ -196,7 +199,6 @@ int sysv_notify_change(struct dentry *dentry, struct iattr *attr)
attr->ia_mode = COH_KLUDGE_NOT_SYMLINK; attr->ia_mode = COH_KLUDGE_NOT_SYMLINK;
inode_setattr(inode, attr); inode_setattr(inode, attr);
return 0; return 0;
} }
......
...@@ -26,11 +26,16 @@ ...@@ -26,11 +26,16 @@
#include <linux/sysv_fs.h> #include <linux/sysv_fs.h>
#include <linux/init.h> #include <linux/init.h>
/* The following functions try to recognize specific filesystems. /*
* The following functions try to recognize specific filesystems.
*
* We recognize: * We recognize:
* - Xenix FS by its magic number. * - Xenix FS by its magic number.
* - SystemV FS by its magic number. * - SystemV FS by its magic number.
* - Coherent FS by its funny fname/fpack field. * - Coherent FS by its funny fname/fpack field.
* - SCO AFS by s_nfree == 0xffff
* - V7 FS has no distinguishing features.
*
* We discriminate among SystemV4 and SystemV2 FS by the assumption that * We discriminate among SystemV4 and SystemV2 FS by the assumption that
* the time stamp is not < 01-01-1980. * the time stamp is not < 01-01-1980.
*/ */
...@@ -197,7 +202,19 @@ static int detect_sysv (struct super_block *sb, struct buffer_head *bh) ...@@ -197,7 +202,19 @@ static int detect_sysv (struct super_block *sb, struct buffer_head *bh)
sb->sv_bytesex = BYTESEX_BE; sb->sv_bytesex = BYTESEX_BE;
else else
return 0; return 0;
if (sbd->s_time < JAN_1_1980) {
if (fs16_to_cpu(sb, sbd->s_nfree) == 0xffff) {
sb->sv_type = FSTYPE_AFS;
if (!(sb->s_flags & MS_RDONLY)) {
printk("SysV FS: SCO EAFS on %s detected, "
"forcing read-only mode.\n",
bdevname(sb->s_dev));
sb->s_flags |= MS_RDONLY;
}
return sbd->s_type;
}
if (fs32_to_cpu(sb, sbd->s_time) < JAN_1_1980) {
/* this is likely to happen on SystemV2 FS */ /* this is likely to happen on SystemV2 FS */
if (sbd->s_type > 3 || sbd->s_type < 1) if (sbd->s_type > 3 || sbd->s_type < 1)
return 0; return 0;
...@@ -261,6 +278,7 @@ static char *flavour_names[] = { ...@@ -261,6 +278,7 @@ static char *flavour_names[] = {
[FSTYPE_SYSV2] "SystemV Release 2", [FSTYPE_SYSV2] "SystemV Release 2",
[FSTYPE_COH] "Coherent", [FSTYPE_COH] "Coherent",
[FSTYPE_V7] "V7", [FSTYPE_V7] "V7",
[FSTYPE_AFS] "AFS",
}; };
static void (*flavour_setup[])(struct super_block *) = { static void (*flavour_setup[])(struct super_block *) = {
...@@ -269,6 +287,7 @@ static void (*flavour_setup[])(struct super_block *) = { ...@@ -269,6 +287,7 @@ static void (*flavour_setup[])(struct super_block *) = {
[FSTYPE_SYSV2] detected_sysv2, [FSTYPE_SYSV2] detected_sysv2,
[FSTYPE_COH] detected_coherent, [FSTYPE_COH] detected_coherent,
[FSTYPE_V7] detected_v7, [FSTYPE_V7] detected_v7,
[FSTYPE_AFS] detected_sysv4,
}; };
static int complete_read_super(struct super_block *sb, int silent, int size) static int complete_read_super(struct super_block *sb, int silent, int size)
...@@ -294,7 +313,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size) ...@@ -294,7 +313,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
sb->sv_toobig_block = 10 + bsize_4 * (1 + bsize_4 * (1 + bsize_4)); sb->sv_toobig_block = 10 + bsize_4 * (1 + bsize_4 * (1 + bsize_4));
sb->sv_ind_per_block_bits = n_bits-2; sb->sv_ind_per_block_bits = n_bits-2;
sb->sv_ninodes = (sb->sv_firstdatazone - sb->sv_firstinodezone) << sb->sv_inodes_per_block_bits; sb->sv_ninodes = (sb->sv_firstdatazone - sb->sv_firstinodezone)
<< sb->sv_inodes_per_block_bits;
sb->s_blocksize = bsize; sb->s_blocksize = bsize;
sb->s_blocksize_bits = n_bits; sb->s_blocksize_bits = n_bits;
...@@ -346,13 +366,10 @@ static struct super_block *sysv_read_super(struct super_block *sb, ...@@ -346,13 +366,10 @@ static struct super_block *sysv_read_super(struct super_block *sb,
sb->sv_block_base = 0; sb->sv_block_base = 0;
for (i = 0; i < sizeof(flavours)/sizeof(flavours[0]) && !size; i++) { for (i = 0; i < sizeof(flavours)/sizeof(flavours[0]) && !size; i++) {
struct buffer_head *next_bh;
next_bh = bread(dev, flavours[i].block, BLOCK_SIZE);
if (!next_bh)
continue;
brelse(bh); brelse(bh);
bh = next_bh; bh = bread(dev, flavours[i].block, BLOCK_SIZE);
if (!bh)
continue;
size = flavours[i].test(sb, bh); size = flavours[i].test(sb, bh);
} }
...@@ -411,8 +428,10 @@ static struct super_block *sysv_read_super(struct super_block *sb, ...@@ -411,8 +428,10 @@ static struct super_block *sysv_read_super(struct super_block *sb,
static struct super_block *v7_read_super(struct super_block *sb,void *data, static struct super_block *v7_read_super(struct super_block *sb,void *data,
int silent) int silent)
{ {
struct buffer_head *bh; struct buffer_head *bh, *bh2 = NULL;
kdev_t dev = sb->s_dev; kdev_t dev = sb->s_dev;
struct v7_super_block *v7sb;
struct sysv_inode *v7i;
if (440 != sizeof (struct v7_super_block)) if (440 != sizeof (struct v7_super_block))
panic("V7 FS: bad super-block size"); panic("V7 FS: bad super-block size");
...@@ -422,23 +441,41 @@ static struct super_block *v7_read_super(struct super_block *sb,void *data, ...@@ -422,23 +441,41 @@ static struct super_block *v7_read_super(struct super_block *sb,void *data,
sb->sv_type = FSTYPE_V7; sb->sv_type = FSTYPE_V7;
sb->sv_bytesex = BYTESEX_PDP; sb->sv_bytesex = BYTESEX_PDP;
set_blocksize(dev,512); set_blocksize(dev, 512);
if ((bh = bread(dev, 1, 512)) == NULL) { if ((bh = bread(dev, 1, 512)) == NULL) {
if (!silent) if (!silent)
printk("VFS: unable to read V7 FS superblock on device " printk("VFS: unable to read V7 FS superblock on "
"%s.\n", bdevname(dev)); "device %s.\n", bdevname(dev));
goto failed; goto failed;
} }
/* plausibility check on superblock */
v7sb = (struct v7_super_block *) bh->b_data;
if (fs16_to_cpu(sb,v7sb->s_nfree) > V7_NICFREE ||
fs16_to_cpu(sb,v7sb->s_ninode) > V7_NICINOD ||
fs32_to_cpu(sb,v7sb->s_time) == 0)
goto failed;
/* plausibility check on root inode: it is a directory,
with a nonzero size that is a multiple of 16 */
if ((bh2 = bread(dev, 2, 512)) == NULL)
goto failed;
v7i = (struct sysv_inode *)(bh2->b_data + 64);
if ((fs16_to_cpu(sb,v7i->i_mode) & ~0777) != S_IFDIR ||
(fs32_to_cpu(sb,v7i->i_size) == 0) ||
(fs32_to_cpu(sb,v7i->i_size) & 017) != 0)
goto failed;
brelse(bh2);
sb->sv_bh1 = bh; sb->sv_bh1 = bh;
sb->sv_bh2 = bh; sb->sv_bh2 = bh;
if (complete_read_super(sb, silent, 1)) if (complete_read_super(sb, silent, 1))
return sb; return sb;
brelse(bh);
failed: failed:
brelse(bh2);
brelse(bh);
return NULL; return NULL;
} }
......
/*
* linux/fs/sysv/symlink.c
*
* Handling of System V filesystem fast symlinks extensions.
* Aug 2001, Christoph Hellwig (hch@caldera.de)
*/
#include <linux/fs.h>
static int sysv_readlink(struct dentry *dentry, char *buffer, int buflen)
{
char *s = (char *)dentry->d_inode->u.sysv_i.i_data;
return vfs_readlink(dentry, buffer, buflen, s);
}
static int sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char *s = (char *)dentry->d_inode->u.sysv_i.i_data;
return vfs_follow_link(nd, s);
}
struct inode_operations sysv_fast_symlink_inode_operations = {
readlink: sysv_readlink,
follow_link: sysv_follow_link,
};
...@@ -81,11 +81,6 @@ extern void lock_page(struct page *page); ...@@ -81,11 +81,6 @@ extern void lock_page(struct page *page);
#define find_lock_page(mapping, index) \ #define find_lock_page(mapping, index) \
__find_lock_page(mapping, index, page_hash(mapping, index)) __find_lock_page(mapping, index, page_hash(mapping, index))
extern struct page * __find_get_swapcache_page (struct address_space * mapping,
unsigned long index, struct page **hash);
#define find_get_swapcache_page(mapping, index) \
__find_get_swapcache_page(mapping, index, page_hash(mapping, index))
extern void __add_page_to_hash_queue(struct page * page, struct page **p); extern void __add_page_to_hash_queue(struct page * page, struct page **p);
extern void add_to_page_cache(struct page * page, struct address_space *mapping, unsigned long index); extern void add_to_page_cache(struct page * page, struct address_space *mapping, unsigned long index);
......
...@@ -749,6 +749,7 @@ extern void exit_mm(struct task_struct *); ...@@ -749,6 +749,7 @@ extern void exit_mm(struct task_struct *);
extern void exit_files(struct task_struct *); extern void exit_files(struct task_struct *);
extern void exit_sighand(struct task_struct *); extern void exit_sighand(struct task_struct *);
extern void reparent_to_init(void);
extern void daemonize(void); extern void daemonize(void);
extern int do_execve(char *, char **, char **, struct pt_regs *); extern int do_execve(char *, char **, char **, struct pt_regs *);
......
...@@ -325,6 +325,7 @@ enum { ...@@ -325,6 +325,7 @@ enum {
FSTYPE_SYSV2, FSTYPE_SYSV2,
FSTYPE_COH, FSTYPE_COH,
FSTYPE_V7, FSTYPE_V7,
FSTYPE_AFS,
FSTYPE_END, FSTYPE_END,
}; };
...@@ -373,6 +374,7 @@ extern ino_t sysv_inode_by_name(struct dentry*); ...@@ -373,6 +374,7 @@ extern ino_t sysv_inode_by_name(struct dentry*);
extern struct inode_operations sysv_file_inode_operations; extern struct inode_operations sysv_file_inode_operations;
extern struct inode_operations sysv_dir_inode_operations; extern struct inode_operations sysv_dir_inode_operations;
extern struct inode_operations sysv_fast_symlink_inode_operations;
extern struct file_operations sysv_file_operations; extern struct file_operations sysv_file_operations;
extern struct file_operations sysv_dir_operations; extern struct file_operations sysv_dir_operations;
extern struct address_space_operations sysv_aops; extern struct address_space_operations sysv_aops;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* 'fork.c' contains the help-routines for the 'fork' system call * 'fork.c' contains the help-routines for the 'fork' system call
* (see also entry.S and others). * (see also entry.S and others).
* Fork is rather simple, once you get the hang of it, but the memory * Fork is rather simple, once you get the hang of it, but the memory
* management can be a bitch. See 'mm/memory.c': 'copy_page_tables()' * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
*/ */
#include <linux/config.h> #include <linux/config.h>
...@@ -134,9 +134,22 @@ static inline int dup_mmap(struct mm_struct * mm) ...@@ -134,9 +134,22 @@ static inline int dup_mmap(struct mm_struct * mm)
mm->mmap_avl = NULL; mm->mmap_avl = NULL;
mm->mmap_cache = NULL; mm->mmap_cache = NULL;
mm->map_count = 0; mm->map_count = 0;
mm->rss = 0;
mm->cpu_vm_mask = 0; mm->cpu_vm_mask = 0;
mm->swap_address = 0; mm->swap_address = 0;
pprev = &mm->mmap; pprev = &mm->mmap;
/*
* Add it to the mmlist after the parent.
* Doing it this way means that we can order the list,
* and fork() won't mess up the ordering significantly.
* Add it first so that swapoff can see any swap entries.
*/
spin_lock(&mmlist_lock);
list_add(&mm->mmlist, &current->mm->mmlist);
mmlist_nr++;
spin_unlock(&mmlist_lock);
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
struct file *file; struct file *file;
...@@ -149,7 +162,6 @@ static inline int dup_mmap(struct mm_struct * mm) ...@@ -149,7 +162,6 @@ static inline int dup_mmap(struct mm_struct * mm)
*tmp = *mpnt; *tmp = *mpnt;
tmp->vm_flags &= ~VM_LOCKED; tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm; tmp->vm_mm = mm;
mm->map_count++;
tmp->vm_next = NULL; tmp->vm_next = NULL;
file = tmp->vm_file; file = tmp->vm_file;
if (file) { if (file) {
...@@ -168,17 +180,19 @@ static inline int dup_mmap(struct mm_struct * mm) ...@@ -168,17 +180,19 @@ static inline int dup_mmap(struct mm_struct * mm)
spin_unlock(&inode->i_mapping->i_shared_lock); spin_unlock(&inode->i_mapping->i_shared_lock);
} }
/* Copy the pages, but defer checking for errors */
retval = copy_page_range(mm, current->mm, tmp);
if (!retval && tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
/* /*
* Link in the new vma even if an error occurred, * Link in the new vma and copy the page table entries:
* so that exit_mmap() can clean up the mess. * link in first so that swapoff can see swap entries.
*/ */
spin_lock(&mm->page_table_lock);
*pprev = tmp; *pprev = tmp;
pprev = &tmp->vm_next; pprev = &tmp->vm_next;
mm->map_count++;
retval = copy_page_range(mm, current->mm, tmp);
spin_unlock(&mm->page_table_lock);
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
if (retval) if (retval)
goto fail_nomem; goto fail_nomem;
...@@ -246,6 +260,9 @@ inline void __mmdrop(struct mm_struct *mm) ...@@ -246,6 +260,9 @@ inline void __mmdrop(struct mm_struct *mm)
void mmput(struct mm_struct *mm) void mmput(struct mm_struct *mm)
{ {
if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) { if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
extern struct mm_struct *swap_mm;
if (swap_mm == mm)
swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
list_del(&mm->mmlist); list_del(&mm->mmlist);
mmlist_nr--; mmlist_nr--;
spin_unlock(&mmlist_lock); spin_unlock(&mmlist_lock);
...@@ -320,18 +337,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) ...@@ -320,18 +337,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
retval = dup_mmap(mm); retval = dup_mmap(mm);
up_write(&oldmm->mmap_sem); up_write(&oldmm->mmap_sem);
/*
* Add it to the mmlist after the parent.
*
* Doing it this way means that we can order
* the list, and fork() won't mess up the
* ordering significantly.
*/
spin_lock(&mmlist_lock);
list_add(&mm->mmlist, &oldmm->mmlist);
mmlist_nr++;
spin_unlock(&mmlist_lock);
if (retval) if (retval)
goto free_pt; goto free_pt;
......
...@@ -478,6 +478,7 @@ EXPORT_SYMBOL(secure_tcp_sequence_number); ...@@ -478,6 +478,7 @@ EXPORT_SYMBOL(secure_tcp_sequence_number);
EXPORT_SYMBOL(get_random_bytes); EXPORT_SYMBOL(get_random_bytes);
EXPORT_SYMBOL(securebits); EXPORT_SYMBOL(securebits);
EXPORT_SYMBOL(cap_bset); EXPORT_SYMBOL(cap_bset);
EXPORT_SYMBOL(reparent_to_init);
EXPORT_SYMBOL(daemonize); EXPORT_SYMBOL(daemonize);
EXPORT_SYMBOL(csum_partial); /* for networking and md */ EXPORT_SYMBOL(csum_partial); /* for networking and md */
......
...@@ -107,6 +107,7 @@ static union { ...@@ -107,6 +107,7 @@ static union {
#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule #define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
struct kernel_stat kstat; struct kernel_stat kstat;
extern struct task_struct *child_reaper;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -1215,6 +1216,59 @@ void show_state(void) ...@@ -1215,6 +1216,59 @@ void show_state(void)
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
} }
/**
* reparent_to_init() - Reparent the calling kernel thread to the init task.
*
* If a kernel thread is launched as a result of a system call, or if
* it ever exits, it should generally reparent itself to init so that
* it is correctly cleaned up on exit.
*
* The various task state such as scheduling policy and priority may have
* been inherited fro a user process, so we reset them to sane values here.
*
* NOTE that reparent_to_init() gives the caller full capabilities.
*/
void reparent_to_init(void)
{
struct task_struct *this_task = current;
write_lock_irq(&tasklist_lock);
/* Reparent to init */
REMOVE_LINKS(this_task);
this_task->p_pptr = child_reaper;
this_task->p_opptr = child_reaper;
SET_LINKS(this_task);
/* Set the exit signal to SIGCHLD so we signal init on exit */
if (this_task->exit_signal != 0) {
printk(KERN_ERR "task `%s' exit_signal %d in "
__FUNCTION__ "\n",
this_task->comm, this_task->exit_signal);
}
this_task->exit_signal = SIGCHLD;
/* We also take the runqueue_lock while altering task fields
* which affect scheduling decisions */
spin_lock(&runqueue_lock);
this_task->ptrace = 0;
this_task->nice = DEF_NICE;
this_task->policy = SCHED_OTHER;
/* cpus_allowed? */
/* rt_priority? */
/* signals? */
this_task->cap_effective = CAP_INIT_EFF_SET;
this_task->cap_inheritable = CAP_INIT_INH_SET;
this_task->cap_permitted = CAP_FULL_SET;
this_task->keep_capabilities = 0;
memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim)));
this_task->user = INIT_USER;
spin_unlock(&runqueue_lock);
write_unlock_irq(&tasklist_lock);
}
/* /*
* Put all the gunge required to become a kernel thread without * Put all the gunge required to become a kernel thread without
* attached user resources in one place where it belongs. * attached user resources in one place where it belongs.
......
...@@ -681,34 +681,6 @@ struct page * __find_get_page(struct address_space *mapping, ...@@ -681,34 +681,6 @@ struct page * __find_get_page(struct address_space *mapping,
return page; return page;
} }
/*
* Find a swapcache page (and get a reference) or return NULL.
* The SwapCache check is protected by the pagecache lock.
*/
struct page * __find_get_swapcache_page(struct address_space *mapping,
unsigned long offset, struct page **hash)
{
struct page *page;
/*
* We need the LRU lock to protect against page_launder().
*/
spin_lock(&pagecache_lock);
page = __find_page_nolock(mapping, offset, *hash);
if (page) {
spin_lock(&pagemap_lru_lock);
if (PageSwapCache(page))
page_cache_get(page);
else
page = NULL;
spin_unlock(&pagemap_lru_lock);
}
spin_unlock(&pagecache_lock);
return page;
}
/* /*
* Same as the above, but lock the page too, verifying that * Same as the above, but lock the page too, verifying that
* it's still valid once we own it. * it's still valid once we own it.
......
...@@ -148,6 +148,9 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr) ...@@ -148,6 +148,9 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
* *
* 08Jan98 Merged into one routine from several inline routines to reduce * 08Jan98 Merged into one routine from several inline routines to reduce
* variable count and make things faster. -jj * variable count and make things faster. -jj
*
* dst->page_table_lock is held on entry and exit,
* but may be dropped within pmd_alloc() and pte_alloc().
*/ */
int copy_page_range(struct mm_struct *dst, struct mm_struct *src, int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma) struct vm_area_struct *vma)
...@@ -159,8 +162,7 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -159,8 +162,7 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
src_pgd = pgd_offset(src, address)-1; src_pgd = pgd_offset(src, address)-1;
dst_pgd = pgd_offset(dst, address)-1; dst_pgd = pgd_offset(dst, address)-1;
spin_lock(&dst->page_table_lock);
for (;;) { for (;;) {
pmd_t * src_pmd, * dst_pmd; pmd_t * src_pmd, * dst_pmd;
...@@ -234,6 +236,7 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; ...@@ -234,6 +236,7 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK;
pte = pte_mkclean(pte); pte = pte_mkclean(pte);
pte = pte_mkold(pte); pte = pte_mkold(pte);
get_page(ptepage); get_page(ptepage);
dst->rss++;
cont_copy_pte_range: set_pte(dst_pte, pte); cont_copy_pte_range: set_pte(dst_pte, pte);
cont_copy_pte_range_noset: address += PAGE_SIZE; cont_copy_pte_range_noset: address += PAGE_SIZE;
...@@ -251,11 +254,8 @@ cont_copy_pmd_range: src_pmd++; ...@@ -251,11 +254,8 @@ cont_copy_pmd_range: src_pmd++;
out_unlock: out_unlock:
spin_unlock(&src->page_table_lock); spin_unlock(&src->page_table_lock);
out: out:
spin_unlock(&dst->page_table_lock);
return 0; return 0;
nomem: nomem:
spin_unlock(&dst->page_table_lock);
return -ENOMEM; return -ENOMEM;
} }
...@@ -999,7 +999,6 @@ static void vmtruncate_list(struct vm_area_struct *mpnt, unsigned long pgoff) ...@@ -999,7 +999,6 @@ static void vmtruncate_list(struct vm_area_struct *mpnt, unsigned long pgoff)
flush_tlb_range(mm, start, end); flush_tlb_range(mm, start, end);
} while ((mpnt = mpnt->vm_next_share) != NULL); } while ((mpnt = mpnt->vm_next_share) != NULL);
} }
/* /*
* Handle all mappings that got truncated by a "truncate()" * Handle all mappings that got truncated by a "truncate()"
...@@ -1057,8 +1056,6 @@ void vmtruncate(struct inode * inode, loff_t offset) ...@@ -1057,8 +1056,6 @@ void vmtruncate(struct inode * inode, loff_t offset)
return; return;
} }
/* /*
* Primitive swap readahead code. We simply read an aligned block of * Primitive swap readahead code. We simply read an aligned block of
* (1 << page_cluster) entries in the swap area. This method is chosen * (1 << page_cluster) entries in the swap area. This method is chosen
...@@ -1072,23 +1069,19 @@ void swapin_readahead(swp_entry_t entry) ...@@ -1072,23 +1069,19 @@ void swapin_readahead(swp_entry_t entry)
unsigned long offset; unsigned long offset;
/* /*
* Get the number of handles we should do readahead io to. Also, * Get the number of handles we should do readahead io to.
* grab temporary references on them, releasing them as io completes.
*/ */
num = valid_swaphandles(entry, &offset); num = valid_swaphandles(entry, &offset);
for (i = 0; i < num; offset++, i++) { for (i = 0; i < num; offset++, i++) {
/* Don't block on I/O for read-ahead */ /* Don't block on I/O for read-ahead */
if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster if (atomic_read(&nr_async_pages) >=
* (1 << page_cluster)) { pager_daemon.swap_cluster << page_cluster)
while (i++ < num)
swap_free(SWP_ENTRY(SWP_TYPE(entry), offset++));
break; break;
}
/* Ok, do the async read-ahead now */ /* Ok, do the async read-ahead now */
new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset)); new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset));
if (new_page != NULL) if (!new_page)
page_cache_release(new_page); break;
swap_free(SWP_ENTRY(SWP_TYPE(entry), offset)); page_cache_release(new_page);
} }
return; return;
} }
......
...@@ -29,7 +29,7 @@ static int swap_writepage(struct page *page) ...@@ -29,7 +29,7 @@ static int swap_writepage(struct page *page)
if (swap_count(page) > 1) if (swap_count(page) > 1)
goto in_use; goto in_use;
/* We could remove it here, but page_launder will do it anyway */ delete_from_swap_cache_nolock(page);
UnlockPage(page); UnlockPage(page);
return 0; return 0;
...@@ -79,40 +79,35 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry) ...@@ -79,40 +79,35 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry)
BUG(); BUG();
if (page->mapping) if (page->mapping)
BUG(); BUG();
flags = page->flags & ~((1 << PG_error) | (1 << PG_arch_1));
/* clear PG_dirty so a subsequent set_page_dirty takes effect */
flags = page->flags & ~((1 << PG_error) | (1 << PG_dirty) | (1 << PG_arch_1));
page->flags = flags | (1 << PG_uptodate); page->flags = flags | (1 << PG_uptodate);
page->age = PAGE_AGE_START; page->age = PAGE_AGE_START;
add_to_page_cache_locked(page, &swapper_space, entry.val); add_to_page_cache_locked(page, &swapper_space, entry.val);
} }
static inline void remove_from_swap_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
if (mapping != &swapper_space)
BUG();
if (!PageSwapCache(page) || !PageLocked(page))
PAGE_BUG(page);
PageClearSwapCache(page);
ClearPageDirty(page);
__remove_inode_page(page);
}
/* /*
* This must be called only on pages that have * This must be called only on pages that have
* been verified to be in the swap cache. * been verified to be in the swap cache.
*/ */
void __delete_from_swap_cache(struct page *page) void __delete_from_swap_cache(struct page *page)
{ {
struct address_space *mapping = page->mapping;
swp_entry_t entry; swp_entry_t entry;
entry.val = page->index;
#ifdef SWAP_CACHE_INFO #ifdef SWAP_CACHE_INFO
swap_cache_del_total++; swap_cache_del_total++;
#endif #endif
remove_from_swap_cache(page); if (mapping != &swapper_space)
BUG();
if (!PageSwapCache(page) || !PageLocked(page))
BUG();
entry.val = page->index;
PageClearSwapCache(page);
ClearPageDirty(page);
__remove_inode_page(page);
swap_free(entry); swap_free(entry);
} }
...@@ -129,7 +124,6 @@ void delete_from_swap_cache_nolock(struct page *page) ...@@ -129,7 +124,6 @@ void delete_from_swap_cache_nolock(struct page *page)
lru_cache_del(page); lru_cache_del(page);
spin_lock(&pagecache_lock); spin_lock(&pagecache_lock);
ClearPageDirty(page);
__delete_from_swap_cache(page); __delete_from_swap_cache(page);
spin_unlock(&pagecache_lock); spin_unlock(&pagecache_lock);
page_cache_release(page); page_cache_release(page);
...@@ -169,14 +163,12 @@ void free_page_and_swap_cache(struct page *page) ...@@ -169,14 +163,12 @@ void free_page_and_swap_cache(struct page *page)
page_cache_release(page); page_cache_release(page);
} }
/* /*
* Lookup a swap entry in the swap cache. A found page will be returned * Lookup a swap entry in the swap cache. A found page will be returned
* unlocked and with its refcount incremented - we rely on the kernel * unlocked and with its refcount incremented - we rely on the kernel
* lock getting page table operations atomic even if we drop the page * lock getting page table operations atomic even if we drop the page
* lock before returning. * lock before returning.
*/ */
struct page * lookup_swap_cache(swp_entry_t entry) struct page * lookup_swap_cache(swp_entry_t entry)
{ {
struct page *found; struct page *found;
...@@ -184,59 +176,62 @@ struct page * lookup_swap_cache(swp_entry_t entry) ...@@ -184,59 +176,62 @@ struct page * lookup_swap_cache(swp_entry_t entry)
#ifdef SWAP_CACHE_INFO #ifdef SWAP_CACHE_INFO
swap_cache_find_total++; swap_cache_find_total++;
#endif #endif
while (1) { found = find_get_page(&swapper_space, entry.val);
/* /*
* Right now the pagecache is 32-bit only. But it's a 32 bit index. =) * Unsafe to assert PageSwapCache and mapping on page found:
*/ * if SMP nothing prevents swapoff from deleting this page from
found = find_get_swapcache_page(&swapper_space, entry.val); * the swap cache at this moment. find_lock_page would prevent
if (!found) * that, but no need to change: we _have_ got the right page.
return 0; */
if (!PageSwapCache(found))
BUG();
if (found->mapping != &swapper_space)
BUG();
#ifdef SWAP_CACHE_INFO #ifdef SWAP_CACHE_INFO
if (found)
swap_cache_find_success++; swap_cache_find_success++;
#endif #endif
return found; return found;
}
} }
/* /*
* Locate a page of swap in physical memory, reserving swap cache space * Locate a page of swap in physical memory, reserving swap cache space
* and reading the disk if it is not already cached. If wait==0, we are * and reading the disk if it is not already cached.
* only doing readahead, so don't worry if the page is already locked.
*
* A failure return means that either the page allocation failed or that * A failure return means that either the page allocation failed or that
* the swap entry is no longer in use. * the swap entry is no longer in use.
*/ */
struct page * read_swap_cache_async(swp_entry_t entry) struct page * read_swap_cache_async(swp_entry_t entry)
{ {
struct page *found_page = 0, *new_page; struct page *found_page, *new_page;
struct page **hash;
/* /*
* Make sure the swap entry is still in use. * Look for the page in the swap cache. Since we normally call
* this only after lookup_swap_cache() failed, re-calling that
* would confuse the statistics: use __find_get_page() directly.
*/ */
if (!swap_duplicate(entry)) /* Account for the swap cache */ hash = page_hash(&swapper_space, entry.val);
goto out; found_page = __find_get_page(&swapper_space, entry.val, hash);
/*
* Look for the page in the swap cache.
*/
found_page = lookup_swap_cache(entry);
if (found_page) if (found_page)
goto out_free_swap; goto out;
new_page = alloc_page(GFP_HIGHUSER); new_page = alloc_page(GFP_HIGHUSER);
if (!new_page) if (!new_page)
goto out_free_swap; /* Out of memory */ goto out; /* Out of memory */
/* /*
* Check the swap cache again, in case we stalled above. * Check the swap cache again, in case we stalled above.
* The BKL is guarding against races between this check
* and where the new page is added to the swap cache below.
*/ */
found_page = lookup_swap_cache(entry); found_page = __find_get_page(&swapper_space, entry.val, hash);
if (found_page) if (found_page)
goto out_free_page; goto out_free_page;
/*
* Make sure the swap entry is still in use. It could have gone
* while caller waited for BKL, or while allocating page above,
* or while allocating page in prior call via swapin_readahead.
*/
if (!swap_duplicate(entry)) /* Account for the swap cache */
goto out_free_page;
/* /*
* Add it to the swap cache and read its contents. * Add it to the swap cache and read its contents.
*/ */
...@@ -248,8 +243,6 @@ struct page * read_swap_cache_async(swp_entry_t entry) ...@@ -248,8 +243,6 @@ struct page * read_swap_cache_async(swp_entry_t entry)
out_free_page: out_free_page:
page_cache_release(new_page); page_cache_release(new_page);
out_free_swap:
swap_free(entry);
out: out:
return found_page; return found_page;
} }
...@@ -20,6 +20,12 @@ ...@@ -20,6 +20,12 @@
spinlock_t swaplock = SPIN_LOCK_UNLOCKED; spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
unsigned int nr_swapfiles; unsigned int nr_swapfiles;
int total_swap_pages; int total_swap_pages;
static int swap_overflow;
static const char Bad_file[] = "Bad swap file entry ";
static const char Unused_file[] = "Unused swap file entry ";
static const char Bad_offset[] = "Bad swap offset entry ";
static const char Unused_offset[] = "Unused swap offset entry ";
struct swap_list_t swap_list = {-1, -1}; struct swap_list_t swap_list = {-1, -1};
...@@ -202,21 +208,21 @@ void __swap_free(swp_entry_t entry, unsigned short count) ...@@ -202,21 +208,21 @@ void __swap_free(swp_entry_t entry, unsigned short count)
return; return;
bad_nofile: bad_nofile:
printk("swap_free: Trying to free nonexistent swap-page\n"); printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
goto out; goto out;
bad_device: bad_device:
printk("swap_free: Trying to free swap from unused swap-device\n"); printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
goto out; goto out;
bad_offset: bad_offset:
printk("swap_free: offset exceeds max\n"); printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
goto out; goto out;
bad_free: bad_free:
printk("VM: Bad swap entry %08lx\n", entry.val); printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
goto out; goto out;
bad_count: bad_count:
swap_device_unlock(p); swap_device_unlock(p);
swap_list_unlock(); swap_list_unlock();
printk(KERN_ERR "VM: Bad count %hd current count %hd\n", count, p->swap_map[offset]); printk(KERN_ERR "swap_free: Bad count %hd current count %hd\n", count, p->swap_map[offset]);
goto out; goto out;
} }
...@@ -229,33 +235,23 @@ void __swap_free(swp_entry_t entry, unsigned short count) ...@@ -229,33 +235,23 @@ void __swap_free(swp_entry_t entry, unsigned short count)
* share this swap entry, so be cautious and let do_wp_page work out * share this swap entry, so be cautious and let do_wp_page work out
* what to do if a write is requested later. * what to do if a write is requested later.
*/ */
/* tasklist_lock and vma->vm_mm->page_table_lock are held */ /* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
pte_t *dir, swp_entry_t entry, struct page* page) pte_t *dir, swp_entry_t entry, struct page* page)
{ {
pte_t pte = *dir; pte_t pte = *dir;
if (pte_none(pte))
return;
if (pte_present(pte)) {
/* If this entry is swap-cached, then page must already
hold the right address for any copies in physical
memory */
if (pte_page(pte) != page)
return;
/* We will be removing the swap cache in a moment, so... */
ptep_mkdirty(dir);
return;
}
if (pte_to_swp_entry(pte).val != entry.val) if (pte_to_swp_entry(pte).val != entry.val)
return; return;
set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); if (pte_none(pte) || pte_present(pte))
swap_free(entry); return;
get_page(page); get_page(page);
set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
swap_free(entry);
++vma->vm_mm->rss; ++vma->vm_mm->rss;
} }
/* tasklist_lock and vma->vm_mm->page_table_lock are held */ /* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
unsigned long address, unsigned long size, unsigned long offset, unsigned long address, unsigned long size, unsigned long offset,
swp_entry_t entry, struct page* page) swp_entry_t entry, struct page* page)
...@@ -283,7 +279,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, ...@@ -283,7 +279,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
} while (address && (address < end)); } while (address && (address < end));
} }
/* tasklist_lock and vma->vm_mm->page_table_lock are held */ /* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
unsigned long address, unsigned long size, unsigned long address, unsigned long size,
swp_entry_t entry, struct page* page) swp_entry_t entry, struct page* page)
...@@ -314,7 +310,7 @@ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, ...@@ -314,7 +310,7 @@ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
} while (address && (address < end)); } while (address && (address < end));
} }
/* tasklist_lock and vma->vm_mm->page_table_lock are held */ /* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir, static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
swp_entry_t entry, struct page* page) swp_entry_t entry, struct page* page)
{ {
...@@ -337,8 +333,6 @@ static void unuse_process(struct mm_struct * mm, ...@@ -337,8 +333,6 @@ static void unuse_process(struct mm_struct * mm,
/* /*
* Go through process' page directory. * Go through process' page directory.
*/ */
if (!mm)
return;
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
for (vma = mm->mmap; vma; vma = vma->vm_next) { for (vma = mm->mmap; vma; vma = vma->vm_next) {
pgd_t * pgd = pgd_offset(mm, vma->vm_start); pgd_t * pgd = pgd_offset(mm, vma->vm_start);
...@@ -349,53 +343,42 @@ static void unuse_process(struct mm_struct * mm, ...@@ -349,53 +343,42 @@ static void unuse_process(struct mm_struct * mm,
} }
/* /*
* this is called when we find a page in the swap list * Scan swap_map from current position to next entry still in use.
* all the locks have been dropped at this point which * Recycle to start on reaching the end, returning 0 when empty.
* isn't a problem because we rescan the swap map
* and we _don't_ clear the refrence count if for
* some reason it isn't 0
*/ */
static int find_next_to_unuse(struct swap_info_struct *si, int prev)
static inline int free_found_swap_entry(unsigned int type, int i)
{ {
struct task_struct *p; int max = si->max;
struct page *page; int i = prev;
swp_entry_t entry; int count;
entry = SWP_ENTRY(type, i);
/*
* Get a page for the entry, using the existing swap
* cache page if there is one. Otherwise, get a clean
* page and read the swap into it.
*/
page = read_swap_cache_async(entry);
if (!page) {
swap_free(entry);
return -ENOMEM;
}
lock_page(page);
if (PageSwapCache(page))
delete_from_swap_cache_nolock(page);
UnlockPage(page);
read_lock(&tasklist_lock);
for_each_task(p)
unuse_process(p->mm, entry, page);
read_unlock(&tasklist_lock);
shmem_unuse(entry, page);
/*
* Now get rid of the extra reference to the temporary
* page we've been using.
*/
page_cache_release(page);
/* /*
* Check for and clear any overflowed swap map counts. * No need for swap_device_lock(si) here: we're just looking
* for whether an entry is in use, not modifying it; false
* hits are okay, and sys_swapoff() has already prevented new
* allocations from this area (while holding swap_list_lock()).
*/ */
swap_free(entry); for (;;) {
return 0; if (++i >= max) {
if (!prev) {
i = 0;
break;
}
/*
* No entries in use at top of swap_map,
* loop back to start and recheck there.
*/
max = prev + 1;
prev = 0;
i = 1;
}
count = si->swap_map[i];
if (count && count != SWAP_MAP_BAD)
break;
}
return i;
} }
/* /*
* We completely avoid races by reading each swap page in advance, * We completely avoid races by reading each swap page in advance,
* and then search for the process using it. All the necessary * and then search for the process using it. All the necessary
...@@ -404,80 +387,175 @@ static inline int free_found_swap_entry(unsigned int type, int i) ...@@ -404,80 +387,175 @@ static inline int free_found_swap_entry(unsigned int type, int i)
static int try_to_unuse(unsigned int type) static int try_to_unuse(unsigned int type)
{ {
struct swap_info_struct * si = &swap_info[type]; struct swap_info_struct * si = &swap_info[type];
int ret, foundpage; struct mm_struct *start_mm;
unsigned short *swap_map;
unsigned short swcount;
struct page *page;
swp_entry_t entry;
int i = 0;
int retval = 0;
int reset_overflow = 0;
do { /*
int i; * When searching mms for an entry, a good strategy is to
* start at the first mm we freed the previous entry from
* (though actually we don't notice whether we or coincidence
* freed the entry). Initialize this start_mm with a hold.
*
* A simpler strategy would be to start at the last mm we
* freed the previous entry from; but that would take less
* advantage of mmlist ordering (now preserved by swap_out()),
* which clusters forked address spaces together, most recent
* child immediately after parent. If we race with dup_mmap(),
* we very much want to resolve parent before child, otherwise
* we may miss some entries: using last mm would invert that.
*/
start_mm = &init_mm;
atomic_inc(&init_mm.mm_users);
/* /*
* The algorithm is inefficient but seldomly used * Keep on scanning until all entries have gone. Usually,
* * one pass through swap_map is enough, but not necessarily:
* Find a swap page in use and read it in. * mmput() removes mm from mmlist before exit_mmap() and its
* zap_page_range(). That's not too bad, those entries are
* on their way out, and handled faster there than here.
* do_munmap() behaves similarly, taking the range out of mm's
* vma list before zap_page_range(). But unfortunately, when
* unmapping a part of a vma, it takes the whole out first,
* then reinserts what's left after (might even reschedule if
* open() method called) - so swap entries may be invisible
* to swapoff for a while, then reappear - but that is rare.
*/
while ((i = find_next_to_unuse(si, i))) {
/*
* Get a page for the entry, using the existing swap
* cache page if there is one. Otherwise, get a clean
* page and read the swap into it.
*/ */
foundpage = 0; swap_map = &si->swap_map[i];
swap_device_lock(si); entry = SWP_ENTRY(type, i);
for (i = 1; i < si->max ; i++) { page = read_swap_cache_async(entry);
int count = si->swap_map[i]; if (!page) {
if (!count || count == SWAP_MAP_BAD)
continue;
/* /*
* Prevent swaphandle from being completely * Either swap_duplicate() failed because entry
* unused by swap_free while we are trying * has been freed independently, and will not be
* to read in the page - this prevents warning * reused since sys_swapoff() already disabled
* messages from rw_swap_page_base. * allocation from here, or alloc_page() failed.
*/ */
foundpage = 1; if (!*swap_map)
if (count != SWAP_MAP_MAX) continue;
si->swap_map[i] = count + 1; retval = -ENOMEM;
break;
}
swap_device_unlock(si); /*
ret = free_found_swap_entry(type,i); * Don't hold on to start_mm if it looks like exiting.
if (ret) * Can mmput ever block? if so, then we cannot risk
return ret; * it between deleting the page from the swap cache,
* and completing the search through mms (and cannot
* use it to avoid the long hold on mmlist_lock there).
*/
if (atomic_read(&start_mm->mm_users) == 1) {
mmput(start_mm);
start_mm = &init_mm;
atomic_inc(&init_mm.mm_users);
}
/* /*
* we pick up the swap_list_lock() to guard the nr_swap_pages, * Wait for and lock page. Remove it from swap cache
* si->swap_map[] should only be changed if it is SWAP_MAP_MAX * so try_to_swap_out won't bump swap count. Mark dirty
* otherwise ugly stuff can happen with other people who are in * so try_to_swap_out will preserve it without us having
* the middle of a swap operation to this device. This kind of * to mark any present ptes as dirty: so we can skip
* operation can sometimes be detected with the undead swap * searching processes once swap count has all gone.
* check. Don't worry about these 'undead' entries for now */
* they will be caught the next time though the top loop. lock_page(page);
* Do worry, about the weak locking that allows this to happen if (PageSwapCache(page))
* because if it happens to a page that is SWAP_MAP_MAX delete_from_swap_cache_nolock(page);
* then bad stuff can happen. SetPageDirty(page);
*/ UnlockPage(page);
swap_list_lock(); flush_page_to_ram(page);
swap_device_lock(si);
if (si->swap_map[i] > 0) { /*
/* normally this would just kill the swap page if * Remove all references to entry, without blocking.
* it still existed, it appears though that the locks * Whenever we reach init_mm, there's no address space
* are a little fuzzy * to search, but use it as a reminder to search shmem.
*/ */
if (si->swap_map[i] != SWAP_MAP_MAX) { swcount = *swap_map;
printk("VM: Undead swap entry %08lx\n", if (swcount) {
SWP_ENTRY(type, i).val); if (start_mm == &init_mm)
} else { shmem_unuse(entry, page);
nr_swap_pages++; else
si->swap_map[i] = 0; unuse_process(start_mm, entry, page);
}
if (*swap_map) {
int set_start_mm = (*swap_map >= swcount);
struct list_head *p = &start_mm->mmlist;
struct mm_struct *new_start_mm = start_mm;
struct mm_struct *mm;
spin_lock(&mmlist_lock);
while (*swap_map && (p = p->next) != &start_mm->mmlist) {
mm = list_entry(p, struct mm_struct, mmlist);
swcount = *swap_map;
if (mm == &init_mm) {
set_start_mm = 1;
shmem_unuse(entry, page);
} else
unuse_process(mm, entry, page);
if (set_start_mm && *swap_map < swcount) {
new_start_mm = mm;
set_start_mm = 0;
} }
} }
atomic_inc(&new_start_mm->mm_users);
spin_unlock(&mmlist_lock);
mmput(start_mm);
start_mm = new_start_mm;
}
page_cache_release(page);
/*
* How could swap count reach 0x7fff when the maximum
* pid is 0x7fff, and there's no way to repeat a swap
* page within an mm (except in shmem, where it's the
* shared object which takes the reference count)?
* We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
*
* If that's wrong, then we should worry more about
* exit_mmap() and do_munmap() cases described above:
* we might be resetting SWAP_MAP_MAX too early here.
* We know "Undead"s can happen, they're okay, so don't
* report them; but do report if we reset SWAP_MAP_MAX.
*/
if (*swap_map == SWAP_MAP_MAX) {
swap_list_lock();
swap_device_lock(si);
nr_swap_pages++;
*swap_map = 0;
swap_device_unlock(si); swap_device_unlock(si);
swap_list_unlock(); swap_list_unlock();
reset_overflow = 1;
}
/* /*
* This lock stuff is ulgy! * Make sure that we aren't completely killing
* Make sure that we aren't completely killing * interactive performance. Interruptible check on
* interactive performance. * signal_pending() would be nice, but changes the spec?
*/ */
if (current->need_resched) if (current->need_resched)
schedule(); schedule();
swap_device_lock(si); else {
unlock_kernel();
lock_kernel();
} }
swap_device_unlock(si); }
} while (foundpage);
return 0; mmput(start_mm);
if (reset_overflow) {
printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
swap_overflow = 0;
}
return retval;
} }
asmlinkage long sys_swapoff(const char * specialfile) asmlinkage long sys_swapoff(const char * specialfile)
...@@ -528,8 +606,8 @@ asmlinkage long sys_swapoff(const char * specialfile) ...@@ -528,8 +606,8 @@ asmlinkage long sys_swapoff(const char * specialfile)
} }
nr_swap_pages -= p->pages; nr_swap_pages -= p->pages;
total_swap_pages -= p->pages; total_swap_pages -= p->pages;
swap_list_unlock();
p->flags = SWP_USED; p->flags = SWP_USED;
swap_list_unlock();
err = try_to_unuse(type); err = try_to_unuse(type);
if (err) { if (err) {
/* re-insert swap space back into swap_list */ /* re-insert swap space back into swap_list */
...@@ -544,8 +622,8 @@ asmlinkage long sys_swapoff(const char * specialfile) ...@@ -544,8 +622,8 @@ asmlinkage long sys_swapoff(const char * specialfile)
swap_info[prev].next = p - swap_info; swap_info[prev].next = p - swap_info;
nr_swap_pages += p->pages; nr_swap_pages += p->pages;
total_swap_pages += p->pages; total_swap_pages += p->pages;
swap_list_unlock();
p->flags = SWP_WRITEOK; p->flags = SWP_WRITEOK;
swap_list_unlock();
goto out_dput; goto out_dput;
} }
if (p->swap_device) if (p->swap_device)
...@@ -557,6 +635,7 @@ asmlinkage long sys_swapoff(const char * specialfile) ...@@ -557,6 +635,7 @@ asmlinkage long sys_swapoff(const char * specialfile)
nd.mnt = p->swap_vfsmnt; nd.mnt = p->swap_vfsmnt;
p->swap_vfsmnt = NULL; p->swap_vfsmnt = NULL;
p->swap_device = 0; p->swap_device = 0;
p->max = 0;
vfree(p->swap_map); vfree(p->swap_map);
p->swap_map = NULL; p->swap_map = NULL;
p->flags = 0; p->flags = 0;
...@@ -637,7 +716,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -637,7 +716,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
union swap_header *swap_header = 0; union swap_header *swap_header = 0;
int swap_header_version; int swap_header_version;
int nr_good_pages = 0; int nr_good_pages = 0;
unsigned long maxpages; unsigned long maxpages = 1;
int swapfilesize; int swapfilesize;
struct block_device *bdev = NULL; struct block_device *bdev = NULL;
...@@ -662,7 +741,6 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -662,7 +741,6 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
p->highest_bit = 0; p->highest_bit = 0;
p->cluster_nr = 0; p->cluster_nr = 0;
p->sdev_lock = SPIN_LOCK_UNLOCKED; p->sdev_lock = SPIN_LOCK_UNLOCKED;
p->max = 1;
p->next = -1; p->next = -1;
if (swap_flags & SWAP_FLAG_PREFER) { if (swap_flags & SWAP_FLAG_PREFER) {
p->prio = p->prio =
...@@ -752,17 +830,17 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -752,17 +830,17 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
if (!p->lowest_bit) if (!p->lowest_bit)
p->lowest_bit = i; p->lowest_bit = i;
p->highest_bit = i; p->highest_bit = i;
p->max = i+1; maxpages = i+1;
j++; j++;
} }
} }
nr_good_pages = j; nr_good_pages = j;
p->swap_map = vmalloc(p->max * sizeof(short)); p->swap_map = vmalloc(maxpages * sizeof(short));
if (!p->swap_map) { if (!p->swap_map) {
error = -ENOMEM; error = -ENOMEM;
goto bad_swap; goto bad_swap;
} }
for (i = 1 ; i < p->max ; i++) { for (i = 1 ; i < maxpages ; i++) {
if (test_bit(i,(char *) swap_header)) if (test_bit(i,(char *) swap_header))
p->swap_map[i] = 0; p->swap_map[i] = 0;
else else
...@@ -783,24 +861,22 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -783,24 +861,22 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
p->lowest_bit = 1; p->lowest_bit = 1;
p->highest_bit = swap_header->info.last_page - 1; p->highest_bit = swap_header->info.last_page - 1;
p->max = swap_header->info.last_page; maxpages = SWP_OFFSET(SWP_ENTRY(0,~0UL)) - 1;
if (maxpages > swap_header->info.last_page)
maxpages = SWP_OFFSET(SWP_ENTRY(0,~0UL)); maxpages = swap_header->info.last_page;
if (p->max >= maxpages)
p->max = maxpages-1;
error = -EINVAL; error = -EINVAL;
if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
goto bad_swap; goto bad_swap;
/* OK, set up the swap map and apply the bad block list */ /* OK, set up the swap map and apply the bad block list */
if (!(p->swap_map = vmalloc (p->max * sizeof(short)))) { if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
error = -ENOMEM; error = -ENOMEM;
goto bad_swap; goto bad_swap;
} }
error = 0; error = 0;
memset(p->swap_map, 0, p->max * sizeof(short)); memset(p->swap_map, 0, maxpages * sizeof(short));
for (i=0; i<swap_header->info.nr_badpages; i++) { for (i=0; i<swap_header->info.nr_badpages; i++) {
int page = swap_header->info.badpages[i]; int page = swap_header->info.badpages[i];
if (page <= 0 || page >= swap_header->info.last_page) if (page <= 0 || page >= swap_header->info.last_page)
...@@ -815,7 +891,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -815,7 +891,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
goto bad_swap; goto bad_swap;
} }
if (swapfilesize && p->max > swapfilesize) { if (swapfilesize && maxpages > swapfilesize) {
printk(KERN_WARNING printk(KERN_WARNING
"Swap area shorter than signature indicates\n"); "Swap area shorter than signature indicates\n");
error = -EINVAL; error = -EINVAL;
...@@ -827,6 +903,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -827,6 +903,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
goto bad_swap; goto bad_swap;
} }
p->swap_map[0] = SWAP_MAP_BAD; p->swap_map[0] = SWAP_MAP_BAD;
p->max = maxpages;
p->flags = SWP_WRITEOK; p->flags = SWP_WRITEOK;
p->pages = nr_good_pages; p->pages = nr_good_pages;
swap_list_lock(); swap_list_lock();
...@@ -903,7 +980,6 @@ void si_swapinfo(struct sysinfo *val) ...@@ -903,7 +980,6 @@ void si_swapinfo(struct sysinfo *val)
/* /*
* Verify that a swap entry is valid and increment its swap map count. * Verify that a swap entry is valid and increment its swap map count.
* Kernel_lock is held, which guarantees existance of swap device.
* *
* Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
* "permanent", but will be reclaimed by the next swapoff. * "permanent", but will be reclaimed by the next swapoff.
...@@ -933,9 +1009,8 @@ int swap_duplicate(swp_entry_t entry) ...@@ -933,9 +1009,8 @@ int swap_duplicate(swp_entry_t entry)
if (p->swap_map[offset] < SWAP_MAP_MAX) if (p->swap_map[offset] < SWAP_MAP_MAX)
p->swap_map[offset]++; p->swap_map[offset]++;
else { else {
static int overflow = 0; if (swap_overflow++ < 5)
if (overflow++ < 5) printk(KERN_WARNING "swap_dup: swap entry overflow\n");
printk("VM: swap entry overflow\n");
p->swap_map[offset] = SWAP_MAP_MAX; p->swap_map[offset] = SWAP_MAP_MAX;
} }
swap_device_unlock(p); swap_device_unlock(p);
...@@ -944,13 +1019,13 @@ int swap_duplicate(swp_entry_t entry) ...@@ -944,13 +1019,13 @@ int swap_duplicate(swp_entry_t entry)
return result; return result;
bad_file: bad_file:
printk("Bad swap file entry %08lx\n", entry.val); printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
goto out; goto out;
bad_offset: bad_offset:
printk("Bad swap offset entry %08lx\n", entry.val); /* Don't report: can happen in read_swap_cache_async after swapoff */
goto out; goto out;
bad_unused: bad_unused:
printk("Unused swap offset entry in swap_dup %08lx\n", entry.val); /* Don't report: can happen in read_swap_cache_async after blocking */
goto out; goto out;
} }
...@@ -985,13 +1060,13 @@ int swap_count(struct page *page) ...@@ -985,13 +1060,13 @@ int swap_count(struct page *page)
printk(KERN_ERR "swap_count: null entry!\n"); printk(KERN_ERR "swap_count: null entry!\n");
goto out; goto out;
bad_file: bad_file:
printk("Bad swap file entry %08lx\n", entry.val); printk(KERN_ERR "swap_count: %s%08lx\n", Bad_file, entry.val);
goto out; goto out;
bad_offset: bad_offset:
printk("Bad swap offset entry %08lx\n", entry.val); printk(KERN_ERR "swap_count: %s%08lx\n", Bad_offset, entry.val);
goto out; goto out;
bad_unused: bad_unused:
printk("Unused swap offset entry in swap_count %08lx\n", entry.val); printk(KERN_ERR "swap_count: %s%08lx\n", Unused_offset, entry.val);
goto out; goto out;
} }
...@@ -1006,23 +1081,22 @@ void get_swaphandle_info(swp_entry_t entry, unsigned long *offset, ...@@ -1006,23 +1081,22 @@ void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
type = SWP_TYPE(entry); type = SWP_TYPE(entry);
if (type >= nr_swapfiles) { if (type >= nr_swapfiles) {
printk("Internal error: bad swap-device\n"); printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val);
return; return;
} }
p = &swap_info[type]; p = &swap_info[type];
*offset = SWP_OFFSET(entry); *offset = SWP_OFFSET(entry);
if (*offset >= p->max) { if (*offset >= p->max && *offset != 0) {
printk("rw_swap_page: weirdness\n"); printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val);
return; return;
} }
if (p->swap_map && !p->swap_map[*offset]) { if (p->swap_map && !p->swap_map[*offset]) {
printk("VM: Bad swap entry %08lx\n", entry.val); printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_offset, entry.val);
return; return;
} }
if (!(p->flags & SWP_USED)) { if (!(p->flags & SWP_USED)) {
printk(KERN_ERR "rw_swap_page: " printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_file, entry.val);
"Trying to swap to unused swap-device\n");
return; return;
} }
...@@ -1037,8 +1111,8 @@ void get_swaphandle_info(swp_entry_t entry, unsigned long *offset, ...@@ -1037,8 +1111,8 @@ void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
} }
/* /*
* Kernel_lock protects against swap device deletion. Grab an extra * Kernel_lock protects against swap device deletion. Don't grab an extra
* reference on the swaphandle so that it dos not become unused. * reference on the swaphandle, it doesn't matter if it becomes unused.
*/ */
int valid_swaphandles(swp_entry_t entry, unsigned long *offset) int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
{ {
...@@ -1049,7 +1123,6 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) ...@@ -1049,7 +1123,6 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
*offset = SWP_OFFSET(entry); *offset = SWP_OFFSET(entry);
toff = *offset = (*offset >> page_cluster) << page_cluster; toff = *offset = (*offset >> page_cluster) << page_cluster;
swap_device_lock(swapdev);
do { do {
/* Don't read-ahead past the end of the swap area */ /* Don't read-ahead past the end of the swap area */
if (toff >= swapdev->max) if (toff >= swapdev->max)
...@@ -1059,10 +1132,8 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) ...@@ -1059,10 +1132,8 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
break; break;
if (swapdev->swap_map[toff] == SWAP_MAP_BAD) if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
break; break;
swapdev->swap_map[toff]++;
toff++; toff++;
ret++; ret++;
} while (--i); } while (--i);
swap_device_unlock(swapdev);
return ret; return ret;
} }
...@@ -32,8 +32,6 @@ ...@@ -32,8 +32,6 @@
*/ */
#define DEF_PRIORITY (6) #define DEF_PRIORITY (6)
#define MAX(a,b) ((a) > (b) ? (a) : (b))
static inline void age_page_up(struct page *page) static inline void age_page_up(struct page *page)
{ {
unsigned age = page->age + PAGE_AGE_ADV; unsigned age = page->age + PAGE_AGE_ADV;
...@@ -111,6 +109,7 @@ static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, u ...@@ -111,6 +109,7 @@ static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, u
* is needed on CPUs which update the accessed and dirty * is needed on CPUs which update the accessed and dirty
* bits in hardware. * bits in hardware.
*/ */
flush_cache_page(vma, address);
pte = ptep_get_and_clear(page_table); pte = ptep_get_and_clear(page_table);
flush_tlb_page(vma, address); flush_tlb_page(vma, address);
...@@ -138,7 +137,8 @@ static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, u ...@@ -138,7 +137,8 @@ static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, u
/* /*
* Is it a clean page? Then it must be recoverable * Is it a clean page? Then it must be recoverable
* by just paging it in again, and we can just drop * by just paging it in again, and we can just drop
* it.. * it.. or if it's dirty but has backing store,
* just mark the page dirty and drop it.
* *
* However, this won't actually free any real * However, this won't actually free any real
* memory, as the page will just be in the page cache * memory, as the page will just be in the page cache
...@@ -148,20 +148,17 @@ static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, u ...@@ -148,20 +148,17 @@ static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, u
* Basically, this just makes it possible for us to do * Basically, this just makes it possible for us to do
* some real work in the future in "refill_inactive()". * some real work in the future in "refill_inactive()".
*/ */
flush_cache_page(vma, address); if (page->mapping) {
if (!pte_dirty(pte)) if (pte_dirty(pte))
set_page_dirty(page);
goto drop_pte; goto drop_pte;
}
/* /*
* Ok, it's really dirty. That means that * Check PageDirty as well as pte_dirty: page may
* we should either create a new swap cache * have been brought back from swap by swapoff.
* entry for it, or we should write it back
* to its own backing store.
*/ */
if (page->mapping) { if (!pte_dirty(pte) && !PageDirty(page))
set_page_dirty(page);
goto drop_pte; goto drop_pte;
}
/* /*
* This is a dirty, swappable page. First of all, * This is a dirty, swappable page. First of all,
...@@ -334,6 +331,9 @@ static inline int swap_amount(struct mm_struct *mm) ...@@ -334,6 +331,9 @@ static inline int swap_amount(struct mm_struct *mm)
return nr; return nr;
} }
/* Placeholder for swap_out(): may be updated by fork.c:mmput() */
struct mm_struct *swap_mm = &init_mm;
static void swap_out(unsigned int priority, int gfp_mask) static void swap_out(unsigned int priority, int gfp_mask)
{ {
int counter; int counter;
...@@ -347,17 +347,15 @@ static void swap_out(unsigned int priority, int gfp_mask) ...@@ -347,17 +347,15 @@ static void swap_out(unsigned int priority, int gfp_mask)
/* Then, look at the other mm's */ /* Then, look at the other mm's */
counter = (mmlist_nr << SWAP_MM_SHIFT) >> priority; counter = (mmlist_nr << SWAP_MM_SHIFT) >> priority;
do { do {
struct list_head *p;
spin_lock(&mmlist_lock); spin_lock(&mmlist_lock);
p = init_mm.mmlist.next; mm = swap_mm;
if (p == &init_mm.mmlist) if (mm == &init_mm) {
goto empty; mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
if (mm == &init_mm)
/* Move it to the back of the queue.. */ goto empty;
list_del(p); }
list_add_tail(p, &init_mm.mmlist); /* Set pointer for next call to next in the list */
mm = list_entry(p, struct mm_struct, mmlist); swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
/* Make sure the mm doesn't disappear when we drop the lock.. */ /* Make sure the mm doesn't disappear when we drop the lock.. */
atomic_inc(&mm->mm_users); atomic_inc(&mm->mm_users);
...@@ -539,8 +537,12 @@ int page_launder(int gfp_mask, int sync) ...@@ -539,8 +537,12 @@ int page_launder(int gfp_mask, int sync)
* last copy.. * last copy..
*/ */
if (PageDirty(page)) { if (PageDirty(page)) {
int (*writepage)(struct page *) = page->mapping->a_ops->writepage; int (*writepage)(struct page *);
/* Can a page get here without page->mapping? */
if (!page->mapping)
goto page_active;
writepage = page->mapping->a_ops->writepage;
if (!writepage) if (!writepage)
goto page_active; goto page_active;
...@@ -779,7 +781,7 @@ int inactive_shortage(void) ...@@ -779,7 +781,7 @@ int inactive_shortage(void)
{ {
pg_data_t *pgdat; pg_data_t *pgdat;
unsigned int global_target = freepages.high + inactive_target; unsigned int global_target = freepages.high + inactive_target;
unsigned int global_incative = 0; unsigned int global_inactive = 0;
pgdat = pgdat_list; pgdat = pgdat_list;
do { do {
...@@ -799,13 +801,13 @@ int inactive_shortage(void) ...@@ -799,13 +801,13 @@ int inactive_shortage(void)
if (inactive < zone->pages_high) if (inactive < zone->pages_high)
return 1; return 1;
global_incative += inactive; global_inactive += inactive;
} }
pgdat = pgdat->node_next; pgdat = pgdat->node_next;
} while (pgdat); } while (pgdat);
/* Global shortage? */ /* Global shortage? */
return global_incative < global_target; return global_inactive < global_target;
} }
/* /*
......
...@@ -66,5 +66,7 @@ loadkeys -V 2>&1 | awk \ ...@@ -66,5 +66,7 @@ loadkeys -V 2>&1 | awk \
expr --v 2>&1 | awk 'NR==1{print "Sh-utils ", $NF}' expr --v 2>&1 | awk 'NR==1{print "Sh-utils ", $NF}'
X=`cat /proc/modules | sed -e "s/ .*$//"` if [ -e /proc/modules ]; then
echo "Modules Loaded "$X X=`cat /proc/modules | sed -e "s/ .*$//"`
echo "Modules Loaded "$X
fi
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment